Table.query
A small library that given a set of data it transparently inserts it into a sqlite database and then allows for very flexible querying (SQL). Usefull for ad-hoc data tasks that read data from some file or api that require quick analysis. Adding data into a sqlite database will also allow for the data to be bigger than memory can fit. Inspired by R’s sqldf.
Use Case: Parse data from a log file or a web service and then need to do some data manipulation and summaries like: joining with other data, filtering, pivoting (group by), augment data with calculated columns, calculate sums, counts, averages, etc…
Leverages the power of sql for data analyses inside ruby with a minimal API:
> require 'table'
> Table.new ["user", "value"], [["bob", 3], ["eve", 1]], "tbl"
> Table.query "select sum(value) from tbl"
[[4]]
Features:
-
automatically infers the data type (numeric vs text)
-
shortcut to get a column
> tbl = Table.new ["user", "value"], [["bob", 3], ["eve", 1]], "tbl" > tbl.user ["bob", "eve"]
-
augment your data by adding columns:
> tbl = Table.new ["user", "value"], [["bob", 3], ["eve", 1]], "tbl" > tbl.add "sex", ["male", "female"] > Table.query "select user, sex from tbl" [["bob", "male"], ["eve", "female"]]
-
by naming the query(value at the end) create a new table:
> Table.new ["user", "value"], [["bob", 3], ["eve", 1]], "tbl" > tbl2 = Table.query "select sum(value) as total from tbl", "tbl2" > tbl2.total [4]
-
direct access to db driver when needed:
> Table.new ["user", "value"], [["bob", 3], ["eve", 1]], "tbl" > Table.with_db {|db| db.execute("update tbl set value=5 where user='eve'") } > Table.query "select sum(value) from age" [[8]]
-
persists in file table.db thus also means is then accessible by other tools; R, Excel, Java, etc…
-
small :)
Code
require 'sqlite3'
class Table
attr_accessor :name
def self.with_db(&block)
db = SQLite3::Database.new("./table.db")
yield db if block_given?
db.close
end
def self.query q, new_table_name=nil
if new_table_name
val = []
Table.with_db {|db| val = db.execute2(q) }
Table.new val.shift, val, new_table_name
else
Table.with_db {|db| return db.execute(q) }
end
end
# new
def initialize header, data, name
@table = name
# sql to create new table
sql = "create table #{@table} ("
header.each_with_index do |h, i|
# iterates each row, looking for a non empty value
val = ""
data.each do |row|
if row[i] != nil and row[i] != ""
val = row[i]
break
end
end
if val.class == Fixnum or val.class == Float
sql << "#{h} NUMERIC,"
else
sql << "#{h} TEXT,"
end
end
sql << ");"
Table.with_db do |db|
db.execute( "drop table if exists #{@table};" ) # remove if exists
db.execute(sql.gsub(",);", ");")) # create new table
data.each do |row| # insert data
db.execute( "insert into #{@table} values ( '#{row.join("','")}' );" )
end
end
end
# column
def method_missing(m, *args, &block)
Table.with_db do |db|
return db.execute( "select #{m} from #{@table}" ).flatten
end
end
# add
def add col, data
Table.with_db do |db|
if data[0].class == Fixnum or data[0].class == Float
db.execute( "alter table #{@table} add #{col} NUMERIC;" )
else
db.execute( "alter table #{@table} add #{col} TEXT;" )
end
# add specified value to each row
data.each_with_index do |val, i|
db.execute( "update #{@table} set #{col} = '#{val}' where ROWID = #{i+1};" )
end
end
end
# list tables
def list
Table.with_db do |db|
return @db
.execute("select name from sqlite_master where type='table' ORDER BY name;")
.flatten
end
end
end
if __FILE__ == $0
require "test/unit"
class TestTable < Test::Unit::TestCase
def setup
require 'fileutils'
File.delete "./table.db" if File.exists? "./table.db"
end
def test_insert
Table.new ["id", "v1", "v2"], [[1, 23, "a"], [2, 34, "b"]], "test"
assert_equal [[23.0, "a"], [34.0, "b"]], Table.query("select v1,v2 from test")
end
def test_method_missing
tbl = Table.new ["id", "col1", "col2"], [[1, 23, "a"], [2, 34, "b"]], "test"
assert_equal [23.0, 34.0], tbl.col1
end
def test_add
tbl = Table.new ["id", "col1"], [[1, 23], [2, 34]], "test"
tbl.add("col2", tbl.col1.map{|v|v+1} ) # v1+=1 as v2
assert_equal [24.0, 35.0], tbl.col2
tbl.add("col3", ["random", "stuff"])
assert_equal ["random", "stuff"], tbl.col3
end
def test_join
Table.new ["id", "v1", "v2"], [[1, 23, "a"], [2, 34, "b"]], "tbl1"
Table.new ["id", "v3", "v4"], [[1, 24, "c"], [2, 36, "d"]], "tbl2"
sql = "select tbl1.id,v1,v4 from tbl1 join tbl2 on tbl1.id = tbl2.id"
assert_equal [[1, 23, "c"], [2, 34, "d"]], Table.query(sql)
end
def test_alias
Table.new ["g", "v"], [["a",11], ["a",9], ["b",2], ["b",2]], "tbl"
tbl2 = Table.query("select g, sum(v) as value from tbl group by g", "tbl2")
assert_equal ["a", "b"], tbl2.g
assert_equal [20.0, 4.0], tbl2.value
end
def test_join_with_new_table
Table.new ["id", "v1", "v2"], [[1, 11, "a"], [2, 12, "b"]], "tbl1"
Table.new ["id", "v3", "v4"], [[1, 21, "c"], [2, 22, "d"]], "tbl2"
sql = "select tbl1.id,v1,v4 from tbl1 join tbl2 on tbl1.id = tbl2.id"
Table.query(sql, "tbl3")
assert_equal [[1, 11, "c"], [2, 12, "d"]], Table.query("select * from tbl3")
end
def test_ad_hoc
Table.new ["id", "v1", "v2"], [[1, 23, "a"], [2, 34, "b"]], "test"
val = nil
Table.with_db {|db| val = db.execute("select v1 from test limit 1") }
assert_equal [23.0], val.flatten
end
def test_date
Table.new ["ts", "v"], [[Date.today, 23], [Date.today+10, 34]], "test"
sql = "select v from test where ts < '#{Date.today+2}'"
assert_equal [23.0], Table.query(sql).flatten
end
def test_time
Table.new ["ts", "v"], [[Time.now, 10], [Time.now+10, 20]], "test"
sql = "select v from test where ts < '#{Time.now+2}'"
assert_equal [10.0], Table.query(sql).flatten
end
end
end