summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHarel Ben-Attia <harelba@gmail.com>2015-08-08 08:59:19 -0400
committerHarel Ben-Attia <harelba@gmail.com>2015-08-08 08:59:19 -0400
commit55d062b61a5b84ff8bc1ca883c452628afa417d3 (patch)
treee4d8958917df2c80bb5c6882060bcf077bf1695e
parent3a1897e9074813d0d79498c0fbab0de3434d639e (diff)
Added initial percentile implementation - no tests for it yet
-rwxr-xr-xbin/q30
1 files changed, 29 insertions, 1 deletions
diff --git a/bin/q b/bin/q
index ded2ee9..3c09901 100755
--- a/bin/q
+++ b/bin/q
@@ -48,10 +48,11 @@ import csv
import hashlib
import uuid
import cStringIO
+import math
csv.field_size_limit(sys.maxsize)
-DEBUG = True
+DEBUG = False
def get_stdout_encoding(encoding_override=None):
if encoding_override is not None and encoding_override != 'none':
@@ -82,6 +83,32 @@ class Sqlite3DBResults(object):
self.query_column_names = query_column_names
self.results = results
+def percentile(l, p):
+ # TODO Alpha implementation, need to provide multiple interpolation methods, and add tests
+ if not l:
+ return None
+ k = p*(len(l) - 1)
+ f = math.floor(k)
+ c = math.ceil(k)
+ if c == f:
+ return l[int(k)]
+ return (c-k) * l[int(f)] + (k-f) * l[int(c)]
+
+class StrictPercentile(object):
+ def __init__(self):
+ self.values = []
+ self.p = None
+ def step(self,value,p):
+ if self.p is None:
+ self.p = p
+ self.values.append(value)
+
+ def finalize(self):
+ if len(self.values) == 0 or (self.p < 0 or self.p > 1):
+ return None
+ else:
+ return percentile(sorted(self.values),self.p)
+
class Sqlite3DB(object):
def __init__(self, show_sql=SHOW_SQL):
@@ -97,6 +124,7 @@ class Sqlite3DB(object):
def add_user_functions(self):
self.conn.create_function("regexp", 2, regexp)
self.conn.create_function("sha1", 1, sha1)
+ self.conn.create_aggregate("percentile",2,StrictPercentile)
def is_numeric_type(self, column_type):
return column_type in self.numeric_column_types