summaryrefslogtreecommitdiffstats
path: root/pkg/data/histogram.go
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/data/histogram.go')
-rw-r--r--pkg/data/histogram.go108
1 files changed, 108 insertions, 0 deletions
diff --git a/pkg/data/histogram.go b/pkg/data/histogram.go
new file mode 100644
index 0000000..e16d000
--- /dev/null
+++ b/pkg/data/histogram.go
@@ -0,0 +1,108 @@
+package data
+
+import (
+ "fmt"
+ "math"
+)
+
+import "strconv"
+
+const maxDigits = 6
+
+func ff(x float64) string {
+ minExact := strconv.FormatFloat(x, 'g', -1, 64)
+ fixed := strconv.FormatFloat(x, 'g', maxDigits, 64)
+ if len(minExact) < len(fixed) {
+ return minExact
+ }
+ return fixed
+}
+
+type Bin struct {
+ LeftInclusive float64
+ Right float64
+ RightInclusive bool
+ Count uint64
+}
+
+func (b *Bin) String() string {
+ if b.RightInclusive {
+ return fmt.Sprintf("[%s,%s]", ff(b.LeftInclusive), ff(b.Right))
+ }
+ return fmt.Sprintf("[%s,%s)", ff(b.LeftInclusive), ff(b.Right))
+}
+
+type Bins struct {
+ Number int
+ min, max float64
+ numPoints int
+}
+
+func (b *Bins) ChooseSqrt() {
+ b.Number = int(math.Sqrt(float64(b.numPoints)))
+}
+
+func (b *Bins) ChooseSturges() {
+ b.Number = int(math.Ceil(math.Log2(float64(b.numPoints))) + 1)
+}
+
+func (b *Bins) ChooseRice() {
+ b.Number = int(math.Ceil(2 * math.Pow(float64(b.numPoints), 1.0/3.0)))
+}
+
+func NewBins(points []float64) *Bins {
+ bins := new(Bins)
+ bins.numPoints = len(points)
+ bins.Number = 5
+ bins.min = math.Inf(1)
+ bins.max = math.Inf(-1)
+ for _, x := range points {
+ bins.min = math.Min(bins.min, x)
+ bins.max = math.Max(bins.max, x)
+ }
+ return bins
+}
+
+func (b *Bins) left(i int) float64 {
+ return b.min + ((b.max - b.min) / float64(b.Number) * float64(i))
+}
+
+func (b *Bins) right(i int) float64 {
+ return b.left(i + 1)
+}
+
+func (b *Bins) All() (out []Bin) {
+ if b.max == b.min {
+ b.Number = 1
+ }
+ for i := 0; i < b.Number; i++ {
+ out = append(out, Bin{
+ LeftInclusive: b.left(i),
+ Right: b.right(i),
+ })
+ }
+ out[b.Number-1].RightInclusive = true
+ return
+}
+
+func (b *Bins) Point(x float64) int {
+ if b.max == b.min {
+ return 0
+ }
+ i := int((x - b.min) / (b.max - b.min) * float64(b.Number))
+ if i >= b.Number {
+ i--
+ }
+ return i
+}
+
+func Histogram(points []float64, bins *Bins) (out []Bin) {
+ out = bins.All()
+ for _, b := range out {
+ b.Count = 0
+ }
+ for _, x := range points {
+ out[bins.Point(x)].Count++
+ }
+ return
+}