From 6ded61572365c84b872e95d3eea4f73e707e7105 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20Schw=C3=B6rer?= Date: Fri, 12 Jul 2024 16:33:42 +0200 Subject: [PATCH] v0.0.482 mathext.Percentile --- exerr/dataType.go | 1 + go.mod | 8 +- go.sum | 8 ++ goextVersion.go | 4 +- mathext/statistics.go | 55 ++++++++- mathext/statistics_test.go | 238 +++++++++++++++++++++++++++++++++++++ 6 files changed, 307 insertions(+), 7 deletions(-) create mode 100644 mathext/statistics_test.go diff --git a/exerr/dataType.go b/exerr/dataType.go index 79700f1..8e1583e 100644 --- a/exerr/dataType.go +++ b/exerr/dataType.go @@ -23,6 +23,7 @@ var ( TypeInternal = NewType("INTERNAL_ERROR", langext.Ptr(500)) TypePanic = NewType("PANIC", langext.Ptr(500)) TypeNotImplemented = NewType("NOT_IMPLEMENTED", langext.Ptr(500)) + TypeAssert = NewType("ASSERT", langext.Ptr(500)) TypeMongoQuery = NewType("MONGO_QUERY", langext.Ptr(500)) TypeCursorTokenDecode = NewType("CURSOR_TOKEN_DECODE", langext.Ptr(500)) diff --git a/go.mod b/go.mod index 0a280e2..5b05c9a 100644 --- a/go.mod +++ b/go.mod @@ -9,9 +9,9 @@ require ( github.com/rs/xid v1.5.0 github.com/rs/zerolog v1.33.0 go.mongodb.org/mongo-driver v1.16.0 - golang.org/x/crypto v0.24.0 - golang.org/x/sys v0.21.0 - golang.org/x/term v0.21.0 + golang.org/x/crypto v0.25.0 + golang.org/x/sys v0.22.0 + golang.org/x/term v0.22.0 ) require ( @@ -55,7 +55,7 @@ require ( github.com/youmark/pkcs8 v0.0.0-20240424034433-3c2c7870ae76 // indirect golang.org/x/arch v0.8.0 // indirect golang.org/x/image v0.18.0 // indirect - golang.org/x/net v0.26.0 // indirect + golang.org/x/net v0.27.0 // indirect golang.org/x/text v0.16.0 // indirect google.golang.org/protobuf v1.34.2 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect diff --git a/go.sum b/go.sum index 513d4a5..060f794 100644 --- a/go.sum +++ b/go.sum @@ -249,6 +249,8 @@ golang.org/x/crypto v0.23.0 h1:dIJU/v2J8Mdglj/8rJ6UUOM3Zc9zLZxVZwwxMooUSAI= golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8= golang.org/x/crypto v0.24.0 h1:mnl8DM0o513X8fdIkmyFE/5hTYxbwYOjDS/+rK6qpRI= golang.org/x/crypto v0.24.0/go.mod h1:Z1PMYSOR5nyMcyAVAIQSKCDwalqy85Aqn1x3Ws4L5DM= +golang.org/x/crypto v0.25.0 h1:ypSNr+bnYL2YhwoMt2zPxHFmbAN1KZs/njMG3hxUp30= +golang.org/x/crypto v0.25.0/go.mod h1:T+wALwcMOSE0kXgUAnPAHqTLW+XHgcELELW8VaDgm/M= golang.org/x/image v0.0.0-20190910094157-69e4b8554b2a/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= golang.org/x/image v0.0.0-20191009234506-e7c1f5e7dbb8 h1:hVwzHzIUGRjiF7EcUjqNxk3NCfkPxbDKRdnNE1Rpg0U= golang.org/x/image v0.0.0-20191009234506-e7c1f5e7dbb8/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= @@ -278,6 +280,8 @@ golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac= golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM= golang.org/x/net v0.26.0 h1:soB7SVo0PWrY4vPW/+ay0jKDNScG2X9wFeYlXIvJsOQ= golang.org/x/net v0.26.0/go.mod h1:5YKkiSynbBIh3p6iOc/vibscux0x38BZDkn8sCUPxHE= +golang.org/x/net v0.27.0 h1:5K3Njcw06/l2y9vpGCSdcxWOYHOUk3dVNGDXN+FvAys= +golang.org/x/net v0.27.0/go.mod h1:dDi0PyhWNoiUOrAS8uXv/vnScO4wnHQO4mj9fn/RytE= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.6.0 h1:5BMeUDZ7vkXGfEr1x9B4bRcTH4lpkTkpdh0T/J+qjbQ= @@ -307,6 +311,8 @@ golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y= golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.21.0 h1:rF+pYz3DAGSQAxAu1CbC7catZg4ebC4UIeIhKxBZvws= golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.22.0 h1:RI27ohtqKCnwULzJLqkv897zojh5/DwS/ENaMzUOaWI= +golang.org/x/sys v0.22.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.16.0 h1:m+B6fahuftsE9qjo0VWp2FW0mB3MTJvR0BaMQrq0pmE= @@ -321,6 +327,8 @@ golang.org/x/term v0.20.0 h1:VnkxpohqXaOBYJtBmEppKUG6mXpi+4O6purfc2+sMhw= golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY= golang.org/x/term v0.21.0 h1:WVXCp+/EBEHOj53Rvu+7KiT/iElMrO8ACK16SMZ3jaA= golang.org/x/term v0.21.0/go.mod h1:ooXLefLobQVslOqselCNF4SxFAaoS6KujMbsGzSDmX0= +golang.org/x/term v0.22.0 h1:BbsgPEJULsl2fV/AT3v15Mjva5yXKQDyKf+TbDz7QJk= +golang.org/x/term v0.22.0/go.mod h1:F3qCibpT5AMpCRfhfT53vVJwhLtIVHhB9XDjfFvnMI4= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= diff --git a/goextVersion.go b/goextVersion.go index 9d52344..27c1463 100644 --- a/goextVersion.go +++ b/goextVersion.go @@ -1,5 +1,5 @@ package goext -const GoextVersion = "0.0.481" +const GoextVersion = "0.0.482" -const GoextVersionTimestamp = "2024-07-04T16:24:49+0200" +const GoextVersionTimestamp = "2024-07-12T16:33:42+0200" diff --git a/mathext/statistics.go b/mathext/statistics.go index 52216c7..65f5db3 100644 --- a/mathext/statistics.go +++ b/mathext/statistics.go @@ -1,6 +1,9 @@ package mathext -import "gogs.mikescher.com/BlackForestBytes/goext/langext" +import ( + "gogs.mikescher.com/BlackForestBytes/goext/exerr" + "gogs.mikescher.com/BlackForestBytes/goext/langext" +) func Sum[T langext.NumberConstraint](v []T) T { total := T(0) @@ -41,3 +44,53 @@ func ArrMax[T langext.OrderedConstraint](v []T) T { } return r } + +func MustPercentile[T langext.NumberConstraint](rawdata []T, percentile float64) T { + v, err := Percentile(rawdata, percentile) + if err != nil { + panic(err) + } + return v +} + +func Percentile[T langext.NumberConstraint](rawdata []T, percentile float64) (T, error) { + v, err := FloatPercentile(rawdata, percentile) + if err != nil { + return T(0), err + } + return T(v), nil + +} + +func FloatPercentile[T langext.NumberConstraint](rawdata []T, percentile float64) (float64, error) { + if len(rawdata) == 0 { + return 0, exerr.New(exerr.TypeAssert, "no data to calculate percentile").Any("percentile", percentile).Build() + } + + if percentile < 0 || percentile > 100 { + return 0, exerr.New(exerr.TypeAssert, "percentile out of range").Any("percentile", percentile).Build() + } + + data := langext.ArrCopy(rawdata) + langext.Sort(data) + + idxFloat := float64(len(data)-1) * (percentile / float64(100)) + + idxInt := int(idxFloat) + + // exact match on index + if idxFloat == float64(idxInt) { + return float64(data[idxInt]), nil + } + + // linear interpolation + v1 := data[idxInt] + v2 := data[idxInt+1] + + weight := idxFloat - float64(idxInt) + + valFloat := (float64(v1) * (1 - weight)) + (float64(v2) * weight) + + return valFloat, nil + +} diff --git a/mathext/statistics_test.go b/mathext/statistics_test.go new file mode 100644 index 0000000..ede5924 --- /dev/null +++ b/mathext/statistics_test.go @@ -0,0 +1,238 @@ +package mathext + +import ( + "math" + "testing" +) + +func TestSumIntsHappyPath(t *testing.T) { + values := []int{1, 2, 3, 4, 5} + expected := 15 + result := Sum(values) + if result != expected { + t.Errorf("Sum of %v; expected %v, got %v", values, expected, result) + } +} + +func TestSumFloatsHappyPath(t *testing.T) { + values := []float64{1.1, 2.2, 3.3} + expected := 6.6 + result := Sum(values) + if result != expected { + t.Errorf("Sum of %v; expected %v, got %v", values, expected, result) + } +} + +func TestMeanOfInts(t *testing.T) { + values := []float64{1, 2, 3, 4, 5} + expected := 3.0 + result := Mean(values) + if result != expected { + t.Errorf("Mean of %v; expected %v, got %v", values, expected, result) + } +} + +func TestMedianOddNumberOfElements(t *testing.T) { + values := []float64{1, 2, 3, 4, 5} + expected := 3.0 + result := Median(values) + if result != expected { + t.Errorf("Median of %v; expected %v, got %v", values, expected, result) + } +} + +func TestMedianEvenNumberOfElements(t *testing.T) { + values := []float64{1, 2, 3, 4, 5, 6} + expected := 3.5 + result := Median(values) + if result != expected { + t.Errorf("Median of %v; expected %v, got %v", values, expected, result) + } +} + +func TestArrMinInts(t *testing.T) { + values := []int{5, 3, 9, 1, 4} + expected := 1 + result := ArrMin(values) + if result != expected { + t.Errorf("ArrMin of %v; expected %v, got %v", values, expected, result) + } +} + +func TestArrMaxInts(t *testing.T) { + values := []int{5, 3, 9, 1, 4} + expected := 9 + result := ArrMax(values) + if result != expected { + t.Errorf("ArrMax of %v; expected %v, got %v", values, expected, result) + } +} + +func TestPercentileValidInput(t *testing.T) { + values := []int{1, 2, 3, 4, 5} + percentile := 50.0 + expected := 3 + result, err := Percentile(values, percentile) + if err != nil || result != expected { + t.Errorf("Percentile %v of %v; expected %v, got %v, err: %v", percentile, values, expected, result, err) + } +} + +func TestPercentileOutOfRange(t *testing.T) { + values := []int{1, 2, 3, 4, 5} + percentile := 150.0 + _, err := Percentile(values, percentile) + if err == nil { + t.Errorf("Expected error for percentile %v out of range, got nil", percentile) + } +} + +func TestPercentileValueInArray(t *testing.T) { + values := []int{1, 3, 5, 7, 9} + percentile := 40.0 + expected := 4 + result, err := Percentile(values, percentile) + if err != nil || result != expected { + t.Errorf("Percentile %v of %v; expected %v, got %v, err: %v", percentile, values, expected, result, err) + } +} + +func TestFloatPercentileValueInArray(t *testing.T) { + values := []int{1, 3, 5, 7, 9} + percentile := 40.0 + expected := 4.2 + result, err := FloatPercentile(values, percentile) + if err != nil || result != expected { + t.Errorf("Percentile %v of %v; expected %v, got %v, err: %v", percentile, values, expected, result, err) + } +} + +func TestPercentileInterpolation(t *testing.T) { + values := []float64{1.0, 2.0, 3.0, 4.0, 5.0} + percentile := 25.0 + expected := 2.0 + result, err := Percentile(values, percentile) + if err != nil || result != expected { + t.Errorf("Percentile %v of %v; expected %v, got %v, err: %v", percentile, values, expected, result, err) + } +} + +func TestPercentileSingleValue(t *testing.T) { + values := []int{10} + percentile := 50.0 + expected := 10 + result, err := Percentile(values, percentile) + if err != nil || result != expected { + t.Errorf("Percentile %v of %v; expected %v, got %v, err: %v", percentile, values, expected, result, err) + } +} + +func TestPercentileExactlyBetweenTwoValues(t *testing.T) { + values := []float64{1, 2, 3, 4, 5} + percentile := 62.5 // Exactly between 3 and 4 + expected := 3.5 + result, err := FloatPercentile(values, percentile) + if err != nil || result != expected { + t.Errorf("Percentile %v of %v; expected %v, got %v, err: %v", percentile, values, expected, result, err) + } +} + +func TestPercentileTwoThirdsBetweenTwoValues(t *testing.T) { + values := []float64{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10} + percentile := 66.666666666666 + expected := 6.666666666666667 // Since 2/3 of the way between 6 and 7 is 6.666... + result, err := Percentile(values, percentile) + if err != nil || math.Abs(result-expected) > 1e-9 { + t.Errorf("Percentile %v of %v; expected %v, got %v, err: %v", percentile, values, expected, result, err) + } +} + +func TestPercentileBetweenTwoValues1(t *testing.T) { + values := []float64{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10} + percentile := 11.0 + expected := 1.1 + result, err := Percentile(values, percentile) + if err != nil || math.Abs(result-expected) > 1e-9 { + t.Errorf("Percentile %v of %v; expected %v, got %v, err: %v", percentile, values, expected, result, err) + } +} + +func TestPercentileBetweenTwoValues2(t *testing.T) { + values := []float64{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10} + percentile := 9.0 + expected := 0.9 + result, err := Percentile(values, percentile) + if err != nil || math.Abs(result-expected) > 1e-9 { + t.Errorf("Percentile %v of %v; expected %v, got %v, err: %v", percentile, values, expected, result, err) + } +} + +func TestPercentileUnsortedInput(t *testing.T) { + values := []float64{5, 1, 4, 2, 3} // Unsorted input + percentile := 50.0 + expected := 3.0 + result, err := Percentile(values, percentile) + if err != nil || result != expected { + t.Errorf("Percentile %v of %v; expected %v, got %v, err: %v", percentile, values, expected, result, err) + } +} + +func TestPercentileUnsortedInputLowPercentile(t *testing.T) { + values := []float64{10, 6, 7, 3, 2, 9, 8, 1, 4, 5} // Unsorted input + percentile := 10.0 + expected := 1.9 // Expecting interpolation between 1 and 2 + result, err := Percentile(values, percentile) + if err != nil || result != expected { + t.Errorf("Percentile %v of %v; expected %v, got %v, err: %v", percentile, values, expected, result, err) + } +} + +func TestPercentileUnsortedInputHighPercentile(t *testing.T) { + values := []float64{10, 6, 7, 3, 2, 9, 8, 1, 4, 5} // Unsorted input + percentile := 90.0 + expected := 9.1 // Expecting interpolation between 9 and 10 + result, err := Percentile(values, percentile) + if err != nil || result != expected { + t.Errorf("Percentile %v of %v; expected %v, got %v, err: %v", percentile, values, expected, result, err) + } +} + +func TestFloatPercentileExactValueFromInput(t *testing.T) { + values := []float64{1.5, 2.5, 3.5, 4.5, 5.5} + percentile := 50.0 // Exact value from input array should be 3.5 + expected := 3.5 + result, err := FloatPercentile(values, percentile) + if err != nil || result != expected { + t.Errorf("FloatPercentile %v of %v; expected %v, got %v, err: %v", percentile, values, expected, result, err) + } +} + +func TestFloatPercentileInterpolatedValue(t *testing.T) { + values := []float64{1.0, 2.0, 3.0, 4.0, 5.0} + percentile := 87.5 // Interpolated value between 4.0 and 5.0 + expected := 4.5 + result, err := FloatPercentile(values, percentile) + if err != nil || result != expected { + t.Errorf("FloatPercentile %v of %v; expected %v, got %v, err: %v", percentile, values, expected, result, err) + } +} + +func TestFloatPercentileUnsortedInputExactValue(t *testing.T) { + values := []float64{5.5, 1.5, 4.5, 2.5, 3.5} // Unsorted input + percentile := 50.0 + expected := 3.5 + result, err := FloatPercentile(values, percentile) + if err != nil || result != expected { + t.Errorf("FloatPercentile %v of %v; expected %v, got %v, err: %v", percentile, values, expected, result, err) + } +} + +func TestFloatPercentileUnsortedInputInterpolatedValue(t *testing.T) { + values := []float64{10.5, 6.5, 7.5, 3.5, 2.5, 9.5, 8.5, 1.5, 4.5, 5.5} + percentile := 80.0 // Interpolated value between 4.0 and 5.0 + expected := 8.7 + result, err := FloatPercentile(values, percentile) + if err != nil || math.Abs(result-expected) > 1e-9 { + t.Errorf("FloatPercentile %v of %v; expected %v, got %v, err: %v", percentile, values, expected, result, err) + } +}