mirror of
https://github.com/ceph/ceph-csi.git
synced 2025-04-11 18:13:00 +00:00
Several packages are only used while running the e2e suite. These packages are less important to update, as the they can not influence the final executable that is part of the Ceph-CSI container-image. By moving these dependencies out of the main Ceph-CSI go.mod, it is easier to identify if a reported CVE affects Ceph-CSI, or only the testing (like most of the Kubernetes CVEs). Signed-off-by: Niels de Vos <ndevos@ibm.com>
86 lines
1.8 KiB
Go
86 lines
1.8 KiB
Go
package compress
|
|
|
|
import "math"
|
|
|
|
// Estimate returns a normalized compressibility estimate of block b.
|
|
// Values close to zero are likely uncompressible.
|
|
// Values above 0.1 are likely to be compressible.
|
|
// Values above 0.5 are very compressible.
|
|
// Very small lengths will return 0.
|
|
func Estimate(b []byte) float64 {
|
|
if len(b) < 16 {
|
|
return 0
|
|
}
|
|
|
|
// Correctly predicted order 1
|
|
hits := 0
|
|
lastMatch := false
|
|
var o1 [256]byte
|
|
var hist [256]int
|
|
c1 := byte(0)
|
|
for _, c := range b {
|
|
if c == o1[c1] {
|
|
// We only count a hit if there was two correct predictions in a row.
|
|
if lastMatch {
|
|
hits++
|
|
}
|
|
lastMatch = true
|
|
} else {
|
|
lastMatch = false
|
|
}
|
|
o1[c1] = c
|
|
c1 = c
|
|
hist[c]++
|
|
}
|
|
|
|
// Use x^0.6 to give better spread
|
|
prediction := math.Pow(float64(hits)/float64(len(b)), 0.6)
|
|
|
|
// Calculate histogram distribution
|
|
variance := float64(0)
|
|
avg := float64(len(b)) / 256
|
|
|
|
for _, v := range hist {
|
|
Δ := float64(v) - avg
|
|
variance += Δ * Δ
|
|
}
|
|
|
|
stddev := math.Sqrt(float64(variance)) / float64(len(b))
|
|
exp := math.Sqrt(1 / float64(len(b)))
|
|
|
|
// Subtract expected stddev
|
|
stddev -= exp
|
|
if stddev < 0 {
|
|
stddev = 0
|
|
}
|
|
stddev *= 1 + exp
|
|
|
|
// Use x^0.4 to give better spread
|
|
entropy := math.Pow(stddev, 0.4)
|
|
|
|
// 50/50 weight between prediction and histogram distribution
|
|
return math.Pow((prediction+entropy)/2, 0.9)
|
|
}
|
|
|
|
// ShannonEntropyBits returns the number of bits minimum required to represent
|
|
// an entropy encoding of the input bytes.
|
|
// https://en.wiktionary.org/wiki/Shannon_entropy
|
|
func ShannonEntropyBits(b []byte) int {
|
|
if len(b) == 0 {
|
|
return 0
|
|
}
|
|
var hist [256]int
|
|
for _, c := range b {
|
|
hist[c]++
|
|
}
|
|
shannon := float64(0)
|
|
invTotal := 1.0 / float64(len(b))
|
|
for _, v := range hist[:] {
|
|
if v > 0 {
|
|
n := float64(v)
|
|
shannon += math.Ceil(-math.Log2(n*invTotal) * n)
|
|
}
|
|
}
|
|
return int(math.Ceil(shannon))
|
|
}
|