pkg/vendor/github.com/ulikunitz/xz/internal/randtxt/probs.go

// Copyright 2014-2017 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// Package randtxt supports the generation of random text using a
// trigram model for the English language.
package randtxt

import (
	"math"
	"math/rand"
	"sort"
)

// ngram stores an entry from the language model.
type ngram struct {
	s   string
	lgP float64
	lgQ float64
}

// ngrams represents a slice of ngram values and is used to represent a
// language model.
type ngrams []ngram

func (s ngrams) Len() int           { return len(s) }
func (s ngrams) Swap(i, j int)      { s[i], s[j] = s[j], s[i] }
func (s ngrams) Less(i, j int) bool { return s[i].s < s[j].s }

// Sorts the language model in the sequence of their ngrams.
func (s ngrams) Sort() { sort.Sort(s) }

// Search is looking for an ngram or the position where it would be
// inserted.
func (s ngrams) Search(g string) int {
	return sort.Search(len(s), func(k int) bool { return s[k].s >= g })
}

// prob represents a string, usually an ngram, and a probability value.
type prob struct {
	s string
	p float64
}

// probs is a slice of prob values that can be sorted and searched.
type probs []prob

func (s probs) Len() int           { return len(s) }
func (s probs) Swap(i, j int)      { s[i], s[j] = s[j], s[i] }
func (s probs) Less(i, j int) bool { return s[i].s < s[j].s }

// SortByNgram sorts the probs slice by ngram, field s.
func (s probs) SortByNgram() { sort.Sort(s) }

// SortsByProb sorts the probs slice by probability, field p.
func (s probs) SortByProb() { sort.Sort(byProb{s}) }

// SearchNgram searches for an ngram or the position where it would be
// inserted.
func (s probs) SearchNgram(g string) int {
	return sort.Search(len(s), func(k int) bool { return s[k].s >= g })
}

// SearchProb searches ngrams for a specific probability or where it
// would be inserted.
func (s probs) SearchProb(p float64) int {
	return sort.Search(len(s), func(k int) bool { return s[k].p >= p })
}

// byProb is used to sort probs slice by probability, field p.
type byProb struct {
	probs
}

func (s byProb) Less(i, j int) bool {
	return s.probs[i].p < s.probs[j].p
}

// cdf can be used to setup a cumulative distribution function
// represented by a probs slice. We should have returned an actual
// function.
func cdf(n int, p func(i int) prob) probs {
	prs := make(probs, n)
	sum := 0.0
	for i := range prs {
		pr := p(i)
		sum += pr.p
		prs[i] = pr
	}
	q := 1.0 / sum
	x := 0.0
	for i, pr := range prs {
		x += pr.p * q
		if x > 1.0 {
			x = 1.0
		}
		prs[i].p = x
	}
	if !sort.IsSorted(byProb{prs}) {
		panic("cdf not sorted")
	}
	return prs
}

// pCDFOfLM converts a language model into a cumulative distribution
// function represented by probs.
func pCDFOfLM(lm ngrams) probs {
	return cdf(len(lm), func(i int) prob {
		return prob{lm[i].s, math.Exp2(lm[i].lgP)}
	})
}

// cCDF converts a ngrams slice into a cumulative distribution function
// using the conditional probability lgQ.
func cCDF(s ngrams) probs {
	return cdf(len(s), func(i int) prob {
		return prob{s[i].s, math.Exp2(s[i].lgQ)}
	})
}

// comap contains a map of conditional distribution function for the
// last character.
type comap map[string]probs

// comapOfLM converts a language model in a map of conditional
// distribution functions.
func comapOfLM(lm ngrams) comap {
	if !sort.IsSorted(lm) {
		panic("lm is not sorted")
	}
	m := make(comap, 26*26)
	for i := 0; i < len(lm); {
		j := i
		g := lm[i].s
		g2 := g[:2]
		z := g2 + "Z"
		i = lm.Search(z)
		if i >= len(lm) || lm[i].s != z {
			panic("unexpected search result")
		}
		i++
		m[g2] = cCDF(lm[j:i])
	}
	return m
}

// trigram returns the trigram with prefix g2 using a probability value
// in the range [0.0,1.0).
func (c comap) trigram(g2 string, p float64) string {
	prs := c[g2]
	i := prs.SearchProb(p)
	return prs[i].s
}

var (
	// CDF for normal probabilities
	pcdf = pCDFOfLM(englm3)
	// map of two letter conditionals
	cmap = comapOfLM(englm3)
)

// Reader generates a stream of text of uppercase letters with trigrams
// distributed according to a language model of the English language.
type Reader struct {
	rnd *rand.Rand
	g3  string
}

// NewReader creates a new reader. The argument src must create a uniformly
// distributed stream of random values.
func NewReader(src rand.Source) *Reader {
	rnd := rand.New(src)
	i := pcdf.SearchProb(rnd.Float64())
	return &Reader{rnd, pcdf[i].s}
}

// Read reads random text. The Read function will always return len(p)
// bytes and will never return an error.
func (r *Reader) Read(p []byte) (n int, err error) {
	for i := range p {
		r.g3 = cmap.trigram(r.g3[1:], r.rnd.Float64())
		p[i] = r.g3[2]
	}
	return len(p), nil
}
vendor 2018-06-17 06:46:58 +00:00			`// Copyright 2014-2017 Ulrich Kunitz. All rights reserved.`
			`// Use of this source code is governed by a BSD-style`
			`// license that can be found in the LICENSE file.`

			`// Package randtxt supports the generation of random text using a`
			`// trigram model for the English language.`
			`package randtxt`

			`import (`
			`"math"`
			`"math/rand"`
			`"sort"`
			`)`

			`// ngram stores an entry from the language model.`
			`type ngram struct {`
			`s string`
			`lgP float64`
			`lgQ float64`
			`}`

			`// ngrams represents a slice of ngram values and is used to represent a`
			`// language model.`
			`type ngrams []ngram`

			`func (s ngrams) Len() int { return len(s) }`
			`func (s ngrams) Swap(i, j int) { s[i], s[j] = s[j], s[i] }`
			`func (s ngrams) Less(i, j int) bool { return s[i].s < s[j].s }`

			`// Sorts the language model in the sequence of their ngrams.`
			`func (s ngrams) Sort() { sort.Sort(s) }`

			`// Search is looking for an ngram or the position where it would be`
			`// inserted.`
			`func (s ngrams) Search(g string) int {`
			`return sort.Search(len(s), func(k int) bool { return s[k].s >= g })`
			`}`

			`// prob represents a string, usually an ngram, and a probability value.`
			`type prob struct {`
			`s string`
			`p float64`
			`}`

			`// probs is a slice of prob values that can be sorted and searched.`
			`type probs []prob`

			`func (s probs) Len() int { return len(s) }`
			`func (s probs) Swap(i, j int) { s[i], s[j] = s[j], s[i] }`
			`func (s probs) Less(i, j int) bool { return s[i].s < s[j].s }`

			`// SortByNgram sorts the probs slice by ngram, field s.`
			`func (s probs) SortByNgram() { sort.Sort(s) }`

			`// SortsByProb sorts the probs slice by probability, field p.`
			`func (s probs) SortByProb() { sort.Sort(byProb{s}) }`

			`// SearchNgram searches for an ngram or the position where it would be`
			`// inserted.`
			`func (s probs) SearchNgram(g string) int {`
			`return sort.Search(len(s), func(k int) bool { return s[k].s >= g })`
			`}`

			`// SearchProb searches ngrams for a specific probability or where it`
			`// would be inserted.`
			`func (s probs) SearchProb(p float64) int {`
			`return sort.Search(len(s), func(k int) bool { return s[k].p >= p })`
			`}`

			`// byProb is used to sort probs slice by probability, field p.`
			`type byProb struct {`
			`probs`
			`}`

			`func (s byProb) Less(i, j int) bool {`
			`return s.probs[i].p < s.probs[j].p`
			`}`

			`// cdf can be used to setup a cumulative distribution function`
			`// represented by a probs slice. We should have returned an actual`
			`// function.`
			`func cdf(n int, p func(i int) prob) probs {`
			`prs := make(probs, n)`
			`sum := 0.0`
			`for i := range prs {`
			`pr := p(i)`
			`sum += pr.p`
			`prs[i] = pr`
			`}`
			`q := 1.0 / sum`
			`x := 0.0`
			`for i, pr := range prs {`
			`x += pr.p * q`
			`if x > 1.0 {`
			`x = 1.0`
			`}`
			`prs[i].p = x`
			`}`
			`if !sort.IsSorted(byProb{prs}) {`
			`panic("cdf not sorted")`
			`}`
			`return prs`
			`}`

			`// pCDFOfLM converts a language model into a cumulative distribution`
			`// function represented by probs.`
			`func pCDFOfLM(lm ngrams) probs {`
			`return cdf(len(lm), func(i int) prob {`
			`return prob{lm[i].s, math.Exp2(lm[i].lgP)}`
			`})`
			`}`

			`// cCDF converts a ngrams slice into a cumulative distribution function`
			`// using the conditional probability lgQ.`
			`func cCDF(s ngrams) probs {`
			`return cdf(len(s), func(i int) prob {`
			`return prob{s[i].s, math.Exp2(s[i].lgQ)}`
			`})`
			`}`

			`// comap contains a map of conditional distribution function for the`
			`// last character.`
			`type comap map[string]probs`

			`// comapOfLM converts a language model in a map of conditional`
			`// distribution functions.`
			`func comapOfLM(lm ngrams) comap {`
			`if !sort.IsSorted(lm) {`
			`panic("lm is not sorted")`
			`}`
			`m := make(comap, 26*26)`
			`for i := 0; i < len(lm); {`
			`j := i`
			`g := lm[i].s`
			`g2 := g[:2]`
			`z := g2 + "Z"`
			`i = lm.Search(z)`
			`if i >= len(lm) \|\| lm[i].s != z {`
			`panic("unexpected search result")`
			`}`
			`i++`
			`m[g2] = cCDF(lm[j:i])`
			`}`
			`return m`
			`}`

			`// trigram returns the trigram with prefix g2 using a probability value`
			`// in the range [0.0,1.0).`
			`func (c comap) trigram(g2 string, p float64) string {`
			`prs := c[g2]`
			`i := prs.SearchProb(p)`
			`return prs[i].s`
			`}`

			`var (`
			`// CDF for normal probabilities`
			`pcdf = pCDFOfLM(englm3)`
			`// map of two letter conditionals`
			`cmap = comapOfLM(englm3)`
			`)`

			`// Reader generates a stream of text of uppercase letters with trigrams`
			`// distributed according to a language model of the English language.`
			`type Reader struct {`
			`rnd *rand.Rand`
			`g3 string`
			`}`

			`// NewReader creates a new reader. The argument src must create a uniformly`
			`// distributed stream of random values.`
			`func NewReader(src rand.Source) *Reader {`
			`rnd := rand.New(src)`
			`i := pcdf.SearchProb(rnd.Float64())`
			`return &Reader{rnd, pcdf[i].s}`
			`}`

			`// Read reads random text. The Read function will always return len(p)`
			`// bytes and will never return an error.`
			`func (r *Reader) Read(p []byte) (n int, err error) {`
			`for i := range p {`
			`r.g3 = cmap.trigram(r.g3[1:], r.rnd.Float64())`
			`p[i] = r.g3[2]`
			`}`
			`return len(p), nil`
			`}`