vendor

2018-07-06 19:13:18 +11:00
parent d76b49b3c5
commit 21d3f45969
442 changed files with 75578 additions and 0 deletions
--- a/vendor/github.com/ulikunitz/xz/internal/randtxt/englm3.go
+++ b/vendor/github.com/ulikunitz/xz/internal/randtxt/englm3.go
--- a/vendor/github.com/ulikunitz/xz/internal/randtxt/groupreader.go
+++ b/vendor/github.com/ulikunitz/xz/internal/randtxt/groupreader.go
@ -0,0 +1,82 @@
+// Copyright 2014-2017 Ulrich Kunitz. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package randtxt
+
+import (
+	"bufio"
+	"io"
+	"unicode"
+)
+
+// GroupReader groups the incoming text in groups of 5, whereby the
+// number of groups per line can be controlled.
+type GroupReader struct {
+	R             io.ByteReader
+	GroupsPerLine int
+	off           int64
+	eof           bool
+}
+
+// NewGroupReader creates a new group reader.
+func NewGroupReader(r io.Reader) *GroupReader {
+	return &GroupReader{R: bufio.NewReader(r)}
+}
+
+// Read formats the data provided by the internal reader in groups of 5
+// characters. If GroupsPerLine hasn't been initialized 8 groups per
+// line will be produced.
+func (r *GroupReader) Read(p []byte) (n int, err error) {
+	if r.eof {
+		return 0, io.EOF
+	}
+	groupsPerLine := r.GroupsPerLine
+	if groupsPerLine < 1 {
+		groupsPerLine = 8
+	}
+	lineLen := int64(groupsPerLine * 6)
+	var c byte
+	for i := range p {
+		switch {
+		case r.off%lineLen == lineLen-1:
+			if i+1 == len(p) && len(p) > 1 {
+				return i, nil
+			}
+			c = '\n'
+		case r.off%6 == 5:
+			if i+1 == len(p) && len(p) > 1 {
+				return i, nil
+			}
+			c = ' '
+		default:
+			c, err = r.R.ReadByte()
+			if err == io.EOF {
+				r.eof = true
+				if i > 0 {
+					switch p[i-1] {
+					case ' ':
+						p[i-1] = '\n'
+						fallthrough
+					case '\n':
+						return i, io.EOF
+					}
+				}
+				p[i] = '\n'
+				return i + 1, io.EOF
+			}
+			if err != nil {
+				return i, err
+			}
+			switch {
+			case c == ' ':
+				c = '_'
+			case !unicode.IsPrint(rune(c)):
+				c = '-'
+			}
+		}
+		p[i] = c
+		r.off++
+	}
+	return len(p), nil
+}
--- a/vendor/github.com/ulikunitz/xz/internal/randtxt/probs.go
+++ b/vendor/github.com/ulikunitz/xz/internal/randtxt/probs.go
@ -0,0 +1,185 @@
+// Copyright 2014-2017 Ulrich Kunitz. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package randtxt supports the generation of random text using a
+// trigram model for the English language.
+package randtxt
+
+import (
+	"math"
+	"math/rand"
+	"sort"
+)
+
+// ngram stores an entry from the language model.
+type ngram struct {
+	s   string
+	lgP float64
+	lgQ float64
+}
+
+// ngrams represents a slice of ngram values and is used to represent a
+// language model.
+type ngrams []ngram
+
+func (s ngrams) Len() int           { return len(s) }
+func (s ngrams) Swap(i, j int)      { s[i], s[j] = s[j], s[i] }
+func (s ngrams) Less(i, j int) bool { return s[i].s < s[j].s }
+
+// Sorts the language model in the sequence of their ngrams.
+func (s ngrams) Sort() { sort.Sort(s) }
+
+// Search is looking for an ngram or the position where it would be
+// inserted.
+func (s ngrams) Search(g string) int {
+	return sort.Search(len(s), func(k int) bool { return s[k].s >= g })
+}
+
+// prob represents a string, usually an ngram, and a probability value.
+type prob struct {
+	s string
+	p float64
+}
+
+// probs is a slice of prob values that can be sorted and searched.
+type probs []prob
+
+func (s probs) Len() int           { return len(s) }
+func (s probs) Swap(i, j int)      { s[i], s[j] = s[j], s[i] }
+func (s probs) Less(i, j int) bool { return s[i].s < s[j].s }
+
+// SortByNgram sorts the probs slice by ngram, field s.
+func (s probs) SortByNgram() { sort.Sort(s) }
+
+// SortsByProb sorts the probs slice by probability, field p.
+func (s probs) SortByProb() { sort.Sort(byProb{s}) }
+
+// SearchNgram searches for an ngram or the position where it would be
+// inserted.
+func (s probs) SearchNgram(g string) int {
+	return sort.Search(len(s), func(k int) bool { return s[k].s >= g })
+}
+
+// SearchProb searches ngrams for a specific probability or where it
+// would be inserted.
+func (s probs) SearchProb(p float64) int {
+	return sort.Search(len(s), func(k int) bool { return s[k].p >= p })
+}
+
+// byProb is used to sort probs slice by probability, field p.
+type byProb struct {
+	probs
+}
+
+func (s byProb) Less(i, j int) bool {
+	return s.probs[i].p < s.probs[j].p
+}
+
+// cdf can be used to setup a cumulative distribution function
+// represented by a probs slice. We should have returned an actual
+// function.
+func cdf(n int, p func(i int) prob) probs {
+	prs := make(probs, n)
+	sum := 0.0
+	for i := range prs {
+		pr := p(i)
+		sum += pr.p
+		prs[i] = pr
+	}
+	q := 1.0 / sum
+	x := 0.0
+	for i, pr := range prs {
+		x += pr.p * q
+		if x > 1.0 {
+			x = 1.0
+		}
+		prs[i].p = x
+	}
+	if !sort.IsSorted(byProb{prs}) {
+		panic("cdf not sorted")
+	}
+	return prs
+}
+
+// pCDFOfLM converts a language model into a cumulative distribution
+// function represented by probs.
+func pCDFOfLM(lm ngrams) probs {
+	return cdf(len(lm), func(i int) prob {
+		return prob{lm[i].s, math.Exp2(lm[i].lgP)}
+	})
+}
+
+// cCDF converts a ngrams slice into a cumulative distribution function
+// using the conditional probability lgQ.
+func cCDF(s ngrams) probs {
+	return cdf(len(s), func(i int) prob {
+		return prob{s[i].s, math.Exp2(s[i].lgQ)}
+	})
+}
+
+// comap contains a map of conditional distribution function for the
+// last character.
+type comap map[string]probs
+
+// comapOfLM converts a language model in a map of conditional
+// distribution functions.
+func comapOfLM(lm ngrams) comap {
+	if !sort.IsSorted(lm) {
+		panic("lm is not sorted")
+	}
+	m := make(comap, 26*26)
+	for i := 0; i < len(lm); {
+		j := i
+		g := lm[i].s
+		g2 := g[:2]
+		z := g2 + "Z"
+		i = lm.Search(z)
+		if i >= len(lm) || lm[i].s != z {
+			panic("unexpected search result")
+		}
+		i++
+		m[g2] = cCDF(lm[j:i])
+	}
+	return m
+}
+
+// trigram returns the trigram with prefix g2 using a probability value
+// in the range [0.0,1.0).
+func (c comap) trigram(g2 string, p float64) string {
+	prs := c[g2]
+	i := prs.SearchProb(p)
+	return prs[i].s
+}
+
+var (
+	// CDF for normal probabilities
+	pcdf = pCDFOfLM(englm3)
+	// map of two letter conditionals
+	cmap = comapOfLM(englm3)
+)
+
+// Reader generates a stream of text of uppercase letters with trigrams
+// distributed according to a language model of the English language.
+type Reader struct {
+	rnd *rand.Rand
+	g3  string
+}
+
+// NewReader creates a new reader. The argument src must create a uniformly
+// distributed stream of random values.
+func NewReader(src rand.Source) *Reader {
+	rnd := rand.New(src)
+	i := pcdf.SearchProb(rnd.Float64())
+	return &Reader{rnd, pcdf[i].s}
+}
+
+// Read reads random text. The Read function will always return len(p)
+// bytes and will never return an error.
+func (r *Reader) Read(p []byte) (n int, err error) {
+	for i := range p {
+		r.g3 = cmap.trigram(r.g3[1:], r.rnd.Float64())
+		p[i] = r.g3[2]
+	}
+	return len(p), nil
+}
--- a/vendor/github.com/ulikunitz/xz/internal/randtxt/probs_test.go
+++ b/vendor/github.com/ulikunitz/xz/internal/randtxt/probs_test.go
@ -0,0 +1,37 @@
+// Copyright 2014-2017 Ulrich Kunitz. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package randtxt
+
+import (
+	"bufio"
+	"io"
+	"math/rand"
+	"testing"
+)
+
+func TestReader(t *testing.T) {
+	lr := io.LimitReader(NewReader(rand.NewSource(13)), 195)
+	pretty := NewGroupReader(lr)
+	scanner := bufio.NewScanner(pretty)
+	for scanner.Scan() {
+		t.Log(scanner.Text())
+	}
+	if err := scanner.Err(); err != nil {
+		t.Fatalf("scanner error %s", err)
+	}
+}
+
+func TestComap(t *testing.T) {
+	prs := cmap["TH"]
+	for _, p := range prs[3:6] {
+		t.Logf("%v", p)
+	}
+	p := 0.2
+	x := cmap.trigram("TH", p)
+	if x != "THE" {
+		t.Fatalf("cmap.trigram(%q, %.1f) returned %q; want %q",
+			"TH", p, x, "THE")
+	}
+}