rebase: update to latest github.com/openshift/api version

Also vendor all dependencies.

Signed-off-by: Niels de Vos <ndevos@ibm.com>
This commit is contained in:
Niels de Vos
2024-01-16 14:00:59 +01:00
committed by mergify[bot]
parent ab87045afb
commit ce603fb47e
547 changed files with 335074 additions and 4 deletions

359
api/vendor/golang.org/x/text/unicode/bidi/bidi.go generated vendored Normal file
View File

@ -0,0 +1,359 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:generate go run gen.go gen_trieval.go gen_ranges.go
// Package bidi contains functionality for bidirectional text support.
//
// See https://www.unicode.org/reports/tr9.
//
// NOTE: UNDER CONSTRUCTION. This API may change in backwards incompatible ways
// and without notice.
package bidi // import "golang.org/x/text/unicode/bidi"
// TODO
// - Transformer for reordering?
// - Transformer (validator, really) for Bidi Rule.
import (
"bytes"
)
// This API tries to avoid dealing with embedding levels for now. Under the hood
// these will be computed, but the question is to which extent the user should
// know they exist. We should at some point allow the user to specify an
// embedding hierarchy, though.
// A Direction indicates the overall flow of text.
type Direction int
const (
// LeftToRight indicates the text contains no right-to-left characters and
// that either there are some left-to-right characters or the option
// DefaultDirection(LeftToRight) was passed.
LeftToRight Direction = iota
// RightToLeft indicates the text contains no left-to-right characters and
// that either there are some right-to-left characters or the option
// DefaultDirection(RightToLeft) was passed.
RightToLeft
// Mixed indicates text contains both left-to-right and right-to-left
// characters.
Mixed
// Neutral means that text contains no left-to-right and right-to-left
// characters and that no default direction has been set.
Neutral
)
type options struct {
defaultDirection Direction
}
// An Option is an option for Bidi processing.
type Option func(*options)
// ICU allows the user to define embedding levels. This may be used, for example,
// to use hierarchical structure of markup languages to define embeddings.
// The following option may be a way to expose this functionality in this API.
// // LevelFunc sets a function that associates nesting levels with the given text.
// // The levels function will be called with monotonically increasing values for p.
// func LevelFunc(levels func(p int) int) Option {
// panic("unimplemented")
// }
// DefaultDirection sets the default direction for a Paragraph. The direction is
// overridden if the text contains directional characters.
func DefaultDirection(d Direction) Option {
return func(opts *options) {
opts.defaultDirection = d
}
}
// A Paragraph holds a single Paragraph for Bidi processing.
type Paragraph struct {
p []byte
o Ordering
opts []Option
types []Class
pairTypes []bracketType
pairValues []rune
runes []rune
options options
}
// Initialize the p.pairTypes, p.pairValues and p.types from the input previously
// set by p.SetBytes() or p.SetString(). Also limit the input up to (and including) a paragraph
// separator (bidi class B).
//
// The function p.Order() needs these values to be set, so this preparation could be postponed.
// But since the SetBytes and SetStrings functions return the length of the input up to the paragraph
// separator, the whole input needs to be processed anyway and should not be done twice.
//
// The function has the same return values as SetBytes() / SetString()
func (p *Paragraph) prepareInput() (n int, err error) {
p.runes = bytes.Runes(p.p)
bytecount := 0
// clear slices from previous SetString or SetBytes
p.pairTypes = nil
p.pairValues = nil
p.types = nil
for _, r := range p.runes {
props, i := LookupRune(r)
bytecount += i
cls := props.Class()
if cls == B {
return bytecount, nil
}
p.types = append(p.types, cls)
if props.IsOpeningBracket() {
p.pairTypes = append(p.pairTypes, bpOpen)
p.pairValues = append(p.pairValues, r)
} else if props.IsBracket() {
// this must be a closing bracket,
// since IsOpeningBracket is not true
p.pairTypes = append(p.pairTypes, bpClose)
p.pairValues = append(p.pairValues, r)
} else {
p.pairTypes = append(p.pairTypes, bpNone)
p.pairValues = append(p.pairValues, 0)
}
}
return bytecount, nil
}
// SetBytes configures p for the given paragraph text. It replaces text
// previously set by SetBytes or SetString. If b contains a paragraph separator
// it will only process the first paragraph and report the number of bytes
// consumed from b including this separator. Error may be non-nil if options are
// given.
func (p *Paragraph) SetBytes(b []byte, opts ...Option) (n int, err error) {
p.p = b
p.opts = opts
return p.prepareInput()
}
// SetString configures s for the given paragraph text. It replaces text
// previously set by SetBytes or SetString. If s contains a paragraph separator
// it will only process the first paragraph and report the number of bytes
// consumed from s including this separator. Error may be non-nil if options are
// given.
func (p *Paragraph) SetString(s string, opts ...Option) (n int, err error) {
p.p = []byte(s)
p.opts = opts
return p.prepareInput()
}
// IsLeftToRight reports whether the principle direction of rendering for this
// paragraphs is left-to-right. If this returns false, the principle direction
// of rendering is right-to-left.
func (p *Paragraph) IsLeftToRight() bool {
return p.Direction() == LeftToRight
}
// Direction returns the direction of the text of this paragraph.
//
// The direction may be LeftToRight, RightToLeft, Mixed, or Neutral.
func (p *Paragraph) Direction() Direction {
return p.o.Direction()
}
// TODO: what happens if the position is > len(input)? This should return an error.
// RunAt reports the Run at the given position of the input text.
//
// This method can be used for computing line breaks on paragraphs.
func (p *Paragraph) RunAt(pos int) Run {
c := 0
runNumber := 0
for i, r := range p.o.runes {
c += len(r)
if pos < c {
runNumber = i
}
}
return p.o.Run(runNumber)
}
func calculateOrdering(levels []level, runes []rune) Ordering {
var curDir Direction
prevDir := Neutral
prevI := 0
o := Ordering{}
// lvl = 0,2,4,...: left to right
// lvl = 1,3,5,...: right to left
for i, lvl := range levels {
if lvl%2 == 0 {
curDir = LeftToRight
} else {
curDir = RightToLeft
}
if curDir != prevDir {
if i > 0 {
o.runes = append(o.runes, runes[prevI:i])
o.directions = append(o.directions, prevDir)
o.startpos = append(o.startpos, prevI)
}
prevI = i
prevDir = curDir
}
}
o.runes = append(o.runes, runes[prevI:])
o.directions = append(o.directions, prevDir)
o.startpos = append(o.startpos, prevI)
return o
}
// Order computes the visual ordering of all the runs in a Paragraph.
func (p *Paragraph) Order() (Ordering, error) {
if len(p.types) == 0 {
return Ordering{}, nil
}
for _, fn := range p.opts {
fn(&p.options)
}
lvl := level(-1)
if p.options.defaultDirection == RightToLeft {
lvl = 1
}
para, err := newParagraph(p.types, p.pairTypes, p.pairValues, lvl)
if err != nil {
return Ordering{}, err
}
levels := para.getLevels([]int{len(p.types)})
p.o = calculateOrdering(levels, p.runes)
return p.o, nil
}
// Line computes the visual ordering of runs for a single line starting and
// ending at the given positions in the original text.
func (p *Paragraph) Line(start, end int) (Ordering, error) {
lineTypes := p.types[start:end]
para, err := newParagraph(lineTypes, p.pairTypes[start:end], p.pairValues[start:end], -1)
if err != nil {
return Ordering{}, err
}
levels := para.getLevels([]int{len(lineTypes)})
o := calculateOrdering(levels, p.runes[start:end])
return o, nil
}
// An Ordering holds the computed visual order of runs of a Paragraph. Calling
// SetBytes or SetString on the originating Paragraph invalidates an Ordering.
// The methods of an Ordering should only be called by one goroutine at a time.
type Ordering struct {
runes [][]rune
directions []Direction
startpos []int
}
// Direction reports the directionality of the runs.
//
// The direction may be LeftToRight, RightToLeft, Mixed, or Neutral.
func (o *Ordering) Direction() Direction {
return o.directions[0]
}
// NumRuns returns the number of runs.
func (o *Ordering) NumRuns() int {
return len(o.runes)
}
// Run returns the ith run within the ordering.
func (o *Ordering) Run(i int) Run {
r := Run{
runes: o.runes[i],
direction: o.directions[i],
startpos: o.startpos[i],
}
return r
}
// TODO: perhaps with options.
// // Reorder creates a reader that reads the runes in visual order per character.
// // Modifiers remain after the runes they modify.
// func (l *Runs) Reorder() io.Reader {
// panic("unimplemented")
// }
// A Run is a continuous sequence of characters of a single direction.
type Run struct {
runes []rune
direction Direction
startpos int
}
// String returns the text of the run in its original order.
func (r *Run) String() string {
return string(r.runes)
}
// Bytes returns the text of the run in its original order.
func (r *Run) Bytes() []byte {
return []byte(r.String())
}
// TODO: methods for
// - Display order
// - headers and footers
// - bracket replacement.
// Direction reports the direction of the run.
func (r *Run) Direction() Direction {
return r.direction
}
// Pos returns the position of the Run within the text passed to SetBytes or SetString of the
// originating Paragraph value.
func (r *Run) Pos() (start, end int) {
return r.startpos, r.startpos + len(r.runes) - 1
}
// AppendReverse reverses the order of characters of in, appends them to out,
// and returns the result. Modifiers will still follow the runes they modify.
// Brackets are replaced with their counterparts.
func AppendReverse(out, in []byte) []byte {
ret := make([]byte, len(in)+len(out))
copy(ret, out)
inRunes := bytes.Runes(in)
for i, r := range inRunes {
prop, _ := LookupRune(r)
if prop.IsBracket() {
inRunes[i] = prop.reverseBracket(r)
}
}
for i, j := 0, len(inRunes)-1; i < j; i, j = i+1, j-1 {
inRunes[i], inRunes[j] = inRunes[j], inRunes[i]
}
copy(ret[len(out):], string(inRunes))
return ret
}
// ReverseString reverses the order of characters in s and returns a new string.
// Modifiers will still follow the runes they modify. Brackets are replaced with
// their counterparts.
func ReverseString(s string) string {
input := []rune(s)
li := len(input)
ret := make([]rune, li)
for i, r := range input {
prop, _ := LookupRune(r)
if prop.IsBracket() {
ret[li-i-1] = prop.reverseBracket(r)
} else {
ret[li-i-1] = r
}
}
return string(ret)
}

335
api/vendor/golang.org/x/text/unicode/bidi/bracket.go generated vendored Normal file
View File

@ -0,0 +1,335 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package bidi
import (
"container/list"
"fmt"
"sort"
)
// This file contains a port of the reference implementation of the
// Bidi Parentheses Algorithm:
// https://www.unicode.org/Public/PROGRAMS/BidiReferenceJava/BidiPBAReference.java
//
// The implementation in this file covers definitions BD14-BD16 and rule N0
// of UAX#9.
//
// Some preprocessing is done for each rune before data is passed to this
// algorithm:
// - opening and closing brackets are identified
// - a bracket pair type, like '(' and ')' is assigned a unique identifier that
// is identical for the opening and closing bracket. It is left to do these
// mappings.
// - The BPA algorithm requires that bracket characters that are canonical
// equivalents of each other be able to be substituted for each other.
// It is the responsibility of the caller to do this canonicalization.
//
// In implementing BD16, this implementation departs slightly from the "logical"
// algorithm defined in UAX#9. In particular, the stack referenced there
// supports operations that go beyond a "basic" stack. An equivalent
// implementation based on a linked list is used here.
// Bidi_Paired_Bracket_Type
// BD14. An opening paired bracket is a character whose
// Bidi_Paired_Bracket_Type property value is Open.
//
// BD15. A closing paired bracket is a character whose
// Bidi_Paired_Bracket_Type property value is Close.
type bracketType byte
const (
bpNone bracketType = iota
bpOpen
bpClose
)
// bracketPair holds a pair of index values for opening and closing bracket
// location of a bracket pair.
type bracketPair struct {
opener int
closer int
}
func (b *bracketPair) String() string {
return fmt.Sprintf("(%v, %v)", b.opener, b.closer)
}
// bracketPairs is a slice of bracketPairs with a sort.Interface implementation.
type bracketPairs []bracketPair
func (b bracketPairs) Len() int { return len(b) }
func (b bracketPairs) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
func (b bracketPairs) Less(i, j int) bool { return b[i].opener < b[j].opener }
// resolvePairedBrackets runs the paired bracket part of the UBA algorithm.
//
// For each rune, it takes the indexes into the original string, the class the
// bracket type (in pairTypes) and the bracket identifier (pairValues). It also
// takes the direction type for the start-of-sentence and the embedding level.
//
// The identifiers for bracket types are the rune of the canonicalized opening
// bracket for brackets (open or close) or 0 for runes that are not brackets.
func resolvePairedBrackets(s *isolatingRunSequence) {
p := bracketPairer{
sos: s.sos,
openers: list.New(),
codesIsolatedRun: s.types,
indexes: s.indexes,
}
dirEmbed := L
if s.level&1 != 0 {
dirEmbed = R
}
p.locateBrackets(s.p.pairTypes, s.p.pairValues)
p.resolveBrackets(dirEmbed, s.p.initialTypes)
}
type bracketPairer struct {
sos Class // direction corresponding to start of sequence
// The following is a restatement of BD 16 using non-algorithmic language.
//
// A bracket pair is a pair of characters consisting of an opening
// paired bracket and a closing paired bracket such that the
// Bidi_Paired_Bracket property value of the former equals the latter,
// subject to the following constraints.
// - both characters of a pair occur in the same isolating run sequence
// - the closing character of a pair follows the opening character
// - any bracket character can belong at most to one pair, the earliest possible one
// - any bracket character not part of a pair is treated like an ordinary character
// - pairs may nest properly, but their spans may not overlap otherwise
// Bracket characters with canonical decompositions are supposed to be
// treated as if they had been normalized, to allow normalized and non-
// normalized text to give the same result. In this implementation that step
// is pushed out to the caller. The caller has to ensure that the pairValue
// slices contain the rune of the opening bracket after normalization for
// any opening or closing bracket.
openers *list.List // list of positions for opening brackets
// bracket pair positions sorted by location of opening bracket
pairPositions bracketPairs
codesIsolatedRun []Class // directional bidi codes for an isolated run
indexes []int // array of index values into the original string
}
// matchOpener reports whether characters at given positions form a matching
// bracket pair.
func (p *bracketPairer) matchOpener(pairValues []rune, opener, closer int) bool {
return pairValues[p.indexes[opener]] == pairValues[p.indexes[closer]]
}
const maxPairingDepth = 63
// locateBrackets locates matching bracket pairs according to BD16.
//
// This implementation uses a linked list instead of a stack, because, while
// elements are added at the front (like a push) they are not generally removed
// in atomic 'pop' operations, reducing the benefit of the stack archetype.
func (p *bracketPairer) locateBrackets(pairTypes []bracketType, pairValues []rune) {
// traverse the run
// do that explicitly (not in a for-each) so we can record position
for i, index := range p.indexes {
// look at the bracket type for each character
if pairTypes[index] == bpNone || p.codesIsolatedRun[i] != ON {
// continue scanning
continue
}
switch pairTypes[index] {
case bpOpen:
// check if maximum pairing depth reached
if p.openers.Len() == maxPairingDepth {
p.openers.Init()
return
}
// remember opener location, most recent first
p.openers.PushFront(i)
case bpClose:
// see if there is a match
count := 0
for elem := p.openers.Front(); elem != nil; elem = elem.Next() {
count++
opener := elem.Value.(int)
if p.matchOpener(pairValues, opener, i) {
// if the opener matches, add nested pair to the ordered list
p.pairPositions = append(p.pairPositions, bracketPair{opener, i})
// remove up to and including matched opener
for ; count > 0; count-- {
p.openers.Remove(p.openers.Front())
}
break
}
}
sort.Sort(p.pairPositions)
// if we get here, the closing bracket matched no openers
// and gets ignored
}
}
}
// Bracket pairs within an isolating run sequence are processed as units so
// that both the opening and the closing paired bracket in a pair resolve to
// the same direction.
//
// N0. Process bracket pairs in an isolating run sequence sequentially in
// the logical order of the text positions of the opening paired brackets
// using the logic given below. Within this scope, bidirectional types EN
// and AN are treated as R.
//
// Identify the bracket pairs in the current isolating run sequence
// according to BD16. For each bracket-pair element in the list of pairs of
// text positions:
//
// a Inspect the bidirectional types of the characters enclosed within the
// bracket pair.
//
// b If any strong type (either L or R) matching the embedding direction is
// found, set the type for both brackets in the pair to match the embedding
// direction.
//
// o [ e ] o -> o e e e o
//
// o [ o e ] -> o e o e e
//
// o [ NI e ] -> o e NI e e
//
// c Otherwise, if a strong type (opposite the embedding direction) is
// found, test for adjacent strong types as follows: 1 First, check
// backwards before the opening paired bracket until the first strong type
// (L, R, or sos) is found. If that first preceding strong type is opposite
// the embedding direction, then set the type for both brackets in the pair
// to that type. 2 Otherwise, set the type for both brackets in the pair to
// the embedding direction.
//
// o [ o ] e -> o o o o e
//
// o [ o NI ] o -> o o o NI o o
//
// e [ o ] o -> e e o e o
//
// e [ o ] e -> e e o e e
//
// e ( o [ o ] NI ) e -> e e o o o o NI e e
//
// d Otherwise, do not set the type for the current bracket pair. Note that
// if the enclosed text contains no strong types the paired brackets will
// both resolve to the same level when resolved individually using rules N1
// and N2.
//
// e ( NI ) o -> e ( NI ) o
// getStrongTypeN0 maps character's directional code to strong type as required
// by rule N0.
//
// TODO: have separate type for "strong" directionality.
func (p *bracketPairer) getStrongTypeN0(index int) Class {
switch p.codesIsolatedRun[index] {
// in the scope of N0, number types are treated as R
case EN, AN, AL, R:
return R
case L:
return L
default:
return ON
}
}
// classifyPairContent reports the strong types contained inside a Bracket Pair,
// assuming the given embedding direction.
//
// It returns ON if no strong type is found. If a single strong type is found,
// it returns this type. Otherwise it returns the embedding direction.
//
// TODO: use separate type for "strong" directionality.
func (p *bracketPairer) classifyPairContent(loc bracketPair, dirEmbed Class) Class {
dirOpposite := ON
for i := loc.opener + 1; i < loc.closer; i++ {
dir := p.getStrongTypeN0(i)
if dir == ON {
continue
}
if dir == dirEmbed {
return dir // type matching embedding direction found
}
dirOpposite = dir
}
// return ON if no strong type found, or class opposite to dirEmbed
return dirOpposite
}
// classBeforePair determines which strong types are present before a Bracket
// Pair. Return R or L if strong type found, otherwise ON.
func (p *bracketPairer) classBeforePair(loc bracketPair) Class {
for i := loc.opener - 1; i >= 0; i-- {
if dir := p.getStrongTypeN0(i); dir != ON {
return dir
}
}
// no strong types found, return sos
return p.sos
}
// assignBracketType implements rule N0 for a single bracket pair.
func (p *bracketPairer) assignBracketType(loc bracketPair, dirEmbed Class, initialTypes []Class) {
// rule "N0, a", inspect contents of pair
dirPair := p.classifyPairContent(loc, dirEmbed)
// dirPair is now L, R, or N (no strong type found)
// the following logical tests are performed out of order compared to
// the statement of the rules but yield the same results
if dirPair == ON {
return // case "d" - nothing to do
}
if dirPair != dirEmbed {
// case "c": strong type found, opposite - check before (c.1)
dirPair = p.classBeforePair(loc)
if dirPair == dirEmbed || dirPair == ON {
// no strong opposite type found before - use embedding (c.2)
dirPair = dirEmbed
}
}
// else: case "b", strong type found matching embedding,
// no explicit action needed, as dirPair is already set to embedding
// direction
// set the bracket types to the type found
p.setBracketsToType(loc, dirPair, initialTypes)
}
func (p *bracketPairer) setBracketsToType(loc bracketPair, dirPair Class, initialTypes []Class) {
p.codesIsolatedRun[loc.opener] = dirPair
p.codesIsolatedRun[loc.closer] = dirPair
for i := loc.opener + 1; i < loc.closer; i++ {
index := p.indexes[i]
if initialTypes[index] != NSM {
break
}
p.codesIsolatedRun[i] = dirPair
}
for i := loc.closer + 1; i < len(p.indexes); i++ {
index := p.indexes[i]
if initialTypes[index] != NSM {
break
}
p.codesIsolatedRun[i] = dirPair
}
}
// resolveBrackets implements rule N0 for a list of pairs.
func (p *bracketPairer) resolveBrackets(dirEmbed Class, initialTypes []Class) {
for _, loc := range p.pairPositions {
p.assignBracketType(loc, dirEmbed, initialTypes)
}
}

1071
api/vendor/golang.org/x/text/unicode/bidi/core.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

206
api/vendor/golang.org/x/text/unicode/bidi/prop.go generated vendored Normal file
View File

@ -0,0 +1,206 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package bidi
import "unicode/utf8"
// Properties provides access to BiDi properties of runes.
type Properties struct {
entry uint8
last uint8
}
var trie = newBidiTrie(0)
// TODO: using this for bidirule reduces the running time by about 5%. Consider
// if this is worth exposing or if we can find a way to speed up the Class
// method.
//
// // CompactClass is like Class, but maps all of the BiDi control classes
// // (LRO, RLO, LRE, RLE, PDF, LRI, RLI, FSI, PDI) to the class Control.
// func (p Properties) CompactClass() Class {
// return Class(p.entry & 0x0F)
// }
// Class returns the Bidi class for p.
func (p Properties) Class() Class {
c := Class(p.entry & 0x0F)
if c == Control {
c = controlByteToClass[p.last&0xF]
}
return c
}
// IsBracket reports whether the rune is a bracket.
func (p Properties) IsBracket() bool { return p.entry&0xF0 != 0 }
// IsOpeningBracket reports whether the rune is an opening bracket.
// IsBracket must return true.
func (p Properties) IsOpeningBracket() bool { return p.entry&openMask != 0 }
// TODO: find a better API and expose.
func (p Properties) reverseBracket(r rune) rune {
return xorMasks[p.entry>>xorMaskShift] ^ r
}
var controlByteToClass = [16]Class{
0xD: LRO, // U+202D LeftToRightOverride,
0xE: RLO, // U+202E RightToLeftOverride,
0xA: LRE, // U+202A LeftToRightEmbedding,
0xB: RLE, // U+202B RightToLeftEmbedding,
0xC: PDF, // U+202C PopDirectionalFormat,
0x6: LRI, // U+2066 LeftToRightIsolate,
0x7: RLI, // U+2067 RightToLeftIsolate,
0x8: FSI, // U+2068 FirstStrongIsolate,
0x9: PDI, // U+2069 PopDirectionalIsolate,
}
// LookupRune returns properties for r.
func LookupRune(r rune) (p Properties, size int) {
var buf [4]byte
n := utf8.EncodeRune(buf[:], r)
return Lookup(buf[:n])
}
// TODO: these lookup methods are based on the generated trie code. The returned
// sizes have slightly different semantics from the generated code, in that it
// always returns size==1 for an illegal UTF-8 byte (instead of the length
// of the maximum invalid subsequence). Most Transformers, like unicode/norm,
// leave invalid UTF-8 untouched, in which case it has performance benefits to
// do so (without changing the semantics). Bidi requires the semantics used here
// for the bidirule implementation to be compatible with the Go semantics.
// They ultimately should perhaps be adopted by all trie implementations, for
// convenience sake.
// This unrolled code also boosts performance of the secure/bidirule package by
// about 30%.
// So, to remove this code:
// - add option to trie generator to define return type.
// - always return 1 byte size for ill-formed UTF-8 runes.
// Lookup returns properties for the first rune in s and the width in bytes of
// its encoding. The size will be 0 if s does not hold enough bytes to complete
// the encoding.
func Lookup(s []byte) (p Properties, sz int) {
c0 := s[0]
switch {
case c0 < 0x80: // is ASCII
return Properties{entry: bidiValues[c0]}, 1
case c0 < 0xC2:
return Properties{}, 1
case c0 < 0xE0: // 2-byte UTF-8
if len(s) < 2 {
return Properties{}, 0
}
i := bidiIndex[c0]
c1 := s[1]
if c1 < 0x80 || 0xC0 <= c1 {
return Properties{}, 1
}
return Properties{entry: trie.lookupValue(uint32(i), c1)}, 2
case c0 < 0xF0: // 3-byte UTF-8
if len(s) < 3 {
return Properties{}, 0
}
i := bidiIndex[c0]
c1 := s[1]
if c1 < 0x80 || 0xC0 <= c1 {
return Properties{}, 1
}
o := uint32(i)<<6 + uint32(c1)
i = bidiIndex[o]
c2 := s[2]
if c2 < 0x80 || 0xC0 <= c2 {
return Properties{}, 1
}
return Properties{entry: trie.lookupValue(uint32(i), c2), last: c2}, 3
case c0 < 0xF8: // 4-byte UTF-8
if len(s) < 4 {
return Properties{}, 0
}
i := bidiIndex[c0]
c1 := s[1]
if c1 < 0x80 || 0xC0 <= c1 {
return Properties{}, 1
}
o := uint32(i)<<6 + uint32(c1)
i = bidiIndex[o]
c2 := s[2]
if c2 < 0x80 || 0xC0 <= c2 {
return Properties{}, 1
}
o = uint32(i)<<6 + uint32(c2)
i = bidiIndex[o]
c3 := s[3]
if c3 < 0x80 || 0xC0 <= c3 {
return Properties{}, 1
}
return Properties{entry: trie.lookupValue(uint32(i), c3)}, 4
}
// Illegal rune
return Properties{}, 1
}
// LookupString returns properties for the first rune in s and the width in
// bytes of its encoding. The size will be 0 if s does not hold enough bytes to
// complete the encoding.
func LookupString(s string) (p Properties, sz int) {
c0 := s[0]
switch {
case c0 < 0x80: // is ASCII
return Properties{entry: bidiValues[c0]}, 1
case c0 < 0xC2:
return Properties{}, 1
case c0 < 0xE0: // 2-byte UTF-8
if len(s) < 2 {
return Properties{}, 0
}
i := bidiIndex[c0]
c1 := s[1]
if c1 < 0x80 || 0xC0 <= c1 {
return Properties{}, 1
}
return Properties{entry: trie.lookupValue(uint32(i), c1)}, 2
case c0 < 0xF0: // 3-byte UTF-8
if len(s) < 3 {
return Properties{}, 0
}
i := bidiIndex[c0]
c1 := s[1]
if c1 < 0x80 || 0xC0 <= c1 {
return Properties{}, 1
}
o := uint32(i)<<6 + uint32(c1)
i = bidiIndex[o]
c2 := s[2]
if c2 < 0x80 || 0xC0 <= c2 {
return Properties{}, 1
}
return Properties{entry: trie.lookupValue(uint32(i), c2), last: c2}, 3
case c0 < 0xF8: // 4-byte UTF-8
if len(s) < 4 {
return Properties{}, 0
}
i := bidiIndex[c0]
c1 := s[1]
if c1 < 0x80 || 0xC0 <= c1 {
return Properties{}, 1
}
o := uint32(i)<<6 + uint32(c1)
i = bidiIndex[o]
c2 := s[2]
if c2 < 0x80 || 0xC0 <= c2 {
return Properties{}, 1
}
o = uint32(i)<<6 + uint32(c2)
i = bidiIndex[o]
c3 := s[3]
if c3 < 0x80 || 0xC0 <= c3 {
return Properties{}, 1
}
return Properties{entry: trie.lookupValue(uint32(i), c3)}, 4
}
// Illegal rune
return Properties{}, 1
}

1816
api/vendor/golang.org/x/text/unicode/bidi/tables10.0.0.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

1888
api/vendor/golang.org/x/text/unicode/bidi/tables11.0.0.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

1924
api/vendor/golang.org/x/text/unicode/bidi/tables12.0.0.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

1956
api/vendor/golang.org/x/text/unicode/bidi/tables13.0.0.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

2043
api/vendor/golang.org/x/text/unicode/bidi/tables15.0.0.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

1782
api/vendor/golang.org/x/text/unicode/bidi/tables9.0.0.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

48
api/vendor/golang.org/x/text/unicode/bidi/trieval.go generated vendored Normal file
View File

@ -0,0 +1,48 @@
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
package bidi
// Class is the Unicode BiDi class. Each rune has a single class.
type Class uint
const (
L Class = iota // LeftToRight
R // RightToLeft
EN // EuropeanNumber
ES // EuropeanSeparator
ET // EuropeanTerminator
AN // ArabicNumber
CS // CommonSeparator
B // ParagraphSeparator
S // SegmentSeparator
WS // WhiteSpace
ON // OtherNeutral
BN // BoundaryNeutral
NSM // NonspacingMark
AL // ArabicLetter
Control // Control LRO - PDI
numClass
LRO // LeftToRightOverride
RLO // RightToLeftOverride
LRE // LeftToRightEmbedding
RLE // RightToLeftEmbedding
PDF // PopDirectionalFormat
LRI // LeftToRightIsolate
RLI // RightToLeftIsolate
FSI // FirstStrongIsolate
PDI // PopDirectionalIsolate
unknownClass = ^Class(0)
)
// A trie entry has the following bits:
// 7..5 XOR mask for brackets
// 4 1: Bracket open, 0: Bracket close
// 3..0 Class type
const (
openMask = 0x10
xorMaskShift = 5
)

View File

@ -0,0 +1,512 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package norm
import "unicode/utf8"
const (
maxNonStarters = 30
// The maximum number of characters needed for a buffer is
// maxNonStarters + 1 for the starter + 1 for the GCJ
maxBufferSize = maxNonStarters + 2
maxNFCExpansion = 3 // NFC(0x1D160)
maxNFKCExpansion = 18 // NFKC(0xFDFA)
maxByteBufferSize = utf8.UTFMax * maxBufferSize // 128
)
// ssState is used for reporting the segment state after inserting a rune.
// It is returned by streamSafe.next.
type ssState int
const (
// Indicates a rune was successfully added to the segment.
ssSuccess ssState = iota
// Indicates a rune starts a new segment and should not be added.
ssStarter
// Indicates a rune caused a segment overflow and a CGJ should be inserted.
ssOverflow
)
// streamSafe implements the policy of when a CGJ should be inserted.
type streamSafe uint8
// first inserts the first rune of a segment. It is a faster version of next if
// it is known p represents the first rune in a segment.
func (ss *streamSafe) first(p Properties) {
*ss = streamSafe(p.nTrailingNonStarters())
}
// insert returns a ssState value to indicate whether a rune represented by p
// can be inserted.
func (ss *streamSafe) next(p Properties) ssState {
if *ss > maxNonStarters {
panic("streamSafe was not reset")
}
n := p.nLeadingNonStarters()
if *ss += streamSafe(n); *ss > maxNonStarters {
*ss = 0
return ssOverflow
}
// The Stream-Safe Text Processing prescribes that the counting can stop
// as soon as a starter is encountered. However, there are some starters,
// like Jamo V and T, that can combine with other runes, leaving their
// successive non-starters appended to the previous, possibly causing an
// overflow. We will therefore consider any rune with a non-zero nLead to
// be a non-starter. Note that it always hold that if nLead > 0 then
// nLead == nTrail.
if n == 0 {
*ss = streamSafe(p.nTrailingNonStarters())
return ssStarter
}
return ssSuccess
}
// backwards is used for checking for overflow and segment starts
// when traversing a string backwards. Users do not need to call first
// for the first rune. The state of the streamSafe retains the count of
// the non-starters loaded.
func (ss *streamSafe) backwards(p Properties) ssState {
if *ss > maxNonStarters {
panic("streamSafe was not reset")
}
c := *ss + streamSafe(p.nTrailingNonStarters())
if c > maxNonStarters {
return ssOverflow
}
*ss = c
if p.nLeadingNonStarters() == 0 {
return ssStarter
}
return ssSuccess
}
func (ss streamSafe) isMax() bool {
return ss == maxNonStarters
}
// GraphemeJoiner is inserted after maxNonStarters non-starter runes.
const GraphemeJoiner = "\u034F"
// reorderBuffer is used to normalize a single segment. Characters inserted with
// insert are decomposed and reordered based on CCC. The compose method can
// be used to recombine characters. Note that the byte buffer does not hold
// the UTF-8 characters in order. Only the rune array is maintained in sorted
// order. flush writes the resulting segment to a byte array.
type reorderBuffer struct {
rune [maxBufferSize]Properties // Per character info.
byte [maxByteBufferSize]byte // UTF-8 buffer. Referenced by runeInfo.pos.
nbyte uint8 // Number or bytes.
ss streamSafe // For limiting length of non-starter sequence.
nrune int // Number of runeInfos.
f formInfo
src input
nsrc int
tmpBytes input
out []byte
flushF func(*reorderBuffer) bool
}
func (rb *reorderBuffer) init(f Form, src []byte) {
rb.f = *formTable[f]
rb.src.setBytes(src)
rb.nsrc = len(src)
rb.ss = 0
}
func (rb *reorderBuffer) initString(f Form, src string) {
rb.f = *formTable[f]
rb.src.setString(src)
rb.nsrc = len(src)
rb.ss = 0
}
func (rb *reorderBuffer) setFlusher(out []byte, f func(*reorderBuffer) bool) {
rb.out = out
rb.flushF = f
}
// reset discards all characters from the buffer.
func (rb *reorderBuffer) reset() {
rb.nrune = 0
rb.nbyte = 0
}
func (rb *reorderBuffer) doFlush() bool {
if rb.f.composing {
rb.compose()
}
res := rb.flushF(rb)
rb.reset()
return res
}
// appendFlush appends the normalized segment to rb.out.
func appendFlush(rb *reorderBuffer) bool {
for i := 0; i < rb.nrune; i++ {
start := rb.rune[i].pos
end := start + rb.rune[i].size
rb.out = append(rb.out, rb.byte[start:end]...)
}
return true
}
// flush appends the normalized segment to out and resets rb.
func (rb *reorderBuffer) flush(out []byte) []byte {
for i := 0; i < rb.nrune; i++ {
start := rb.rune[i].pos
end := start + rb.rune[i].size
out = append(out, rb.byte[start:end]...)
}
rb.reset()
return out
}
// flushCopy copies the normalized segment to buf and resets rb.
// It returns the number of bytes written to buf.
func (rb *reorderBuffer) flushCopy(buf []byte) int {
p := 0
for i := 0; i < rb.nrune; i++ {
runep := rb.rune[i]
p += copy(buf[p:], rb.byte[runep.pos:runep.pos+runep.size])
}
rb.reset()
return p
}
// insertOrdered inserts a rune in the buffer, ordered by Canonical Combining Class.
// It returns false if the buffer is not large enough to hold the rune.
// It is used internally by insert and insertString only.
func (rb *reorderBuffer) insertOrdered(info Properties) {
n := rb.nrune
b := rb.rune[:]
cc := info.ccc
if cc > 0 {
// Find insertion position + move elements to make room.
for ; n > 0; n-- {
if b[n-1].ccc <= cc {
break
}
b[n] = b[n-1]
}
}
rb.nrune += 1
pos := uint8(rb.nbyte)
rb.nbyte += utf8.UTFMax
info.pos = pos
b[n] = info
}
// insertErr is an error code returned by insert. Using this type instead
// of error improves performance up to 20% for many of the benchmarks.
type insertErr int
const (
iSuccess insertErr = -iota
iShortDst
iShortSrc
)
// insertFlush inserts the given rune in the buffer ordered by CCC.
// If a decomposition with multiple segments are encountered, they leading
// ones are flushed.
// It returns a non-zero error code if the rune was not inserted.
func (rb *reorderBuffer) insertFlush(src input, i int, info Properties) insertErr {
if rune := src.hangul(i); rune != 0 {
rb.decomposeHangul(rune)
return iSuccess
}
if info.hasDecomposition() {
return rb.insertDecomposed(info.Decomposition())
}
rb.insertSingle(src, i, info)
return iSuccess
}
// insertUnsafe inserts the given rune in the buffer ordered by CCC.
// It is assumed there is sufficient space to hold the runes. It is the
// responsibility of the caller to ensure this. This can be done by checking
// the state returned by the streamSafe type.
func (rb *reorderBuffer) insertUnsafe(src input, i int, info Properties) {
if rune := src.hangul(i); rune != 0 {
rb.decomposeHangul(rune)
}
if info.hasDecomposition() {
// TODO: inline.
rb.insertDecomposed(info.Decomposition())
} else {
rb.insertSingle(src, i, info)
}
}
// insertDecomposed inserts an entry in to the reorderBuffer for each rune
// in dcomp. dcomp must be a sequence of decomposed UTF-8-encoded runes.
// It flushes the buffer on each new segment start.
func (rb *reorderBuffer) insertDecomposed(dcomp []byte) insertErr {
rb.tmpBytes.setBytes(dcomp)
// As the streamSafe accounting already handles the counting for modifiers,
// we don't have to call next. However, we do need to keep the accounting
// intact when flushing the buffer.
for i := 0; i < len(dcomp); {
info := rb.f.info(rb.tmpBytes, i)
if info.BoundaryBefore() && rb.nrune > 0 && !rb.doFlush() {
return iShortDst
}
i += copy(rb.byte[rb.nbyte:], dcomp[i:i+int(info.size)])
rb.insertOrdered(info)
}
return iSuccess
}
// insertSingle inserts an entry in the reorderBuffer for the rune at
// position i. info is the runeInfo for the rune at position i.
func (rb *reorderBuffer) insertSingle(src input, i int, info Properties) {
src.copySlice(rb.byte[rb.nbyte:], i, i+int(info.size))
rb.insertOrdered(info)
}
// insertCGJ inserts a Combining Grapheme Joiner (0x034f) into rb.
func (rb *reorderBuffer) insertCGJ() {
rb.insertSingle(input{str: GraphemeJoiner}, 0, Properties{size: uint8(len(GraphemeJoiner))})
}
// appendRune inserts a rune at the end of the buffer. It is used for Hangul.
func (rb *reorderBuffer) appendRune(r rune) {
bn := rb.nbyte
sz := utf8.EncodeRune(rb.byte[bn:], rune(r))
rb.nbyte += utf8.UTFMax
rb.rune[rb.nrune] = Properties{pos: bn, size: uint8(sz)}
rb.nrune++
}
// assignRune sets a rune at position pos. It is used for Hangul and recomposition.
func (rb *reorderBuffer) assignRune(pos int, r rune) {
bn := rb.rune[pos].pos
sz := utf8.EncodeRune(rb.byte[bn:], rune(r))
rb.rune[pos] = Properties{pos: bn, size: uint8(sz)}
}
// runeAt returns the rune at position n. It is used for Hangul and recomposition.
func (rb *reorderBuffer) runeAt(n int) rune {
inf := rb.rune[n]
r, _ := utf8.DecodeRune(rb.byte[inf.pos : inf.pos+inf.size])
return r
}
// bytesAt returns the UTF-8 encoding of the rune at position n.
// It is used for Hangul and recomposition.
func (rb *reorderBuffer) bytesAt(n int) []byte {
inf := rb.rune[n]
return rb.byte[inf.pos : int(inf.pos)+int(inf.size)]
}
// For Hangul we combine algorithmically, instead of using tables.
const (
hangulBase = 0xAC00 // UTF-8(hangulBase) -> EA B0 80
hangulBase0 = 0xEA
hangulBase1 = 0xB0
hangulBase2 = 0x80
hangulEnd = hangulBase + jamoLVTCount // UTF-8(0xD7A4) -> ED 9E A4
hangulEnd0 = 0xED
hangulEnd1 = 0x9E
hangulEnd2 = 0xA4
jamoLBase = 0x1100 // UTF-8(jamoLBase) -> E1 84 00
jamoLBase0 = 0xE1
jamoLBase1 = 0x84
jamoLEnd = 0x1113
jamoVBase = 0x1161
jamoVEnd = 0x1176
jamoTBase = 0x11A7
jamoTEnd = 0x11C3
jamoTCount = 28
jamoVCount = 21
jamoVTCount = 21 * 28
jamoLVTCount = 19 * 21 * 28
)
const hangulUTF8Size = 3
func isHangul(b []byte) bool {
if len(b) < hangulUTF8Size {
return false
}
b0 := b[0]
if b0 < hangulBase0 {
return false
}
b1 := b[1]
switch {
case b0 == hangulBase0:
return b1 >= hangulBase1
case b0 < hangulEnd0:
return true
case b0 > hangulEnd0:
return false
case b1 < hangulEnd1:
return true
}
return b1 == hangulEnd1 && b[2] < hangulEnd2
}
func isHangulString(b string) bool {
if len(b) < hangulUTF8Size {
return false
}
b0 := b[0]
if b0 < hangulBase0 {
return false
}
b1 := b[1]
switch {
case b0 == hangulBase0:
return b1 >= hangulBase1
case b0 < hangulEnd0:
return true
case b0 > hangulEnd0:
return false
case b1 < hangulEnd1:
return true
}
return b1 == hangulEnd1 && b[2] < hangulEnd2
}
// Caller must ensure len(b) >= 2.
func isJamoVT(b []byte) bool {
// True if (rune & 0xff00) == jamoLBase
return b[0] == jamoLBase0 && (b[1]&0xFC) == jamoLBase1
}
func isHangulWithoutJamoT(b []byte) bool {
c, _ := utf8.DecodeRune(b)
c -= hangulBase
return c < jamoLVTCount && c%jamoTCount == 0
}
// decomposeHangul writes the decomposed Hangul to buf and returns the number
// of bytes written. len(buf) should be at least 9.
func decomposeHangul(buf []byte, r rune) int {
const JamoUTF8Len = 3
r -= hangulBase
x := r % jamoTCount
r /= jamoTCount
utf8.EncodeRune(buf, jamoLBase+r/jamoVCount)
utf8.EncodeRune(buf[JamoUTF8Len:], jamoVBase+r%jamoVCount)
if x != 0 {
utf8.EncodeRune(buf[2*JamoUTF8Len:], jamoTBase+x)
return 3 * JamoUTF8Len
}
return 2 * JamoUTF8Len
}
// decomposeHangul algorithmically decomposes a Hangul rune into
// its Jamo components.
// See https://unicode.org/reports/tr15/#Hangul for details on decomposing Hangul.
func (rb *reorderBuffer) decomposeHangul(r rune) {
r -= hangulBase
x := r % jamoTCount
r /= jamoTCount
rb.appendRune(jamoLBase + r/jamoVCount)
rb.appendRune(jamoVBase + r%jamoVCount)
if x != 0 {
rb.appendRune(jamoTBase + x)
}
}
// combineHangul algorithmically combines Jamo character components into Hangul.
// See https://unicode.org/reports/tr15/#Hangul for details on combining Hangul.
func (rb *reorderBuffer) combineHangul(s, i, k int) {
b := rb.rune[:]
bn := rb.nrune
for ; i < bn; i++ {
cccB := b[k-1].ccc
cccC := b[i].ccc
if cccB == 0 {
s = k - 1
}
if s != k-1 && cccB >= cccC {
// b[i] is blocked by greater-equal cccX below it
b[k] = b[i]
k++
} else {
l := rb.runeAt(s) // also used to compare to hangulBase
v := rb.runeAt(i) // also used to compare to jamoT
switch {
case jamoLBase <= l && l < jamoLEnd &&
jamoVBase <= v && v < jamoVEnd:
// 11xx plus 116x to LV
rb.assignRune(s, hangulBase+
(l-jamoLBase)*jamoVTCount+(v-jamoVBase)*jamoTCount)
case hangulBase <= l && l < hangulEnd &&
jamoTBase < v && v < jamoTEnd &&
((l-hangulBase)%jamoTCount) == 0:
// ACxx plus 11Ax to LVT
rb.assignRune(s, l+v-jamoTBase)
default:
b[k] = b[i]
k++
}
}
}
rb.nrune = k
}
// compose recombines the runes in the buffer.
// It should only be used to recompose a single segment, as it will not
// handle alternations between Hangul and non-Hangul characters correctly.
func (rb *reorderBuffer) compose() {
// Lazily load the map used by the combine func below, but do
// it outside of the loop.
recompMapOnce.Do(buildRecompMap)
// UAX #15, section X5 , including Corrigendum #5
// "In any character sequence beginning with starter S, a character C is
// blocked from S if and only if there is some character B between S
// and C, and either B is a starter or it has the same or higher
// combining class as C."
bn := rb.nrune
if bn == 0 {
return
}
k := 1
b := rb.rune[:]
for s, i := 0, 1; i < bn; i++ {
if isJamoVT(rb.bytesAt(i)) {
// Redo from start in Hangul mode. Necessary to support
// U+320E..U+321E in NFKC mode.
rb.combineHangul(s, i, k)
return
}
ii := b[i]
// We can only use combineForward as a filter if we later
// get the info for the combined character. This is more
// expensive than using the filter. Using combinesBackward()
// is safe.
if ii.combinesBackward() {
cccB := b[k-1].ccc
cccC := ii.ccc
blocked := false // b[i] blocked by starter or greater or equal CCC?
if cccB == 0 {
s = k - 1
} else {
blocked = s != k-1 && cccB >= cccC
}
if !blocked {
combined := combine(rb.runeAt(s), rb.runeAt(i))
if combined != 0 {
rb.assignRune(s, combined)
continue
}
}
}
b[k] = b[i]
k++
}
rb.nrune = k
}

279
api/vendor/golang.org/x/text/unicode/norm/forminfo.go generated vendored Normal file
View File

@ -0,0 +1,279 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package norm
import "encoding/binary"
// This file contains Form-specific logic and wrappers for data in tables.go.
// Rune info is stored in a separate trie per composing form. A composing form
// and its corresponding decomposing form share the same trie. Each trie maps
// a rune to a uint16. The values take two forms. For v >= 0x8000:
// bits
// 15: 1 (inverse of NFD_QC bit of qcInfo)
// 13..7: qcInfo (see below). isYesD is always true (no decomposition).
// 6..0: ccc (compressed CCC value).
// For v < 0x8000, the respective rune has a decomposition and v is an index
// into a byte array of UTF-8 decomposition sequences and additional info and
// has the form:
// <header> <decomp_byte>* [<tccc> [<lccc>]]
// The header contains the number of bytes in the decomposition (excluding this
// length byte). The two most significant bits of this length byte correspond
// to bit 5 and 4 of qcInfo (see below). The byte sequence itself starts at v+1.
// The byte sequence is followed by a trailing and leading CCC if the values
// for these are not zero. The value of v determines which ccc are appended
// to the sequences. For v < firstCCC, there are none, for v >= firstCCC,
// the sequence is followed by a trailing ccc, and for v >= firstLeadingCC
// there is an additional leading ccc. The value of tccc itself is the
// trailing CCC shifted left 2 bits. The two least-significant bits of tccc
// are the number of trailing non-starters.
const (
qcInfoMask = 0x3F // to clear all but the relevant bits in a qcInfo
headerLenMask = 0x3F // extract the length value from the header byte
headerFlagsMask = 0xC0 // extract the qcInfo bits from the header byte
)
// Properties provides access to normalization properties of a rune.
type Properties struct {
pos uint8 // start position in reorderBuffer; used in composition.go
size uint8 // length of UTF-8 encoding of this rune
ccc uint8 // leading canonical combining class (ccc if not decomposition)
tccc uint8 // trailing canonical combining class (ccc if not decomposition)
nLead uint8 // number of leading non-starters.
flags qcInfo // quick check flags
index uint16
}
// functions dispatchable per form
type lookupFunc func(b input, i int) Properties
// formInfo holds Form-specific functions and tables.
type formInfo struct {
form Form
composing, compatibility bool // form type
info lookupFunc
nextMain iterFunc
}
var formTable = []*formInfo{{
form: NFC,
composing: true,
compatibility: false,
info: lookupInfoNFC,
nextMain: nextComposed,
}, {
form: NFD,
composing: false,
compatibility: false,
info: lookupInfoNFC,
nextMain: nextDecomposed,
}, {
form: NFKC,
composing: true,
compatibility: true,
info: lookupInfoNFKC,
nextMain: nextComposed,
}, {
form: NFKD,
composing: false,
compatibility: true,
info: lookupInfoNFKC,
nextMain: nextDecomposed,
}}
// We do not distinguish between boundaries for NFC, NFD, etc. to avoid
// unexpected behavior for the user. For example, in NFD, there is a boundary
// after 'a'. However, 'a' might combine with modifiers, so from the application's
// perspective it is not a good boundary. We will therefore always use the
// boundaries for the combining variants.
// BoundaryBefore returns true if this rune starts a new segment and
// cannot combine with any rune on the left.
func (p Properties) BoundaryBefore() bool {
if p.ccc == 0 && !p.combinesBackward() {
return true
}
// We assume that the CCC of the first character in a decomposition
// is always non-zero if different from info.ccc and that we can return
// false at this point. This is verified by maketables.
return false
}
// BoundaryAfter returns true if runes cannot combine with or otherwise
// interact with this or previous runes.
func (p Properties) BoundaryAfter() bool {
// TODO: loosen these conditions.
return p.isInert()
}
// We pack quick check data in 4 bits:
//
// 5: Combines forward (0 == false, 1 == true)
// 4..3: NFC_QC Yes(00), No (10), or Maybe (11)
// 2: NFD_QC Yes (0) or No (1). No also means there is a decomposition.
// 1..0: Number of trailing non-starters.
//
// When all 4 bits are zero, the character is inert, meaning it is never
// influenced by normalization.
type qcInfo uint8
func (p Properties) isYesC() bool { return p.flags&0x10 == 0 }
func (p Properties) isYesD() bool { return p.flags&0x4 == 0 }
func (p Properties) combinesForward() bool { return p.flags&0x20 != 0 }
func (p Properties) combinesBackward() bool { return p.flags&0x8 != 0 } // == isMaybe
func (p Properties) hasDecomposition() bool { return p.flags&0x4 != 0 } // == isNoD
func (p Properties) isInert() bool {
return p.flags&qcInfoMask == 0 && p.ccc == 0
}
func (p Properties) multiSegment() bool {
return p.index >= firstMulti && p.index < endMulti
}
func (p Properties) nLeadingNonStarters() uint8 {
return p.nLead
}
func (p Properties) nTrailingNonStarters() uint8 {
return uint8(p.flags & 0x03)
}
// Decomposition returns the decomposition for the underlying rune
// or nil if there is none.
func (p Properties) Decomposition() []byte {
// TODO: create the decomposition for Hangul?
if p.index == 0 {
return nil
}
i := p.index
n := decomps[i] & headerLenMask
i++
return decomps[i : i+uint16(n)]
}
// Size returns the length of UTF-8 encoding of the rune.
func (p Properties) Size() int {
return int(p.size)
}
// CCC returns the canonical combining class of the underlying rune.
func (p Properties) CCC() uint8 {
if p.index >= firstCCCZeroExcept {
return 0
}
return ccc[p.ccc]
}
// LeadCCC returns the CCC of the first rune in the decomposition.
// If there is no decomposition, LeadCCC equals CCC.
func (p Properties) LeadCCC() uint8 {
return ccc[p.ccc]
}
// TrailCCC returns the CCC of the last rune in the decomposition.
// If there is no decomposition, TrailCCC equals CCC.
func (p Properties) TrailCCC() uint8 {
return ccc[p.tccc]
}
func buildRecompMap() {
recompMap = make(map[uint32]rune, len(recompMapPacked)/8)
var buf [8]byte
for i := 0; i < len(recompMapPacked); i += 8 {
copy(buf[:], recompMapPacked[i:i+8])
key := binary.BigEndian.Uint32(buf[:4])
val := binary.BigEndian.Uint32(buf[4:])
recompMap[key] = rune(val)
}
}
// Recomposition
// We use 32-bit keys instead of 64-bit for the two codepoint keys.
// This clips off the bits of three entries, but we know this will not
// result in a collision. In the unlikely event that changes to
// UnicodeData.txt introduce collisions, the compiler will catch it.
// Note that the recomposition map for NFC and NFKC are identical.
// combine returns the combined rune or 0 if it doesn't exist.
//
// The caller is responsible for calling
// recompMapOnce.Do(buildRecompMap) sometime before this is called.
func combine(a, b rune) rune {
key := uint32(uint16(a))<<16 + uint32(uint16(b))
if recompMap == nil {
panic("caller error") // see func comment
}
return recompMap[key]
}
func lookupInfoNFC(b input, i int) Properties {
v, sz := b.charinfoNFC(i)
return compInfo(v, sz)
}
func lookupInfoNFKC(b input, i int) Properties {
v, sz := b.charinfoNFKC(i)
return compInfo(v, sz)
}
// Properties returns properties for the first rune in s.
func (f Form) Properties(s []byte) Properties {
if f == NFC || f == NFD {
return compInfo(nfcData.lookup(s))
}
return compInfo(nfkcData.lookup(s))
}
// PropertiesString returns properties for the first rune in s.
func (f Form) PropertiesString(s string) Properties {
if f == NFC || f == NFD {
return compInfo(nfcData.lookupString(s))
}
return compInfo(nfkcData.lookupString(s))
}
// compInfo converts the information contained in v and sz
// to a Properties. See the comment at the top of the file
// for more information on the format.
func compInfo(v uint16, sz int) Properties {
if v == 0 {
return Properties{size: uint8(sz)}
} else if v >= 0x8000 {
p := Properties{
size: uint8(sz),
ccc: uint8(v),
tccc: uint8(v),
flags: qcInfo(v >> 8),
}
if p.ccc > 0 || p.combinesBackward() {
p.nLead = uint8(p.flags & 0x3)
}
return p
}
// has decomposition
h := decomps[v]
f := (qcInfo(h&headerFlagsMask) >> 2) | 0x4
p := Properties{size: uint8(sz), flags: f, index: v}
if v >= firstCCC {
v += uint16(h&headerLenMask) + 1
c := decomps[v]
p.tccc = c >> 2
p.flags |= qcInfo(c & 0x3)
if v >= firstLeadingCCC {
p.nLead = c & 0x3
if v >= firstStarterWithNLead {
// We were tricked. Remove the decomposition.
p.flags &= 0x03
p.index = 0
return p
}
p.ccc = decomps[v+1]
}
}
return p
}

109
api/vendor/golang.org/x/text/unicode/norm/input.go generated vendored Normal file
View File

@ -0,0 +1,109 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package norm
import "unicode/utf8"
type input struct {
str string
bytes []byte
}
func inputBytes(str []byte) input {
return input{bytes: str}
}
func inputString(str string) input {
return input{str: str}
}
func (in *input) setBytes(str []byte) {
in.str = ""
in.bytes = str
}
func (in *input) setString(str string) {
in.str = str
in.bytes = nil
}
func (in *input) _byte(p int) byte {
if in.bytes == nil {
return in.str[p]
}
return in.bytes[p]
}
func (in *input) skipASCII(p, max int) int {
if in.bytes == nil {
for ; p < max && in.str[p] < utf8.RuneSelf; p++ {
}
} else {
for ; p < max && in.bytes[p] < utf8.RuneSelf; p++ {
}
}
return p
}
func (in *input) skipContinuationBytes(p int) int {
if in.bytes == nil {
for ; p < len(in.str) && !utf8.RuneStart(in.str[p]); p++ {
}
} else {
for ; p < len(in.bytes) && !utf8.RuneStart(in.bytes[p]); p++ {
}
}
return p
}
func (in *input) appendSlice(buf []byte, b, e int) []byte {
if in.bytes != nil {
return append(buf, in.bytes[b:e]...)
}
for i := b; i < e; i++ {
buf = append(buf, in.str[i])
}
return buf
}
func (in *input) copySlice(buf []byte, b, e int) int {
if in.bytes == nil {
return copy(buf, in.str[b:e])
}
return copy(buf, in.bytes[b:e])
}
func (in *input) charinfoNFC(p int) (uint16, int) {
if in.bytes == nil {
return nfcData.lookupString(in.str[p:])
}
return nfcData.lookup(in.bytes[p:])
}
func (in *input) charinfoNFKC(p int) (uint16, int) {
if in.bytes == nil {
return nfkcData.lookupString(in.str[p:])
}
return nfkcData.lookup(in.bytes[p:])
}
func (in *input) hangul(p int) (r rune) {
var size int
if in.bytes == nil {
if !isHangulString(in.str[p:]) {
return 0
}
r, size = utf8.DecodeRuneInString(in.str[p:])
} else {
if !isHangul(in.bytes[p:]) {
return 0
}
r, size = utf8.DecodeRune(in.bytes[p:])
}
if size != hangulUTF8Size {
return 0
}
return r
}

458
api/vendor/golang.org/x/text/unicode/norm/iter.go generated vendored Normal file
View File

@ -0,0 +1,458 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package norm
import (
"fmt"
"unicode/utf8"
)
// MaxSegmentSize is the maximum size of a byte buffer needed to consider any
// sequence of starter and non-starter runes for the purpose of normalization.
const MaxSegmentSize = maxByteBufferSize
// An Iter iterates over a string or byte slice, while normalizing it
// to a given Form.
type Iter struct {
rb reorderBuffer
buf [maxByteBufferSize]byte
info Properties // first character saved from previous iteration
next iterFunc // implementation of next depends on form
asciiF iterFunc
p int // current position in input source
multiSeg []byte // remainder of multi-segment decomposition
}
type iterFunc func(*Iter) []byte
// Init initializes i to iterate over src after normalizing it to Form f.
func (i *Iter) Init(f Form, src []byte) {
i.p = 0
if len(src) == 0 {
i.setDone()
i.rb.nsrc = 0
return
}
i.multiSeg = nil
i.rb.init(f, src)
i.next = i.rb.f.nextMain
i.asciiF = nextASCIIBytes
i.info = i.rb.f.info(i.rb.src, i.p)
i.rb.ss.first(i.info)
}
// InitString initializes i to iterate over src after normalizing it to Form f.
func (i *Iter) InitString(f Form, src string) {
i.p = 0
if len(src) == 0 {
i.setDone()
i.rb.nsrc = 0
return
}
i.multiSeg = nil
i.rb.initString(f, src)
i.next = i.rb.f.nextMain
i.asciiF = nextASCIIString
i.info = i.rb.f.info(i.rb.src, i.p)
i.rb.ss.first(i.info)
}
// Seek sets the segment to be returned by the next call to Next to start
// at position p. It is the responsibility of the caller to set p to the
// start of a segment.
func (i *Iter) Seek(offset int64, whence int) (int64, error) {
var abs int64
switch whence {
case 0:
abs = offset
case 1:
abs = int64(i.p) + offset
case 2:
abs = int64(i.rb.nsrc) + offset
default:
return 0, fmt.Errorf("norm: invalid whence")
}
if abs < 0 {
return 0, fmt.Errorf("norm: negative position")
}
if int(abs) >= i.rb.nsrc {
i.setDone()
return int64(i.p), nil
}
i.p = int(abs)
i.multiSeg = nil
i.next = i.rb.f.nextMain
i.info = i.rb.f.info(i.rb.src, i.p)
i.rb.ss.first(i.info)
return abs, nil
}
// returnSlice returns a slice of the underlying input type as a byte slice.
// If the underlying is of type []byte, it will simply return a slice.
// If the underlying is of type string, it will copy the slice to the buffer
// and return that.
func (i *Iter) returnSlice(a, b int) []byte {
if i.rb.src.bytes == nil {
return i.buf[:copy(i.buf[:], i.rb.src.str[a:b])]
}
return i.rb.src.bytes[a:b]
}
// Pos returns the byte position at which the next call to Next will commence processing.
func (i *Iter) Pos() int {
return i.p
}
func (i *Iter) setDone() {
i.next = nextDone
i.p = i.rb.nsrc
}
// Done returns true if there is no more input to process.
func (i *Iter) Done() bool {
return i.p >= i.rb.nsrc
}
// Next returns f(i.input[i.Pos():n]), where n is a boundary of i.input.
// For any input a and b for which f(a) == f(b), subsequent calls
// to Next will return the same segments.
// Modifying runes are grouped together with the preceding starter, if such a starter exists.
// Although not guaranteed, n will typically be the smallest possible n.
func (i *Iter) Next() []byte {
return i.next(i)
}
func nextASCIIBytes(i *Iter) []byte {
p := i.p + 1
if p >= i.rb.nsrc {
p0 := i.p
i.setDone()
return i.rb.src.bytes[p0:p]
}
if i.rb.src.bytes[p] < utf8.RuneSelf {
p0 := i.p
i.p = p
return i.rb.src.bytes[p0:p]
}
i.info = i.rb.f.info(i.rb.src, i.p)
i.next = i.rb.f.nextMain
return i.next(i)
}
func nextASCIIString(i *Iter) []byte {
p := i.p + 1
if p >= i.rb.nsrc {
i.buf[0] = i.rb.src.str[i.p]
i.setDone()
return i.buf[:1]
}
if i.rb.src.str[p] < utf8.RuneSelf {
i.buf[0] = i.rb.src.str[i.p]
i.p = p
return i.buf[:1]
}
i.info = i.rb.f.info(i.rb.src, i.p)
i.next = i.rb.f.nextMain
return i.next(i)
}
func nextHangul(i *Iter) []byte {
p := i.p
next := p + hangulUTF8Size
if next >= i.rb.nsrc {
i.setDone()
} else if i.rb.src.hangul(next) == 0 {
i.rb.ss.next(i.info)
i.info = i.rb.f.info(i.rb.src, i.p)
i.next = i.rb.f.nextMain
return i.next(i)
}
i.p = next
return i.buf[:decomposeHangul(i.buf[:], i.rb.src.hangul(p))]
}
func nextDone(i *Iter) []byte {
return nil
}
// nextMulti is used for iterating over multi-segment decompositions
// for decomposing normal forms.
func nextMulti(i *Iter) []byte {
j := 0
d := i.multiSeg
// skip first rune
for j = 1; j < len(d) && !utf8.RuneStart(d[j]); j++ {
}
for j < len(d) {
info := i.rb.f.info(input{bytes: d}, j)
if info.BoundaryBefore() {
i.multiSeg = d[j:]
return d[:j]
}
j += int(info.size)
}
// treat last segment as normal decomposition
i.next = i.rb.f.nextMain
return i.next(i)
}
// nextMultiNorm is used for iterating over multi-segment decompositions
// for composing normal forms.
func nextMultiNorm(i *Iter) []byte {
j := 0
d := i.multiSeg
for j < len(d) {
info := i.rb.f.info(input{bytes: d}, j)
if info.BoundaryBefore() {
i.rb.compose()
seg := i.buf[:i.rb.flushCopy(i.buf[:])]
i.rb.insertUnsafe(input{bytes: d}, j, info)
i.multiSeg = d[j+int(info.size):]
return seg
}
i.rb.insertUnsafe(input{bytes: d}, j, info)
j += int(info.size)
}
i.multiSeg = nil
i.next = nextComposed
return doNormComposed(i)
}
// nextDecomposed is the implementation of Next for forms NFD and NFKD.
func nextDecomposed(i *Iter) (next []byte) {
outp := 0
inCopyStart, outCopyStart := i.p, 0
for {
if sz := int(i.info.size); sz <= 1 {
i.rb.ss = 0
p := i.p
i.p++ // ASCII or illegal byte. Either way, advance by 1.
if i.p >= i.rb.nsrc {
i.setDone()
return i.returnSlice(p, i.p)
} else if i.rb.src._byte(i.p) < utf8.RuneSelf {
i.next = i.asciiF
return i.returnSlice(p, i.p)
}
outp++
} else if d := i.info.Decomposition(); d != nil {
// Note: If leading CCC != 0, then len(d) == 2 and last is also non-zero.
// Case 1: there is a leftover to copy. In this case the decomposition
// must begin with a modifier and should always be appended.
// Case 2: no leftover. Simply return d if followed by a ccc == 0 value.
p := outp + len(d)
if outp > 0 {
i.rb.src.copySlice(i.buf[outCopyStart:], inCopyStart, i.p)
// TODO: this condition should not be possible, but we leave it
// in for defensive purposes.
if p > len(i.buf) {
return i.buf[:outp]
}
} else if i.info.multiSegment() {
// outp must be 0 as multi-segment decompositions always
// start a new segment.
if i.multiSeg == nil {
i.multiSeg = d
i.next = nextMulti
return nextMulti(i)
}
// We are in the last segment. Treat as normal decomposition.
d = i.multiSeg
i.multiSeg = nil
p = len(d)
}
prevCC := i.info.tccc
if i.p += sz; i.p >= i.rb.nsrc {
i.setDone()
i.info = Properties{} // Force BoundaryBefore to succeed.
} else {
i.info = i.rb.f.info(i.rb.src, i.p)
}
switch i.rb.ss.next(i.info) {
case ssOverflow:
i.next = nextCGJDecompose
fallthrough
case ssStarter:
if outp > 0 {
copy(i.buf[outp:], d)
return i.buf[:p]
}
return d
}
copy(i.buf[outp:], d)
outp = p
inCopyStart, outCopyStart = i.p, outp
if i.info.ccc < prevCC {
goto doNorm
}
continue
} else if r := i.rb.src.hangul(i.p); r != 0 {
outp = decomposeHangul(i.buf[:], r)
i.p += hangulUTF8Size
inCopyStart, outCopyStart = i.p, outp
if i.p >= i.rb.nsrc {
i.setDone()
break
} else if i.rb.src.hangul(i.p) != 0 {
i.next = nextHangul
return i.buf[:outp]
}
} else {
p := outp + sz
if p > len(i.buf) {
break
}
outp = p
i.p += sz
}
if i.p >= i.rb.nsrc {
i.setDone()
break
}
prevCC := i.info.tccc
i.info = i.rb.f.info(i.rb.src, i.p)
if v := i.rb.ss.next(i.info); v == ssStarter {
break
} else if v == ssOverflow {
i.next = nextCGJDecompose
break
}
if i.info.ccc < prevCC {
goto doNorm
}
}
if outCopyStart == 0 {
return i.returnSlice(inCopyStart, i.p)
} else if inCopyStart < i.p {
i.rb.src.copySlice(i.buf[outCopyStart:], inCopyStart, i.p)
}
return i.buf[:outp]
doNorm:
// Insert what we have decomposed so far in the reorderBuffer.
// As we will only reorder, there will always be enough room.
i.rb.src.copySlice(i.buf[outCopyStart:], inCopyStart, i.p)
i.rb.insertDecomposed(i.buf[0:outp])
return doNormDecomposed(i)
}
func doNormDecomposed(i *Iter) []byte {
for {
i.rb.insertUnsafe(i.rb.src, i.p, i.info)
if i.p += int(i.info.size); i.p >= i.rb.nsrc {
i.setDone()
break
}
i.info = i.rb.f.info(i.rb.src, i.p)
if i.info.ccc == 0 {
break
}
if s := i.rb.ss.next(i.info); s == ssOverflow {
i.next = nextCGJDecompose
break
}
}
// new segment or too many combining characters: exit normalization
return i.buf[:i.rb.flushCopy(i.buf[:])]
}
func nextCGJDecompose(i *Iter) []byte {
i.rb.ss = 0
i.rb.insertCGJ()
i.next = nextDecomposed
i.rb.ss.first(i.info)
buf := doNormDecomposed(i)
return buf
}
// nextComposed is the implementation of Next for forms NFC and NFKC.
func nextComposed(i *Iter) []byte {
outp, startp := 0, i.p
var prevCC uint8
for {
if !i.info.isYesC() {
goto doNorm
}
prevCC = i.info.tccc
sz := int(i.info.size)
if sz == 0 {
sz = 1 // illegal rune: copy byte-by-byte
}
p := outp + sz
if p > len(i.buf) {
break
}
outp = p
i.p += sz
if i.p >= i.rb.nsrc {
i.setDone()
break
} else if i.rb.src._byte(i.p) < utf8.RuneSelf {
i.rb.ss = 0
i.next = i.asciiF
break
}
i.info = i.rb.f.info(i.rb.src, i.p)
if v := i.rb.ss.next(i.info); v == ssStarter {
break
} else if v == ssOverflow {
i.next = nextCGJCompose
break
}
if i.info.ccc < prevCC {
goto doNorm
}
}
return i.returnSlice(startp, i.p)
doNorm:
// reset to start position
i.p = startp
i.info = i.rb.f.info(i.rb.src, i.p)
i.rb.ss.first(i.info)
if i.info.multiSegment() {
d := i.info.Decomposition()
info := i.rb.f.info(input{bytes: d}, 0)
i.rb.insertUnsafe(input{bytes: d}, 0, info)
i.multiSeg = d[int(info.size):]
i.next = nextMultiNorm
return nextMultiNorm(i)
}
i.rb.ss.first(i.info)
i.rb.insertUnsafe(i.rb.src, i.p, i.info)
return doNormComposed(i)
}
func doNormComposed(i *Iter) []byte {
// First rune should already be inserted.
for {
if i.p += int(i.info.size); i.p >= i.rb.nsrc {
i.setDone()
break
}
i.info = i.rb.f.info(i.rb.src, i.p)
if s := i.rb.ss.next(i.info); s == ssStarter {
break
} else if s == ssOverflow {
i.next = nextCGJCompose
break
}
i.rb.insertUnsafe(i.rb.src, i.p, i.info)
}
i.rb.compose()
seg := i.buf[:i.rb.flushCopy(i.buf[:])]
return seg
}
func nextCGJCompose(i *Iter) []byte {
i.rb.ss = 0 // instead of first
i.rb.insertCGJ()
i.next = nextComposed
// Note that we treat any rune with nLeadingNonStarters > 0 as a non-starter,
// even if they are not. This is particularly dubious for U+FF9E and UFF9A.
// If we ever change that, insert a check here.
i.rb.ss.first(i.info)
i.rb.insertUnsafe(i.rb.src, i.p, i.info)
return doNormComposed(i)
}

610
api/vendor/golang.org/x/text/unicode/norm/normalize.go generated vendored Normal file
View File

@ -0,0 +1,610 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Note: the file data_test.go that is generated should not be checked in.
//go:generate go run maketables.go triegen.go
//go:generate go test -tags test
// Package norm contains types and functions for normalizing Unicode strings.
package norm // import "golang.org/x/text/unicode/norm"
import (
"unicode/utf8"
"golang.org/x/text/transform"
)
// A Form denotes a canonical representation of Unicode code points.
// The Unicode-defined normalization and equivalence forms are:
//
// NFC Unicode Normalization Form C
// NFD Unicode Normalization Form D
// NFKC Unicode Normalization Form KC
// NFKD Unicode Normalization Form KD
//
// For a Form f, this documentation uses the notation f(x) to mean
// the bytes or string x converted to the given form.
// A position n in x is called a boundary if conversion to the form can
// proceed independently on both sides:
//
// f(x) == append(f(x[0:n]), f(x[n:])...)
//
// References: https://unicode.org/reports/tr15/ and
// https://unicode.org/notes/tn5/.
type Form int
const (
NFC Form = iota
NFD
NFKC
NFKD
)
// Bytes returns f(b). May return b if f(b) = b.
func (f Form) Bytes(b []byte) []byte {
src := inputBytes(b)
ft := formTable[f]
n, ok := ft.quickSpan(src, 0, len(b), true)
if ok {
return b
}
out := make([]byte, n, len(b))
copy(out, b[0:n])
rb := reorderBuffer{f: *ft, src: src, nsrc: len(b), out: out, flushF: appendFlush}
return doAppendInner(&rb, n)
}
// String returns f(s).
func (f Form) String(s string) string {
src := inputString(s)
ft := formTable[f]
n, ok := ft.quickSpan(src, 0, len(s), true)
if ok {
return s
}
out := make([]byte, n, len(s))
copy(out, s[0:n])
rb := reorderBuffer{f: *ft, src: src, nsrc: len(s), out: out, flushF: appendFlush}
return string(doAppendInner(&rb, n))
}
// IsNormal returns true if b == f(b).
func (f Form) IsNormal(b []byte) bool {
src := inputBytes(b)
ft := formTable[f]
bp, ok := ft.quickSpan(src, 0, len(b), true)
if ok {
return true
}
rb := reorderBuffer{f: *ft, src: src, nsrc: len(b)}
rb.setFlusher(nil, cmpNormalBytes)
for bp < len(b) {
rb.out = b[bp:]
if bp = decomposeSegment(&rb, bp, true); bp < 0 {
return false
}
bp, _ = rb.f.quickSpan(rb.src, bp, len(b), true)
}
return true
}
func cmpNormalBytes(rb *reorderBuffer) bool {
b := rb.out
for i := 0; i < rb.nrune; i++ {
info := rb.rune[i]
if int(info.size) > len(b) {
return false
}
p := info.pos
pe := p + info.size
for ; p < pe; p++ {
if b[0] != rb.byte[p] {
return false
}
b = b[1:]
}
}
return true
}
// IsNormalString returns true if s == f(s).
func (f Form) IsNormalString(s string) bool {
src := inputString(s)
ft := formTable[f]
bp, ok := ft.quickSpan(src, 0, len(s), true)
if ok {
return true
}
rb := reorderBuffer{f: *ft, src: src, nsrc: len(s)}
rb.setFlusher(nil, func(rb *reorderBuffer) bool {
for i := 0; i < rb.nrune; i++ {
info := rb.rune[i]
if bp+int(info.size) > len(s) {
return false
}
p := info.pos
pe := p + info.size
for ; p < pe; p++ {
if s[bp] != rb.byte[p] {
return false
}
bp++
}
}
return true
})
for bp < len(s) {
if bp = decomposeSegment(&rb, bp, true); bp < 0 {
return false
}
bp, _ = rb.f.quickSpan(rb.src, bp, len(s), true)
}
return true
}
// patchTail fixes a case where a rune may be incorrectly normalized
// if it is followed by illegal continuation bytes. It returns the
// patched buffer and whether the decomposition is still in progress.
func patchTail(rb *reorderBuffer) bool {
info, p := lastRuneStart(&rb.f, rb.out)
if p == -1 || info.size == 0 {
return true
}
end := p + int(info.size)
extra := len(rb.out) - end
if extra > 0 {
// Potentially allocating memory. However, this only
// happens with ill-formed UTF-8.
x := make([]byte, 0)
x = append(x, rb.out[len(rb.out)-extra:]...)
rb.out = rb.out[:end]
decomposeToLastBoundary(rb)
rb.doFlush()
rb.out = append(rb.out, x...)
return false
}
buf := rb.out[p:]
rb.out = rb.out[:p]
decomposeToLastBoundary(rb)
if s := rb.ss.next(info); s == ssStarter {
rb.doFlush()
rb.ss.first(info)
} else if s == ssOverflow {
rb.doFlush()
rb.insertCGJ()
rb.ss = 0
}
rb.insertUnsafe(inputBytes(buf), 0, info)
return true
}
func appendQuick(rb *reorderBuffer, i int) int {
if rb.nsrc == i {
return i
}
end, _ := rb.f.quickSpan(rb.src, i, rb.nsrc, true)
rb.out = rb.src.appendSlice(rb.out, i, end)
return end
}
// Append returns f(append(out, b...)).
// The buffer out must be nil, empty, or equal to f(out).
func (f Form) Append(out []byte, src ...byte) []byte {
return f.doAppend(out, inputBytes(src), len(src))
}
func (f Form) doAppend(out []byte, src input, n int) []byte {
if n == 0 {
return out
}
ft := formTable[f]
// Attempt to do a quickSpan first so we can avoid initializing the reorderBuffer.
if len(out) == 0 {
p, _ := ft.quickSpan(src, 0, n, true)
out = src.appendSlice(out, 0, p)
if p == n {
return out
}
rb := reorderBuffer{f: *ft, src: src, nsrc: n, out: out, flushF: appendFlush}
return doAppendInner(&rb, p)
}
rb := reorderBuffer{f: *ft, src: src, nsrc: n}
return doAppend(&rb, out, 0)
}
func doAppend(rb *reorderBuffer, out []byte, p int) []byte {
rb.setFlusher(out, appendFlush)
src, n := rb.src, rb.nsrc
doMerge := len(out) > 0
if q := src.skipContinuationBytes(p); q > p {
// Move leading non-starters to destination.
rb.out = src.appendSlice(rb.out, p, q)
p = q
doMerge = patchTail(rb)
}
fd := &rb.f
if doMerge {
var info Properties
if p < n {
info = fd.info(src, p)
if !info.BoundaryBefore() || info.nLeadingNonStarters() > 0 {
if p == 0 {
decomposeToLastBoundary(rb)
}
p = decomposeSegment(rb, p, true)
}
}
if info.size == 0 {
rb.doFlush()
// Append incomplete UTF-8 encoding.
return src.appendSlice(rb.out, p, n)
}
if rb.nrune > 0 {
return doAppendInner(rb, p)
}
}
p = appendQuick(rb, p)
return doAppendInner(rb, p)
}
func doAppendInner(rb *reorderBuffer, p int) []byte {
for n := rb.nsrc; p < n; {
p = decomposeSegment(rb, p, true)
p = appendQuick(rb, p)
}
return rb.out
}
// AppendString returns f(append(out, []byte(s))).
// The buffer out must be nil, empty, or equal to f(out).
func (f Form) AppendString(out []byte, src string) []byte {
return f.doAppend(out, inputString(src), len(src))
}
// QuickSpan returns a boundary n such that b[0:n] == f(b[0:n]).
// It is not guaranteed to return the largest such n.
func (f Form) QuickSpan(b []byte) int {
n, _ := formTable[f].quickSpan(inputBytes(b), 0, len(b), true)
return n
}
// Span implements transform.SpanningTransformer. It returns a boundary n such
// that b[0:n] == f(b[0:n]). It is not guaranteed to return the largest such n.
func (f Form) Span(b []byte, atEOF bool) (n int, err error) {
n, ok := formTable[f].quickSpan(inputBytes(b), 0, len(b), atEOF)
if n < len(b) {
if !ok {
err = transform.ErrEndOfSpan
} else {
err = transform.ErrShortSrc
}
}
return n, err
}
// SpanString returns a boundary n such that s[0:n] == f(s[0:n]).
// It is not guaranteed to return the largest such n.
func (f Form) SpanString(s string, atEOF bool) (n int, err error) {
n, ok := formTable[f].quickSpan(inputString(s), 0, len(s), atEOF)
if n < len(s) {
if !ok {
err = transform.ErrEndOfSpan
} else {
err = transform.ErrShortSrc
}
}
return n, err
}
// quickSpan returns a boundary n such that src[0:n] == f(src[0:n]) and
// whether any non-normalized parts were found. If atEOF is false, n will
// not point past the last segment if this segment might be become
// non-normalized by appending other runes.
func (f *formInfo) quickSpan(src input, i, end int, atEOF bool) (n int, ok bool) {
var lastCC uint8
ss := streamSafe(0)
lastSegStart := i
for n = end; i < n; {
if j := src.skipASCII(i, n); i != j {
i = j
lastSegStart = i - 1
lastCC = 0
ss = 0
continue
}
info := f.info(src, i)
if info.size == 0 {
if atEOF {
// include incomplete runes
return n, true
}
return lastSegStart, true
}
// This block needs to be before the next, because it is possible to
// have an overflow for runes that are starters (e.g. with U+FF9E).
switch ss.next(info) {
case ssStarter:
lastSegStart = i
case ssOverflow:
return lastSegStart, false
case ssSuccess:
if lastCC > info.ccc {
return lastSegStart, false
}
}
if f.composing {
if !info.isYesC() {
break
}
} else {
if !info.isYesD() {
break
}
}
lastCC = info.ccc
i += int(info.size)
}
if i == n {
if !atEOF {
n = lastSegStart
}
return n, true
}
return lastSegStart, false
}
// QuickSpanString returns a boundary n such that s[0:n] == f(s[0:n]).
// It is not guaranteed to return the largest such n.
func (f Form) QuickSpanString(s string) int {
n, _ := formTable[f].quickSpan(inputString(s), 0, len(s), true)
return n
}
// FirstBoundary returns the position i of the first boundary in b
// or -1 if b contains no boundary.
func (f Form) FirstBoundary(b []byte) int {
return f.firstBoundary(inputBytes(b), len(b))
}
func (f Form) firstBoundary(src input, nsrc int) int {
i := src.skipContinuationBytes(0)
if i >= nsrc {
return -1
}
fd := formTable[f]
ss := streamSafe(0)
// We should call ss.first here, but we can't as the first rune is
// skipped already. This means FirstBoundary can't really determine
// CGJ insertion points correctly. Luckily it doesn't have to.
for {
info := fd.info(src, i)
if info.size == 0 {
return -1
}
if s := ss.next(info); s != ssSuccess {
return i
}
i += int(info.size)
if i >= nsrc {
if !info.BoundaryAfter() && !ss.isMax() {
return -1
}
return nsrc
}
}
}
// FirstBoundaryInString returns the position i of the first boundary in s
// or -1 if s contains no boundary.
func (f Form) FirstBoundaryInString(s string) int {
return f.firstBoundary(inputString(s), len(s))
}
// NextBoundary reports the index of the boundary between the first and next
// segment in b or -1 if atEOF is false and there are not enough bytes to
// determine this boundary.
func (f Form) NextBoundary(b []byte, atEOF bool) int {
return f.nextBoundary(inputBytes(b), len(b), atEOF)
}
// NextBoundaryInString reports the index of the boundary between the first and
// next segment in b or -1 if atEOF is false and there are not enough bytes to
// determine this boundary.
func (f Form) NextBoundaryInString(s string, atEOF bool) int {
return f.nextBoundary(inputString(s), len(s), atEOF)
}
func (f Form) nextBoundary(src input, nsrc int, atEOF bool) int {
if nsrc == 0 {
if atEOF {
return 0
}
return -1
}
fd := formTable[f]
info := fd.info(src, 0)
if info.size == 0 {
if atEOF {
return 1
}
return -1
}
ss := streamSafe(0)
ss.first(info)
for i := int(info.size); i < nsrc; i += int(info.size) {
info = fd.info(src, i)
if info.size == 0 {
if atEOF {
return i
}
return -1
}
// TODO: Using streamSafe to determine the boundary isn't the same as
// using BoundaryBefore. Determine which should be used.
if s := ss.next(info); s != ssSuccess {
return i
}
}
if !atEOF && !info.BoundaryAfter() && !ss.isMax() {
return -1
}
return nsrc
}
// LastBoundary returns the position i of the last boundary in b
// or -1 if b contains no boundary.
func (f Form) LastBoundary(b []byte) int {
return lastBoundary(formTable[f], b)
}
func lastBoundary(fd *formInfo, b []byte) int {
i := len(b)
info, p := lastRuneStart(fd, b)
if p == -1 {
return -1
}
if info.size == 0 { // ends with incomplete rune
if p == 0 { // starts with incomplete rune
return -1
}
i = p
info, p = lastRuneStart(fd, b[:i])
if p == -1 { // incomplete UTF-8 encoding or non-starter bytes without a starter
return i
}
}
if p+int(info.size) != i { // trailing non-starter bytes: illegal UTF-8
return i
}
if info.BoundaryAfter() {
return i
}
ss := streamSafe(0)
v := ss.backwards(info)
for i = p; i >= 0 && v != ssStarter; i = p {
info, p = lastRuneStart(fd, b[:i])
if v = ss.backwards(info); v == ssOverflow {
break
}
if p+int(info.size) != i {
if p == -1 { // no boundary found
return -1
}
return i // boundary after an illegal UTF-8 encoding
}
}
return i
}
// decomposeSegment scans the first segment in src into rb. It inserts 0x034f
// (Grapheme Joiner) when it encounters a sequence of more than 30 non-starters
// and returns the number of bytes consumed from src or iShortDst or iShortSrc.
func decomposeSegment(rb *reorderBuffer, sp int, atEOF bool) int {
// Force one character to be consumed.
info := rb.f.info(rb.src, sp)
if info.size == 0 {
return 0
}
if s := rb.ss.next(info); s == ssStarter {
// TODO: this could be removed if we don't support merging.
if rb.nrune > 0 {
goto end
}
} else if s == ssOverflow {
rb.insertCGJ()
goto end
}
if err := rb.insertFlush(rb.src, sp, info); err != iSuccess {
return int(err)
}
for {
sp += int(info.size)
if sp >= rb.nsrc {
if !atEOF && !info.BoundaryAfter() {
return int(iShortSrc)
}
break
}
info = rb.f.info(rb.src, sp)
if info.size == 0 {
if !atEOF {
return int(iShortSrc)
}
break
}
if s := rb.ss.next(info); s == ssStarter {
break
} else if s == ssOverflow {
rb.insertCGJ()
break
}
if err := rb.insertFlush(rb.src, sp, info); err != iSuccess {
return int(err)
}
}
end:
if !rb.doFlush() {
return int(iShortDst)
}
return sp
}
// lastRuneStart returns the runeInfo and position of the last
// rune in buf or the zero runeInfo and -1 if no rune was found.
func lastRuneStart(fd *formInfo, buf []byte) (Properties, int) {
p := len(buf) - 1
for ; p >= 0 && !utf8.RuneStart(buf[p]); p-- {
}
if p < 0 {
return Properties{}, -1
}
return fd.info(inputBytes(buf), p), p
}
// decomposeToLastBoundary finds an open segment at the end of the buffer
// and scans it into rb. Returns the buffer minus the last segment.
func decomposeToLastBoundary(rb *reorderBuffer) {
fd := &rb.f
info, i := lastRuneStart(fd, rb.out)
if int(info.size) != len(rb.out)-i {
// illegal trailing continuation bytes
return
}
if info.BoundaryAfter() {
return
}
var add [maxNonStarters + 1]Properties // stores runeInfo in reverse order
padd := 0
ss := streamSafe(0)
p := len(rb.out)
for {
add[padd] = info
v := ss.backwards(info)
if v == ssOverflow {
// Note that if we have an overflow, it the string we are appending to
// is not correctly normalized. In this case the behavior is undefined.
break
}
padd++
p -= int(info.size)
if v == ssStarter || p < 0 {
break
}
info, i = lastRuneStart(fd, rb.out[:p])
if int(info.size) != p-i {
break
}
}
rb.ss = ss
// Copy bytes for insertion as we may need to overwrite rb.out.
var buf [maxBufferSize * utf8.UTFMax]byte
cp := buf[:copy(buf[:], rb.out[p:])]
rb.out = rb.out[:p]
for padd--; padd >= 0; padd-- {
info = add[padd]
rb.insertUnsafe(inputBytes(cp), 0, info)
cp = cp[info.size:]
}
}

125
api/vendor/golang.org/x/text/unicode/norm/readwriter.go generated vendored Normal file
View File

@ -0,0 +1,125 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package norm
import "io"
type normWriter struct {
rb reorderBuffer
w io.Writer
buf []byte
}
// Write implements the standard write interface. If the last characters are
// not at a normalization boundary, the bytes will be buffered for the next
// write. The remaining bytes will be written on close.
func (w *normWriter) Write(data []byte) (n int, err error) {
// Process data in pieces to keep w.buf size bounded.
const chunk = 4000
for len(data) > 0 {
// Normalize into w.buf.
m := len(data)
if m > chunk {
m = chunk
}
w.rb.src = inputBytes(data[:m])
w.rb.nsrc = m
w.buf = doAppend(&w.rb, w.buf, 0)
data = data[m:]
n += m
// Write out complete prefix, save remainder.
// Note that lastBoundary looks back at most 31 runes.
i := lastBoundary(&w.rb.f, w.buf)
if i == -1 {
i = 0
}
if i > 0 {
if _, err = w.w.Write(w.buf[:i]); err != nil {
break
}
bn := copy(w.buf, w.buf[i:])
w.buf = w.buf[:bn]
}
}
return n, err
}
// Close forces data that remains in the buffer to be written.
func (w *normWriter) Close() error {
if len(w.buf) > 0 {
_, err := w.w.Write(w.buf)
if err != nil {
return err
}
}
return nil
}
// Writer returns a new writer that implements Write(b)
// by writing f(b) to w. The returned writer may use an
// internal buffer to maintain state across Write calls.
// Calling its Close method writes any buffered data to w.
func (f Form) Writer(w io.Writer) io.WriteCloser {
wr := &normWriter{rb: reorderBuffer{}, w: w}
wr.rb.init(f, nil)
return wr
}
type normReader struct {
rb reorderBuffer
r io.Reader
inbuf []byte
outbuf []byte
bufStart int
lastBoundary int
err error
}
// Read implements the standard read interface.
func (r *normReader) Read(p []byte) (int, error) {
for {
if r.lastBoundary-r.bufStart > 0 {
n := copy(p, r.outbuf[r.bufStart:r.lastBoundary])
r.bufStart += n
if r.lastBoundary-r.bufStart > 0 {
return n, nil
}
return n, r.err
}
if r.err != nil {
return 0, r.err
}
outn := copy(r.outbuf, r.outbuf[r.lastBoundary:])
r.outbuf = r.outbuf[0:outn]
r.bufStart = 0
n, err := r.r.Read(r.inbuf)
r.rb.src = inputBytes(r.inbuf[0:n])
r.rb.nsrc, r.err = n, err
if n > 0 {
r.outbuf = doAppend(&r.rb, r.outbuf, 0)
}
if err == io.EOF {
r.lastBoundary = len(r.outbuf)
} else {
r.lastBoundary = lastBoundary(&r.rb.f, r.outbuf)
if r.lastBoundary == -1 {
r.lastBoundary = 0
}
}
}
}
// Reader returns a new reader that implements Read
// by reading data from r and returning f(data).
func (f Form) Reader(r io.Reader) io.Reader {
const chunk = 4000
buf := make([]byte, chunk)
rr := &normReader{rb: reorderBuffer{}, r: r, inbuf: buf}
rr.rb.init(f, buf)
return rr
}

7658
api/vendor/golang.org/x/text/unicode/norm/tables10.0.0.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

7694
api/vendor/golang.org/x/text/unicode/norm/tables11.0.0.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

7711
api/vendor/golang.org/x/text/unicode/norm/tables12.0.0.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

7761
api/vendor/golang.org/x/text/unicode/norm/tables13.0.0.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

7908
api/vendor/golang.org/x/text/unicode/norm/tables15.0.0.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

7638
api/vendor/golang.org/x/text/unicode/norm/tables9.0.0.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

88
api/vendor/golang.org/x/text/unicode/norm/transform.go generated vendored Normal file
View File

@ -0,0 +1,88 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package norm
import (
"unicode/utf8"
"golang.org/x/text/transform"
)
// Reset implements the Reset method of the transform.Transformer interface.
func (Form) Reset() {}
// Transform implements the Transform method of the transform.Transformer
// interface. It may need to write segments of up to MaxSegmentSize at once.
// Users should either catch ErrShortDst and allow dst to grow or have dst be at
// least of size MaxTransformChunkSize to be guaranteed of progress.
func (f Form) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
// Cap the maximum number of src bytes to check.
b := src
eof := atEOF
if ns := len(dst); ns < len(b) {
err = transform.ErrShortDst
eof = false
b = b[:ns]
}
i, ok := formTable[f].quickSpan(inputBytes(b), 0, len(b), eof)
n := copy(dst, b[:i])
if !ok {
nDst, nSrc, err = f.transform(dst[n:], src[n:], atEOF)
return nDst + n, nSrc + n, err
}
if err == nil && n < len(src) && !atEOF {
err = transform.ErrShortSrc
}
return n, n, err
}
func flushTransform(rb *reorderBuffer) bool {
// Write out (must fully fit in dst, or else it is an ErrShortDst).
if len(rb.out) < rb.nrune*utf8.UTFMax {
return false
}
rb.out = rb.out[rb.flushCopy(rb.out):]
return true
}
var errs = []error{nil, transform.ErrShortDst, transform.ErrShortSrc}
// transform implements the transform.Transformer interface. It is only called
// when quickSpan does not pass for a given string.
func (f Form) transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
// TODO: get rid of reorderBuffer. See CL 23460044.
rb := reorderBuffer{}
rb.init(f, src)
for {
// Load segment into reorder buffer.
rb.setFlusher(dst[nDst:], flushTransform)
end := decomposeSegment(&rb, nSrc, atEOF)
if end < 0 {
return nDst, nSrc, errs[-end]
}
nDst = len(dst) - len(rb.out)
nSrc = end
// Next quickSpan.
end = rb.nsrc
eof := atEOF
if n := nSrc + len(dst) - nDst; n < end {
err = transform.ErrShortDst
end = n
eof = false
}
end, ok := rb.f.quickSpan(rb.src, nSrc, end, eof)
n := copy(dst[nDst:], rb.src.bytes[nSrc:end])
nSrc += n
nDst += n
if ok {
if err == nil && n < rb.nsrc && !atEOF {
err = transform.ErrShortSrc
}
return nDst, nSrc, err
}
}
}

54
api/vendor/golang.org/x/text/unicode/norm/trie.go generated vendored Normal file
View File

@ -0,0 +1,54 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package norm
type valueRange struct {
value uint16 // header: value:stride
lo, hi byte // header: lo:n
}
type sparseBlocks struct {
values []valueRange
offset []uint16
}
var nfcSparse = sparseBlocks{
values: nfcSparseValues[:],
offset: nfcSparseOffset[:],
}
var nfkcSparse = sparseBlocks{
values: nfkcSparseValues[:],
offset: nfkcSparseOffset[:],
}
var (
nfcData = newNfcTrie(0)
nfkcData = newNfkcTrie(0)
)
// lookup determines the type of block n and looks up the value for b.
// For n < t.cutoff, the block is a simple lookup table. Otherwise, the block
// is a list of ranges with an accompanying value. Given a matching range r,
// the value for b is by r.value + (b - r.lo) * stride.
func (t *sparseBlocks) lookup(n uint32, b byte) uint16 {
offset := t.offset[n]
header := t.values[offset]
lo := offset + 1
hi := lo + uint16(header.lo)
for lo < hi {
m := lo + (hi-lo)/2
r := t.values[m]
if r.lo <= b && b <= r.hi {
return r.value + uint16(b-r.lo)*header.value
}
if b < r.lo {
hi = m
} else {
lo = m + 1
}
}
return 0
}