rebase: ParseAcceptLanguage takes a long time to parse complex tags

A vulnerability was found in golang.org/x/text/language package which
could cause a denial of service. An attacker can craft an
Accept-Language header which ParseAcceptLanguage will take significant
time to parse.
Version v0.3.8 of golang.org/x/text fixes a vulnerability.

See-also: https://go.dev/issue/56152
See-also: https://bugzilla.redhat.com/CVE-2022-32149
Signed-off-by: Niels de Vos <ndevos@redhat.com>
This commit is contained in:
Niels de Vos 2022-10-17 08:49:59 +02:00 committed by mergify[bot]
parent b3837d44ce
commit e08005f402
27 changed files with 667 additions and 583 deletions

2
go.mod
View File

@ -151,7 +151,7 @@ require (
go.uber.org/zap v1.21.0 // indirect go.uber.org/zap v1.21.0 // indirect
golang.org/x/oauth2 v0.0.0-20211104180415-d3ed0bb246c8 // indirect golang.org/x/oauth2 v0.0.0-20211104180415-d3ed0bb246c8 // indirect
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211 // indirect golang.org/x/term v0.0.0-20210927222741-03fcf44c2211 // indirect
golang.org/x/text v0.3.7 // indirect golang.org/x/text v0.3.8 // indirect
golang.org/x/time v0.0.0-20220210224613-90d013bbcef8 // indirect golang.org/x/time v0.0.0-20220210224613-90d013bbcef8 // indirect
gomodules.xyz/jsonpatch/v2 v2.2.0 // indirect gomodules.xyz/jsonpatch/v2 v2.2.0 // indirect
google.golang.org/appengine v1.6.7 // indirect google.golang.org/appengine v1.6.7 // indirect

3
go.sum
View File

@ -1522,8 +1522,9 @@ golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.3.8 h1:nAL+RVCQ9uMn3vJZbV+MRnydTJFPf8qqY42YiA6MrqY=
golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ=
golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=

3
vendor/golang.org/x/text/AUTHORS generated vendored
View File

@ -1,3 +0,0 @@
# This source code refers to The Go Authors for copyright purposes.
# The master list of authors is in the main Go distribution,
# visible at http://tip.golang.org/AUTHORS.

View File

@ -1,3 +0,0 @@
# This source code was written by the Go contributors.
# The master list of contributors is in the main Go distribution,
# visible at http://tip.golang.org/CONTRIBUTORS.

View File

@ -128,7 +128,8 @@ const (
// The entry is pointed to by the exception index in an entry. It has the // The entry is pointed to by the exception index in an entry. It has the
// following format: // following format:
// //
// Header // Header:
//
// byte 0: // byte 0:
// 7..6 unused // 7..6 unused
// 5..4 CCC type (same bits as entry) // 5..4 CCC type (same bits as entry)
@ -149,6 +150,7 @@ const (
// A length of 0 indicates a mapping to zero-length string. // A length of 0 indicates a mapping to zero-length string.
// //
// Body bytes: // Body bytes:
//
// case folding bytes // case folding bytes
// lowercase mapping bytes // lowercase mapping bytes
// uppercase mapping bytes // uppercase mapping bytes
@ -156,6 +158,7 @@ const (
// closure mapping bytes (for NFKC_Casefold). (TODO) // closure mapping bytes (for NFKC_Casefold). (TODO)
// //
// Fallbacks: // Fallbacks:
//
// missing fold -> lower // missing fold -> lower
// missing title -> upper // missing title -> upper
// all missing -> original rune // all missing -> original rune

View File

@ -93,8 +93,11 @@ var canonical = [numEncodings]string{
var nameMap = map[string]htmlEncoding{ var nameMap = map[string]htmlEncoding{
"unicode-1-1-utf-8": utf8, "unicode-1-1-utf-8": utf8,
"unicode11utf8": utf8,
"unicode20utf8": utf8,
"utf-8": utf8, "utf-8": utf8,
"utf8": utf8, "utf8": utf8,
"x-unicode20utf8": utf8,
"866": ibm866, "866": ibm866,
"cp866": ibm866, "cp866": ibm866,
"csibm866": ibm866, "csibm866": ibm866,
@ -307,7 +310,13 @@ var nameMap = map[string]htmlEncoding{
"iso-2022-cn-ext": replacement, "iso-2022-cn-ext": replacement,
"iso-2022-kr": replacement, "iso-2022-kr": replacement,
"replacement": replacement, "replacement": replacement,
"unicodefffe": utf16be,
"utf-16be": utf16be, "utf-16be": utf16be,
"csunicode": utf16le,
"iso-10646-ucs-2": utf16le,
"ucs-2": utf16le,
"unicode": utf16le,
"unicodefeff": utf16le,
"utf-16": utf16le, "utf-16": utf16le,
"utf-16le": utf16le, "utf-16le": utf16le,
"x-user-defined": xUserDefined, "x-user-defined": xUserDefined,

View File

@ -905,6 +905,14 @@ const (
// https://www.unicode.org/notes/tn6/ // https://www.unicode.org/notes/tn6/
BOCU1 MIB = 1020 BOCU1 MIB = 1020
// UTF7IMAP is the MIB identifier with IANA name UTF-7-IMAP.
//
// Note: This charset is used to encode Unicode in IMAP mailbox names;
// see section 5.1.3 of rfc3501 . It should never be used
// outside this context. A name has been assigned so that charset processing
// implementations can refer to it in a consistent way.
UTF7IMAP MIB = 1021
// Windows30Latin1 is the MIB identifier with IANA name ISO-8859-1-Windows-3.0-Latin-1. // Windows30Latin1 is the MIB identifier with IANA name ISO-8859-1-Windows-3.0-Latin-1.
// //
// Extended ISO 8859-1 Latin-1 for Windows 3.0. // Extended ISO 8859-1 Latin-1 for Windows 3.0.

View File

@ -55,6 +55,8 @@ loop:
// Microsoft's Code Page 936 extends GBK 1.0 to encode the euro sign U+20AC // Microsoft's Code Page 936 extends GBK 1.0 to encode the euro sign U+20AC
// as 0x80. The HTML5 specification at http://encoding.spec.whatwg.org/#gbk // as 0x80. The HTML5 specification at http://encoding.spec.whatwg.org/#gbk
// says to treat "gbk" as Code Page 936. // says to treat "gbk" as Code Page 936.
// GBKs decoder is gb18030s decoder. https://encoding.spec.whatwg.org/#gbk-decoder
// If byte is 0x80, return code point U+20AC. https://encoding.spec.whatwg.org/#gb18030-decoder
case c0 == 0x80: case c0 == 0x80:
r, size = '€', 1 r, size = '€', 1
@ -180,7 +182,9 @@ func (e gbkEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err
// Microsoft's Code Page 936 extends GBK 1.0 to encode the euro sign U+20AC // Microsoft's Code Page 936 extends GBK 1.0 to encode the euro sign U+20AC
// as 0x80. The HTML5 specification at http://encoding.spec.whatwg.org/#gbk // as 0x80. The HTML5 specification at http://encoding.spec.whatwg.org/#gbk
// says to treat "gbk" as Code Page 936. // says to treat "gbk" as Code Page 936.
if r == '€' { // GBKs encoder is gb18030s encoder with its _is GBK_ set to true. https://encoding.spec.whatwg.org/#gbk-encoder
// If _is GBK_ is true and code point is U+20AC, return byte 0x80. https://encoding.spec.whatwg.org/#gb18030-encoder
if !e.gb18030 && r == '€' {
r = 0x80 r = 0x80
goto write1 goto write1
} }

View File

@ -966,7 +966,7 @@ var coreTags = []language.CompactCoreInfo{ // 773 elements
0x3fd00000, 0x3fd00072, 0x3fd000da, 0x3fd0010c, 0x3fd00000, 0x3fd00072, 0x3fd000da, 0x3fd0010c,
0x3ff00000, 0x3ff000d1, 0x40100000, 0x401000c3, 0x3ff00000, 0x3ff000d1, 0x40100000, 0x401000c3,
0x40200000, 0x4020004c, 0x40700000, 0x40800000, 0x40200000, 0x4020004c, 0x40700000, 0x40800000,
0x4085a000, 0x4085a0ba, 0x408e3000, 0x408e30ba, 0x4085a000, 0x4085a0ba, 0x408e8000, 0x408e80ba,
0x40c00000, 0x40c000b3, 0x41200000, 0x41200111, 0x40c00000, 0x40c000b3, 0x41200000, 0x41200111,
0x41600000, 0x4160010f, 0x41c00000, 0x41d00000, 0x41600000, 0x4160010f, 0x41c00000, 0x41d00000,
// Entry 280 - 29F // Entry 280 - 29F
@ -994,7 +994,7 @@ var coreTags = []language.CompactCoreInfo{ // 773 elements
0x4ae00130, 0x4b400000, 0x4b400099, 0x4b4000e8, 0x4ae00130, 0x4b400000, 0x4b400099, 0x4b4000e8,
0x4bc00000, 0x4bc05000, 0x4bc05024, 0x4bc20000, 0x4bc00000, 0x4bc05000, 0x4bc05024, 0x4bc20000,
0x4bc20137, 0x4bc5a000, 0x4bc5a137, 0x4be00000, 0x4bc20137, 0x4bc5a000, 0x4bc5a137, 0x4be00000,
0x4be5a000, 0x4be5a0b4, 0x4beeb000, 0x4beeb0b4, 0x4be5a000, 0x4be5a0b4, 0x4bef1000, 0x4bef10b4,
0x4c000000, 0x4c300000, 0x4c30013e, 0x4c900000, 0x4c000000, 0x4c300000, 0x4c30013e, 0x4c900000,
// Entry 2E0 - 2FF // Entry 2E0 - 2FF
0x4c900001, 0x4cc00000, 0x4cc0012f, 0x4ce00000, 0x4c900001, 0x4cc00000, 0x4cc0012f, 0x4ce00000,
@ -1012,4 +1012,4 @@ var coreTags = []language.CompactCoreInfo{ // 773 elements
const specialTagsStr string = "ca-ES-valencia en-US-u-va-posix" const specialTagsStr string = "ca-ES-valencia en-US-u-va-posix"
// Total table size 3147 bytes (3KiB); checksum: BE816D44 // Total table size 3147 bytes (3KiB); checksum: 6772C83C

View File

@ -328,7 +328,7 @@ func (r Region) IsPrivateUse() bool {
return r.typ()&iso3166UserAssigned != 0 return r.typ()&iso3166UserAssigned != 0
} }
type Script uint8 type Script uint16
// getScriptID returns the script id for string s. It assumes that s // getScriptID returns the script id for string s. It assumes that s
// is of the format [A-Z][a-z]{3}. // is of the format [A-Z][a-z]{3}.

View File

@ -270,7 +270,7 @@ func parse(scan *scanner, s string) (t Tag, err error) {
} else if n >= 4 { } else if n >= 4 {
return Und, ErrSyntax return Und, ErrSyntax
} else { // the usual case } else { // the usual case
t, end = parseTag(scan) t, end = parseTag(scan, true)
if n := len(scan.token); n == 1 { if n := len(scan.token); n == 1 {
t.pExt = uint16(end) t.pExt = uint16(end)
end = parseExtensions(scan) end = parseExtensions(scan)
@ -296,7 +296,8 @@ func parse(scan *scanner, s string) (t Tag, err error) {
// parseTag parses language, script, region and variants. // parseTag parses language, script, region and variants.
// It returns a Tag and the end position in the input that was parsed. // It returns a Tag and the end position in the input that was parsed.
func parseTag(scan *scanner) (t Tag, end int) { // If doNorm is true, then <lang>-<extlang> will be normalized to <extlang>.
func parseTag(scan *scanner, doNorm bool) (t Tag, end int) {
var e error var e error
// TODO: set an error if an unknown lang, script or region is encountered. // TODO: set an error if an unknown lang, script or region is encountered.
t.LangID, e = getLangID(scan.token) t.LangID, e = getLangID(scan.token)
@ -307,14 +308,17 @@ func parseTag(scan *scanner) (t Tag, end int) {
for len(scan.token) == 3 && isAlpha(scan.token[0]) { for len(scan.token) == 3 && isAlpha(scan.token[0]) {
// From http://tools.ietf.org/html/bcp47, <lang>-<extlang> tags are equivalent // From http://tools.ietf.org/html/bcp47, <lang>-<extlang> tags are equivalent
// to a tag of the form <extlang>. // to a tag of the form <extlang>.
if doNorm {
lang, e := getLangID(scan.token) lang, e := getLangID(scan.token)
if lang != 0 { if lang != 0 {
t.LangID = lang t.LangID = lang
copy(scan.b[langStart:], lang.String()) langStr := lang.String()
scan.b[langStart+3] = '-' copy(scan.b[langStart:], langStr)
scan.start = langStart + 4 scan.b[langStart+len(langStr)] = '-'
scan.start = langStart + len(langStr) + 1
} }
scan.gobble(e) scan.gobble(e)
}
end = scan.scan() end = scan.scan()
} }
if len(scan.token) == 4 && isAlpha(scan.token[0]) { if len(scan.token) == 4 && isAlpha(scan.token[0]) {
@ -559,7 +563,7 @@ func parseExtension(scan *scanner) int {
case 't': // https://www.ietf.org/rfc/rfc6497.txt case 't': // https://www.ietf.org/rfc/rfc6497.txt
scan.scan() scan.scan()
if n := len(scan.token); n >= 2 && n <= 3 && isAlpha(scan.token[1]) { if n := len(scan.token); n >= 2 && n <= 3 && isAlpha(scan.token[1]) {
_, end = parseTag(scan) _, end = parseTag(scan, false)
scan.toLower(start, end) scan.toLower(start, end)
} }
for len(scan.token) == 2 && !isAlpha(scan.token[1]) { for len(scan.token) == 2 && !isAlpha(scan.token[1]) {

File diff suppressed because it is too large Load Diff

View File

@ -10,8 +10,7 @@
// and provides the user with the best experience // and provides the user with the best experience
// (see https://blog.golang.org/matchlang). // (see https://blog.golang.org/matchlang).
// //
// // # Matching preferred against supported languages
// Matching preferred against supported languages
// //
// A Matcher for an application that supports English, Australian English, // A Matcher for an application that supports English, Australian English,
// Danish, and standard Mandarin can be created as follows: // Danish, and standard Mandarin can be created as follows:
@ -48,8 +47,7 @@
// For instance, it will know that a reader of Bokmål Danish can read Norwegian // For instance, it will know that a reader of Bokmål Danish can read Norwegian
// and will know that Cantonese ("yue") is a good match for "zh-HK". // and will know that Cantonese ("yue") is a good match for "zh-HK".
// //
// // # Using match results
// Using match results
// //
// To guarantee a consistent user experience to the user it is important to // To guarantee a consistent user experience to the user it is important to
// use the same language tag for the selection of any locale-specific services. // use the same language tag for the selection of any locale-specific services.
@ -70,8 +68,7 @@
// Match and MatchString both return the index of the matched supported tag // Match and MatchString both return the index of the matched supported tag
// to simplify associating such data with the matched tag. // to simplify associating such data with the matched tag.
// //
// // # Canonicalization
// Canonicalization
// //
// If one uses the Matcher to compare languages one does not need to // If one uses the Matcher to compare languages one does not need to
// worry about canonicalization. // worry about canonicalization.
@ -92,10 +89,9 @@
// equivalence relations. The CanonType type can be used to alter the // equivalence relations. The CanonType type can be used to alter the
// canonicalization form. // canonicalization form.
// //
// References // # References
// //
// BCP 47 - Tags for Identifying Languages http://tools.ietf.org/html/bcp47 // BCP 47 - Tags for Identifying Languages http://tools.ietf.org/html/bcp47
//
package language // import "golang.org/x/text/language" package language // import "golang.org/x/text/language"
// TODO: explanation on how to match languages for your own locale-specific // TODO: explanation on how to match languages for your own locale-specific

View File

@ -545,7 +545,7 @@ type bestMatch struct {
// match as the preferred match. // match as the preferred match.
// //
// If pin is true and have and tag are a strong match, it will henceforth only // If pin is true and have and tag are a strong match, it will henceforth only
// consider matches for this language. This corresponds to the nothing that most // consider matches for this language. This corresponds to the idea that most
// users have a strong preference for the first defined language. A user can // users have a strong preference for the first defined language. A user can
// still prefer a second language over a dialect of the preferred language by // still prefer a second language over a dialect of the preferred language by
// explicitly specifying dialects, e.g. "en, nl, en-GB". In this case pin should // explicitly specifying dialects, e.g. "en, nl, en-GB". In this case pin should

View File

@ -147,6 +147,7 @@ func update(b *language.Builder, part ...interface{}) (err error) {
} }
var errInvalidWeight = errors.New("ParseAcceptLanguage: invalid weight") var errInvalidWeight = errors.New("ParseAcceptLanguage: invalid weight")
var errTagListTooLarge = errors.New("tag list exceeds max length")
// ParseAcceptLanguage parses the contents of an Accept-Language header as // ParseAcceptLanguage parses the contents of an Accept-Language header as
// defined in http://www.ietf.org/rfc/rfc2616.txt and returns a list of Tags and // defined in http://www.ietf.org/rfc/rfc2616.txt and returns a list of Tags and
@ -164,6 +165,10 @@ func ParseAcceptLanguage(s string) (tag []Tag, q []float32, err error) {
} }
}() }()
if strings.Count(s, "-") > 1000 {
return nil, nil, errTagListTooLarge
}
var entry string var entry string
for s != "" { for s != "" {
if entry, s = split(s, ','); entry == "" { if entry, s = split(s, ','); entry == "" {

View File

@ -39,12 +39,12 @@ const (
_Hani = 57 _Hani = 57
_Hans = 59 _Hans = 59
_Hant = 60 _Hant = 60
_Qaaa = 143 _Qaaa = 147
_Qaai = 151 _Qaai = 155
_Qabx = 192 _Qabx = 196
_Zinh = 245 _Zinh = 252
_Zyyy = 250 _Zyyy = 257
_Zzzz = 251 _Zzzz = 258
) )
var regionToGroups = []uint8{ // 358 elements var regionToGroups = []uint8{ // 358 elements
@ -265,9 +265,9 @@ var matchScript = []scriptIntelligibility{ // 26 elements
13: {wantLang: 0x39d, haveLang: 0x139, wantScript: 0x36, haveScript: 0x5a, distance: 0xa}, 13: {wantLang: 0x39d, haveLang: 0x139, wantScript: 0x36, haveScript: 0x5a, distance: 0xa},
14: {wantLang: 0x3be, haveLang: 0x139, wantScript: 0x5, haveScript: 0x5a, distance: 0xa}, 14: {wantLang: 0x3be, haveLang: 0x139, wantScript: 0x5, haveScript: 0x5a, distance: 0xa},
15: {wantLang: 0x3fa, haveLang: 0x139, wantScript: 0x5, haveScript: 0x5a, distance: 0xa}, 15: {wantLang: 0x3fa, haveLang: 0x139, wantScript: 0x5, haveScript: 0x5a, distance: 0xa},
16: {wantLang: 0x40c, haveLang: 0x139, wantScript: 0xcf, haveScript: 0x5a, distance: 0xa}, 16: {wantLang: 0x40c, haveLang: 0x139, wantScript: 0xd4, haveScript: 0x5a, distance: 0xa},
17: {wantLang: 0x450, haveLang: 0x139, wantScript: 0xde, haveScript: 0x5a, distance: 0xa}, 17: {wantLang: 0x450, haveLang: 0x139, wantScript: 0xe3, haveScript: 0x5a, distance: 0xa},
18: {wantLang: 0x461, haveLang: 0x139, wantScript: 0xe1, haveScript: 0x5a, distance: 0xa}, 18: {wantLang: 0x461, haveLang: 0x139, wantScript: 0xe6, haveScript: 0x5a, distance: 0xa},
19: {wantLang: 0x46f, haveLang: 0x139, wantScript: 0x2c, haveScript: 0x5a, distance: 0xa}, 19: {wantLang: 0x46f, haveLang: 0x139, wantScript: 0x2c, haveScript: 0x5a, distance: 0xa},
20: {wantLang: 0x476, haveLang: 0x3e2, wantScript: 0x5a, haveScript: 0x20, distance: 0xa}, 20: {wantLang: 0x476, haveLang: 0x3e2, wantScript: 0x5a, haveScript: 0x20, distance: 0xa},
21: {wantLang: 0x4b4, haveLang: 0x139, wantScript: 0x5, haveScript: 0x5a, distance: 0xa}, 21: {wantLang: 0x4b4, haveLang: 0x139, wantScript: 0x5, haveScript: 0x5a, distance: 0xa},

View File

@ -495,9 +495,9 @@ func (s *isolatingRunSequence) resolveWeakTypes() {
if t == NSM { if t == NSM {
s.types[i] = precedingCharacterType s.types[i] = precedingCharacterType
} else { } else {
if t.in(LRI, RLI, FSI, PDI) { // if t.in(LRI, RLI, FSI, PDI) {
precedingCharacterType = ON // precedingCharacterType = ON
} // }
precedingCharacterType = t precedingCharacterType = t
} }
} }

View File

@ -110,6 +110,7 @@ func (p Properties) BoundaryAfter() bool {
} }
// We pack quick check data in 4 bits: // We pack quick check data in 4 bits:
//
// 5: Combines forward (0 == false, 1 == true) // 5: Combines forward (0 == false, 1 == true)
// 4..3: NFC_QC Yes(00), No (10), or Maybe (11) // 4..3: NFC_QC Yes(00), No (10), or Maybe (11)
// 2: NFD_QC Yes (0) or No (1). No also means there is a decomposition. // 2: NFD_QC Yes (0) or No (1). No also means there is a decomposition.

View File

@ -27,6 +27,7 @@ import (
// the bytes or string x converted to the given form. // the bytes or string x converted to the given form.
// A position n in x is called a boundary if conversion to the form can // A position n in x is called a boundary if conversion to the form can
// proceed independently on both sides: // proceed independently on both sides:
//
// f(x) == append(f(x[0:n]), f(x[n:])...) // f(x) == append(f(x[0:n]), f(x[n:])...)
// //
// References: https://unicode.org/reports/tr15/ and // References: https://unicode.org/reports/tr15/ and

View File

@ -7315,7 +7315,7 @@ const recompMapPacked = "" +
"\x00V\x03\x03\x00\x00\x1e|" + // 0x00560303: 0x00001E7C "\x00V\x03\x03\x00\x00\x1e|" + // 0x00560303: 0x00001E7C
"\x00v\x03\x03\x00\x00\x1e}" + // 0x00760303: 0x00001E7D "\x00v\x03\x03\x00\x00\x1e}" + // 0x00760303: 0x00001E7D
"\x00V\x03#\x00\x00\x1e~" + // 0x00560323: 0x00001E7E "\x00V\x03#\x00\x00\x1e~" + // 0x00560323: 0x00001E7E
"\x00v\x03#\x00\x00\x1e\u007f" + // 0x00760323: 0x00001E7F "\x00v\x03#\x00\x00\x1e\x7f" + // 0x00760323: 0x00001E7F
"\x00W\x03\x00\x00\x00\x1e\x80" + // 0x00570300: 0x00001E80 "\x00W\x03\x00\x00\x00\x1e\x80" + // 0x00570300: 0x00001E80
"\x00w\x03\x00\x00\x00\x1e\x81" + // 0x00770300: 0x00001E81 "\x00w\x03\x00\x00\x00\x1e\x81" + // 0x00770300: 0x00001E81
"\x00W\x03\x01\x00\x00\x1e\x82" + // 0x00570301: 0x00001E82 "\x00W\x03\x01\x00\x00\x1e\x82" + // 0x00570301: 0x00001E82
@ -7342,7 +7342,7 @@ const recompMapPacked = "" +
"\x00t\x03\b\x00\x00\x1e\x97" + // 0x00740308: 0x00001E97 "\x00t\x03\b\x00\x00\x1e\x97" + // 0x00740308: 0x00001E97
"\x00w\x03\n\x00\x00\x1e\x98" + // 0x0077030A: 0x00001E98 "\x00w\x03\n\x00\x00\x1e\x98" + // 0x0077030A: 0x00001E98
"\x00y\x03\n\x00\x00\x1e\x99" + // 0x0079030A: 0x00001E99 "\x00y\x03\n\x00\x00\x1e\x99" + // 0x0079030A: 0x00001E99
"\x01\u007f\x03\a\x00\x00\x1e\x9b" + // 0x017F0307: 0x00001E9B "\x01\x7f\x03\a\x00\x00\x1e\x9b" + // 0x017F0307: 0x00001E9B
"\x00A\x03#\x00\x00\x1e\xa0" + // 0x00410323: 0x00001EA0 "\x00A\x03#\x00\x00\x1e\xa0" + // 0x00410323: 0x00001EA0
"\x00a\x03#\x00\x00\x1e\xa1" + // 0x00610323: 0x00001EA1 "\x00a\x03#\x00\x00\x1e\xa1" + // 0x00610323: 0x00001EA1
"\x00A\x03\t\x00\x00\x1e\xa2" + // 0x00410309: 0x00001EA2 "\x00A\x03\t\x00\x00\x1e\xa2" + // 0x00410309: 0x00001EA2

View File

@ -1146,21 +1146,31 @@ var widthIndex = [1408]uint8{
} }
// inverseData contains 4-byte entries of the following format: // inverseData contains 4-byte entries of the following format:
//
// <length> <modified UTF-8-encoded rune> <0 padding> // <length> <modified UTF-8-encoded rune> <0 padding>
//
// The last byte of the UTF-8-encoded rune is xor-ed with the last byte of the // The last byte of the UTF-8-encoded rune is xor-ed with the last byte of the
// UTF-8 encoding of the original rune. Mappings often have the following // UTF-8 encoding of the original rune. Mappings often have the following
// pattern: // pattern:
//
// -> A (U+FF21 -> U+0041) // -> A (U+FF21 -> U+0041)
// -> B (U+FF22 -> U+0042) // -> B (U+FF22 -> U+0042)
// ... // ...
//
// By xor-ing the last byte the same entry can be shared by many mappings. This // By xor-ing the last byte the same entry can be shared by many mappings. This
// reduces the total number of distinct entries by about two thirds. // reduces the total number of distinct entries by about two thirds.
// The resulting entry for the aforementioned mappings is // The resulting entry for the aforementioned mappings is
//
// { 0x01, 0xE0, 0x00, 0x00 } // { 0x01, 0xE0, 0x00, 0x00 }
//
// Using this entry to map U+FF21 (UTF-8 [EF BC A1]), we get // Using this entry to map U+FF21 (UTF-8 [EF BC A1]), we get
//
// E0 ^ A1 = 41. // E0 ^ A1 = 41.
//
// Similarly, for U+FF22 (UTF-8 [EF BC A2]), we get // Similarly, for U+FF22 (UTF-8 [EF BC A2]), we get
//
// E0 ^ A2 = 42. // E0 ^ A2 = 42.
//
// Note that because of the xor-ing, the byte sequence stored in the entry is // Note that because of the xor-ing, the byte sequence stored in the entry is
// not valid UTF-8. // not valid UTF-8.
var inverseData = [150][4]byte{ var inverseData = [150][4]byte{

View File

@ -1158,21 +1158,31 @@ var widthIndex = [1408]uint8{
} }
// inverseData contains 4-byte entries of the following format: // inverseData contains 4-byte entries of the following format:
//
// <length> <modified UTF-8-encoded rune> <0 padding> // <length> <modified UTF-8-encoded rune> <0 padding>
//
// The last byte of the UTF-8-encoded rune is xor-ed with the last byte of the // The last byte of the UTF-8-encoded rune is xor-ed with the last byte of the
// UTF-8 encoding of the original rune. Mappings often have the following // UTF-8 encoding of the original rune. Mappings often have the following
// pattern: // pattern:
//
// -> A (U+FF21 -> U+0041) // -> A (U+FF21 -> U+0041)
// -> B (U+FF22 -> U+0042) // -> B (U+FF22 -> U+0042)
// ... // ...
//
// By xor-ing the last byte the same entry can be shared by many mappings. This // By xor-ing the last byte the same entry can be shared by many mappings. This
// reduces the total number of distinct entries by about two thirds. // reduces the total number of distinct entries by about two thirds.
// The resulting entry for the aforementioned mappings is // The resulting entry for the aforementioned mappings is
//
// { 0x01, 0xE0, 0x00, 0x00 } // { 0x01, 0xE0, 0x00, 0x00 }
//
// Using this entry to map U+FF21 (UTF-8 [EF BC A1]), we get // Using this entry to map U+FF21 (UTF-8 [EF BC A1]), we get
//
// E0 ^ A1 = 41. // E0 ^ A1 = 41.
//
// Similarly, for U+FF22 (UTF-8 [EF BC A2]), we get // Similarly, for U+FF22 (UTF-8 [EF BC A2]), we get
//
// E0 ^ A2 = 42. // E0 ^ A2 = 42.
//
// Note that because of the xor-ing, the byte sequence stored in the entry is // Note that because of the xor-ing, the byte sequence stored in the entry is
// not valid UTF-8. // not valid UTF-8.
var inverseData = [150][4]byte{ var inverseData = [150][4]byte{

View File

@ -1178,21 +1178,31 @@ var widthIndex = [1408]uint8{
} }
// inverseData contains 4-byte entries of the following format: // inverseData contains 4-byte entries of the following format:
//
// <length> <modified UTF-8-encoded rune> <0 padding> // <length> <modified UTF-8-encoded rune> <0 padding>
//
// The last byte of the UTF-8-encoded rune is xor-ed with the last byte of the // The last byte of the UTF-8-encoded rune is xor-ed with the last byte of the
// UTF-8 encoding of the original rune. Mappings often have the following // UTF-8 encoding of the original rune. Mappings often have the following
// pattern: // pattern:
//
// -> A (U+FF21 -> U+0041) // -> A (U+FF21 -> U+0041)
// -> B (U+FF22 -> U+0042) // -> B (U+FF22 -> U+0042)
// ... // ...
//
// By xor-ing the last byte the same entry can be shared by many mappings. This // By xor-ing the last byte the same entry can be shared by many mappings. This
// reduces the total number of distinct entries by about two thirds. // reduces the total number of distinct entries by about two thirds.
// The resulting entry for the aforementioned mappings is // The resulting entry for the aforementioned mappings is
//
// { 0x01, 0xE0, 0x00, 0x00 } // { 0x01, 0xE0, 0x00, 0x00 }
//
// Using this entry to map U+FF21 (UTF-8 [EF BC A1]), we get // Using this entry to map U+FF21 (UTF-8 [EF BC A1]), we get
//
// E0 ^ A1 = 41. // E0 ^ A1 = 41.
//
// Similarly, for U+FF22 (UTF-8 [EF BC A2]), we get // Similarly, for U+FF22 (UTF-8 [EF BC A2]), we get
//
// E0 ^ A2 = 42. // E0 ^ A2 = 42.
//
// Note that because of the xor-ing, the byte sequence stored in the entry is // Note that because of the xor-ing, the byte sequence stored in the entry is
// not valid UTF-8. // not valid UTF-8.
var inverseData = [150][4]byte{ var inverseData = [150][4]byte{

View File

@ -1179,21 +1179,31 @@ var widthIndex = [1408]uint8{
} }
// inverseData contains 4-byte entries of the following format: // inverseData contains 4-byte entries of the following format:
//
// <length> <modified UTF-8-encoded rune> <0 padding> // <length> <modified UTF-8-encoded rune> <0 padding>
//
// The last byte of the UTF-8-encoded rune is xor-ed with the last byte of the // The last byte of the UTF-8-encoded rune is xor-ed with the last byte of the
// UTF-8 encoding of the original rune. Mappings often have the following // UTF-8 encoding of the original rune. Mappings often have the following
// pattern: // pattern:
//
// -> A (U+FF21 -> U+0041) // -> A (U+FF21 -> U+0041)
// -> B (U+FF22 -> U+0042) // -> B (U+FF22 -> U+0042)
// ... // ...
//
// By xor-ing the last byte the same entry can be shared by many mappings. This // By xor-ing the last byte the same entry can be shared by many mappings. This
// reduces the total number of distinct entries by about two thirds. // reduces the total number of distinct entries by about two thirds.
// The resulting entry for the aforementioned mappings is // The resulting entry for the aforementioned mappings is
//
// { 0x01, 0xE0, 0x00, 0x00 } // { 0x01, 0xE0, 0x00, 0x00 }
//
// Using this entry to map U+FF21 (UTF-8 [EF BC A1]), we get // Using this entry to map U+FF21 (UTF-8 [EF BC A1]), we get
//
// E0 ^ A1 = 41. // E0 ^ A1 = 41.
//
// Similarly, for U+FF22 (UTF-8 [EF BC A2]), we get // Similarly, for U+FF22 (UTF-8 [EF BC A2]), we get
//
// E0 ^ A2 = 42. // E0 ^ A2 = 42.
//
// Note that because of the xor-ing, the byte sequence stored in the entry is // Note that because of the xor-ing, the byte sequence stored in the entry is
// not valid UTF-8. // not valid UTF-8.
var inverseData = [150][4]byte{ var inverseData = [150][4]byte{

View File

@ -1114,21 +1114,31 @@ var widthIndex = [1408]uint8{
} }
// inverseData contains 4-byte entries of the following format: // inverseData contains 4-byte entries of the following format:
//
// <length> <modified UTF-8-encoded rune> <0 padding> // <length> <modified UTF-8-encoded rune> <0 padding>
//
// The last byte of the UTF-8-encoded rune is xor-ed with the last byte of the // The last byte of the UTF-8-encoded rune is xor-ed with the last byte of the
// UTF-8 encoding of the original rune. Mappings often have the following // UTF-8 encoding of the original rune. Mappings often have the following
// pattern: // pattern:
//
// -> A (U+FF21 -> U+0041) // -> A (U+FF21 -> U+0041)
// -> B (U+FF22 -> U+0042) // -> B (U+FF22 -> U+0042)
// ... // ...
//
// By xor-ing the last byte the same entry can be shared by many mappings. This // By xor-ing the last byte the same entry can be shared by many mappings. This
// reduces the total number of distinct entries by about two thirds. // reduces the total number of distinct entries by about two thirds.
// The resulting entry for the aforementioned mappings is // The resulting entry for the aforementioned mappings is
//
// { 0x01, 0xE0, 0x00, 0x00 } // { 0x01, 0xE0, 0x00, 0x00 }
//
// Using this entry to map U+FF21 (UTF-8 [EF BC A1]), we get // Using this entry to map U+FF21 (UTF-8 [EF BC A1]), we get
//
// E0 ^ A1 = 41. // E0 ^ A1 = 41.
//
// Similarly, for U+FF22 (UTF-8 [EF BC A2]), we get // Similarly, for U+FF22 (UTF-8 [EF BC A2]), we get
//
// E0 ^ A2 = 42. // E0 ^ A2 = 42.
//
// Note that because of the xor-ing, the byte sequence stored in the entry is // Note that because of the xor-ing, the byte sequence stored in the entry is
// not valid UTF-8. // not valid UTF-8.
var inverseData = [150][4]byte{ var inverseData = [150][4]byte{

2
vendor/modules.txt vendored
View File

@ -651,7 +651,7 @@ golang.org/x/sys/windows
# golang.org/x/term v0.0.0-20210927222741-03fcf44c2211 # golang.org/x/term v0.0.0-20210927222741-03fcf44c2211
## explicit; go 1.17 ## explicit; go 1.17
golang.org/x/term golang.org/x/term
# golang.org/x/text v0.3.7 # golang.org/x/text v0.3.8
## explicit; go 1.17 ## explicit; go 1.17
golang.org/x/text/cases golang.org/x/text/cases
golang.org/x/text/encoding golang.org/x/text/encoding