rebase: ParseAcceptLanguage takes a long time to parse complex tags

A vulnerability was found in golang.org/x/text/language package which
could cause a denial of service. An attacker can craft an
Accept-Language header which ParseAcceptLanguage will take significant
time to parse.
Version v0.3.8 of golang.org/x/text fixes a vulnerability.

See-also: https://go.dev/issue/56152
See-also: https://bugzilla.redhat.com/CVE-2022-32149
Signed-off-by: Niels de Vos <ndevos@redhat.com>
(cherry picked from commit e08005f402)
This commit is contained in:
Niels de Vos 2022-10-17 08:49:59 +02:00 committed by mergify[bot]
parent f9adcde538
commit 763aa3df03
27 changed files with 667 additions and 583 deletions

2
go.mod
View File

@ -150,7 +150,7 @@ require (
go.uber.org/zap v1.21.0 // indirect go.uber.org/zap v1.21.0 // indirect
golang.org/x/oauth2 v0.0.0-20211104180415-d3ed0bb246c8 // indirect golang.org/x/oauth2 v0.0.0-20211104180415-d3ed0bb246c8 // indirect
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211 // indirect golang.org/x/term v0.0.0-20210927222741-03fcf44c2211 // indirect
golang.org/x/text v0.3.7 // indirect golang.org/x/text v0.3.8 // indirect
golang.org/x/time v0.0.0-20220210224613-90d013bbcef8 // indirect golang.org/x/time v0.0.0-20220210224613-90d013bbcef8 // indirect
gomodules.xyz/jsonpatch/v2 v2.2.0 // indirect gomodules.xyz/jsonpatch/v2 v2.2.0 // indirect
google.golang.org/appengine v1.6.7 // indirect google.golang.org/appengine v1.6.7 // indirect

3
go.sum
View File

@ -1492,8 +1492,9 @@ golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.3.8 h1:nAL+RVCQ9uMn3vJZbV+MRnydTJFPf8qqY42YiA6MrqY=
golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ=
golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=

3
vendor/golang.org/x/text/AUTHORS generated vendored
View File

@ -1,3 +0,0 @@
# This source code refers to The Go Authors for copyright purposes.
# The master list of authors is in the main Go distribution,
# visible at http://tip.golang.org/AUTHORS.

View File

@ -1,3 +0,0 @@
# This source code was written by the Go contributors.
# The master list of contributors is in the main Go distribution,
# visible at http://tip.golang.org/CONTRIBUTORS.

View File

@ -14,19 +14,19 @@ package cases
// //
// The per-rune values have the following format: // The per-rune values have the following format:
// //
// if (exception) { // if (exception) {
// 15..4 unsigned exception index // 15..4 unsigned exception index
// } else { // } else {
// 15..8 XOR pattern or index to XOR pattern for case mapping // 15..8 XOR pattern or index to XOR pattern for case mapping
// Only 13..8 are used for XOR patterns. // Only 13..8 are used for XOR patterns.
// 7 inverseFold (fold to upper, not to lower) // 7 inverseFold (fold to upper, not to lower)
// 6 index: interpret the XOR pattern as an index // 6 index: interpret the XOR pattern as an index
// or isMid if case mode is cIgnorableUncased. // or isMid if case mode is cIgnorableUncased.
// 5..4 CCC: zero (normal or break), above or other // 5..4 CCC: zero (normal or break), above or other
// } // }
// 3 exception: interpret this value as an exception index // 3 exception: interpret this value as an exception index
// (TODO: is this bit necessary? Probably implied from case mode.) // (TODO: is this bit necessary? Probably implied from case mode.)
// 2..0 case mode // 2..0 case mode
// //
// For the non-exceptional cases, a rune must be either uncased, lowercase or // For the non-exceptional cases, a rune must be either uncased, lowercase or
// uppercase. If the rune is cased, the XOR pattern maps either a lowercase // uppercase. If the rune is cased, the XOR pattern maps either a lowercase
@ -128,37 +128,40 @@ const (
// The entry is pointed to by the exception index in an entry. It has the // The entry is pointed to by the exception index in an entry. It has the
// following format: // following format:
// //
// Header // Header:
// byte 0:
// 7..6 unused
// 5..4 CCC type (same bits as entry)
// 3 unused
// 2..0 length of fold
// //
// byte 1: // byte 0:
// 7..6 unused // 7..6 unused
// 5..3 length of 1st mapping of case type // 5..4 CCC type (same bits as entry)
// 2..0 length of 2nd mapping of case type // 3 unused
// 2..0 length of fold
// //
// case 1st 2nd // byte 1:
// lower -> upper, title // 7..6 unused
// upper -> lower, title // 5..3 length of 1st mapping of case type
// title -> lower, upper // 2..0 length of 2nd mapping of case type
//
// case 1st 2nd
// lower -> upper, title
// upper -> lower, title
// title -> lower, upper
// //
// Lengths with the value 0x7 indicate no value and implies no change. // Lengths with the value 0x7 indicate no value and implies no change.
// A length of 0 indicates a mapping to zero-length string. // A length of 0 indicates a mapping to zero-length string.
// //
// Body bytes: // Body bytes:
// case folding bytes //
// lowercase mapping bytes // case folding bytes
// uppercase mapping bytes // lowercase mapping bytes
// titlecase mapping bytes // uppercase mapping bytes
// closure mapping bytes (for NFKC_Casefold). (TODO) // titlecase mapping bytes
// closure mapping bytes (for NFKC_Casefold). (TODO)
// //
// Fallbacks: // Fallbacks:
// missing fold -> lower //
// missing title -> upper // missing fold -> lower
// all missing -> original rune // missing title -> upper
// all missing -> original rune
// //
// exceptions starts with a dummy byte to enforce that there is no zero index // exceptions starts with a dummy byte to enforce that there is no zero index
// value. // value.

View File

@ -93,8 +93,11 @@ var canonical = [numEncodings]string{
var nameMap = map[string]htmlEncoding{ var nameMap = map[string]htmlEncoding{
"unicode-1-1-utf-8": utf8, "unicode-1-1-utf-8": utf8,
"unicode11utf8": utf8,
"unicode20utf8": utf8,
"utf-8": utf8, "utf-8": utf8,
"utf8": utf8, "utf8": utf8,
"x-unicode20utf8": utf8,
"866": ibm866, "866": ibm866,
"cp866": ibm866, "cp866": ibm866,
"csibm866": ibm866, "csibm866": ibm866,
@ -307,7 +310,13 @@ var nameMap = map[string]htmlEncoding{
"iso-2022-cn-ext": replacement, "iso-2022-cn-ext": replacement,
"iso-2022-kr": replacement, "iso-2022-kr": replacement,
"replacement": replacement, "replacement": replacement,
"unicodefffe": utf16be,
"utf-16be": utf16be, "utf-16be": utf16be,
"csunicode": utf16le,
"iso-10646-ucs-2": utf16le,
"ucs-2": utf16le,
"unicode": utf16le,
"unicodefeff": utf16le,
"utf-16": utf16le, "utf-16": utf16le,
"utf-16le": utf16le, "utf-16le": utf16le,
"x-user-defined": xUserDefined, "x-user-defined": xUserDefined,

View File

@ -905,6 +905,14 @@ const (
// https://www.unicode.org/notes/tn6/ // https://www.unicode.org/notes/tn6/
BOCU1 MIB = 1020 BOCU1 MIB = 1020
// UTF7IMAP is the MIB identifier with IANA name UTF-7-IMAP.
//
// Note: This charset is used to encode Unicode in IMAP mailbox names;
// see section 5.1.3 of rfc3501 . It should never be used
// outside this context. A name has been assigned so that charset processing
// implementations can refer to it in a consistent way.
UTF7IMAP MIB = 1021
// Windows30Latin1 is the MIB identifier with IANA name ISO-8859-1-Windows-3.0-Latin-1. // Windows30Latin1 is the MIB identifier with IANA name ISO-8859-1-Windows-3.0-Latin-1.
// //
// Extended ISO 8859-1 Latin-1 for Windows 3.0. // Extended ISO 8859-1 Latin-1 for Windows 3.0.

View File

@ -55,6 +55,8 @@ loop:
// Microsoft's Code Page 936 extends GBK 1.0 to encode the euro sign U+20AC // Microsoft's Code Page 936 extends GBK 1.0 to encode the euro sign U+20AC
// as 0x80. The HTML5 specification at http://encoding.spec.whatwg.org/#gbk // as 0x80. The HTML5 specification at http://encoding.spec.whatwg.org/#gbk
// says to treat "gbk" as Code Page 936. // says to treat "gbk" as Code Page 936.
// GBKs decoder is gb18030s decoder. https://encoding.spec.whatwg.org/#gbk-decoder
// If byte is 0x80, return code point U+20AC. https://encoding.spec.whatwg.org/#gb18030-decoder
case c0 == 0x80: case c0 == 0x80:
r, size = '€', 1 r, size = '€', 1
@ -180,7 +182,9 @@ func (e gbkEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err
// Microsoft's Code Page 936 extends GBK 1.0 to encode the euro sign U+20AC // Microsoft's Code Page 936 extends GBK 1.0 to encode the euro sign U+20AC
// as 0x80. The HTML5 specification at http://encoding.spec.whatwg.org/#gbk // as 0x80. The HTML5 specification at http://encoding.spec.whatwg.org/#gbk
// says to treat "gbk" as Code Page 936. // says to treat "gbk" as Code Page 936.
if r == '€' { // GBKs encoder is gb18030s encoder with its _is GBK_ set to true. https://encoding.spec.whatwg.org/#gbk-encoder
// If _is GBK_ is true and code point is U+20AC, return byte 0x80. https://encoding.spec.whatwg.org/#gb18030-encoder
if !e.gb18030 && r == '€' {
r = 0x80 r = 0x80
goto write1 goto write1
} }

View File

@ -966,7 +966,7 @@ var coreTags = []language.CompactCoreInfo{ // 773 elements
0x3fd00000, 0x3fd00072, 0x3fd000da, 0x3fd0010c, 0x3fd00000, 0x3fd00072, 0x3fd000da, 0x3fd0010c,
0x3ff00000, 0x3ff000d1, 0x40100000, 0x401000c3, 0x3ff00000, 0x3ff000d1, 0x40100000, 0x401000c3,
0x40200000, 0x4020004c, 0x40700000, 0x40800000, 0x40200000, 0x4020004c, 0x40700000, 0x40800000,
0x4085a000, 0x4085a0ba, 0x408e3000, 0x408e30ba, 0x4085a000, 0x4085a0ba, 0x408e8000, 0x408e80ba,
0x40c00000, 0x40c000b3, 0x41200000, 0x41200111, 0x40c00000, 0x40c000b3, 0x41200000, 0x41200111,
0x41600000, 0x4160010f, 0x41c00000, 0x41d00000, 0x41600000, 0x4160010f, 0x41c00000, 0x41d00000,
// Entry 280 - 29F // Entry 280 - 29F
@ -994,7 +994,7 @@ var coreTags = []language.CompactCoreInfo{ // 773 elements
0x4ae00130, 0x4b400000, 0x4b400099, 0x4b4000e8, 0x4ae00130, 0x4b400000, 0x4b400099, 0x4b4000e8,
0x4bc00000, 0x4bc05000, 0x4bc05024, 0x4bc20000, 0x4bc00000, 0x4bc05000, 0x4bc05024, 0x4bc20000,
0x4bc20137, 0x4bc5a000, 0x4bc5a137, 0x4be00000, 0x4bc20137, 0x4bc5a000, 0x4bc5a137, 0x4be00000,
0x4be5a000, 0x4be5a0b4, 0x4beeb000, 0x4beeb0b4, 0x4be5a000, 0x4be5a0b4, 0x4bef1000, 0x4bef10b4,
0x4c000000, 0x4c300000, 0x4c30013e, 0x4c900000, 0x4c000000, 0x4c300000, 0x4c30013e, 0x4c900000,
// Entry 2E0 - 2FF // Entry 2E0 - 2FF
0x4c900001, 0x4cc00000, 0x4cc0012f, 0x4ce00000, 0x4c900001, 0x4cc00000, 0x4cc0012f, 0x4ce00000,
@ -1012,4 +1012,4 @@ var coreTags = []language.CompactCoreInfo{ // 773 elements
const specialTagsStr string = "ca-ES-valencia en-US-u-va-posix" const specialTagsStr string = "ca-ES-valencia en-US-u-va-posix"
// Total table size 3147 bytes (3KiB); checksum: BE816D44 // Total table size 3147 bytes (3KiB); checksum: 6772C83C

View File

@ -328,7 +328,7 @@ func (r Region) IsPrivateUse() bool {
return r.typ()&iso3166UserAssigned != 0 return r.typ()&iso3166UserAssigned != 0
} }
type Script uint8 type Script uint16
// getScriptID returns the script id for string s. It assumes that s // getScriptID returns the script id for string s. It assumes that s
// is of the format [A-Z][a-z]{3}. // is of the format [A-Z][a-z]{3}.

View File

@ -270,7 +270,7 @@ func parse(scan *scanner, s string) (t Tag, err error) {
} else if n >= 4 { } else if n >= 4 {
return Und, ErrSyntax return Und, ErrSyntax
} else { // the usual case } else { // the usual case
t, end = parseTag(scan) t, end = parseTag(scan, true)
if n := len(scan.token); n == 1 { if n := len(scan.token); n == 1 {
t.pExt = uint16(end) t.pExt = uint16(end)
end = parseExtensions(scan) end = parseExtensions(scan)
@ -296,7 +296,8 @@ func parse(scan *scanner, s string) (t Tag, err error) {
// parseTag parses language, script, region and variants. // parseTag parses language, script, region and variants.
// It returns a Tag and the end position in the input that was parsed. // It returns a Tag and the end position in the input that was parsed.
func parseTag(scan *scanner) (t Tag, end int) { // If doNorm is true, then <lang>-<extlang> will be normalized to <extlang>.
func parseTag(scan *scanner, doNorm bool) (t Tag, end int) {
var e error var e error
// TODO: set an error if an unknown lang, script or region is encountered. // TODO: set an error if an unknown lang, script or region is encountered.
t.LangID, e = getLangID(scan.token) t.LangID, e = getLangID(scan.token)
@ -307,14 +308,17 @@ func parseTag(scan *scanner) (t Tag, end int) {
for len(scan.token) == 3 && isAlpha(scan.token[0]) { for len(scan.token) == 3 && isAlpha(scan.token[0]) {
// From http://tools.ietf.org/html/bcp47, <lang>-<extlang> tags are equivalent // From http://tools.ietf.org/html/bcp47, <lang>-<extlang> tags are equivalent
// to a tag of the form <extlang>. // to a tag of the form <extlang>.
lang, e := getLangID(scan.token) if doNorm {
if lang != 0 { lang, e := getLangID(scan.token)
t.LangID = lang if lang != 0 {
copy(scan.b[langStart:], lang.String()) t.LangID = lang
scan.b[langStart+3] = '-' langStr := lang.String()
scan.start = langStart + 4 copy(scan.b[langStart:], langStr)
scan.b[langStart+len(langStr)] = '-'
scan.start = langStart + len(langStr) + 1
}
scan.gobble(e)
} }
scan.gobble(e)
end = scan.scan() end = scan.scan()
} }
if len(scan.token) == 4 && isAlpha(scan.token[0]) { if len(scan.token) == 4 && isAlpha(scan.token[0]) {
@ -559,7 +563,7 @@ func parseExtension(scan *scanner) int {
case 't': // https://www.ietf.org/rfc/rfc6497.txt case 't': // https://www.ietf.org/rfc/rfc6497.txt
scan.scan() scan.scan()
if n := len(scan.token); n >= 2 && n <= 3 && isAlpha(scan.token[1]) { if n := len(scan.token); n >= 2 && n <= 3 && isAlpha(scan.token[1]) {
_, end = parseTag(scan) _, end = parseTag(scan, false)
scan.toLower(start, end) scan.toLower(start, end)
} }
for len(scan.token) == 2 && !isAlpha(scan.token[1]) { for len(scan.token) == 2 && !isAlpha(scan.token[1]) {

File diff suppressed because it is too large Load Diff

View File

@ -74,7 +74,7 @@ type AcceptRange struct {
// AcceptRanges is a slice of AcceptRange values. For a given byte sequence b // AcceptRanges is a slice of AcceptRange values. For a given byte sequence b
// //
// AcceptRanges[First[b[0]]>>AcceptShift] // AcceptRanges[First[b[0]]>>AcceptShift]
// //
// will give the value of AcceptRange for the multi-byte UTF-8 sequence starting // will give the value of AcceptRange for the multi-byte UTF-8 sequence starting
// at b[0]. // at b[0].

View File

@ -10,18 +10,17 @@
// and provides the user with the best experience // and provides the user with the best experience
// (see https://blog.golang.org/matchlang). // (see https://blog.golang.org/matchlang).
// //
// // # Matching preferred against supported languages
// Matching preferred against supported languages
// //
// A Matcher for an application that supports English, Australian English, // A Matcher for an application that supports English, Australian English,
// Danish, and standard Mandarin can be created as follows: // Danish, and standard Mandarin can be created as follows:
// //
// var matcher = language.NewMatcher([]language.Tag{ // var matcher = language.NewMatcher([]language.Tag{
// language.English, // The first language is used as fallback. // language.English, // The first language is used as fallback.
// language.MustParse("en-AU"), // language.MustParse("en-AU"),
// language.Danish, // language.Danish,
// language.Chinese, // language.Chinese,
// }) // })
// //
// This list of supported languages is typically implied by the languages for // This list of supported languages is typically implied by the languages for
// which there exists translations of the user interface. // which there exists translations of the user interface.
@ -30,14 +29,14 @@
// language tags. // language tags.
// The MatchString finds best matches for such strings: // The MatchString finds best matches for such strings:
// //
// handler(w http.ResponseWriter, r *http.Request) { // handler(w http.ResponseWriter, r *http.Request) {
// lang, _ := r.Cookie("lang") // lang, _ := r.Cookie("lang")
// accept := r.Header.Get("Accept-Language") // accept := r.Header.Get("Accept-Language")
// tag, _ := language.MatchStrings(matcher, lang.String(), accept) // tag, _ := language.MatchStrings(matcher, lang.String(), accept)
// //
// // tag should now be used for the initialization of any // // tag should now be used for the initialization of any
// // locale-specific service. // // locale-specific service.
// } // }
// //
// The Matcher's Match method can be used to match Tags directly. // The Matcher's Match method can be used to match Tags directly.
// //
@ -48,8 +47,7 @@
// For instance, it will know that a reader of Bokmål Danish can read Norwegian // For instance, it will know that a reader of Bokmål Danish can read Norwegian
// and will know that Cantonese ("yue") is a good match for "zh-HK". // and will know that Cantonese ("yue") is a good match for "zh-HK".
// //
// // # Using match results
// Using match results
// //
// To guarantee a consistent user experience to the user it is important to // To guarantee a consistent user experience to the user it is important to
// use the same language tag for the selection of any locale-specific services. // use the same language tag for the selection of any locale-specific services.
@ -58,9 +56,9 @@
// More subtly confusing is using the wrong sorting order or casing // More subtly confusing is using the wrong sorting order or casing
// algorithm for a certain language. // algorithm for a certain language.
// //
// All the packages in x/text that provide locale-specific services // All the packages in x/text that provide locale-specific services
// (e.g. collate, cases) should be initialized with the tag that was // (e.g. collate, cases) should be initialized with the tag that was
// obtained at the start of an interaction with the user. // obtained at the start of an interaction with the user.
// //
// Note that Tag that is returned by Match and MatchString may differ from any // Note that Tag that is returned by Match and MatchString may differ from any
// of the supported languages, as it may contain carried over settings from // of the supported languages, as it may contain carried over settings from
@ -70,8 +68,7 @@
// Match and MatchString both return the index of the matched supported tag // Match and MatchString both return the index of the matched supported tag
// to simplify associating such data with the matched tag. // to simplify associating such data with the matched tag.
// //
// // # Canonicalization
// Canonicalization
// //
// If one uses the Matcher to compare languages one does not need to // If one uses the Matcher to compare languages one does not need to
// worry about canonicalization. // worry about canonicalization.
@ -92,10 +89,9 @@
// equivalence relations. The CanonType type can be used to alter the // equivalence relations. The CanonType type can be used to alter the
// canonicalization form. // canonicalization form.
// //
// References // # References
// //
// BCP 47 - Tags for Identifying Languages http://tools.ietf.org/html/bcp47 // BCP 47 - Tags for Identifying Languages http://tools.ietf.org/html/bcp47
//
package language // import "golang.org/x/text/language" package language // import "golang.org/x/text/language"
// TODO: explanation on how to match languages for your own locale-specific // TODO: explanation on how to match languages for your own locale-specific

View File

@ -545,7 +545,7 @@ type bestMatch struct {
// match as the preferred match. // match as the preferred match.
// //
// If pin is true and have and tag are a strong match, it will henceforth only // If pin is true and have and tag are a strong match, it will henceforth only
// consider matches for this language. This corresponds to the nothing that most // consider matches for this language. This corresponds to the idea that most
// users have a strong preference for the first defined language. A user can // users have a strong preference for the first defined language. A user can
// still prefer a second language over a dialect of the preferred language by // still prefer a second language over a dialect of the preferred language by
// explicitly specifying dialects, e.g. "en, nl, en-GB". In this case pin should // explicitly specifying dialects, e.g. "en, nl, en-GB". In this case pin should

View File

@ -147,6 +147,7 @@ func update(b *language.Builder, part ...interface{}) (err error) {
} }
var errInvalidWeight = errors.New("ParseAcceptLanguage: invalid weight") var errInvalidWeight = errors.New("ParseAcceptLanguage: invalid weight")
var errTagListTooLarge = errors.New("tag list exceeds max length")
// ParseAcceptLanguage parses the contents of an Accept-Language header as // ParseAcceptLanguage parses the contents of an Accept-Language header as
// defined in http://www.ietf.org/rfc/rfc2616.txt and returns a list of Tags and // defined in http://www.ietf.org/rfc/rfc2616.txt and returns a list of Tags and
@ -164,6 +165,10 @@ func ParseAcceptLanguage(s string) (tag []Tag, q []float32, err error) {
} }
}() }()
if strings.Count(s, "-") > 1000 {
return nil, nil, errTagListTooLarge
}
var entry string var entry string
for s != "" { for s != "" {
if entry, s = split(s, ','); entry == "" { if entry, s = split(s, ','); entry == "" {

View File

@ -39,12 +39,12 @@ const (
_Hani = 57 _Hani = 57
_Hans = 59 _Hans = 59
_Hant = 60 _Hant = 60
_Qaaa = 143 _Qaaa = 147
_Qaai = 151 _Qaai = 155
_Qabx = 192 _Qabx = 196
_Zinh = 245 _Zinh = 252
_Zyyy = 250 _Zyyy = 257
_Zzzz = 251 _Zzzz = 258
) )
var regionToGroups = []uint8{ // 358 elements var regionToGroups = []uint8{ // 358 elements
@ -265,9 +265,9 @@ var matchScript = []scriptIntelligibility{ // 26 elements
13: {wantLang: 0x39d, haveLang: 0x139, wantScript: 0x36, haveScript: 0x5a, distance: 0xa}, 13: {wantLang: 0x39d, haveLang: 0x139, wantScript: 0x36, haveScript: 0x5a, distance: 0xa},
14: {wantLang: 0x3be, haveLang: 0x139, wantScript: 0x5, haveScript: 0x5a, distance: 0xa}, 14: {wantLang: 0x3be, haveLang: 0x139, wantScript: 0x5, haveScript: 0x5a, distance: 0xa},
15: {wantLang: 0x3fa, haveLang: 0x139, wantScript: 0x5, haveScript: 0x5a, distance: 0xa}, 15: {wantLang: 0x3fa, haveLang: 0x139, wantScript: 0x5, haveScript: 0x5a, distance: 0xa},
16: {wantLang: 0x40c, haveLang: 0x139, wantScript: 0xcf, haveScript: 0x5a, distance: 0xa}, 16: {wantLang: 0x40c, haveLang: 0x139, wantScript: 0xd4, haveScript: 0x5a, distance: 0xa},
17: {wantLang: 0x450, haveLang: 0x139, wantScript: 0xde, haveScript: 0x5a, distance: 0xa}, 17: {wantLang: 0x450, haveLang: 0x139, wantScript: 0xe3, haveScript: 0x5a, distance: 0xa},
18: {wantLang: 0x461, haveLang: 0x139, wantScript: 0xe1, haveScript: 0x5a, distance: 0xa}, 18: {wantLang: 0x461, haveLang: 0x139, wantScript: 0xe6, haveScript: 0x5a, distance: 0xa},
19: {wantLang: 0x46f, haveLang: 0x139, wantScript: 0x2c, haveScript: 0x5a, distance: 0xa}, 19: {wantLang: 0x46f, haveLang: 0x139, wantScript: 0x2c, haveScript: 0x5a, distance: 0xa},
20: {wantLang: 0x476, haveLang: 0x3e2, wantScript: 0x5a, haveScript: 0x20, distance: 0xa}, 20: {wantLang: 0x476, haveLang: 0x3e2, wantScript: 0x5a, haveScript: 0x20, distance: 0xa},
21: {wantLang: 0x4b4, haveLang: 0x139, wantScript: 0x5, haveScript: 0x5a, distance: 0xa}, 21: {wantLang: 0x4b4, haveLang: 0x139, wantScript: 0x5, haveScript: 0x5a, distance: 0xa},

View File

@ -193,14 +193,14 @@ func (p *paragraph) run() {
// //
// At the end of this function: // At the end of this function:
// //
// - The member variable matchingPDI is set to point to the index of the // - The member variable matchingPDI is set to point to the index of the
// matching PDI character for each isolate initiator character. If there is // matching PDI character for each isolate initiator character. If there is
// no matching PDI, it is set to the length of the input text. For other // no matching PDI, it is set to the length of the input text. For other
// characters, it is set to -1. // characters, it is set to -1.
// - The member variable matchingIsolateInitiator is set to point to the // - The member variable matchingIsolateInitiator is set to point to the
// index of the matching isolate initiator character for each PDI character. // index of the matching isolate initiator character for each PDI character.
// If there is no matching isolate initiator, or the character is not a PDI, // If there is no matching isolate initiator, or the character is not a PDI,
// it is set to -1. // it is set to -1.
func (p *paragraph) determineMatchingIsolates() { func (p *paragraph) determineMatchingIsolates() {
p.matchingPDI = make([]int, p.Len()) p.matchingPDI = make([]int, p.Len())
p.matchingIsolateInitiator = make([]int, p.Len()) p.matchingIsolateInitiator = make([]int, p.Len())
@ -435,7 +435,7 @@ func maxLevel(a, b level) level {
} }
// Rule X10, second bullet: Determine the start-of-sequence (sos) and end-of-sequence (eos) types, // Rule X10, second bullet: Determine the start-of-sequence (sos) and end-of-sequence (eos) types,
// either L or R, for each isolating run sequence. // either L or R, for each isolating run sequence.
func (p *paragraph) isolatingRunSequence(indexes []int) *isolatingRunSequence { func (p *paragraph) isolatingRunSequence(indexes []int) *isolatingRunSequence {
length := len(indexes) length := len(indexes)
types := make([]Class, length) types := make([]Class, length)
@ -495,9 +495,9 @@ func (s *isolatingRunSequence) resolveWeakTypes() {
if t == NSM { if t == NSM {
s.types[i] = precedingCharacterType s.types[i] = precedingCharacterType
} else { } else {
if t.in(LRI, RLI, FSI, PDI) { // if t.in(LRI, RLI, FSI, PDI) {
precedingCharacterType = ON // precedingCharacterType = ON
} // }
precedingCharacterType = t precedingCharacterType = t
} }
} }
@ -905,7 +905,7 @@ func (p *paragraph) getLevels(linebreaks []int) []level {
// Lines are concatenated from left to right. So for example, the fifth // Lines are concatenated from left to right. So for example, the fifth
// character from the left on the third line is // character from the left on the third line is
// //
// getReordering(linebreaks)[linebreaks[1] + 4] // getReordering(linebreaks)[linebreaks[1] + 4]
// //
// (linebreaks[1] is the position after the last character of the second // (linebreaks[1] is the position after the last character of the second
// line, which is also the index of the first character on the third line, // line, which is also the index of the first character on the third line,

View File

@ -110,10 +110,11 @@ func (p Properties) BoundaryAfter() bool {
} }
// We pack quick check data in 4 bits: // We pack quick check data in 4 bits:
// 5: Combines forward (0 == false, 1 == true) //
// 4..3: NFC_QC Yes(00), No (10), or Maybe (11) // 5: Combines forward (0 == false, 1 == true)
// 2: NFD_QC Yes (0) or No (1). No also means there is a decomposition. // 4..3: NFC_QC Yes(00), No (10), or Maybe (11)
// 1..0: Number of trailing non-starters. // 2: NFD_QC Yes (0) or No (1). No also means there is a decomposition.
// 1..0: Number of trailing non-starters.
// //
// When all 4 bits are zero, the character is inert, meaning it is never // When all 4 bits are zero, the character is inert, meaning it is never
// influenced by normalization. // influenced by normalization.

View File

@ -18,16 +18,17 @@ import (
// A Form denotes a canonical representation of Unicode code points. // A Form denotes a canonical representation of Unicode code points.
// The Unicode-defined normalization and equivalence forms are: // The Unicode-defined normalization and equivalence forms are:
// //
// NFC Unicode Normalization Form C // NFC Unicode Normalization Form C
// NFD Unicode Normalization Form D // NFD Unicode Normalization Form D
// NFKC Unicode Normalization Form KC // NFKC Unicode Normalization Form KC
// NFKD Unicode Normalization Form KD // NFKD Unicode Normalization Form KD
// //
// For a Form f, this documentation uses the notation f(x) to mean // For a Form f, this documentation uses the notation f(x) to mean
// the bytes or string x converted to the given form. // the bytes or string x converted to the given form.
// A position n in x is called a boundary if conversion to the form can // A position n in x is called a boundary if conversion to the form can
// proceed independently on both sides: // proceed independently on both sides:
// f(x) == append(f(x[0:n]), f(x[n:])...) //
// f(x) == append(f(x[0:n]), f(x[n:])...)
// //
// References: https://unicode.org/reports/tr15/ and // References: https://unicode.org/reports/tr15/ and
// https://unicode.org/notes/tn5/. // https://unicode.org/notes/tn5/.

View File

@ -7315,7 +7315,7 @@ const recompMapPacked = "" +
"\x00V\x03\x03\x00\x00\x1e|" + // 0x00560303: 0x00001E7C "\x00V\x03\x03\x00\x00\x1e|" + // 0x00560303: 0x00001E7C
"\x00v\x03\x03\x00\x00\x1e}" + // 0x00760303: 0x00001E7D "\x00v\x03\x03\x00\x00\x1e}" + // 0x00760303: 0x00001E7D
"\x00V\x03#\x00\x00\x1e~" + // 0x00560323: 0x00001E7E "\x00V\x03#\x00\x00\x1e~" + // 0x00560323: 0x00001E7E
"\x00v\x03#\x00\x00\x1e\u007f" + // 0x00760323: 0x00001E7F "\x00v\x03#\x00\x00\x1e\x7f" + // 0x00760323: 0x00001E7F
"\x00W\x03\x00\x00\x00\x1e\x80" + // 0x00570300: 0x00001E80 "\x00W\x03\x00\x00\x00\x1e\x80" + // 0x00570300: 0x00001E80
"\x00w\x03\x00\x00\x00\x1e\x81" + // 0x00770300: 0x00001E81 "\x00w\x03\x00\x00\x00\x1e\x81" + // 0x00770300: 0x00001E81
"\x00W\x03\x01\x00\x00\x1e\x82" + // 0x00570301: 0x00001E82 "\x00W\x03\x01\x00\x00\x1e\x82" + // 0x00570301: 0x00001E82
@ -7342,7 +7342,7 @@ const recompMapPacked = "" +
"\x00t\x03\b\x00\x00\x1e\x97" + // 0x00740308: 0x00001E97 "\x00t\x03\b\x00\x00\x1e\x97" + // 0x00740308: 0x00001E97
"\x00w\x03\n\x00\x00\x1e\x98" + // 0x0077030A: 0x00001E98 "\x00w\x03\n\x00\x00\x1e\x98" + // 0x0077030A: 0x00001E98
"\x00y\x03\n\x00\x00\x1e\x99" + // 0x0079030A: 0x00001E99 "\x00y\x03\n\x00\x00\x1e\x99" + // 0x0079030A: 0x00001E99
"\x01\u007f\x03\a\x00\x00\x1e\x9b" + // 0x017F0307: 0x00001E9B "\x01\x7f\x03\a\x00\x00\x1e\x9b" + // 0x017F0307: 0x00001E9B
"\x00A\x03#\x00\x00\x1e\xa0" + // 0x00410323: 0x00001EA0 "\x00A\x03#\x00\x00\x1e\xa0" + // 0x00410323: 0x00001EA0
"\x00a\x03#\x00\x00\x1e\xa1" + // 0x00610323: 0x00001EA1 "\x00a\x03#\x00\x00\x1e\xa1" + // 0x00610323: 0x00001EA1
"\x00A\x03\t\x00\x00\x1e\xa2" + // 0x00410309: 0x00001EA2 "\x00A\x03\t\x00\x00\x1e\xa2" + // 0x00410309: 0x00001EA2

View File

@ -1146,21 +1146,31 @@ var widthIndex = [1408]uint8{
} }
// inverseData contains 4-byte entries of the following format: // inverseData contains 4-byte entries of the following format:
// <length> <modified UTF-8-encoded rune> <0 padding> //
// <length> <modified UTF-8-encoded rune> <0 padding>
//
// The last byte of the UTF-8-encoded rune is xor-ed with the last byte of the // The last byte of the UTF-8-encoded rune is xor-ed with the last byte of the
// UTF-8 encoding of the original rune. Mappings often have the following // UTF-8 encoding of the original rune. Mappings often have the following
// pattern: // pattern:
// -> A (U+FF21 -> U+0041) //
// -> B (U+FF22 -> U+0042) // -> A (U+FF21 -> U+0041)
// ... // -> B (U+FF22 -> U+0042)
// ...
//
// By xor-ing the last byte the same entry can be shared by many mappings. This // By xor-ing the last byte the same entry can be shared by many mappings. This
// reduces the total number of distinct entries by about two thirds. // reduces the total number of distinct entries by about two thirds.
// The resulting entry for the aforementioned mappings is // The resulting entry for the aforementioned mappings is
// { 0x01, 0xE0, 0x00, 0x00 } //
// { 0x01, 0xE0, 0x00, 0x00 }
//
// Using this entry to map U+FF21 (UTF-8 [EF BC A1]), we get // Using this entry to map U+FF21 (UTF-8 [EF BC A1]), we get
// E0 ^ A1 = 41. //
// E0 ^ A1 = 41.
//
// Similarly, for U+FF22 (UTF-8 [EF BC A2]), we get // Similarly, for U+FF22 (UTF-8 [EF BC A2]), we get
// E0 ^ A2 = 42. //
// E0 ^ A2 = 42.
//
// Note that because of the xor-ing, the byte sequence stored in the entry is // Note that because of the xor-ing, the byte sequence stored in the entry is
// not valid UTF-8. // not valid UTF-8.
var inverseData = [150][4]byte{ var inverseData = [150][4]byte{

View File

@ -1158,21 +1158,31 @@ var widthIndex = [1408]uint8{
} }
// inverseData contains 4-byte entries of the following format: // inverseData contains 4-byte entries of the following format:
// <length> <modified UTF-8-encoded rune> <0 padding> //
// <length> <modified UTF-8-encoded rune> <0 padding>
//
// The last byte of the UTF-8-encoded rune is xor-ed with the last byte of the // The last byte of the UTF-8-encoded rune is xor-ed with the last byte of the
// UTF-8 encoding of the original rune. Mappings often have the following // UTF-8 encoding of the original rune. Mappings often have the following
// pattern: // pattern:
// -> A (U+FF21 -> U+0041) //
// -> B (U+FF22 -> U+0042) // -> A (U+FF21 -> U+0041)
// ... // -> B (U+FF22 -> U+0042)
// ...
//
// By xor-ing the last byte the same entry can be shared by many mappings. This // By xor-ing the last byte the same entry can be shared by many mappings. This
// reduces the total number of distinct entries by about two thirds. // reduces the total number of distinct entries by about two thirds.
// The resulting entry for the aforementioned mappings is // The resulting entry for the aforementioned mappings is
// { 0x01, 0xE0, 0x00, 0x00 } //
// { 0x01, 0xE0, 0x00, 0x00 }
//
// Using this entry to map U+FF21 (UTF-8 [EF BC A1]), we get // Using this entry to map U+FF21 (UTF-8 [EF BC A1]), we get
// E0 ^ A1 = 41. //
// E0 ^ A1 = 41.
//
// Similarly, for U+FF22 (UTF-8 [EF BC A2]), we get // Similarly, for U+FF22 (UTF-8 [EF BC A2]), we get
// E0 ^ A2 = 42. //
// E0 ^ A2 = 42.
//
// Note that because of the xor-ing, the byte sequence stored in the entry is // Note that because of the xor-ing, the byte sequence stored in the entry is
// not valid UTF-8. // not valid UTF-8.
var inverseData = [150][4]byte{ var inverseData = [150][4]byte{

View File

@ -1178,21 +1178,31 @@ var widthIndex = [1408]uint8{
} }
// inverseData contains 4-byte entries of the following format: // inverseData contains 4-byte entries of the following format:
// <length> <modified UTF-8-encoded rune> <0 padding> //
// <length> <modified UTF-8-encoded rune> <0 padding>
//
// The last byte of the UTF-8-encoded rune is xor-ed with the last byte of the // The last byte of the UTF-8-encoded rune is xor-ed with the last byte of the
// UTF-8 encoding of the original rune. Mappings often have the following // UTF-8 encoding of the original rune. Mappings often have the following
// pattern: // pattern:
// -> A (U+FF21 -> U+0041) //
// -> B (U+FF22 -> U+0042) // -> A (U+FF21 -> U+0041)
// ... // -> B (U+FF22 -> U+0042)
// ...
//
// By xor-ing the last byte the same entry can be shared by many mappings. This // By xor-ing the last byte the same entry can be shared by many mappings. This
// reduces the total number of distinct entries by about two thirds. // reduces the total number of distinct entries by about two thirds.
// The resulting entry for the aforementioned mappings is // The resulting entry for the aforementioned mappings is
// { 0x01, 0xE0, 0x00, 0x00 } //
// { 0x01, 0xE0, 0x00, 0x00 }
//
// Using this entry to map U+FF21 (UTF-8 [EF BC A1]), we get // Using this entry to map U+FF21 (UTF-8 [EF BC A1]), we get
// E0 ^ A1 = 41. //
// E0 ^ A1 = 41.
//
// Similarly, for U+FF22 (UTF-8 [EF BC A2]), we get // Similarly, for U+FF22 (UTF-8 [EF BC A2]), we get
// E0 ^ A2 = 42. //
// E0 ^ A2 = 42.
//
// Note that because of the xor-ing, the byte sequence stored in the entry is // Note that because of the xor-ing, the byte sequence stored in the entry is
// not valid UTF-8. // not valid UTF-8.
var inverseData = [150][4]byte{ var inverseData = [150][4]byte{

View File

@ -1179,21 +1179,31 @@ var widthIndex = [1408]uint8{
} }
// inverseData contains 4-byte entries of the following format: // inverseData contains 4-byte entries of the following format:
// <length> <modified UTF-8-encoded rune> <0 padding> //
// <length> <modified UTF-8-encoded rune> <0 padding>
//
// The last byte of the UTF-8-encoded rune is xor-ed with the last byte of the // The last byte of the UTF-8-encoded rune is xor-ed with the last byte of the
// UTF-8 encoding of the original rune. Mappings often have the following // UTF-8 encoding of the original rune. Mappings often have the following
// pattern: // pattern:
// -> A (U+FF21 -> U+0041) //
// -> B (U+FF22 -> U+0042) // -> A (U+FF21 -> U+0041)
// ... // -> B (U+FF22 -> U+0042)
// ...
//
// By xor-ing the last byte the same entry can be shared by many mappings. This // By xor-ing the last byte the same entry can be shared by many mappings. This
// reduces the total number of distinct entries by about two thirds. // reduces the total number of distinct entries by about two thirds.
// The resulting entry for the aforementioned mappings is // The resulting entry for the aforementioned mappings is
// { 0x01, 0xE0, 0x00, 0x00 } //
// { 0x01, 0xE0, 0x00, 0x00 }
//
// Using this entry to map U+FF21 (UTF-8 [EF BC A1]), we get // Using this entry to map U+FF21 (UTF-8 [EF BC A1]), we get
// E0 ^ A1 = 41. //
// E0 ^ A1 = 41.
//
// Similarly, for U+FF22 (UTF-8 [EF BC A2]), we get // Similarly, for U+FF22 (UTF-8 [EF BC A2]), we get
// E0 ^ A2 = 42. //
// E0 ^ A2 = 42.
//
// Note that because of the xor-ing, the byte sequence stored in the entry is // Note that because of the xor-ing, the byte sequence stored in the entry is
// not valid UTF-8. // not valid UTF-8.
var inverseData = [150][4]byte{ var inverseData = [150][4]byte{

View File

@ -1114,21 +1114,31 @@ var widthIndex = [1408]uint8{
} }
// inverseData contains 4-byte entries of the following format: // inverseData contains 4-byte entries of the following format:
// <length> <modified UTF-8-encoded rune> <0 padding> //
// <length> <modified UTF-8-encoded rune> <0 padding>
//
// The last byte of the UTF-8-encoded rune is xor-ed with the last byte of the // The last byte of the UTF-8-encoded rune is xor-ed with the last byte of the
// UTF-8 encoding of the original rune. Mappings often have the following // UTF-8 encoding of the original rune. Mappings often have the following
// pattern: // pattern:
// -> A (U+FF21 -> U+0041) //
// -> B (U+FF22 -> U+0042) // -> A (U+FF21 -> U+0041)
// ... // -> B (U+FF22 -> U+0042)
// ...
//
// By xor-ing the last byte the same entry can be shared by many mappings. This // By xor-ing the last byte the same entry can be shared by many mappings. This
// reduces the total number of distinct entries by about two thirds. // reduces the total number of distinct entries by about two thirds.
// The resulting entry for the aforementioned mappings is // The resulting entry for the aforementioned mappings is
// { 0x01, 0xE0, 0x00, 0x00 } //
// { 0x01, 0xE0, 0x00, 0x00 }
//
// Using this entry to map U+FF21 (UTF-8 [EF BC A1]), we get // Using this entry to map U+FF21 (UTF-8 [EF BC A1]), we get
// E0 ^ A1 = 41. //
// E0 ^ A1 = 41.
//
// Similarly, for U+FF22 (UTF-8 [EF BC A2]), we get // Similarly, for U+FF22 (UTF-8 [EF BC A2]), we get
// E0 ^ A2 = 42. //
// E0 ^ A2 = 42.
//
// Note that because of the xor-ing, the byte sequence stored in the entry is // Note that because of the xor-ing, the byte sequence stored in the entry is
// not valid UTF-8. // not valid UTF-8.
var inverseData = [150][4]byte{ var inverseData = [150][4]byte{

2
vendor/modules.txt vendored
View File

@ -651,7 +651,7 @@ golang.org/x/sys/windows
# golang.org/x/term v0.0.0-20210927222741-03fcf44c2211 # golang.org/x/term v0.0.0-20210927222741-03fcf44c2211
## explicit; go 1.17 ## explicit; go 1.17
golang.org/x/term golang.org/x/term
# golang.org/x/text v0.3.7 # golang.org/x/text v0.3.8
## explicit; go 1.17 ## explicit; go 1.17
golang.org/x/text/cases golang.org/x/text/cases
golang.org/x/text/encoding golang.org/x/text/encoding