rebase: ParseAcceptLanguage takes a long time to parse complex tags

A vulnerability was found in golang.org/x/text/language package which
could cause a denial of service. An attacker can craft an
Accept-Language header which ParseAcceptLanguage will take significant
time to parse.
Version v0.3.8 of golang.org/x/text fixes a vulnerability.

See-also: https://go.dev/issue/56152
See-also: https://bugzilla.redhat.com/CVE-2022-32149
Signed-off-by: Niels de Vos <ndevos@redhat.com>
This commit is contained in:
Niels de Vos
2022-10-17 08:49:59 +02:00
committed by mergify[bot]
parent b3837d44ce
commit e08005f402
27 changed files with 667 additions and 583 deletions

View File

@ -93,8 +93,11 @@ var canonical = [numEncodings]string{
var nameMap = map[string]htmlEncoding{
"unicode-1-1-utf-8": utf8,
"unicode11utf8": utf8,
"unicode20utf8": utf8,
"utf-8": utf8,
"utf8": utf8,
"x-unicode20utf8": utf8,
"866": ibm866,
"cp866": ibm866,
"csibm866": ibm866,
@ -307,7 +310,13 @@ var nameMap = map[string]htmlEncoding{
"iso-2022-cn-ext": replacement,
"iso-2022-kr": replacement,
"replacement": replacement,
"unicodefffe": utf16be,
"utf-16be": utf16be,
"csunicode": utf16le,
"iso-10646-ucs-2": utf16le,
"ucs-2": utf16le,
"unicode": utf16le,
"unicodefeff": utf16le,
"utf-16": utf16le,
"utf-16le": utf16le,
"x-user-defined": xUserDefined,

View File

@ -905,6 +905,14 @@ const (
// https://www.unicode.org/notes/tn6/
BOCU1 MIB = 1020
// UTF7IMAP is the MIB identifier with IANA name UTF-7-IMAP.
//
// Note: This charset is used to encode Unicode in IMAP mailbox names;
// see section 5.1.3 of rfc3501 . It should never be used
// outside this context. A name has been assigned so that charset processing
// implementations can refer to it in a consistent way.
UTF7IMAP MIB = 1021
// Windows30Latin1 is the MIB identifier with IANA name ISO-8859-1-Windows-3.0-Latin-1.
//
// Extended ISO 8859-1 Latin-1 for Windows 3.0.

View File

@ -55,6 +55,8 @@ loop:
// Microsoft's Code Page 936 extends GBK 1.0 to encode the euro sign U+20AC
// as 0x80. The HTML5 specification at http://encoding.spec.whatwg.org/#gbk
// says to treat "gbk" as Code Page 936.
// GBKs decoder is gb18030s decoder. https://encoding.spec.whatwg.org/#gbk-decoder
// If byte is 0x80, return code point U+20AC. https://encoding.spec.whatwg.org/#gb18030-decoder
case c0 == 0x80:
r, size = '€', 1
@ -180,7 +182,9 @@ func (e gbkEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err
// Microsoft's Code Page 936 extends GBK 1.0 to encode the euro sign U+20AC
// as 0x80. The HTML5 specification at http://encoding.spec.whatwg.org/#gbk
// says to treat "gbk" as Code Page 936.
if r == '€' {
// GBKs encoder is gb18030s encoder with its _is GBK_ set to true. https://encoding.spec.whatwg.org/#gbk-encoder
// If _is GBK_ is true and code point is U+20AC, return byte 0x80. https://encoding.spec.whatwg.org/#gb18030-encoder
if !e.gb18030 && r == '€' {
r = 0x80
goto write1
}