Vendor cleanup

Signed-off-by: Madhu Rajanna <mrajanna@redhat.com>
This commit is contained in:
Madhu Rajanna
2019-01-16 18:11:54 +05:30
parent 661818bd79
commit 0f836c62fa
16816 changed files with 20 additions and 4611100 deletions

View File

@ -1,16 +0,0 @@
# Copyright 2013 The Go Authors. All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.
CLEANFILES+=maketables
maketables: maketables.go
go build $^
tables: maketables
./maketables > tables.go
gofmt -w -s tables.go
# Build (but do not run) maketables during testing,
# just to make sure it still compiles.
testshort: maketables

View File

@ -1,154 +0,0 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package language
import (
"fmt"
"reflect"
"testing"
)
func TestSupported(t *testing.T) {
// To prove the results are correct for a type, we test that the number of
// results is identical to the number of results on record, that all results
// are distinct and that all results are valid.
tests := map[string]int{
"BaseLanguages": numLanguages,
"Scripts": numScripts,
"Regions": numRegions,
"Tags": 0,
}
sup := reflect.ValueOf(Supported)
for name, num := range tests {
v := sup.MethodByName(name).Call(nil)[0]
if n := v.Len(); n != num {
t.Errorf("len(%s()) was %d; want %d", name, n, num)
}
dup := make(map[string]bool)
for i := 0; i < v.Len(); i++ {
x := v.Index(i).Interface()
// An invalid value will either cause a crash or result in a
// duplicate when passed to Sprint.
s := fmt.Sprint(x)
if dup[s] {
t.Errorf("%s: duplicate entry %q", name, s)
}
dup[s] = true
}
if len(dup) != v.Len() {
t.Errorf("%s: # unique entries was %d; want %d", name, len(dup), v.Len())
}
}
}
func TestNewCoverage(t *testing.T) {
bases := []Base{Base{0}, Base{3}, Base{7}}
scripts := []Script{Script{11}, Script{17}, Script{23}}
regions := []Region{Region{101}, Region{103}, Region{107}}
tags := []Tag{Make("pt"), Make("en"), Make("en-GB"), Make("en-US"), Make("pt-PT")}
fbases := func() []Base { return bases }
fscripts := func() []Script { return scripts }
fregions := func() []Region { return regions }
ftags := func() []Tag { return tags }
tests := []struct {
desc string
list []interface{}
bases []Base
scripts []Script
regions []Region
tags []Tag
}{
{
desc: "empty",
},
{
desc: "bases",
list: []interface{}{bases},
bases: bases,
},
{
desc: "scripts",
list: []interface{}{scripts},
scripts: scripts,
},
{
desc: "regions",
list: []interface{}{regions},
regions: regions,
},
{
desc: "bases derives from tags",
list: []interface{}{tags},
bases: []Base{Base{_en}, Base{_pt}},
tags: tags,
},
{
desc: "tags and bases",
list: []interface{}{tags, bases},
bases: bases,
tags: tags,
},
{
desc: "fully specified",
list: []interface{}{tags, bases, scripts, regions},
bases: bases,
scripts: scripts,
regions: regions,
tags: tags,
},
{
desc: "bases func",
list: []interface{}{fbases},
bases: bases,
},
{
desc: "scripts func",
list: []interface{}{fscripts},
scripts: scripts,
},
{
desc: "regions func",
list: []interface{}{fregions},
regions: regions,
},
{
desc: "tags func",
list: []interface{}{ftags},
bases: []Base{Base{_en}, Base{_pt}},
tags: tags,
},
{
desc: "tags and bases",
list: []interface{}{ftags, fbases},
bases: bases,
tags: tags,
},
{
desc: "fully specified",
list: []interface{}{ftags, fbases, fscripts, fregions},
bases: bases,
scripts: scripts,
regions: regions,
tags: tags,
},
}
for i, tt := range tests {
l := NewCoverage(tt.list...)
if a := l.BaseLanguages(); !reflect.DeepEqual(a, tt.bases) {
t.Errorf("%d:%s: BaseLanguages was %v; want %v", i, tt.desc, a, tt.bases)
}
if a := l.Scripts(); !reflect.DeepEqual(a, tt.scripts) {
t.Errorf("%d:%s: Scripts was %v; want %v", i, tt.desc, a, tt.scripts)
}
if a := l.Regions(); !reflect.DeepEqual(a, tt.regions) {
t.Errorf("%d:%s: Regions was %v; want %v", i, tt.desc, a, tt.regions)
}
if a := l.Tags(); !reflect.DeepEqual(a, tt.tags) {
t.Errorf("%d:%s: Tags was %v; want %v", i, tt.desc, a, tt.tags)
}
}
}

View File

@ -1,92 +0,0 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package display
// This file contains sets of data for specific languages. Users can use these
// to create smaller collections of supported languages and reduce total table
// size.
// The variable names defined here correspond to those in package language.
var (
Afrikaans *Dictionary = &af // af
Amharic *Dictionary = &am // am
Arabic *Dictionary = &ar // ar
ModernStandardArabic *Dictionary = Arabic // ar-001
Azerbaijani *Dictionary = &az // az
Bulgarian *Dictionary = &bg // bg
Bengali *Dictionary = &bn // bn
Catalan *Dictionary = &ca // ca
Czech *Dictionary = &cs // cs
Danish *Dictionary = &da // da
German *Dictionary = &de // de
Greek *Dictionary = &el // el
English *Dictionary = &en // en
AmericanEnglish *Dictionary = English // en-US
BritishEnglish *Dictionary = English // en-GB
Spanish *Dictionary = &es // es
EuropeanSpanish *Dictionary = Spanish // es-ES
LatinAmericanSpanish *Dictionary = Spanish // es-419
Estonian *Dictionary = &et // et
Persian *Dictionary = &fa // fa
Finnish *Dictionary = &fi // fi
Filipino *Dictionary = &fil // fil
French *Dictionary = &fr // fr
Gujarati *Dictionary = &gu // gu
Hebrew *Dictionary = &he // he
Hindi *Dictionary = &hi // hi
Croatian *Dictionary = &hr // hr
Hungarian *Dictionary = &hu // hu
Armenian *Dictionary = &hy // hy
Indonesian *Dictionary = &id // id
Icelandic *Dictionary = &is // is
Italian *Dictionary = &it // it
Japanese *Dictionary = &ja // ja
Georgian *Dictionary = &ka // ka
Kazakh *Dictionary = &kk // kk
Khmer *Dictionary = &km // km
Kannada *Dictionary = &kn // kn
Korean *Dictionary = &ko // ko
Kirghiz *Dictionary = &ky // ky
Lao *Dictionary = &lo // lo
Lithuanian *Dictionary = &lt // lt
Latvian *Dictionary = &lv // lv
Macedonian *Dictionary = &mk // mk
Malayalam *Dictionary = &ml // ml
Mongolian *Dictionary = &mn // mn
Marathi *Dictionary = &mr // mr
Malay *Dictionary = &ms // ms
Burmese *Dictionary = &my // my
Nepali *Dictionary = &ne // ne
Dutch *Dictionary = &nl // nl
Norwegian *Dictionary = &no // no
Punjabi *Dictionary = &pa // pa
Polish *Dictionary = &pl // pl
Portuguese *Dictionary = &pt // pt
BrazilianPortuguese *Dictionary = Portuguese // pt-BR
EuropeanPortuguese *Dictionary = &ptPT // pt-PT
Romanian *Dictionary = &ro // ro
Russian *Dictionary = &ru // ru
Sinhala *Dictionary = &si // si
Slovak *Dictionary = &sk // sk
Slovenian *Dictionary = &sl // sl
Albanian *Dictionary = &sq // sq
Serbian *Dictionary = &sr // sr
SerbianLatin *Dictionary = &srLatn // sr
Swedish *Dictionary = &sv // sv
Swahili *Dictionary = &sw // sw
Tamil *Dictionary = &ta // ta
Telugu *Dictionary = &te // te
Thai *Dictionary = &th // th
Turkish *Dictionary = &tr // tr
Ukrainian *Dictionary = &uk // uk
Urdu *Dictionary = &ur // ur
Uzbek *Dictionary = &uz // uz
Vietnamese *Dictionary = &vi // vi
Chinese *Dictionary = &zh // zh
SimplifiedChinese *Dictionary = Chinese // zh-Hans
TraditionalChinese *Dictionary = &zhHant // zh-Hant
Zulu *Dictionary = &zu // zu
)

View File

@ -1,39 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package display
import (
"fmt"
"testing"
"golang.org/x/text/internal/testtext"
)
func TestLinking(t *testing.T) {
base := getSize(t, `display.Tags(language.English).Name(language.English)`)
compact := getSize(t, `display.English.Languages().Name(language.English)`)
if d := base - compact; d < 1.5*1024*1024 {
t.Errorf("size(base) - size(compact) = %d - %d = was %d; want > 1.5MB", base, compact, d)
}
}
func getSize(t *testing.T, main string) int {
size, err := testtext.CodeSize(fmt.Sprintf(body, main))
if err != nil {
t.Skipf("skipping link size test; binary size could not be determined: %v", err)
}
return size
}
const body = `package main
import (
"golang.org/x/text/language"
"golang.org/x/text/language/display"
)
func main() {
%s
}
`

View File

@ -1,420 +0,0 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:generate go run maketables.go -output tables.go
// Package display provides display names for languages, scripts and regions in
// a requested language.
//
// The data is based on CLDR's localeDisplayNames. It includes the names of the
// draft level "contributed" or "approved". The resulting tables are quite
// large. The display package is designed so that users can reduce the linked-in
// table sizes by cherry picking the languages one wishes to support. There is a
// Dictionary defined for a selected set of common languages for this purpose.
package display // import "golang.org/x/text/language/display"
import (
"fmt"
"strings"
"golang.org/x/text/internal/format"
"golang.org/x/text/language"
)
/*
TODO:
All fairly low priority at the moment:
- Include alternative and variants as an option (using func options).
- Option for returning the empty string for undefined values.
- Support variants, currencies, time zones, option names and other data
provided in CLDR.
- Do various optimizations:
- Reduce size of offset tables.
- Consider compressing infrequently used languages and decompress on demand.
*/
// A Formatter formats a tag in the current language. It is used in conjunction
// with the message package.
type Formatter struct {
lookup func(tag int, x interface{}) string
x interface{}
}
// Format implements "golang.org/x/text/internal/format".Formatter.
func (f Formatter) Format(state format.State, verb rune) {
// TODO: there are a lot of inefficiencies in this code. Fix it when we
// language.Tag has embedded compact tags.
t := state.Language()
_, index, _ := matcher.Match(t)
str := f.lookup(index, f.x)
if str == "" {
// TODO: use language-specific punctuation.
// TODO: use codePattern instead of language?
if unknown := f.lookup(index, language.Und); unknown != "" {
fmt.Fprintf(state, "%v (%v)", unknown, f.x)
} else {
fmt.Fprintf(state, "[language: %v]", f.x)
}
} else {
state.Write([]byte(str))
}
}
// Language returns a Formatter that renders the name for lang in the
// the current language. x may be a language.Base or a language.Tag.
// It renders lang in the default language if no translation for the current
// language is supported.
func Language(lang interface{}) Formatter {
return Formatter{langFunc, lang}
}
// Region returns a Formatter that renders the name for region in the current
// language. region may be a language.Region or a language.Tag.
// It renders region in the default language if no translation for the current
// language is supported.
func Region(region interface{}) Formatter {
return Formatter{regionFunc, region}
}
// Script returns a Formatter that renders the name for script in the current
// language. script may be a language.Script or a language.Tag.
// It renders script in the default language if no translation for the current
// language is supported.
func Script(script interface{}) Formatter {
return Formatter{scriptFunc, script}
}
// Script returns a Formatter that renders the name for tag in the current
// language. tag may be a language.Tag.
// It renders tag in the default language if no translation for the current
// language is supported.
func Tag(tag interface{}) Formatter {
return Formatter{tagFunc, tag}
}
// A Namer is used to get the name for a given value, such as a Tag, Language,
// Script or Region.
type Namer interface {
// Name returns a display string for the given value. A Namer returns an
// empty string for values it does not support. A Namer may support naming
// an unspecified value. For example, when getting the name for a region for
// a tag that does not have a defined Region, it may return the name for an
// unknown region. It is up to the user to filter calls to Name for values
// for which one does not want to have a name string.
Name(x interface{}) string
}
var (
// Supported lists the languages for which names are defined.
Supported language.Coverage
// The set of all possible values for which names are defined. Note that not
// all Namer implementations will cover all the values of a given type.
// A Namer will return the empty string for unsupported values.
Values language.Coverage
matcher language.Matcher
)
func init() {
tags := make([]language.Tag, numSupported)
s := supported
for i := range tags {
p := strings.IndexByte(s, '|')
tags[i] = language.Raw.Make(s[:p])
s = s[p+1:]
}
matcher = language.NewMatcher(tags)
Supported = language.NewCoverage(tags)
Values = language.NewCoverage(langTagSet.Tags, supportedScripts, supportedRegions)
}
// Languages returns a Namer for naming languages. It returns nil if there is no
// data for the given tag. The type passed to Name must be either language.Base
// or language.Tag. Note that the result may differ between passing a tag or its
// base language. For example, for English, passing "nl-BE" would return Flemish
// whereas passing "nl" returns "Dutch".
func Languages(t language.Tag) Namer {
if _, index, conf := matcher.Match(t); conf != language.No {
return languageNamer(index)
}
return nil
}
type languageNamer int
func langFunc(i int, x interface{}) string {
return nameLanguage(languageNamer(i), x)
}
func (n languageNamer) name(i int) string {
return lookup(langHeaders[:], int(n), i)
}
// Name implements the Namer interface for language names.
func (n languageNamer) Name(x interface{}) string {
return nameLanguage(n, x)
}
// nonEmptyIndex walks up the parent chain until a non-empty header is found.
// It returns -1 if no index could be found.
func nonEmptyIndex(h []header, index int) int {
for ; index != -1 && h[index].data == ""; index = int(parents[index]) {
}
return index
}
// Scripts returns a Namer for naming scripts. It returns nil if there is no
// data for the given tag. The type passed to Name must be either a
// language.Script or a language.Tag. It will not attempt to infer a script for
// tags with an unspecified script.
func Scripts(t language.Tag) Namer {
if _, index, conf := matcher.Match(t); conf != language.No {
if index = nonEmptyIndex(scriptHeaders[:], index); index != -1 {
return scriptNamer(index)
}
}
return nil
}
type scriptNamer int
func scriptFunc(i int, x interface{}) string {
return nameScript(scriptNamer(i), x)
}
func (n scriptNamer) name(i int) string {
return lookup(scriptHeaders[:], int(n), i)
}
// Name implements the Namer interface for script names.
func (n scriptNamer) Name(x interface{}) string {
return nameScript(n, x)
}
// Regions returns a Namer for naming regions. It returns nil if there is no
// data for the given tag. The type passed to Name must be either a
// language.Region or a language.Tag. It will not attempt to infer a region for
// tags with an unspecified region.
func Regions(t language.Tag) Namer {
if _, index, conf := matcher.Match(t); conf != language.No {
if index = nonEmptyIndex(regionHeaders[:], index); index != -1 {
return regionNamer(index)
}
}
return nil
}
type regionNamer int
func regionFunc(i int, x interface{}) string {
return nameRegion(regionNamer(i), x)
}
func (n regionNamer) name(i int) string {
return lookup(regionHeaders[:], int(n), i)
}
// Name implements the Namer interface for region names.
func (n regionNamer) Name(x interface{}) string {
return nameRegion(n, x)
}
// Tags returns a Namer for giving a full description of a tag. The names of
// scripts and regions that are not already implied by the language name will
// in appended within parentheses. It returns nil if there is not data for the
// given tag. The type passed to Name must be a tag.
func Tags(t language.Tag) Namer {
if _, index, conf := matcher.Match(t); conf != language.No {
return tagNamer(index)
}
return nil
}
type tagNamer int
func tagFunc(i int, x interface{}) string {
return nameTag(languageNamer(i), scriptNamer(i), regionNamer(i), x)
}
// Name implements the Namer interface for tag names.
func (n tagNamer) Name(x interface{}) string {
return nameTag(languageNamer(n), scriptNamer(n), regionNamer(n), x)
}
// lookup finds the name for an entry in a global table, traversing the
// inheritance hierarchy if needed.
func lookup(table []header, dict, want int) string {
for dict != -1 {
if s := table[dict].name(want); s != "" {
return s
}
dict = int(parents[dict])
}
return ""
}
// A Dictionary holds a collection of Namers for a single language. One can
// reduce the amount of data linked in to a binary by only referencing
// Dictionaries for the languages one needs to support instead of using the
// generic Namer factories.
type Dictionary struct {
parent *Dictionary
lang header
script header
region header
}
// Tags returns a Namer for giving a full description of a tag. The names of
// scripts and regions that are not already implied by the language name will
// in appended within parentheses. It returns nil if there is not data for the
// given tag. The type passed to Name must be a tag.
func (d *Dictionary) Tags() Namer {
return dictTags{d}
}
type dictTags struct {
d *Dictionary
}
// Name implements the Namer interface for tag names.
func (n dictTags) Name(x interface{}) string {
return nameTag(dictLanguages{n.d}, dictScripts{n.d}, dictRegions{n.d}, x)
}
// Languages returns a Namer for naming languages. It returns nil if there is no
// data for the given tag. The type passed to Name must be either language.Base
// or language.Tag. Note that the result may differ between passing a tag or its
// base language. For example, for English, passing "nl-BE" would return Flemish
// whereas passing "nl" returns "Dutch".
func (d *Dictionary) Languages() Namer {
return dictLanguages{d}
}
type dictLanguages struct {
d *Dictionary
}
func (n dictLanguages) name(i int) string {
for d := n.d; d != nil; d = d.parent {
if s := d.lang.name(i); s != "" {
return s
}
}
return ""
}
// Name implements the Namer interface for language names.
func (n dictLanguages) Name(x interface{}) string {
return nameLanguage(n, x)
}
// Scripts returns a Namer for naming scripts. It returns nil if there is no
// data for the given tag. The type passed to Name must be either a
// language.Script or a language.Tag. It will not attempt to infer a script for
// tags with an unspecified script.
func (d *Dictionary) Scripts() Namer {
return dictScripts{d}
}
type dictScripts struct {
d *Dictionary
}
func (n dictScripts) name(i int) string {
for d := n.d; d != nil; d = d.parent {
if s := d.script.name(i); s != "" {
return s
}
}
return ""
}
// Name implements the Namer interface for script names.
func (n dictScripts) Name(x interface{}) string {
return nameScript(n, x)
}
// Regions returns a Namer for naming regions. It returns nil if there is no
// data for the given tag. The type passed to Name must be either a
// language.Region or a language.Tag. It will not attempt to infer a region for
// tags with an unspecified region.
func (d *Dictionary) Regions() Namer {
return dictRegions{d}
}
type dictRegions struct {
d *Dictionary
}
func (n dictRegions) name(i int) string {
for d := n.d; d != nil; d = d.parent {
if s := d.region.name(i); s != "" {
return s
}
}
return ""
}
// Name implements the Namer interface for region names.
func (n dictRegions) Name(x interface{}) string {
return nameRegion(n, x)
}
// A SelfNamer implements a Namer that returns the name of language in this same
// language. It provides a very compact mechanism to provide a comprehensive
// list of languages to users in their native language.
type SelfNamer struct {
// Supported defines the values supported by this Namer.
Supported language.Coverage
}
var (
// Self is a shared instance of a SelfNamer.
Self *SelfNamer = &self
self = SelfNamer{language.NewCoverage(selfTagSet.Tags)}
)
// Name returns the name of a given language tag in the language identified by
// this tag. It supports both the language.Base and language.Tag types.
func (n SelfNamer) Name(x interface{}) string {
t, _ := language.All.Compose(x)
base, scr, reg := t.Raw()
baseScript := language.Script{}
if (scr == language.Script{} && reg != language.Region{}) {
// For looking up in the self dictionary, we need to select the
// maximized script. This is even the case if the script isn't
// specified.
s1, _ := t.Script()
if baseScript = getScript(base); baseScript != s1 {
scr = s1
}
}
i, scr, reg := selfTagSet.index(base, scr, reg)
if i == -1 {
return ""
}
// Only return the display name if the script matches the expected script.
if (scr != language.Script{}) {
if (baseScript == language.Script{}) {
baseScript = getScript(base)
}
if baseScript != scr {
return ""
}
}
return selfHeaders[0].name(i)
}
// getScript returns the maximized script for a base language.
func getScript(b language.Base) language.Script {
tag, _ := language.Raw.Compose(b)
scr, _ := tag.Script()
return scr
}

View File

@ -1,714 +0,0 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package display
import (
"fmt"
"reflect"
"strings"
"testing"
"unicode"
"golang.org/x/text/internal/testtext"
"golang.org/x/text/language"
"golang.org/x/text/message"
)
// TODO: test that tables are properly dropped by the linker for various use
// cases.
var (
firstLang2aa = language.MustParseBase("aa")
lastLang2zu = language.MustParseBase("zu")
firstLang3ace = language.MustParseBase("ace")
lastLang3zza = language.MustParseBase("zza")
firstTagAr001 = language.MustParse("ar-001")
lastTagZhHant = language.MustParse("zh-Hant")
)
// TestValues tests that for all languages, regions, and scripts in Values, at
// least one language has a name defined for it by checking it exists in
// English, which is assumed to be the most comprehensive. It is also tested
// that a Namer returns "" for unsupported values.
func TestValues(t *testing.T) {
type testcase struct {
kind string
n Namer
}
// checkDefined checks that a value exists in a Namer.
checkDefined := func(x interface{}, namers []testcase) {
for _, n := range namers {
testtext.Run(t, fmt.Sprintf("%s.Name(%s)", n.kind, x), func(t *testing.T) {
if n.n.Name(x) == "" {
// As of version 28 there is no data for az-Arab in English,
// although there is useful data in other languages.
if x.(fmt.Stringer).String() == "az-Arab" {
return
}
t.Errorf("supported but no result")
}
})
}
}
// checkUnsupported checks that a value does not exist in a Namer.
checkUnsupported := func(x interface{}, namers []testcase) {
for _, n := range namers {
if got := n.n.Name(x); got != "" {
t.Fatalf("%s.Name(%s): unsupported tag gave non-empty result: %q", n.kind, x, got)
}
}
}
tags := map[language.Tag]bool{}
namers := []testcase{
{"Languages(en)", Languages(language.English)},
{"Tags(en)", Tags(language.English)},
{"English.Languages()", English.Languages()},
{"English.Tags()", English.Tags()},
}
for _, tag := range Values.Tags() {
checkDefined(tag, namers)
tags[tag] = true
}
for _, base := range language.Supported.BaseLanguages() {
tag, _ := language.All.Compose(base)
if !tags[tag] {
checkUnsupported(tag, namers)
}
}
regions := map[language.Region]bool{}
namers = []testcase{
{"Regions(en)", Regions(language.English)},
{"English.Regions()", English.Regions()},
}
for _, r := range Values.Regions() {
checkDefined(r, namers)
regions[r] = true
}
for _, r := range language.Supported.Regions() {
if r = r.Canonicalize(); !regions[r] {
checkUnsupported(r, namers)
}
}
scripts := map[language.Script]bool{}
namers = []testcase{
{"Scripts(en)", Scripts(language.English)},
{"English.Scripts()", English.Scripts()},
}
for _, s := range Values.Scripts() {
checkDefined(s, namers)
scripts[s] = true
}
for _, s := range language.Supported.Scripts() {
// Canonicalize the script.
tag, _ := language.DeprecatedScript.Compose(s)
if _, s, _ = tag.Raw(); !scripts[s] {
checkUnsupported(s, namers)
}
}
}
// TestSupported tests that we have at least some Namers for languages that we
// claim to support. To test the claims in the documentation, it also verifies
// that if a Namer is returned, it will have at least some data.
func TestSupported(t *testing.T) {
supportedTags := Supported.Tags()
if len(supportedTags) != numSupported {
t.Errorf("number of supported was %d; want %d", len(supportedTags), numSupported)
}
namerFuncs := []struct {
kind string
fn func(language.Tag) Namer
}{
{"Tags", Tags},
{"Languages", Languages},
{"Regions", Regions},
{"Scripts", Scripts},
}
// Verify that we have at least one Namer for all tags we claim to support.
tags := make(map[language.Tag]bool)
for _, tag := range supportedTags {
// Test we have at least one Namer for this supported Tag.
found := false
for _, kind := range namerFuncs {
if defined(t, kind.kind, kind.fn(tag), tag) {
found = true
}
}
if !found {
t.Errorf("%s: supported, but no data available", tag)
}
if tags[tag] {
t.Errorf("%s: included in Supported.Tags more than once", tag)
}
tags[tag] = true
}
// Verify that we have no Namers for tags we don't claim to support.
for _, base := range language.Supported.BaseLanguages() {
tag, _ := language.All.Compose(base)
// Skip tags that are supported after matching.
if _, _, conf := matcher.Match(tag); conf != language.No {
continue
}
// Test there are no Namers for this tag.
for _, kind := range namerFuncs {
if defined(t, kind.kind, kind.fn(tag), tag) {
t.Errorf("%[1]s(%[2]s) returns a Namer, but %[2]s is not in the set of supported Tags.", kind.kind, tag)
}
}
}
}
// defined reports whether n is a proper Namer, which means it is non-nil and
// must have at least one non-empty value.
func defined(t *testing.T, kind string, n Namer, tag language.Tag) bool {
if n == nil {
return false
}
switch kind {
case "Tags":
for _, t := range Values.Tags() {
if n.Name(t) != "" {
return true
}
}
case "Languages":
for _, t := range Values.BaseLanguages() {
if n.Name(t) != "" {
return true
}
}
case "Regions":
for _, t := range Values.Regions() {
if n.Name(t) != "" {
return true
}
}
case "Scripts":
for _, t := range Values.Scripts() {
if n.Name(t) != "" {
return true
}
}
}
t.Errorf("%s(%s) returns non-nil Namer without content", kind, tag)
return false
}
func TestCoverage(t *testing.T) {
en := language.English
tests := []struct {
n Namer
x interface{}
}{
{Languages(en), Values.Tags()},
{Scripts(en), Values.Scripts()},
{Regions(en), Values.Regions()},
}
for i, tt := range tests {
uniq := make(map[string]interface{})
v := reflect.ValueOf(tt.x)
for j := 0; j < v.Len(); j++ {
x := v.Index(j).Interface()
// As of version 28 there is no data for az-Arab in English,
// although there is useful data in other languages.
if x.(fmt.Stringer).String() == "az-Arab" {
continue
}
s := tt.n.Name(x)
if s == "" {
t.Errorf("%d:%d:%s: missing content", i, j, x)
} else if uniq[s] != nil {
t.Errorf("%d:%d:%s: identical return value %q for %v and %v", i, j, x, s, x, uniq[s])
}
uniq[s] = x
}
}
}
// TestUpdate tests whether dictionary entries for certain languages need to be
// updated. For some languages, some of the headers may be empty or they may be
// identical to the parent. This code detects if such entries need to be updated
// after a table update.
func TestUpdate(t *testing.T) {
tests := []struct {
d *Dictionary
tag string
}{
{ModernStandardArabic, "ar-001"},
{AmericanEnglish, "en-US"},
{EuropeanSpanish, "es-ES"},
{BrazilianPortuguese, "pt-BR"},
{SimplifiedChinese, "zh-Hans"},
}
for _, tt := range tests {
_, i, _ := matcher.Match(language.MustParse(tt.tag))
if !reflect.DeepEqual(tt.d.lang, langHeaders[i]) {
t.Errorf("%s: lang table update needed", tt.tag)
}
if !reflect.DeepEqual(tt.d.script, scriptHeaders[i]) {
t.Errorf("%s: script table update needed", tt.tag)
}
if !reflect.DeepEqual(tt.d.region, regionHeaders[i]) {
t.Errorf("%s: region table update needed", tt.tag)
}
}
}
func TestIndex(t *testing.T) {
notIn := []string{"aa", "xx", "zz", "aaa", "xxx", "zzz", "Aaaa", "Xxxx", "Zzzz"}
tests := []tagIndex{
{
"",
"",
"",
},
{
"bb",
"",
"",
},
{
"",
"bbb",
"",
},
{
"",
"",
"Bbbb",
},
{
"bb",
"bbb",
"Bbbb",
},
{
"bbccddyy",
"bbbcccdddyyy",
"BbbbCcccDdddYyyy",
},
}
for i, tt := range tests {
// Create the test set from the tagIndex.
cnt := 0
for sz := 2; sz <= 4; sz++ {
a := tt[sz-2]
for j := 0; j < len(a); j += sz {
s := a[j : j+sz]
if idx := tt.index(s); idx != cnt {
t.Errorf("%d:%s: index was %d; want %d", i, s, idx, cnt)
}
cnt++
}
}
if n := tt.len(); n != cnt {
t.Errorf("%d: len was %d; want %d", i, n, cnt)
}
for _, x := range notIn {
if idx := tt.index(x); idx != -1 {
t.Errorf("%d:%s: index was %d; want -1", i, x, idx)
}
}
}
}
func TestTag(t *testing.T) {
tests := []struct {
dict string
tag string
name string
}{
// sr is in Value.Languages(), but is not supported by agq.
{"agq", "sr", "|[language: sr]"},
{"nl", "nl", "Nederlands"},
// CLDR 30 dropped Vlaams as the word for nl-BE. It is still called
// Flemish in English, though. TODO: check if this is a CLDR bug.
// {"nl", "nl-BE", "Vlaams"},
{"nl", "nl-BE", "Nederlands (België)"},
{"nl", "vls", "West-Vlaams"},
{"en", "nl-BE", "Flemish"},
{"en", "en", "English"},
{"en", "en-GB", "British English"},
{"en", "en-US", "American English"}, // American English in CLDR 24+
{"ru", "ru", "русский"},
{"ru", "ru-RU", "русский (Россия)"},
{"ru", "ru-Cyrl", "русский (кириллица)"},
{"en", lastLang2zu.String(), "Zulu"},
{"en", firstLang2aa.String(), "Afar"},
{"en", lastLang3zza.String(), "Zaza"},
{"en", firstLang3ace.String(), "Achinese"},
{"en", firstTagAr001.String(), "Modern Standard Arabic"},
{"en", lastTagZhHant.String(), "Traditional Chinese"},
{"en", "aaa", "|Unknown language (aaa)"},
{"en", "zzj", "|Unknown language (zzj)"},
// If full tag doesn't match, try without script or region.
{"en", "aa-Hans", "Afar (Simplified Han)"},
{"en", "af-Arab", "Afrikaans (Arabic)"},
{"en", "zu-Cyrl", "Zulu (Cyrillic)"},
{"en", "aa-GB", "Afar (United Kingdom)"},
{"en", "af-NA", "Afrikaans (Namibia)"},
{"en", "zu-BR", "Zulu (Brazil)"},
// Correct inheritance and language selection.
{"zh", "zh-TW", "中文 (台湾)"},
{"zh", "zh-Hant-TW", "繁体中文 (台湾)"},
{"zh-Hant", "zh-TW", "中文 (台灣)"},
{"zh-Hant", "zh-Hant-TW", "繁體中文 (台灣)"},
// Some rather arbitrary interpretations for Serbian. This is arguably
// correct and consistent with the way zh-[Hant-]TW is handled. It will
// also give results more in line with the expectations if users
// explicitly use "sh".
{"sr-Latn", "sr-ME", "srpski (Crna Gora)"},
{"sr-Latn", "sr-Latn-ME", "srpskohrvatski (Crna Gora)"},
// Double script and region
{"nl", "en-Cyrl-BE", "Engels (Cyrillisch, België)"},
}
for _, tt := range tests {
t.Run(tt.dict+"/"+tt.tag, func(t *testing.T) {
name, fmtName := splitName(tt.name)
dict := language.MustParse(tt.dict)
tag := language.Raw.MustParse(tt.tag)
d := Tags(dict)
if n := d.Name(tag); n != name {
// There are inconsistencies w.r.t. capitalization in the tests
// due to CLDR's update procedure which treats modern and other
// languages differently.
// See http://unicode.org/cldr/trac/ticket/8051.
// TODO: use language capitalization to sanitize the strings.
t.Errorf("Name(%s) = %q; want %q", tag, n, name)
}
p := message.NewPrinter(dict)
if n := p.Sprint(Tag(tag)); n != fmtName {
t.Errorf("Tag(%s) = %q; want %q", tag, n, fmtName)
}
})
}
}
func splitName(names string) (name, formatName string) {
split := strings.Split(names, "|")
name, formatName = split[0], split[0]
if len(split) > 1 {
formatName = split[1]
}
return name, formatName
}
func TestLanguage(t *testing.T) {
tests := []struct {
dict string
tag string
name string
}{
// sr is in Value.Languages(), but is not supported by agq.
{"agq", "sr", "|[language: sr]"},
// CLDR 30 dropped Vlaams as the word for nl-BE. It is still called
// Flemish in English, though. TODO: this is probably incorrect.
// West-Vlaams (vls) is not Vlaams. West-Vlaams could be considered its
// own language, whereas Vlaams is generally Dutch. So expect to have
// to change these tests back.
{"nl", "nl", "Nederlands"},
{"nl", "vls", "West-Vlaams"},
{"nl", "nl-BE", "Nederlands"},
{"en", "pt", "Portuguese"},
{"en", "pt-PT", "European Portuguese"},
{"en", "pt-BR", "Brazilian Portuguese"},
{"en", "en", "English"},
{"en", "en-GB", "British English"},
{"en", "en-US", "American English"}, // American English in CLDR 24+
{"en", lastLang2zu.String(), "Zulu"},
{"en", firstLang2aa.String(), "Afar"},
{"en", lastLang3zza.String(), "Zaza"},
{"en", firstLang3ace.String(), "Achinese"},
{"en", firstTagAr001.String(), "Modern Standard Arabic"},
{"en", lastTagZhHant.String(), "Traditional Chinese"},
{"en", "aaa", "|Unknown language (aaa)"},
{"en", "zzj", "|Unknown language (zzj)"},
// If full tag doesn't match, try without script or region.
{"en", "aa-Hans", "Afar"},
{"en", "af-Arab", "Afrikaans"},
{"en", "zu-Cyrl", "Zulu"},
{"en", "aa-GB", "Afar"},
{"en", "af-NA", "Afrikaans"},
{"en", "zu-BR", "Zulu"},
{"agq", "zh-Hant", "|[language: zh-Hant]"},
{"en", "sh", "Serbo-Croatian"},
{"en", "sr-Latn", "Serbo-Croatian"},
{"en", "sr", "Serbian"},
{"en", "sr-ME", "Serbian"},
{"en", "sr-Latn-ME", "Serbo-Croatian"}, // See comments in TestTag.
}
for _, tt := range tests {
testtext.Run(t, tt.dict+"/"+tt.tag, func(t *testing.T) {
name, fmtName := splitName(tt.name)
dict := language.MustParse(tt.dict)
tag := language.Raw.MustParse(tt.tag)
p := message.NewPrinter(dict)
d := Languages(dict)
if n := d.Name(tag); n != name {
t.Errorf("Name(%v) = %q; want %q", tag, n, name)
}
if n := p.Sprint(Language(tag)); n != fmtName {
t.Errorf("Language(%v) = %q; want %q", tag, n, fmtName)
}
if len(tt.tag) <= 3 {
base := language.MustParseBase(tt.tag)
if n := d.Name(base); n != name {
t.Errorf("Name(%v) = %q; want %q", base, n, name)
}
if n := p.Sprint(Language(base)); n != fmtName {
t.Errorf("Language(%v) = %q; want %q", base, n, fmtName)
}
}
})
}
}
func TestScript(t *testing.T) {
tests := []struct {
dict string
scr string
name string
}{
{"nl", "Arab", "Arabisch"},
{"en", "Arab", "Arabic"},
{"en", "Zzzz", "Unknown Script"},
{"zh-Hant", "Hang", "韓文字"},
{"zh-Hant-HK", "Hang", "韓文字"},
{"zh", "Arab", "阿拉伯文"},
{"zh-Hans-HK", "Arab", "阿拉伯文"}, // same as zh
{"zh-Hant", "Arab", "阿拉伯文"},
{"zh-Hant-HK", "Arab", "阿拉伯文"}, // same as zh
// Canonicalized form
{"en", "Qaai", "Inherited"}, // deprecated script, now is Zinh
{"en", "sh", "Unknown Script"}, // sh canonicalizes to sr-Latn
{"en", "en", "Unknown Script"},
// Don't introduce scripts with canonicalization.
{"en", "sh", "Unknown Script"}, // sh canonicalizes to sr-Latn
}
for _, tt := range tests {
t.Run(tt.dict+"/"+tt.scr, func(t *testing.T) {
name, fmtName := splitName(tt.name)
dict := language.MustParse(tt.dict)
p := message.NewPrinter(dict)
d := Scripts(dict)
var tag language.Tag
if unicode.IsUpper(rune(tt.scr[0])) {
x := language.MustParseScript(tt.scr)
if n := d.Name(x); n != name {
t.Errorf("Name(%v) = %q; want %q", x, n, name)
}
if n := p.Sprint(Script(x)); n != fmtName {
t.Errorf("Script(%v) = %q; want %q", x, n, fmtName)
}
tag, _ = language.Raw.Compose(x)
} else {
tag = language.Raw.MustParse(tt.scr)
}
if n := d.Name(tag); n != name {
t.Errorf("Name(%v) = %q; want %q", tag, n, name)
}
if n := p.Sprint(Script(tag)); n != fmtName {
t.Errorf("Script(%v) = %q; want %q", tag, n, fmtName)
}
})
}
}
func TestRegion(t *testing.T) {
tests := []struct {
dict string
reg string
name string
}{
{"nl", "NL", "Nederland"},
{"en", "US", "United States"},
{"en", "ZZ", "Unknown Region"},
{"en-GB", "NL", "Netherlands"},
// Canonical equivalents
{"en", "UK", "United Kingdom"},
// No region
{"en", "pt", "Unknown Region"},
{"en", "und", "Unknown Region"},
// Don't introduce regions with canonicalization.
{"en", "mo", "Unknown Region"},
}
for _, tt := range tests {
t.Run(tt.dict+"/"+tt.reg, func(t *testing.T) {
dict := language.MustParse(tt.dict)
p := message.NewPrinter(dict)
d := Regions(dict)
var tag language.Tag
if unicode.IsUpper(rune(tt.reg[0])) {
// Region
x := language.MustParseRegion(tt.reg)
if n := d.Name(x); n != tt.name {
t.Errorf("Name(%v) = %q; want %q", x, n, tt.name)
}
if n := p.Sprint(Region(x)); n != tt.name {
t.Errorf("Region(%v) = %q; want %q", x, n, tt.name)
}
tag, _ = language.Raw.Compose(x)
} else {
tag = language.Raw.MustParse(tt.reg)
}
if n := d.Name(tag); n != tt.name {
t.Errorf("Name(%v) = %q; want %q", tag, n, tt.name)
}
if n := p.Sprint(Region(tag)); n != tt.name {
t.Errorf("Region(%v) = %q; want %q", tag, n, tt.name)
}
})
}
}
func TestSelf(t *testing.T) {
tests := []struct {
tag string
name string
}{
{"nl", "Nederlands"},
// CLDR 30 dropped Vlaams as the word for nl-BE. It is still called
// Flemish in English, though. TODO: check if this is a CLDR bug.
// {"nl-BE", "Vlaams"},
{"nl-BE", "Nederlands"},
{"en-GB", "British English"},
{lastLang2zu.String(), "isiZulu"},
{firstLang2aa.String(), ""}, // not defined
{lastLang3zza.String(), ""}, // not defined
{firstLang3ace.String(), ""}, // not defined
{firstTagAr001.String(), "العربية الرسمية الحديثة"},
{"ar", "العربية"},
{lastTagZhHant.String(), "繁體中文"},
{"aaa", ""},
{"zzj", ""},
// Drop entries that are not in the requested script, even if there is
// an entry for the language.
{"aa-Hans", ""},
{"af-Arab", ""},
{"zu-Cyrl", ""},
// Append the country name in the language of the matching language.
{"af-NA", "Afrikaans"},
{"zh", "中文"},
// zh-TW should match zh-Hant instead of zh!
{"zh-TW", "繁體中文"},
{"zh-Hant", "繁體中文"},
{"zh-Hans", "简体中文"},
{"zh-Hant-TW", "繁體中文"},
{"zh-Hans-TW", "简体中文"},
// Take the entry for sr which has the matching script.
// TODO: Capitalization changed as of CLDR 26, but change seems
// arbitrary. Revisit capitalization with revision 27. See
// http://unicode.org/cldr/trac/ticket/8051.
{"sr", "српски"},
// TODO: sr-ME should show up as Serbian or Montenegrin, not Serbo-
// Croatian. This is an artifact of the current algorithm, which is the
// way it is to have the preferred behavior for other languages such as
// Chinese. We can hardwire this case in the table generator or package
// code, but we first check if CLDR can be updated.
// {"sr-ME", "Srpski"}, // Is Srpskohrvatski
{"sr-Latn-ME", "srpskohrvatski"},
{"sr-Cyrl-ME", "српски"},
{"sr-NL", "српски"},
// NOTE: kk is defined, but in Cyrillic script. For China, Arab is the
// dominant script. We do not have data for kk-Arab and we chose to not
// fall back in such cases.
{"kk-CN", ""},
}
for i, tt := range tests {
d := Self
if n := d.Name(language.Raw.MustParse(tt.tag)); n != tt.name {
t.Errorf("%d:%s: was %q; want %q", i, tt.tag, n, tt.name)
}
}
}
func TestEquivalence(t *testing.T) {
testCases := []struct {
desc string
namer Namer
}{
{"Self", Self},
{"Tags", Tags(language.Romanian)},
{"Languages", Languages(language.Romanian)},
{"Scripts", Scripts(language.Romanian)},
}
for _, tc := range testCases {
t.Run(tc.desc, func(t *testing.T) {
ro := tc.namer.Name(language.Raw.MustParse("ro-MD"))
mo := tc.namer.Name(language.Raw.MustParse("mo"))
if ro != mo {
t.Errorf("%q != %q", ro, mo)
}
})
}
}
func TestDictionaryLang(t *testing.T) {
tests := []struct {
d *Dictionary
tag string
name string
}{
{English, "en", "English"},
{Portuguese, "af", "africâner"},
{EuropeanPortuguese, "af", "africanês"},
{English, "nl-BE", "Flemish"},
}
for i, test := range tests {
tag := language.MustParse(test.tag)
if got := test.d.Tags().Name(tag); got != test.name {
t.Errorf("%d:%v: got %s; want %s", i, tag, got, test.name)
}
if base, _ := language.Compose(tag.Base()); base == tag {
if got := test.d.Languages().Name(base); got != test.name {
t.Errorf("%d:%v: got %s; want %s", i, tag, got, test.name)
}
}
}
}
func TestDictionaryRegion(t *testing.T) {
tests := []struct {
d *Dictionary
region string
name string
}{
{English, "FR", "France"},
{Portuguese, "009", "Oceania"},
{EuropeanPortuguese, "009", "Oceânia"},
}
for i, test := range tests {
tag := language.MustParseRegion(test.region)
if got := test.d.Regions().Name(tag); got != test.name {
t.Errorf("%d:%v: got %s; want %s", i, tag, got, test.name)
}
}
}
func TestDictionaryScript(t *testing.T) {
tests := []struct {
d *Dictionary
script string
name string
}{
{English, "Cyrl", "Cyrillic"},
{EuropeanPortuguese, "Gujr", "guzerate"},
}
for i, test := range tests {
tag := language.MustParseScript(test.script)
if got := test.d.Scripts().Name(tag); got != test.name {
t.Errorf("%d:%v: got %s; want %s", i, tag, got, test.name)
}
}
}

View File

@ -1,116 +0,0 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package display_test
import (
"fmt"
"golang.org/x/text/language"
"golang.org/x/text/language/display"
"golang.org/x/text/message"
)
func ExampleFormatter() {
message.SetString(language.Dutch, "In %v people speak %v.", "In %v spreekt men %v.")
fr := language.French
region, _ := fr.Region()
for _, tag := range []string{"en", "nl"} {
p := message.NewPrinter(language.Make(tag))
p.Printf("In %v people speak %v.", display.Region(region), display.Language(fr))
p.Println()
}
// Output:
// In France people speak French.
// In Frankrijk spreekt men Frans.
}
func ExampleNamer() {
supported := []string{
"en-US", "en-GB", "ja", "zh", "zh-Hans", "zh-Hant", "pt", "pt-PT", "ko", "ar", "el", "ru", "uk", "pa",
}
en := display.English.Languages()
for _, s := range supported {
t := language.MustParse(s)
fmt.Printf("%-20s (%s)\n", en.Name(t), display.Self.Name(t))
}
// Output:
// American English (American English)
// British English (British English)
// Japanese (日本語)
// Chinese (中文)
// Simplified Chinese (简体中文)
// Traditional Chinese (繁體中文)
// Portuguese (português)
// European Portuguese (português europeu)
// Korean (한국어)
// Arabic (العربية)
// Greek (Ελληνικά)
// Russian (русский)
// Ukrainian (українська)
// Punjabi (ਪੰਜਾਬੀ)
}
func ExampleTags() {
n := display.Tags(language.English)
fmt.Println(n.Name(language.Make("nl")))
fmt.Println(n.Name(language.Make("nl-BE")))
fmt.Println(n.Name(language.Make("nl-CW")))
fmt.Println(n.Name(language.Make("nl-Arab")))
fmt.Println(n.Name(language.Make("nl-Cyrl-RU")))
// Output:
// Dutch
// Flemish
// Dutch (Curaçao)
// Dutch (Arabic)
// Dutch (Cyrillic, Russia)
}
// ExampleDictionary shows how to reduce the amount of data linked into your
// binary by only using the predefined Dictionary variables of the languages you
// wish to support.
func ExampleDictionary() {
tags := []language.Tag{
language.English,
language.German,
language.Japanese,
language.Russian,
}
dicts := []*display.Dictionary{
display.English,
display.German,
display.Japanese,
display.Russian,
}
m := language.NewMatcher(tags)
getDict := func(t language.Tag) *display.Dictionary {
_, i, confidence := m.Match(t)
// Skip this check if you want to support a fall-back language, which
// will be the first one passed to NewMatcher.
if confidence == language.No {
return nil
}
return dicts[i]
}
// The matcher will match Swiss German to German.
n := getDict(language.Make("gsw")).Languages()
fmt.Println(n.Name(language.German))
fmt.Println(n.Name(language.Make("de-CH")))
fmt.Println(n.Name(language.Make("gsw")))
// Output:
// Deutsch
// Schweizer Hochdeutsch
// Schweizerdeutsch
}

View File

@ -1,251 +0,0 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package display
// This file contains common lookup code that is shared between the various
// implementations of Namer and Dictionaries.
import (
"fmt"
"sort"
"strings"
"golang.org/x/text/language"
)
type namer interface {
// name gets the string for the given index. It should walk the
// inheritance chain if a value is not present in the base index.
name(idx int) string
}
func nameLanguage(n namer, x interface{}) string {
t, _ := language.All.Compose(x)
for {
i, _, _ := langTagSet.index(t.Raw())
if s := n.name(i); s != "" {
return s
}
if t = t.Parent(); t == language.Und {
return ""
}
}
}
func nameScript(n namer, x interface{}) string {
t, _ := language.DeprecatedScript.Compose(x)
_, s, _ := t.Raw()
return n.name(scriptIndex.index(s.String()))
}
func nameRegion(n namer, x interface{}) string {
t, _ := language.DeprecatedRegion.Compose(x)
_, _, r := t.Raw()
return n.name(regionIndex.index(r.String()))
}
func nameTag(langN, scrN, regN namer, x interface{}) string {
t, ok := x.(language.Tag)
if !ok {
return ""
}
const form = language.All &^ language.SuppressScript
if c, err := form.Canonicalize(t); err == nil {
t = c
}
_, sRaw, rRaw := t.Raw()
i, scr, reg := langTagSet.index(t.Raw())
for i != -1 {
if str := langN.name(i); str != "" {
if hasS, hasR := (scr != language.Script{}), (reg != language.Region{}); hasS || hasR {
ss, sr := "", ""
if hasS {
ss = scrN.name(scriptIndex.index(scr.String()))
}
if hasR {
sr = regN.name(regionIndex.index(reg.String()))
}
// TODO: use patterns in CLDR or at least confirm they are the
// same for all languages.
if ss != "" && sr != "" {
return fmt.Sprintf("%s (%s, %s)", str, ss, sr)
}
if ss != "" || sr != "" {
return fmt.Sprintf("%s (%s%s)", str, ss, sr)
}
}
return str
}
scr, reg = sRaw, rRaw
if t = t.Parent(); t == language.Und {
return ""
}
i, _, _ = langTagSet.index(t.Raw())
}
return ""
}
// header contains the data and indexes for a single namer.
// data contains a series of strings concatenated into one. index contains the
// offsets for a string in data. For example, consider a header that defines
// strings for the languages de, el, en, fi, and nl:
//
// header{
// data: "GermanGreekEnglishDutch",
// index: []uint16{ 0, 6, 11, 18, 18, 23 },
// }
//
// For a language with index i, the string is defined by
// data[index[i]:index[i+1]]. So the number of elements in index is always one
// greater than the number of languages for which header defines a value.
// A string for a language may be empty, which means the name is undefined. In
// the above example, the name for fi (Finnish) is undefined.
type header struct {
data string
index []uint16
}
// name looks up the name for a tag in the dictionary, given its index.
func (h *header) name(i int) string {
if 0 <= i && i < len(h.index)-1 {
return h.data[h.index[i]:h.index[i+1]]
}
return ""
}
// tagSet is used to find the index of a language in a set of tags.
type tagSet struct {
single tagIndex
long []string
}
var (
langTagSet = tagSet{
single: langIndex,
long: langTagsLong,
}
// selfTagSet is used for indexing the language strings in their own
// language.
selfTagSet = tagSet{
single: selfIndex,
long: selfTagsLong,
}
zzzz = language.MustParseScript("Zzzz")
zz = language.MustParseRegion("ZZ")
)
// index returns the index of the tag for the given base, script and region or
// its parent if the tag is not available. If the match is for a parent entry,
// the excess script and region are returned.
func (ts *tagSet) index(base language.Base, scr language.Script, reg language.Region) (int, language.Script, language.Region) {
lang := base.String()
index := -1
if (scr != language.Script{} || reg != language.Region{}) {
if scr == zzzz {
scr = language.Script{}
}
if reg == zz {
reg = language.Region{}
}
i := sort.SearchStrings(ts.long, lang)
// All entries have either a script or a region and not both.
scrStr, regStr := scr.String(), reg.String()
for ; i < len(ts.long) && strings.HasPrefix(ts.long[i], lang); i++ {
if s := ts.long[i][len(lang)+1:]; s == scrStr {
scr = language.Script{}
index = i + ts.single.len()
break
} else if s == regStr {
reg = language.Region{}
index = i + ts.single.len()
break
}
}
}
if index == -1 {
index = ts.single.index(lang)
}
return index, scr, reg
}
func (ts *tagSet) Tags() []language.Tag {
tags := make([]language.Tag, 0, ts.single.len()+len(ts.long))
ts.single.keys(func(s string) {
tags = append(tags, language.Raw.MustParse(s))
})
for _, s := range ts.long {
tags = append(tags, language.Raw.MustParse(s))
}
return tags
}
func supportedScripts() []language.Script {
scr := make([]language.Script, 0, scriptIndex.len())
scriptIndex.keys(func(s string) {
scr = append(scr, language.MustParseScript(s))
})
return scr
}
func supportedRegions() []language.Region {
reg := make([]language.Region, 0, regionIndex.len())
regionIndex.keys(func(s string) {
reg = append(reg, language.MustParseRegion(s))
})
return reg
}
// tagIndex holds a concatenated lists of subtags of length 2 to 4, one string
// for each length, which can be used in combination with binary search to get
// the index associated with a tag.
// For example, a tagIndex{
// "arenesfrruzh", // 6 2-byte tags.
// "barwae", // 2 3-byte tags.
// "",
// }
// would mean that the 2-byte tag "fr" had an index of 3, and the 3-byte tag
// "wae" had an index of 7.
type tagIndex [3]string
func (t *tagIndex) index(s string) int {
sz := len(s)
if sz < 2 || 4 < sz {
return -1
}
a := t[sz-2]
index := sort.Search(len(a)/sz, func(i int) bool {
p := i * sz
return a[p:p+sz] >= s
})
p := index * sz
if end := p + sz; end > len(a) || a[p:end] != s {
return -1
}
// Add the number of tags for smaller sizes.
for i := 0; i < sz-2; i++ {
index += len(t[i]) / (i + 2)
}
return index
}
// len returns the number of tags that are contained in the tagIndex.
func (t *tagIndex) len() (n int) {
for i, s := range t {
n += len(s) / (i + 2)
}
return n
}
// keys calls f for each tag.
func (t *tagIndex) keys(f func(key string)) {
for i, s := range *t {
for ; s != ""; s = s[i+2:] {
f(s[:i+2])
}
}
}

View File

@ -1,602 +0,0 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
// Generator for display name tables.
package main
import (
"bytes"
"flag"
"fmt"
"log"
"reflect"
"sort"
"strings"
"golang.org/x/text/internal/gen"
"golang.org/x/text/language"
"golang.org/x/text/unicode/cldr"
)
var (
test = flag.Bool("test", false,
"test existing tables; can be used to compare web data with package data.")
outputFile = flag.String("output", "tables.go", "output file")
stats = flag.Bool("stats", false, "prints statistics to stderr")
short = flag.Bool("short", false, `Use "short" alternatives, when available.`)
draft = flag.String("draft",
"contributed",
`Minimal draft requirements (approved, contributed, provisional, unconfirmed).`)
pkg = flag.String("package",
"display",
"the name of the package in which the generated file is to be included")
tags = newTagSet("tags",
[]language.Tag{},
"space-separated list of tags to include or empty for all")
dict = newTagSet("dict",
dictTags(),
"space-separated list or tags for which to include a Dictionary. "+
`"" means the common list from go.text/language.`)
)
func dictTags() (tag []language.Tag) {
// TODO: replace with language.Common.Tags() once supported.
const str = "af am ar ar-001 az bg bn ca cs da de el en en-US en-GB " +
"es es-ES es-419 et fa fi fil fr fr-CA gu he hi hr hu hy id is it ja " +
"ka kk km kn ko ky lo lt lv mk ml mn mr ms my ne nl no pa pl pt pt-BR " +
"pt-PT ro ru si sk sl sq sr sr-Latn sv sw ta te th tr uk ur uz vi " +
"zh zh-Hans zh-Hant zu"
for _, s := range strings.Split(str, " ") {
tag = append(tag, language.MustParse(s))
}
return tag
}
func main() {
gen.Init()
// Read the CLDR zip file.
r := gen.OpenCLDRCoreZip()
defer r.Close()
d := &cldr.Decoder{}
d.SetDirFilter("main", "supplemental")
d.SetSectionFilter("localeDisplayNames")
data, err := d.DecodeZip(r)
if err != nil {
log.Fatalf("DecodeZip: %v", err)
}
w := gen.NewCodeWriter()
defer w.WriteGoFile(*outputFile, "display")
gen.WriteCLDRVersion(w)
b := builder{
w: w,
data: data,
group: make(map[string]*group),
}
b.generate()
}
const tagForm = language.All
// tagSet is used to parse command line flags of tags. It implements the
// flag.Value interface.
type tagSet map[language.Tag]bool
func newTagSet(name string, tags []language.Tag, usage string) tagSet {
f := tagSet(make(map[language.Tag]bool))
for _, t := range tags {
f[t] = true
}
flag.Var(f, name, usage)
return f
}
// String implements the String method of the flag.Value interface.
func (f tagSet) String() string {
tags := []string{}
for t := range f {
tags = append(tags, t.String())
}
sort.Strings(tags)
return strings.Join(tags, " ")
}
// Set implements Set from the flag.Value interface.
func (f tagSet) Set(s string) error {
if s != "" {
for _, s := range strings.Split(s, " ") {
if s != "" {
tag, err := tagForm.Parse(s)
if err != nil {
return err
}
f[tag] = true
}
}
}
return nil
}
func (f tagSet) contains(t language.Tag) bool {
if len(f) == 0 {
return true
}
return f[t]
}
// builder is used to create all tables with display name information.
type builder struct {
w *gen.CodeWriter
data *cldr.CLDR
fromLocs []string
// destination tags for the current locale.
toTags []string
toTagIndex map[string]int
// list of supported tags
supported []language.Tag
// key-value pairs per group
group map[string]*group
// statistics
sizeIndex int // total size of all indexes of headers
sizeData int // total size of all data of headers
totalSize int
}
type group struct {
// Maps from a given language to the Namer data for this language.
lang map[language.Tag]keyValues
headers []header
toTags []string
threeStart int
fourPlusStart int
}
// set sets the typ to the name for locale loc.
func (g *group) set(t language.Tag, typ, name string) {
kv := g.lang[t]
if kv == nil {
kv = make(keyValues)
g.lang[t] = kv
}
if kv[typ] == "" {
kv[typ] = name
}
}
type keyValues map[string]string
type header struct {
tag language.Tag
data string
index []uint16
}
var versionInfo = `// Version is deprecated. Use CLDRVersion.
const Version = %#v
`
var self = language.MustParse("mul")
// generate builds and writes all tables.
func (b *builder) generate() {
fmt.Fprintf(b.w, versionInfo, cldr.Version)
b.filter()
b.setData("lang", func(g *group, loc language.Tag, ldn *cldr.LocaleDisplayNames) {
if ldn.Languages != nil {
for _, v := range ldn.Languages.Language {
lang := v.Type
if lang == "root" {
// We prefer the data from "und"
// TODO: allow both the data for root and und somehow.
continue
}
tag := tagForm.MustParse(lang)
if tags.contains(tag) {
g.set(loc, tag.String(), v.Data())
}
}
}
})
b.setData("script", func(g *group, loc language.Tag, ldn *cldr.LocaleDisplayNames) {
if ldn.Scripts != nil {
for _, v := range ldn.Scripts.Script {
code := language.MustParseScript(v.Type)
if code.IsPrivateUse() { // Qaaa..Qabx
// TODO: data currently appears to be very meager.
// Reconsider if we have data for English.
if loc == language.English {
log.Fatal("Consider including data for private use scripts.")
}
continue
}
g.set(loc, code.String(), v.Data())
}
}
})
b.setData("region", func(g *group, loc language.Tag, ldn *cldr.LocaleDisplayNames) {
if ldn.Territories != nil {
for _, v := range ldn.Territories.Territory {
g.set(loc, language.MustParseRegion(v.Type).String(), v.Data())
}
}
})
b.makeSupported()
b.writeParents()
b.writeGroup("lang")
b.writeGroup("script")
b.writeGroup("region")
b.w.WriteConst("numSupported", len(b.supported))
buf := bytes.Buffer{}
for _, tag := range b.supported {
fmt.Fprint(&buf, tag.String(), "|")
}
b.w.WriteConst("supported", buf.String())
b.writeDictionaries()
b.supported = []language.Tag{self}
// Compute the names of locales in their own language. Some of these names
// may be specified in their parent locales. We iterate the maximum depth
// of the parent three times to match successive parents of tags until a
// possible match is found.
for i := 0; i < 4; i++ {
b.setData("self", func(g *group, tag language.Tag, ldn *cldr.LocaleDisplayNames) {
parent := tag
if b, s, r := tag.Raw(); i > 0 && (s != language.Script{} && r == language.Region{}) {
parent, _ = language.Raw.Compose(b)
}
if ldn.Languages != nil {
for _, v := range ldn.Languages.Language {
key := tagForm.MustParse(v.Type)
saved := key
if key == parent {
g.set(self, tag.String(), v.Data())
}
for k := 0; k < i; k++ {
key = key.Parent()
}
if key == tag {
g.set(self, saved.String(), v.Data()) // set does not overwrite a value.
}
}
}
})
}
b.writeGroup("self")
}
func (b *builder) setData(name string, f func(*group, language.Tag, *cldr.LocaleDisplayNames)) {
b.sizeIndex = 0
b.sizeData = 0
b.toTags = nil
b.fromLocs = nil
b.toTagIndex = make(map[string]int)
g := b.group[name]
if g == nil {
g = &group{lang: make(map[language.Tag]keyValues)}
b.group[name] = g
}
for _, loc := range b.data.Locales() {
// We use RawLDML instead of LDML as we are managing our own inheritance
// in this implementation.
ldml := b.data.RawLDML(loc)
// We do not support the POSIX variant (it is not a supported BCP 47
// variant). This locale also doesn't happen to contain any data, so
// we'll skip it by checking for this.
tag, err := tagForm.Parse(loc)
if err != nil {
if ldml.LocaleDisplayNames != nil {
log.Fatalf("setData: %v", err)
}
continue
}
if ldml.LocaleDisplayNames != nil && tags.contains(tag) {
f(g, tag, ldml.LocaleDisplayNames)
}
}
}
func (b *builder) filter() {
filter := func(s *cldr.Slice) {
if *short {
s.SelectOnePerGroup("alt", []string{"short", ""})
} else {
s.SelectOnePerGroup("alt", []string{"stand-alone", ""})
}
d, err := cldr.ParseDraft(*draft)
if err != nil {
log.Fatalf("filter: %v", err)
}
s.SelectDraft(d)
}
for _, loc := range b.data.Locales() {
if ldn := b.data.RawLDML(loc).LocaleDisplayNames; ldn != nil {
if ldn.Languages != nil {
s := cldr.MakeSlice(&ldn.Languages.Language)
if filter(&s); len(ldn.Languages.Language) == 0 {
ldn.Languages = nil
}
}
if ldn.Scripts != nil {
s := cldr.MakeSlice(&ldn.Scripts.Script)
if filter(&s); len(ldn.Scripts.Script) == 0 {
ldn.Scripts = nil
}
}
if ldn.Territories != nil {
s := cldr.MakeSlice(&ldn.Territories.Territory)
if filter(&s); len(ldn.Territories.Territory) == 0 {
ldn.Territories = nil
}
}
}
}
}
// makeSupported creates a list of all supported locales.
func (b *builder) makeSupported() {
// tags across groups
for _, g := range b.group {
for t, _ := range g.lang {
b.supported = append(b.supported, t)
}
}
b.supported = b.supported[:unique(tagsSorter(b.supported))]
}
type tagsSorter []language.Tag
func (a tagsSorter) Len() int { return len(a) }
func (a tagsSorter) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a tagsSorter) Less(i, j int) bool { return a[i].String() < a[j].String() }
func (b *builder) writeGroup(name string) {
g := b.group[name]
for _, kv := range g.lang {
for t, _ := range kv {
g.toTags = append(g.toTags, t)
}
}
g.toTags = g.toTags[:unique(tagsBySize(g.toTags))]
// Allocate header per supported value.
g.headers = make([]header, len(b.supported))
for i, sup := range b.supported {
kv, ok := g.lang[sup]
if !ok {
g.headers[i].tag = sup
continue
}
data := []byte{}
index := make([]uint16, len(g.toTags), len(g.toTags)+1)
for j, t := range g.toTags {
index[j] = uint16(len(data))
data = append(data, kv[t]...)
}
index = append(index, uint16(len(data)))
// Trim the tail of the index.
// TODO: indexes can be reduced in size quite a bit more.
n := len(index)
for ; n >= 2 && index[n-2] == index[n-1]; n-- {
}
index = index[:n]
// Workaround for a bug in CLDR 26.
// See http://unicode.org/cldr/trac/ticket/8042.
if cldr.Version == "26" && sup.String() == "hsb" {
data = bytes.Replace(data, []byte{'"'}, nil, 1)
}
g.headers[i] = header{sup, string(data), index}
}
g.writeTable(b.w, name)
}
type tagsBySize []string
func (l tagsBySize) Len() int { return len(l) }
func (l tagsBySize) Swap(i, j int) { l[i], l[j] = l[j], l[i] }
func (l tagsBySize) Less(i, j int) bool {
a, b := l[i], l[j]
// Sort single-tag entries based on size first. Otherwise alphabetic.
if len(a) != len(b) && (len(a) <= 4 || len(b) <= 4) {
return len(a) < len(b)
}
return a < b
}
// parentIndices returns slice a of len(tags) where tags[a[i]] is the parent
// of tags[i].
func parentIndices(tags []language.Tag) []int16 {
index := make(map[language.Tag]int16)
for i, t := range tags {
index[t] = int16(i)
}
// Construct default parents.
parents := make([]int16, len(tags))
for i, t := range tags {
parents[i] = -1
for t = t.Parent(); t != language.Und; t = t.Parent() {
if j, ok := index[t]; ok {
parents[i] = j
break
}
}
}
return parents
}
func (b *builder) writeParents() {
parents := parentIndices(b.supported)
fmt.Fprintf(b.w, "var parents = ")
b.w.WriteArray(parents)
}
// writeKeys writes keys to a special index used by the display package.
// tags are assumed to be sorted by length.
func writeKeys(w *gen.CodeWriter, name string, keys []string) {
w.Size += int(3 * reflect.TypeOf("").Size())
w.WriteComment("Number of keys: %d", len(keys))
fmt.Fprintf(w, "var (\n\t%sIndex = tagIndex{\n", name)
for i := 2; i <= 4; i++ {
sub := []string{}
for _, t := range keys {
if len(t) != i {
break
}
sub = append(sub, t)
}
s := strings.Join(sub, "")
w.WriteString(s)
fmt.Fprintf(w, ",\n")
keys = keys[len(sub):]
}
fmt.Fprintln(w, "\t}")
if len(keys) > 0 {
w.Size += int(reflect.TypeOf([]string{}).Size())
fmt.Fprintf(w, "\t%sTagsLong = ", name)
w.WriteSlice(keys)
}
fmt.Fprintln(w, ")\n")
}
// identifier creates an identifier from the given tag.
func identifier(t language.Tag) string {
return strings.Replace(t.String(), "-", "", -1)
}
func (h *header) writeEntry(w *gen.CodeWriter, name string) {
if len(dict) > 0 && dict.contains(h.tag) {
fmt.Fprintf(w, "\t{ // %s\n", h.tag)
fmt.Fprintf(w, "\t\t%[1]s%[2]sStr,\n\t\t%[1]s%[2]sIdx,\n", identifier(h.tag), name)
fmt.Fprintln(w, "\t},")
} else if len(h.data) == 0 {
fmt.Fprintln(w, "\t\t{}, //", h.tag)
} else {
fmt.Fprintf(w, "\t{ // %s\n", h.tag)
w.WriteString(h.data)
fmt.Fprintln(w, ",")
w.WriteSlice(h.index)
fmt.Fprintln(w, ",\n\t},")
}
}
// write the data for the given header as single entries. The size for this data
// was already accounted for in writeEntry.
func (h *header) writeSingle(w *gen.CodeWriter, name string) {
if len(dict) > 0 && dict.contains(h.tag) {
tag := identifier(h.tag)
w.WriteConst(tag+name+"Str", h.data)
// Note that we create a slice instead of an array. If we use an array
// we need to refer to it as a[:] in other tables, which will cause the
// array to always be included by the linker. See Issue 7651.
w.WriteVar(tag+name+"Idx", h.index)
}
}
// WriteTable writes an entry for a single Namer.
func (g *group) writeTable(w *gen.CodeWriter, name string) {
start := w.Size
writeKeys(w, name, g.toTags)
w.Size += len(g.headers) * int(reflect.ValueOf(g.headers[0]).Type().Size())
fmt.Fprintf(w, "var %sHeaders = [%d]header{\n", name, len(g.headers))
title := strings.Title(name)
for _, h := range g.headers {
h.writeEntry(w, title)
}
fmt.Fprintln(w, "}\n")
for _, h := range g.headers {
h.writeSingle(w, title)
}
n := w.Size - start
fmt.Fprintf(w, "// Total size for %s: %d bytes (%d KB)\n\n", name, n, n/1000)
}
func (b *builder) writeDictionaries() {
fmt.Fprintln(b.w, "// Dictionary entries of frequent languages")
fmt.Fprintln(b.w, "var (")
parents := parentIndices(b.supported)
for i, t := range b.supported {
if dict.contains(t) {
ident := identifier(t)
fmt.Fprintf(b.w, "\t%s = Dictionary{ // %s\n", ident, t)
if p := parents[i]; p == -1 {
fmt.Fprintln(b.w, "\t\tnil,")
} else {
fmt.Fprintf(b.w, "\t\t&%s,\n", identifier(b.supported[p]))
}
fmt.Fprintf(b.w, "\t\theader{%[1]sLangStr, %[1]sLangIdx},\n", ident)
fmt.Fprintf(b.w, "\t\theader{%[1]sScriptStr, %[1]sScriptIdx},\n", ident)
fmt.Fprintf(b.w, "\t\theader{%[1]sRegionStr, %[1]sRegionIdx},\n", ident)
fmt.Fprintln(b.w, "\t}")
}
}
fmt.Fprintln(b.w, ")")
var s string
var a []uint16
sz := reflect.TypeOf(s).Size()
sz += reflect.TypeOf(a).Size()
sz *= 3
sz += reflect.TypeOf(&a).Size()
n := int(sz) * len(dict)
fmt.Fprintf(b.w, "// Total size for %d entries: %d bytes (%d KB)\n\n", len(dict), n, n/1000)
b.w.Size += n
}
// unique sorts the given lists and removes duplicate entries by swapping them
// past position k, where k is the number of unique values. It returns k.
func unique(a sort.Interface) int {
if a.Len() == 0 {
return 0
}
sort.Sort(a)
k := 1
for i := 1; i < a.Len(); i++ {
if a.Less(k-1, i) {
if k != i {
a.Swap(k, i)
}
k++
}
}
return k
}

File diff suppressed because it is too large Load Diff

View File

@ -1,413 +0,0 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package language_test
import (
"fmt"
"net/http"
"golang.org/x/text/language"
)
func ExampleCanonType() {
p := func(id string) {
fmt.Printf("Default(%s) -> %s\n", id, language.Make(id))
fmt.Printf("BCP47(%s) -> %s\n", id, language.BCP47.Make(id))
fmt.Printf("Macro(%s) -> %s\n", id, language.Macro.Make(id))
fmt.Printf("All(%s) -> %s\n", id, language.All.Make(id))
}
p("en-Latn")
p("sh")
p("zh-cmn")
p("bjd")
p("iw-Latn-fonipa-u-cu-usd")
// Output:
// Default(en-Latn) -> en-Latn
// BCP47(en-Latn) -> en
// Macro(en-Latn) -> en-Latn
// All(en-Latn) -> en
// Default(sh) -> sr-Latn
// BCP47(sh) -> sh
// Macro(sh) -> sh
// All(sh) -> sr-Latn
// Default(zh-cmn) -> cmn
// BCP47(zh-cmn) -> cmn
// Macro(zh-cmn) -> zh
// All(zh-cmn) -> zh
// Default(bjd) -> drl
// BCP47(bjd) -> drl
// Macro(bjd) -> bjd
// All(bjd) -> drl
// Default(iw-Latn-fonipa-u-cu-usd) -> he-Latn-fonipa-u-cu-usd
// BCP47(iw-Latn-fonipa-u-cu-usd) -> he-Latn-fonipa-u-cu-usd
// Macro(iw-Latn-fonipa-u-cu-usd) -> iw-Latn-fonipa-u-cu-usd
// All(iw-Latn-fonipa-u-cu-usd) -> he-Latn-fonipa-u-cu-usd
}
func ExampleTag_Base() {
fmt.Println(language.Make("und").Base())
fmt.Println(language.Make("und-US").Base())
fmt.Println(language.Make("und-NL").Base())
fmt.Println(language.Make("und-419").Base()) // Latin America
fmt.Println(language.Make("und-ZZ").Base())
// Output:
// en Low
// en High
// nl High
// es Low
// en Low
}
func ExampleTag_Script() {
en := language.Make("en")
sr := language.Make("sr")
sr_Latn := language.Make("sr_Latn")
fmt.Println(en.Script())
fmt.Println(sr.Script())
// Was a script explicitly specified?
_, c := sr.Script()
fmt.Println(c == language.Exact)
_, c = sr_Latn.Script()
fmt.Println(c == language.Exact)
// Output:
// Latn High
// Cyrl Low
// false
// true
}
func ExampleTag_Region() {
ru := language.Make("ru")
en := language.Make("en")
fmt.Println(ru.Region())
fmt.Println(en.Region())
// Output:
// RU Low
// US Low
}
func ExampleRegion_TLD() {
us := language.MustParseRegion("US")
gb := language.MustParseRegion("GB")
uk := language.MustParseRegion("UK")
bu := language.MustParseRegion("BU")
fmt.Println(us.TLD())
fmt.Println(gb.TLD())
fmt.Println(uk.TLD())
fmt.Println(bu.TLD())
fmt.Println(us.Canonicalize().TLD())
fmt.Println(gb.Canonicalize().TLD())
fmt.Println(uk.Canonicalize().TLD())
fmt.Println(bu.Canonicalize().TLD())
// Output:
// US <nil>
// UK <nil>
// UK <nil>
// ZZ language: region is not a valid ccTLD
// US <nil>
// UK <nil>
// UK <nil>
// MM <nil>
}
func ExampleCompose() {
nl, _ := language.ParseBase("nl")
us, _ := language.ParseRegion("US")
de := language.Make("de-1901-u-co-phonebk")
jp := language.Make("ja-JP")
fi := language.Make("fi-x-ing")
u, _ := language.ParseExtension("u-nu-arabic")
x, _ := language.ParseExtension("x-piglatin")
// Combine a base language and region.
fmt.Println(language.Compose(nl, us))
// Combine a base language and extension.
fmt.Println(language.Compose(nl, x))
// Replace the region.
fmt.Println(language.Compose(jp, us))
// Combine several tags.
fmt.Println(language.Compose(us, nl, u))
// Replace the base language of a tag.
fmt.Println(language.Compose(de, nl))
fmt.Println(language.Compose(de, nl, u))
// Remove the base language.
fmt.Println(language.Compose(de, language.Base{}))
// Remove all variants.
fmt.Println(language.Compose(de, []language.Variant{}))
// Remove all extensions.
fmt.Println(language.Compose(de, []language.Extension{}))
fmt.Println(language.Compose(fi, []language.Extension{}))
// Remove all variants and extensions.
fmt.Println(language.Compose(de.Raw()))
// An error is gobbled or returned if non-nil.
fmt.Println(language.Compose(language.ParseRegion("ZA")))
fmt.Println(language.Compose(language.ParseRegion("HH")))
// Compose uses the same Default canonicalization as Make.
fmt.Println(language.Compose(language.Raw.Parse("en-Latn-UK")))
// Call compose on a different CanonType for different results.
fmt.Println(language.All.Compose(language.Raw.Parse("en-Latn-UK")))
// Output:
// nl-US <nil>
// nl-x-piglatin <nil>
// ja-US <nil>
// nl-US-u-nu-arabic <nil>
// nl-1901-u-co-phonebk <nil>
// nl-1901-u-nu-arabic <nil>
// und-1901-u-co-phonebk <nil>
// de-u-co-phonebk <nil>
// de-1901 <nil>
// fi <nil>
// de <nil>
// und-ZA <nil>
// und language: subtag "HH" is well-formed but unknown
// en-Latn-GB <nil>
// en-GB <nil>
}
func ExampleParse_errors() {
for _, s := range []string{"Foo", "Bar", "Foobar"} {
_, err := language.Parse(s)
if err != nil {
if inv, ok := err.(language.ValueError); ok {
fmt.Println(inv.Subtag())
} else {
fmt.Println(s)
}
}
}
for _, s := range []string{"en", "aa-Uuuu", "AC", "ac-u"} {
_, err := language.Parse(s)
switch e := err.(type) {
case language.ValueError:
fmt.Printf("%s: culprit %q\n", s, e.Subtag())
case nil:
// No error.
default:
// A syntax error.
fmt.Printf("%s: ill-formed\n", s)
}
}
// Output:
// foo
// Foobar
// aa-Uuuu: culprit "Uuuu"
// AC: culprit "ac"
// ac-u: ill-formed
}
func ExampleParent() {
p := func(tag string) {
fmt.Printf("parent(%v): %v\n", tag, language.Make(tag).Parent())
}
p("zh-CN")
// Australian English inherits from World English.
p("en-AU")
// If the tag has a different maximized script from its parent, a tag with
// this maximized script is inserted. This allows different language tags
// which have the same base language and script in common to inherit from
// a common set of settings.
p("zh-HK")
// If the maximized script of the parent is not identical, CLDR will skip
// inheriting from it, as it means there will not be many entries in common
// and inheriting from it is nonsensical.
p("zh-Hant")
// The parent of a tag with variants and extensions is the tag with all
// variants and extensions removed.
p("de-1994-u-co-phonebk")
// Remove default script.
p("de-Latn-LU")
// Output:
// parent(zh-CN): zh
// parent(en-AU): en-001
// parent(zh-HK): zh-Hant
// parent(zh-Hant): und
// parent(de-1994-u-co-phonebk): de
// parent(de-Latn-LU): de
}
// ExampleMatcher_bestMatch gives some examples of getting the best match of
// a set of tags to any of the tags of given set.
func ExampleMatcher() {
// This is the set of tags from which we want to pick the best match. These
// can be, for example, the supported languages for some package.
tags := []language.Tag{
language.English,
language.BritishEnglish,
language.French,
language.Afrikaans,
language.BrazilianPortuguese,
language.EuropeanPortuguese,
language.Croatian,
language.SimplifiedChinese,
language.Raw.Make("iw-IL"),
language.Raw.Make("iw"),
language.Raw.Make("he"),
}
m := language.NewMatcher(tags)
// A simple match.
fmt.Println(m.Match(language.Make("fr")))
// Australian English is closer to British than American English.
fmt.Println(m.Match(language.Make("en-AU")))
// Default to the first tag passed to the Matcher if there is no match.
fmt.Println(m.Match(language.Make("ar")))
// Get the default tag.
fmt.Println(m.Match())
fmt.Println("----")
// Someone specifying sr-Latn is probably fine with getting Croatian.
fmt.Println(m.Match(language.Make("sr-Latn")))
// We match SimplifiedChinese, but with Low confidence.
fmt.Println(m.Match(language.TraditionalChinese))
// Serbian in Latin script is a closer match to Croatian than Traditional
// Chinese to Simplified Chinese.
fmt.Println(m.Match(language.TraditionalChinese, language.Make("sr-Latn")))
fmt.Println("----")
// In case a multiple variants of a language are available, the most spoken
// variant is typically returned.
fmt.Println(m.Match(language.Portuguese))
// Pick the first value passed to Match in case of a tie.
fmt.Println(m.Match(language.Dutch, language.Make("fr-BE"), language.Make("af-NA")))
fmt.Println(m.Match(language.Dutch, language.Make("af-NA"), language.Make("fr-BE")))
fmt.Println("----")
// If a Matcher is initialized with a language and it's deprecated version,
// it will distinguish between them.
fmt.Println(m.Match(language.Raw.Make("iw")))
// However, for non-exact matches, it will treat deprecated versions as
// equivalent and consider other factors first.
fmt.Println(m.Match(language.Raw.Make("he-IL")))
fmt.Println("----")
// User settings passed to the Unicode extension are ignored for matching
// and preserved in the returned tag.
fmt.Println(m.Match(language.Make("de-u-co-phonebk"), language.Make("fr-u-cu-frf")))
// Even if the matching language is different.
fmt.Println(m.Match(language.Make("de-u-co-phonebk"), language.Make("br-u-cu-frf")))
// If there is no matching language, the options of the first preferred tag are used.
fmt.Println(m.Match(language.Make("de-u-co-phonebk")))
// Output:
// fr 2 Exact
// en-GB 1 High
// en 0 No
// en 0 No
// ----
// hr 6 High
// zh-Hans 7 Low
// hr 6 High
// ----
// pt-BR 4 High
// fr 2 High
// af 3 High
// ----
// iw 9 Exact
// he 10 Exact
// ----
// fr-u-cu-frf 2 Exact
// fr-u-cu-frf 2 High
// en-u-co-phonebk 0 No
// TODO: "he" should be "he-u-rg-IL High"
}
func ExampleMatchStrings() {
// languages supported by this service:
matcher := language.NewMatcher([]language.Tag{
language.English, language.Dutch, language.German,
})
http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
lang, _ := r.Cookie("lang")
tag, _ := language.MatchStrings(matcher, lang.String(), r.Header.Get("Accept-Language"))
fmt.Println("User language:", tag)
})
}
func ExampleComprehends() {
// Various levels of comprehensibility.
fmt.Println(language.Comprehends(language.English, language.English))
fmt.Println(language.Comprehends(language.AmericanEnglish, language.BritishEnglish))
// An explicit Und results in no match.
fmt.Println(language.Comprehends(language.English, language.Und))
fmt.Println("----")
// There is usually no mutual comprehensibility between different scripts.
fmt.Println(language.Comprehends(language.Make("en-Dsrt"), language.English))
// One exception is for Traditional versus Simplified Chinese, albeit with
// a low confidence.
fmt.Println(language.Comprehends(language.TraditionalChinese, language.SimplifiedChinese))
fmt.Println("----")
// A Swiss German speaker will often understand High German.
fmt.Println(language.Comprehends(language.Make("gsw"), language.Make("de")))
// The converse is not generally the case.
fmt.Println(language.Comprehends(language.Make("de"), language.Make("gsw")))
// Output:
// Exact
// High
// No
// ----
// No
// Low
// ----
// High
// No
}
func ExampleTag_values() {
us := language.MustParseRegion("US")
en := language.MustParseBase("en")
lang, _, region := language.AmericanEnglish.Raw()
fmt.Println(lang == en, region == us)
lang, _, region = language.BritishEnglish.Raw()
fmt.Println(lang == en, region == us)
// Tags can be compared for exact equivalence using '=='.
en_us, _ := language.Compose(en, us)
fmt.Println(en_us == language.AmericanEnglish)
// Output:
// true true
// true false
// true
}

View File

@ -1,48 +0,0 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package language_test
import (
"fmt"
"net/http"
"strings"
"golang.org/x/text/language"
)
// matcher is a language.Matcher configured for all supported languages.
var matcher = language.NewMatcher([]language.Tag{
language.BritishEnglish,
language.Norwegian,
language.German,
})
// handler is a http.HandlerFunc.
func handler(w http.ResponseWriter, r *http.Request) {
t, q, err := language.ParseAcceptLanguage(r.Header.Get("Accept-Language"))
// We ignore the error: the default language will be selected for t == nil.
tag, _, _ := matcher.Match(t...)
fmt.Printf("%5v (t: %6v; q: %3v; err: %v)\n", tag, t, q, err)
}
func ExampleParseAcceptLanguage() {
for _, al := range []string{
"nn;q=0.3, en-us;q=0.8, en,",
"gsw, en;q=0.7, en-US;q=0.8",
"gsw, nl, da",
"invalid",
} {
// Create dummy request with Accept-Language set and pass it to handler.
r, _ := http.NewRequest("GET", "example.com", strings.NewReader("Hello"))
r.Header.Set("Accept-Language", al)
handler(nil, r)
}
// Output:
// en-GB (t: [ en en-US nn]; q: [ 1 0.8 0.3]; err: <nil>)
// en-GB (t: [ gsw en-US en]; q: [ 1 0.8 0.7]; err: <nil>)
// de (t: [ gsw nl da]; q: [ 1 1 1]; err: <nil>)
// en-GB (t: []; q: []; err: language: tag is not well-formed)
}

View File

@ -1,911 +0,0 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package language
import (
"reflect"
"testing"
"golang.org/x/text/internal/testtext"
)
func TestTagSize(t *testing.T) {
id := Tag{}
typ := reflect.TypeOf(id)
if typ.Size() > 24 {
t.Errorf("size of Tag was %d; want 24", typ.Size())
}
}
func TestIsRoot(t *testing.T) {
loc := Tag{}
if !loc.IsRoot() {
t.Errorf("unspecified should be root.")
}
for i, tt := range parseTests() {
loc, _ := Parse(tt.in)
undef := tt.lang == "und" && tt.script == "" && tt.region == "" && tt.ext == ""
if loc.IsRoot() != undef {
t.Errorf("%d: was %v; want %v", i, loc.IsRoot(), undef)
}
}
}
func TestEquality(t *testing.T) {
for i, tt := range parseTests()[48:49] {
s := tt.in
tag := Make(s)
t1 := Make(tag.String())
if tag != t1 {
t.Errorf("%d:%s: equality test 1 failed\n got: %#v\nwant: %#v)", i, s, t1, tag)
}
t2, _ := Compose(tag)
if tag != t2 {
t.Errorf("%d:%s: equality test 2 failed\n got: %#v\nwant: %#v", i, s, t2, tag)
}
}
}
func TestMakeString(t *testing.T) {
tests := []struct{ in, out string }{
{"und", "und"},
{"und", "und-CW"},
{"nl", "nl-NL"},
{"de-1901", "nl-1901"},
{"de-1901", "de-Arab-1901"},
{"x-a-b", "de-Arab-x-a-b"},
{"x-a-b", "x-a-b"},
}
for i, tt := range tests {
id, _ := Parse(tt.in)
mod, _ := Parse(tt.out)
id.setTagsFrom(mod)
for j := 0; j < 2; j++ {
id.remakeString()
if str := id.String(); str != tt.out {
t.Errorf("%d:%d: found %s; want %s", i, j, id.String(), tt.out)
}
}
// The bytes to string conversion as used in remakeString
// occasionally measures as more than one alloc, breaking this test.
// To alleviate this we set the number of runs to more than 1.
if n := testtext.AllocsPerRun(8, id.remakeString); n > 1 {
t.Errorf("%d: # allocs got %.1f; want <= 1", i, n)
}
}
}
func TestCompactIndex(t *testing.T) {
tests := []struct {
tag string
index int
ok bool
}{
// TODO: these values will change with each CLDR update. This issue
// will be solved if we decide to fix the indexes.
{"und", 0, true},
{"ca-ES-valencia", 1, true},
{"ca-ES-valencia-u-va-posix", 0, false},
{"ca-ES-valencia-u-co-phonebk", 1, true},
{"ca-ES-valencia-u-co-phonebk-va-posix", 0, false},
{"x-klingon", 0, false},
{"en-US", 232, true},
{"en-US-u-va-posix", 2, true},
{"en", 136, true},
{"en-u-co-phonebk", 136, true},
{"en-001", 137, true},
{"sh", 0, false}, // We don't normalize.
}
for _, tt := range tests {
x, ok := CompactIndex(Raw.MustParse(tt.tag))
if x != tt.index || ok != tt.ok {
t.Errorf("%s: got %d, %v; want %d %v", tt.tag, x, ok, tt.index, tt.ok)
}
}
}
func TestMarshal(t *testing.T) {
testCases := []string{
// TODO: these values will change with each CLDR update. This issue
// will be solved if we decide to fix the indexes.
"und",
"ca-ES-valencia",
"ca-ES-valencia-u-va-posix",
"ca-ES-valencia-u-co-phonebk",
"ca-ES-valencia-u-co-phonebk-va-posix",
"x-klingon",
"en-US",
"en-US-u-va-posix",
"en",
"en-u-co-phonebk",
"en-001",
"sh",
}
for _, tc := range testCases {
var tag Tag
err := tag.UnmarshalText([]byte(tc))
if err != nil {
t.Errorf("UnmarshalText(%q): unexpected error: %v", tc, err)
}
b, err := tag.MarshalText()
if err != nil {
t.Errorf("MarshalText(%q): unexpected error: %v", tc, err)
}
if got := string(b); got != tc {
t.Errorf("%s: got %q; want %q", tc, got, tc)
}
}
}
func TestBase(t *testing.T) {
tests := []struct {
loc, lang string
conf Confidence
}{
{"und", "en", Low},
{"x-abc", "und", No},
{"en", "en", Exact},
{"und-Cyrl", "ru", High},
// If a region is not included, the official language should be English.
{"und-US", "en", High},
// TODO: not-explicitly listed scripts should probably be und, No
// Modify addTags to return info on how the match was derived.
// {"und-Aghb", "und", No},
}
for i, tt := range tests {
loc, _ := Parse(tt.loc)
lang, conf := loc.Base()
if lang.String() != tt.lang {
t.Errorf("%d: language was %s; want %s", i, lang, tt.lang)
}
if conf != tt.conf {
t.Errorf("%d: confidence was %d; want %d", i, conf, tt.conf)
}
}
}
func TestParseBase(t *testing.T) {
tests := []struct {
in string
out string
ok bool
}{
{"en", "en", true},
{"EN", "en", true},
{"nld", "nl", true},
{"dut", "dut", true}, // bibliographic
{"aaj", "und", false}, // unknown
{"qaa", "qaa", true},
{"a", "und", false},
{"", "und", false},
{"aaaa", "und", false},
}
for i, tt := range tests {
x, err := ParseBase(tt.in)
if x.String() != tt.out || err == nil != tt.ok {
t.Errorf("%d:%s: was %s, %v; want %s, %v", i, tt.in, x, err == nil, tt.out, tt.ok)
}
if y, _, _ := Raw.Make(tt.out).Raw(); x != y {
t.Errorf("%d:%s: tag was %s; want %s", i, tt.in, x, y)
}
}
}
func TestScript(t *testing.T) {
tests := []struct {
loc, scr string
conf Confidence
}{
{"und", "Latn", Low},
{"en-Latn", "Latn", Exact},
{"en", "Latn", High},
{"sr", "Cyrl", Low},
{"kk", "Cyrl", High},
{"kk-CN", "Arab", Low},
{"cmn", "Hans", Low},
{"ru", "Cyrl", High},
{"ru-RU", "Cyrl", High},
{"yue", "Hant", Low},
{"x-abc", "Zzzz", Low},
{"und-zyyy", "Zyyy", Exact},
}
for i, tt := range tests {
loc, _ := Parse(tt.loc)
sc, conf := loc.Script()
if sc.String() != tt.scr {
t.Errorf("%d:%s: script was %s; want %s", i, tt.loc, sc, tt.scr)
}
if conf != tt.conf {
t.Errorf("%d:%s: confidence was %d; want %d", i, tt.loc, conf, tt.conf)
}
}
}
func TestParseScript(t *testing.T) {
tests := []struct {
in string
out string
ok bool
}{
{"Latn", "Latn", true},
{"zzzz", "Zzzz", true},
{"zyyy", "Zyyy", true},
{"Latm", "Zzzz", false},
{"Zzz", "Zzzz", false},
{"", "Zzzz", false},
{"Zzzxx", "Zzzz", false},
}
for i, tt := range tests {
x, err := ParseScript(tt.in)
if x.String() != tt.out || err == nil != tt.ok {
t.Errorf("%d:%s: was %s, %v; want %s, %v", i, tt.in, x, err == nil, tt.out, tt.ok)
}
if err == nil {
if _, y, _ := Raw.Make("und-" + tt.out).Raw(); x != y {
t.Errorf("%d:%s: tag was %s; want %s", i, tt.in, x, y)
}
}
}
}
func TestRegion(t *testing.T) {
tests := []struct {
loc, reg string
conf Confidence
}{
{"und", "US", Low},
{"en", "US", Low},
{"zh-Hant", "TW", Low},
{"en-US", "US", Exact},
{"cmn", "CN", Low},
{"ru", "RU", Low},
{"yue", "HK", Low},
{"x-abc", "ZZ", Low},
}
for i, tt := range tests {
loc, _ := Raw.Parse(tt.loc)
reg, conf := loc.Region()
if reg.String() != tt.reg {
t.Errorf("%d:%s: region was %s; want %s", i, tt.loc, reg, tt.reg)
}
if conf != tt.conf {
t.Errorf("%d:%s: confidence was %d; want %d", i, tt.loc, conf, tt.conf)
}
}
}
func TestEncodeM49(t *testing.T) {
tests := []struct {
m49 int
code string
ok bool
}{
{1, "001", true},
{840, "US", true},
{899, "ZZ", false},
}
for i, tt := range tests {
if r, err := EncodeM49(tt.m49); r.String() != tt.code || err == nil != tt.ok {
t.Errorf("%d:%d: was %s, %v; want %s, %v", i, tt.m49, r, err == nil, tt.code, tt.ok)
}
}
for i := 1; i <= 1000; i++ {
if r, err := EncodeM49(i); err == nil && r.M49() == 0 {
t.Errorf("%d has no error, but maps to undefined region", i)
}
}
}
func TestParseRegion(t *testing.T) {
tests := []struct {
in string
out string
ok bool
}{
{"001", "001", true},
{"840", "US", true},
{"899", "ZZ", false},
{"USA", "US", true},
{"US", "US", true},
{"BC", "ZZ", false},
{"C", "ZZ", false},
{"CCCC", "ZZ", false},
{"01", "ZZ", false},
}
for i, tt := range tests {
r, err := ParseRegion(tt.in)
if r.String() != tt.out || err == nil != tt.ok {
t.Errorf("%d:%s: was %s, %v; want %s, %v", i, tt.in, r, err == nil, tt.out, tt.ok)
}
if err == nil {
if _, _, y := Raw.Make("und-" + tt.out).Raw(); r != y {
t.Errorf("%d:%s: tag was %s; want %s", i, tt.in, r, y)
}
}
}
}
func TestIsCountry(t *testing.T) {
tests := []struct {
reg string
country bool
}{
{"US", true},
{"001", false},
{"958", false},
{"419", false},
{"203", true},
{"020", true},
{"900", false},
{"999", false},
{"QO", false},
{"EU", false},
{"AA", false},
{"XK", true},
}
for i, tt := range tests {
reg, _ := getRegionID([]byte(tt.reg))
r := Region{reg}
if r.IsCountry() != tt.country {
t.Errorf("%d: IsCountry(%s) was %v; want %v", i, tt.reg, r.IsCountry(), tt.country)
}
}
}
func TestIsGroup(t *testing.T) {
tests := []struct {
reg string
group bool
}{
{"US", false},
{"001", true},
{"958", false},
{"419", true},
{"203", false},
{"020", false},
{"900", false},
{"999", false},
{"QO", true},
{"EU", true},
{"AA", false},
{"XK", false},
}
for i, tt := range tests {
reg, _ := getRegionID([]byte(tt.reg))
r := Region{reg}
if r.IsGroup() != tt.group {
t.Errorf("%d: IsGroup(%s) was %v; want %v", i, tt.reg, r.IsGroup(), tt.group)
}
}
}
func TestContains(t *testing.T) {
tests := []struct {
enclosing, contained string
contains bool
}{
// A region contains itself.
{"US", "US", true},
{"001", "001", true},
// Direct containment.
{"001", "002", true},
{"039", "XK", true},
{"150", "XK", true},
{"EU", "AT", true},
{"QO", "AQ", true},
// Indirect containemnt.
{"001", "US", true},
{"001", "419", true},
{"001", "013", true},
// No containment.
{"US", "001", false},
{"155", "EU", false},
}
for i, tt := range tests {
enc, _ := getRegionID([]byte(tt.enclosing))
con, _ := getRegionID([]byte(tt.contained))
r := Region{enc}
if got := r.Contains(Region{con}); got != tt.contains {
t.Errorf("%d: %s.Contains(%s) was %v; want %v", i, tt.enclosing, tt.contained, got, tt.contains)
}
}
}
func TestRegionCanonicalize(t *testing.T) {
for i, tt := range []struct{ in, out string }{
{"UK", "GB"},
{"TP", "TL"},
{"QU", "EU"},
{"SU", "SU"},
{"VD", "VN"},
{"DD", "DE"},
} {
r := MustParseRegion(tt.in)
want := MustParseRegion(tt.out)
if got := r.Canonicalize(); got != want {
t.Errorf("%d: got %v; want %v", i, got, want)
}
}
}
func TestRegionTLD(t *testing.T) {
for _, tt := range []struct {
in, out string
ok bool
}{
{"EH", "EH", true},
{"FR", "FR", true},
{"TL", "TL", true},
// In ccTLD before in ISO.
{"GG", "GG", true},
// Non-standard assignment of ccTLD to ISO code.
{"GB", "UK", true},
// Exceptionally reserved in ISO and valid ccTLD.
{"UK", "UK", true},
{"AC", "AC", true},
{"EU", "EU", true},
{"SU", "SU", true},
// Exceptionally reserved in ISO and invalid ccTLD.
{"CP", "ZZ", false},
{"DG", "ZZ", false},
{"EA", "ZZ", false},
{"FX", "ZZ", false},
{"IC", "ZZ", false},
{"TA", "ZZ", false},
// Transitionally reserved in ISO (e.g. deprecated) but valid ccTLD as
// it is still being phased out.
{"AN", "AN", true},
{"TP", "TP", true},
// Transitionally reserved in ISO (e.g. deprecated) and invalid ccTLD.
// Defined in package language as it has a mapping in CLDR.
{"BU", "ZZ", false},
{"CS", "ZZ", false},
{"NT", "ZZ", false},
{"YU", "ZZ", false},
{"ZR", "ZZ", false},
// Not defined in package: SF.
// Indeterminately reserved in ISO.
// Defined in package language as it has a legacy mapping in CLDR.
{"DY", "ZZ", false},
{"RH", "ZZ", false},
{"VD", "ZZ", false},
// Not defined in package: EW, FL, JA, LF, PI, RA, RB, RC, RI, RL, RM,
// RN, RP, WG, WL, WV, and YV.
// Not assigned in ISO, but legacy definitions in CLDR.
{"DD", "ZZ", false},
{"YD", "ZZ", false},
// Normal mappings but somewhat special status in ccTLD.
{"BL", "BL", true},
{"MF", "MF", true},
{"BV", "BV", true},
{"SJ", "SJ", true},
// Have values when normalized, but not as is.
{"QU", "ZZ", false},
// ISO Private Use.
{"AA", "ZZ", false},
{"QM", "ZZ", false},
{"QO", "ZZ", false},
{"XA", "ZZ", false},
{"XK", "ZZ", false}, // Sometimes used for Kosovo, but invalid ccTLD.
} {
if tt.in == "" {
continue
}
r := MustParseRegion(tt.in)
var want Region
if tt.out != "ZZ" {
want = MustParseRegion(tt.out)
}
tld, err := r.TLD()
if got := err == nil; got != tt.ok {
t.Errorf("error(%v): got %v; want %v", r, got, tt.ok)
}
if tld != want {
t.Errorf("TLD(%v): got %v; want %v", r, tld, want)
}
}
}
func TestCanonicalize(t *testing.T) {
// TODO: do a full test using CLDR data in a separate regression test.
tests := []struct {
in, out string
option CanonType
}{
{"en-Latn", "en", SuppressScript},
{"sr-Cyrl", "sr-Cyrl", SuppressScript},
{"sh", "sr-Latn", Legacy},
{"sh-HR", "sr-Latn-HR", Legacy},
{"sh-Cyrl-HR", "sr-Cyrl-HR", Legacy},
{"tl", "fil", Legacy},
{"no", "no", Legacy},
{"no", "nb", Legacy | CLDR},
{"cmn", "cmn", Legacy},
{"cmn", "zh", Macro},
{"cmn-u-co-stroke", "zh-u-co-stroke", Macro},
{"yue", "yue", Macro},
{"nb", "no", Macro},
{"nb", "nb", Macro | CLDR},
{"no", "no", Macro},
{"no", "no", Macro | CLDR},
{"iw", "he", DeprecatedBase},
{"iw", "he", Deprecated | CLDR},
{"mo", "ro-MD", Deprecated}, // Adopted by CLDR as of version 25.
{"alb", "sq", Legacy}, // bibliographic
{"dut", "nl", Legacy}, // bibliographic
// As of CLDR 25, mo is no longer considered a legacy mapping.
{"mo", "mo", Legacy | CLDR},
{"und-AN", "und-AN", Deprecated},
{"und-YD", "und-YE", DeprecatedRegion},
{"und-YD", "und-YD", DeprecatedBase},
{"und-Qaai", "und-Zinh", DeprecatedScript},
{"und-Qaai", "und-Qaai", DeprecatedBase},
{"drh", "mn", All}, // drh -> khk -> mn
}
for i, tt := range tests {
in, _ := Raw.Parse(tt.in)
in, _ = tt.option.Canonicalize(in)
if in.String() != tt.out {
t.Errorf("%d:%s: was %s; want %s", i, tt.in, in.String(), tt.out)
}
if int(in.pVariant) > int(in.pExt) || int(in.pExt) > len(in.str) {
t.Errorf("%d:%s:offsets %d <= %d <= %d must be true", i, tt.in, in.pVariant, in.pExt, len(in.str))
}
}
// Test idempotence.
for _, base := range Supported.BaseLanguages() {
tag, _ := Raw.Compose(base)
got, _ := All.Canonicalize(tag)
want, _ := All.Canonicalize(got)
if got != want {
t.Errorf("idem(%s): got %s; want %s", tag, got, want)
}
}
}
func TestTypeForKey(t *testing.T) {
tests := []struct{ key, in, out string }{
{"co", "en", ""},
{"co", "en-u-abc", ""},
{"co", "en-u-co-phonebk", "phonebk"},
{"co", "en-u-co-phonebk-cu-aud", "phonebk"},
{"co", "x-foo-u-co-phonebk", ""},
{"nu", "en-u-co-phonebk-nu-arabic", "arabic"},
{"kc", "cmn-u-co-stroke", ""},
}
for _, tt := range tests {
if v := Make(tt.in).TypeForKey(tt.key); v != tt.out {
t.Errorf("%q[%q]: was %q; want %q", tt.in, tt.key, v, tt.out)
}
}
}
func TestSetTypeForKey(t *testing.T) {
tests := []struct {
key, value, in, out string
err bool
}{
// replace existing value
{"co", "pinyin", "en-u-co-phonebk", "en-u-co-pinyin", false},
{"co", "pinyin", "en-u-co-phonebk-cu-xau", "en-u-co-pinyin-cu-xau", false},
{"co", "pinyin", "en-u-co-phonebk-v-xx", "en-u-co-pinyin-v-xx", false},
{"co", "pinyin", "en-u-co-phonebk-x-x", "en-u-co-pinyin-x-x", false},
{"nu", "arabic", "en-u-co-phonebk-nu-vaai", "en-u-co-phonebk-nu-arabic", false},
// add to existing -u extension
{"co", "pinyin", "en-u-ca-gregory", "en-u-ca-gregory-co-pinyin", false},
{"co", "pinyin", "en-u-ca-gregory-nu-vaai", "en-u-ca-gregory-co-pinyin-nu-vaai", false},
{"co", "pinyin", "en-u-ca-gregory-v-va", "en-u-ca-gregory-co-pinyin-v-va", false},
{"co", "pinyin", "en-u-ca-gregory-x-a", "en-u-ca-gregory-co-pinyin-x-a", false},
{"ca", "gregory", "en-u-co-pinyin", "en-u-ca-gregory-co-pinyin", false},
// remove pair
{"co", "", "en-u-co-phonebk", "en", false},
{"co", "", "en-u-ca-gregory-co-phonebk", "en-u-ca-gregory", false},
{"co", "", "en-u-co-phonebk-nu-arabic", "en-u-nu-arabic", false},
{"co", "", "en", "en", false},
// add -u extension
{"co", "pinyin", "en", "en-u-co-pinyin", false},
{"co", "pinyin", "und", "und-u-co-pinyin", false},
{"co", "pinyin", "en-a-aaa", "en-a-aaa-u-co-pinyin", false},
{"co", "pinyin", "en-x-aaa", "en-u-co-pinyin-x-aaa", false},
{"co", "pinyin", "en-v-aa", "en-u-co-pinyin-v-aa", false},
{"co", "pinyin", "en-a-aaa-x-x", "en-a-aaa-u-co-pinyin-x-x", false},
{"co", "pinyin", "en-a-aaa-v-va", "en-a-aaa-u-co-pinyin-v-va", false},
// error on invalid values
{"co", "pinyinxxx", "en", "en", true},
{"co", "piny.n", "en", "en", true},
{"co", "pinyinxxx", "en-a-aaa", "en-a-aaa", true},
{"co", "pinyinxxx", "en-u-aaa", "en-u-aaa", true},
{"co", "pinyinxxx", "en-u-aaa-co-pinyin", "en-u-aaa-co-pinyin", true},
{"co", "pinyi.", "en-u-aaa-co-pinyin", "en-u-aaa-co-pinyin", true},
{"col", "pinyin", "en", "en", true},
{"co", "cu", "en", "en", true},
// error when setting on a private use tag
{"co", "phonebook", "x-foo", "x-foo", true},
}
for i, tt := range tests {
tag := Make(tt.in)
if v, err := tag.SetTypeForKey(tt.key, tt.value); v.String() != tt.out {
t.Errorf("%d:%q[%q]=%q: was %q; want %q", i, tt.in, tt.key, tt.value, v, tt.out)
} else if (err != nil) != tt.err {
t.Errorf("%d:%q[%q]=%q: error was %v; want %v", i, tt.in, tt.key, tt.value, err != nil, tt.err)
} else if val := v.TypeForKey(tt.key); err == nil && val != tt.value {
t.Errorf("%d:%q[%q]==%q: was %v; want %v", i, tt.out, tt.key, tt.value, val, tt.value)
}
if len(tag.String()) <= 3 {
// Simulate a tag for which the string has not been set.
tag.str, tag.pExt, tag.pVariant = "", 0, 0
if tag, err := tag.SetTypeForKey(tt.key, tt.value); err == nil {
if val := tag.TypeForKey(tt.key); err == nil && val != tt.value {
t.Errorf("%d:%q[%q]==%q: was %v; want %v", i, tt.out, tt.key, tt.value, val, tt.value)
}
}
}
}
}
func TestFindKeyAndType(t *testing.T) {
// out is either the matched type in case of a match or the original
// string up till the insertion point.
tests := []struct {
key string
hasExt bool
in, out string
}{
// Don't search past a private use extension.
{"co", false, "en-x-foo-u-co-pinyin", "en"},
{"co", false, "x-foo-u-co-pinyin", ""},
{"co", false, "en-s-fff-x-foo", "en-s-fff"},
// Insertion points in absence of -u extension.
{"cu", false, "en", ""}, // t.str is ""
{"cu", false, "en-v-va", "en"},
{"cu", false, "en-a-va", "en-a-va"},
{"cu", false, "en-a-va-v-va", "en-a-va"},
{"cu", false, "en-x-a", "en"},
// Tags with the -u extension.
{"co", true, "en-u-co-standard", "standard"},
{"co", true, "yue-u-co-pinyin", "pinyin"},
{"co", true, "en-u-co-abc", "abc"},
{"co", true, "en-u-co-abc-def", "abc-def"},
{"co", true, "en-u-co-abc-def-x-foo", "abc-def"},
{"co", true, "en-u-co-standard-nu-arab", "standard"},
{"co", true, "yue-u-co-pinyin-nu-arab", "pinyin"},
// Insertion points.
{"cu", true, "en-u-co-standard", "en-u-co-standard"},
{"cu", true, "yue-u-co-pinyin-x-foo", "yue-u-co-pinyin"},
{"cu", true, "en-u-co-abc", "en-u-co-abc"},
{"cu", true, "en-u-nu-arabic", "en-u"},
{"cu", true, "en-u-co-abc-def-nu-arabic", "en-u-co-abc-def"},
}
for i, tt := range tests {
start, end, hasExt := Make(tt.in).findTypeForKey(tt.key)
if start != end {
res := tt.in[start:end]
if res != tt.out {
t.Errorf("%d:%s: was %q; want %q", i, tt.in, res, tt.out)
}
} else {
if hasExt != tt.hasExt {
t.Errorf("%d:%s: hasExt was %v; want %v", i, tt.in, hasExt, tt.hasExt)
continue
}
if tt.in[:start] != tt.out {
t.Errorf("%d:%s: insertion point was %q; want %q", i, tt.in, tt.in[:start], tt.out)
}
}
}
}
func TestParent(t *testing.T) {
tests := []struct{ in, out string }{
// Strip variants and extensions first
{"de-u-co-phonebk", "de"},
{"de-1994", "de"},
{"de-Latn-1994", "de"}, // remove superfluous script.
// Ensure the canonical Tag for an entry is in the chain for base-script
// pairs.
{"zh-Hans", "zh"},
// Skip the script if it is the maximized version. CLDR files for the
// skipped tag are always empty.
{"zh-Hans-TW", "zh"},
{"zh-Hans-CN", "zh"},
// Insert the script if the maximized script is not the same as the
// maximized script of the base language.
{"zh-TW", "zh-Hant"},
{"zh-HK", "zh-Hant"},
{"zh-Hant-TW", "zh-Hant"},
{"zh-Hant-HK", "zh-Hant"},
// Non-default script skips to und.
// CLDR
{"az-Cyrl", "und"},
{"bs-Cyrl", "und"},
{"en-Dsrt", "und"},
{"ha-Arab", "und"},
{"mn-Mong", "und"},
{"pa-Arab", "und"},
{"shi-Latn", "und"},
{"sr-Latn", "und"},
{"uz-Arab", "und"},
{"uz-Cyrl", "und"},
{"vai-Latn", "und"},
{"zh-Hant", "und"},
// extra
{"nl-Cyrl", "und"},
// World english inherits from en-001.
{"en-150", "en-001"},
{"en-AU", "en-001"},
{"en-BE", "en-001"},
{"en-GG", "en-001"},
{"en-GI", "en-001"},
{"en-HK", "en-001"},
{"en-IE", "en-001"},
{"en-IM", "en-001"},
{"en-IN", "en-001"},
{"en-JE", "en-001"},
{"en-MT", "en-001"},
{"en-NZ", "en-001"},
{"en-PK", "en-001"},
{"en-SG", "en-001"},
// Spanish in Latin-American countries have es-419 as parent.
{"es-AR", "es-419"},
{"es-BO", "es-419"},
{"es-CL", "es-419"},
{"es-CO", "es-419"},
{"es-CR", "es-419"},
{"es-CU", "es-419"},
{"es-DO", "es-419"},
{"es-EC", "es-419"},
{"es-GT", "es-419"},
{"es-HN", "es-419"},
{"es-MX", "es-419"},
{"es-NI", "es-419"},
{"es-PA", "es-419"},
{"es-PE", "es-419"},
{"es-PR", "es-419"},
{"es-PY", "es-419"},
{"es-SV", "es-419"},
{"es-US", "es-419"},
{"es-UY", "es-419"},
{"es-VE", "es-419"},
// exceptions (according to CLDR)
{"es-CW", "es"},
// Inherit from pt-PT, instead of pt for these countries.
{"pt-AO", "pt-PT"},
{"pt-CV", "pt-PT"},
{"pt-GW", "pt-PT"},
{"pt-MO", "pt-PT"},
{"pt-MZ", "pt-PT"},
{"pt-ST", "pt-PT"},
{"pt-TL", "pt-PT"},
}
for _, tt := range tests {
tag := Raw.MustParse(tt.in)
if p := Raw.MustParse(tt.out); p != tag.Parent() {
t.Errorf("%s: was %v; want %v", tt.in, tag.Parent(), p)
}
}
}
var (
// Tags without error that don't need to be changed.
benchBasic = []string{
"en",
"en-Latn",
"en-GB",
"za",
"zh-Hant",
"zh",
"zh-HK",
"ar-MK",
"en-CA",
"fr-CA",
"fr-CH",
"fr",
"lv",
"he-IT",
"tlh",
"ja",
"ja-Jpan",
"ja-Jpan-JP",
"de-1996",
"de-CH",
"sr",
"sr-Latn",
}
// Tags with extensions, not changes required.
benchExt = []string{
"x-a-b-c-d",
"x-aa-bbbb-cccccccc-d",
"en-x_cc-b-bbb-a-aaa",
"en-c_cc-b-bbb-a-aaa-x-x",
"en-u-co-phonebk",
"en-Cyrl-u-co-phonebk",
"en-US-u-co-phonebk-cu-xau",
"en-nedix-u-co-phonebk",
"en-t-t0-abcd",
"en-t-nl-latn",
"en-t-t0-abcd-x-a",
}
// Change, but not memory allocation required.
benchSimpleChange = []string{
"EN",
"i-klingon",
"en-latn",
"zh-cmn-Hans-CN",
"iw-NL",
}
// Change and memory allocation required.
benchChangeAlloc = []string{
"en-c_cc-b-bbb-a-aaa",
"en-u-cu-xua-co-phonebk",
"en-u-cu-xua-co-phonebk-a-cd",
"en-u-def-abc-cu-xua-co-phonebk",
"en-t-en-Cyrl-NL-1994",
"en-t-en-Cyrl-NL-1994-t0-abc-def",
}
// Tags that result in errors.
benchErr = []string{
// IllFormed
"x_A.-B-C_D",
"en-u-cu-co-phonebk",
"en-u-cu-xau-co",
"en-t-nl-abcd",
// Invalid
"xx",
"nl-Uuuu",
"nl-QB",
}
benchChange = append(benchSimpleChange, benchChangeAlloc...)
benchAll = append(append(append(benchBasic, benchExt...), benchChange...), benchErr...)
)
func doParse(b *testing.B, tag []string) {
for i := 0; i < b.N; i++ {
// Use the modulo instead of looping over all tags so that we get a somewhat
// meaningful ns/op.
Parse(tag[i%len(tag)])
}
}
func BenchmarkParse(b *testing.B) {
doParse(b, benchAll)
}
func BenchmarkParseBasic(b *testing.B) {
doParse(b, benchBasic)
}
func BenchmarkParseError(b *testing.B) {
doParse(b, benchErr)
}
func BenchmarkParseSimpleChange(b *testing.B) {
doParse(b, benchSimpleChange)
}
func BenchmarkParseChangeAlloc(b *testing.B) {
doParse(b, benchChangeAlloc)
}

View File

@ -1,457 +0,0 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package language
import (
"testing"
"golang.org/x/text/internal/tag"
)
func b(s string) []byte {
return []byte(s)
}
func TestLangID(t *testing.T) {
tests := []struct {
id, bcp47, iso3, norm string
err error
}{
{id: "", bcp47: "und", iso3: "und", err: errSyntax},
{id: " ", bcp47: "und", iso3: "und", err: errSyntax},
{id: " ", bcp47: "und", iso3: "und", err: errSyntax},
{id: " ", bcp47: "und", iso3: "und", err: errSyntax},
{id: "xxx", bcp47: "und", iso3: "und", err: mkErrInvalid([]byte("xxx"))},
{id: "und", bcp47: "und", iso3: "und"},
{id: "aju", bcp47: "aju", iso3: "aju", norm: "jrb"},
{id: "jrb", bcp47: "jrb", iso3: "jrb"},
{id: "es", bcp47: "es", iso3: "spa"},
{id: "spa", bcp47: "es", iso3: "spa"},
{id: "ji", bcp47: "ji", iso3: "yid-", norm: "yi"},
{id: "jw", bcp47: "jw", iso3: "jav-", norm: "jv"},
{id: "ar", bcp47: "ar", iso3: "ara"},
{id: "kw", bcp47: "kw", iso3: "cor"},
{id: "arb", bcp47: "arb", iso3: "arb", norm: "ar"},
{id: "ar", bcp47: "ar", iso3: "ara"},
{id: "kur", bcp47: "ku", iso3: "kur"},
{id: "nl", bcp47: "nl", iso3: "nld"},
{id: "NL", bcp47: "nl", iso3: "nld"},
{id: "gsw", bcp47: "gsw", iso3: "gsw"},
{id: "gSW", bcp47: "gsw", iso3: "gsw"},
{id: "und", bcp47: "und", iso3: "und"},
{id: "sh", bcp47: "sh", iso3: "hbs", norm: "sr"},
{id: "hbs", bcp47: "sh", iso3: "hbs", norm: "sr"},
{id: "no", bcp47: "no", iso3: "nor", norm: "no"},
{id: "nor", bcp47: "no", iso3: "nor", norm: "no"},
{id: "cmn", bcp47: "cmn", iso3: "cmn", norm: "zh"},
}
for i, tt := range tests {
want, err := getLangID(b(tt.id))
if err != tt.err {
t.Errorf("%d:err(%s): found %q; want %q", i, tt.id, err, tt.err)
}
if err != nil {
continue
}
if id, _ := getLangISO2(b(tt.bcp47)); len(tt.bcp47) == 2 && want != id {
t.Errorf("%d:getISO2(%s): found %v; want %v", i, tt.bcp47, id, want)
}
if len(tt.iso3) == 3 {
if id, _ := getLangISO3(b(tt.iso3)); want != id {
t.Errorf("%d:getISO3(%s): found %q; want %q", i, tt.iso3, id, want)
}
if id, _ := getLangID(b(tt.iso3)); want != id {
t.Errorf("%d:getID3(%s): found %v; want %v", i, tt.iso3, id, want)
}
}
norm := want
if tt.norm != "" {
norm, _ = getLangID(b(tt.norm))
}
id, _ := normLang(want)
if id != norm {
t.Errorf("%d:norm(%s): found %v; want %v", i, tt.id, id, norm)
}
if id := want.String(); tt.bcp47 != id {
t.Errorf("%d:String(): found %s; want %s", i, id, tt.bcp47)
}
if id := want.ISO3(); tt.iso3[:3] != id {
t.Errorf("%d:iso3(): found %s; want %s", i, id, tt.iso3[:3])
}
}
}
func TestGrandfathered(t *testing.T) {
for _, tt := range []struct{ in, out string }{
{"art-lojban", "jbo"},
{"i-ami", "ami"},
{"i-bnn", "bnn"},
{"i-hak", "hak"},
{"i-klingon", "tlh"},
{"i-lux", "lb"},
{"i-navajo", "nv"},
{"i-pwn", "pwn"},
{"i-tao", "tao"},
{"i-tay", "tay"},
{"i-tsu", "tsu"},
{"no-bok", "nb"},
{"no-nyn", "nn"},
{"sgn-BE-FR", "sfb"},
{"sgn-BE-NL", "vgt"},
{"sgn-CH-DE", "sgg"},
{"sgn-ch-de", "sgg"},
{"zh-guoyu", "cmn"},
{"zh-hakka", "hak"},
{"zh-min-nan", "nan"},
{"zh-xiang", "hsn"},
// Grandfathered tags with no modern replacement will be converted as follows:
{"cel-gaulish", "xtg-x-cel-gaulish"},
{"en-GB-oed", "en-GB-oxendict"},
{"en-gb-oed", "en-GB-oxendict"},
{"i-default", "en-x-i-default"},
{"i-enochian", "und-x-i-enochian"},
{"i-mingo", "see-x-i-mingo"},
{"zh-min", "nan-x-zh-min"},
{"root", "und"},
{"en_US_POSIX", "en-US-u-va-posix"},
{"en_us_posix", "en-US-u-va-posix"},
{"en-us-posix", "en-US-u-va-posix"},
} {
got := Raw.Make(tt.in)
want := Raw.MustParse(tt.out)
if got != want {
t.Errorf("%s: got %q; want %q", tt.in, got, want)
}
}
}
func TestRegionID(t *testing.T) {
tests := []struct {
in, out string
}{
{"_ ", ""},
{"_000", ""},
{"419", "419"},
{"AA", "AA"},
{"ATF", "TF"},
{"HV", "HV"},
{"CT", "CT"},
{"DY", "DY"},
{"IC", "IC"},
{"FQ", "FQ"},
{"JT", "JT"},
{"ZZ", "ZZ"},
{"EU", "EU"},
{"QO", "QO"},
{"FX", "FX"},
}
for i, tt := range tests {
if tt.in[0] == '_' {
id := tt.in[1:]
if _, err := getRegionID(b(id)); err == nil {
t.Errorf("%d:err(%s): found nil; want error", i, id)
}
continue
}
want, _ := getRegionID(b(tt.in))
if s := want.String(); s != tt.out {
t.Errorf("%d:%s: found %q; want %q", i, tt.in, s, tt.out)
}
if len(tt.in) == 2 {
want, _ := getRegionISO2(b(tt.in))
if s := want.String(); s != tt.out {
t.Errorf("%d:getISO2(%s): found %q; want %q", i, tt.in, s, tt.out)
}
}
}
}
func TestRegionType(t *testing.T) {
for _, tt := range []struct {
r string
t byte
}{
{"NL", bcp47Region | ccTLD},
{"EU", bcp47Region | ccTLD}, // exceptionally reserved
{"AN", bcp47Region | ccTLD}, // transitionally reserved
{"DD", bcp47Region}, // deleted in ISO, deprecated in BCP 47
{"NT", bcp47Region}, // transitionally reserved, deprecated in BCP 47
{"XA", iso3166UserAssigned | bcp47Region},
{"ZZ", iso3166UserAssigned | bcp47Region},
{"AA", iso3166UserAssigned | bcp47Region},
{"QO", iso3166UserAssigned | bcp47Region},
{"QM", iso3166UserAssigned | bcp47Region},
{"XK", iso3166UserAssigned | bcp47Region},
{"CT", 0}, // deleted in ISO, not in BCP 47, canonicalized in CLDR
} {
r := MustParseRegion(tt.r)
if tp := r.typ(); tp != tt.t {
t.Errorf("Type(%s): got %x; want %x", tt.r, tp, tt.t)
}
}
}
func TestRegionISO3(t *testing.T) {
tests := []struct {
from, iso3, to string
}{
{" ", "ZZZ", "ZZ"},
{"000", "ZZZ", "ZZ"},
{"AA", "AAA", ""},
{"CT", "CTE", ""},
{"DY", "DHY", ""},
{"EU", "QUU", ""},
{"HV", "HVO", ""},
{"IC", "ZZZ", "ZZ"},
{"JT", "JTN", ""},
{"PZ", "PCZ", ""},
{"QU", "QUU", "EU"},
{"QO", "QOO", ""},
{"YD", "YMD", ""},
{"FQ", "ATF", "TF"},
{"TF", "ATF", ""},
{"FX", "FXX", ""},
{"ZZ", "ZZZ", ""},
{"419", "ZZZ", "ZZ"},
}
for _, tt := range tests {
r, _ := getRegionID(b(tt.from))
if s := r.ISO3(); s != tt.iso3 {
t.Errorf("iso3(%q): found %q; want %q", tt.from, s, tt.iso3)
}
if tt.iso3 == "" {
continue
}
want := tt.to
if tt.to == "" {
want = tt.from
}
r, _ = getRegionID(b(want))
if id, _ := getRegionISO3(b(tt.iso3)); id != r {
t.Errorf("%s: found %q; want %q", tt.iso3, id, want)
}
}
}
func TestRegionM49(t *testing.T) {
fromTests := []struct {
m49 int
id string
}{
{0, ""},
{-1, ""},
{1000, ""},
{10000, ""},
{001, "001"},
{104, "MM"},
{180, "CD"},
{230, "ET"},
{231, "ET"},
{249, "FX"},
{250, "FR"},
{276, "DE"},
{278, "DD"},
{280, "DE"},
{419, "419"},
{626, "TL"},
{736, "SD"},
{840, "US"},
{854, "BF"},
{891, "CS"},
{899, ""},
{958, "AA"},
{966, "QT"},
{967, "EU"},
{999, "ZZ"},
}
for _, tt := range fromTests {
id, err := getRegionM49(tt.m49)
if want, have := err != nil, tt.id == ""; want != have {
t.Errorf("error(%d): have %v; want %v", tt.m49, have, want)
continue
}
r, _ := getRegionID(b(tt.id))
if r != id {
t.Errorf("region(%d): have %s; want %s", tt.m49, id, r)
}
}
toTests := []struct {
m49 int
id string
}{
{0, "000"},
{0, "IC"}, // Some codes don't have an ID
{001, "001"},
{104, "MM"},
{104, "BU"},
{180, "CD"},
{180, "ZR"},
{231, "ET"},
{250, "FR"},
{249, "FX"},
{276, "DE"},
{278, "DD"},
{419, "419"},
{626, "TL"},
{626, "TP"},
{729, "SD"},
{826, "GB"},
{840, "US"},
{854, "BF"},
{891, "YU"},
{891, "CS"},
{958, "AA"},
{966, "QT"},
{967, "EU"},
{967, "QU"},
{999, "ZZ"},
// For codes that don't have an M49 code use the replacement value,
// if available.
{854, "HV"}, // maps to Burkino Faso
}
for _, tt := range toTests {
r, _ := getRegionID(b(tt.id))
if r.M49() != tt.m49 {
t.Errorf("m49(%q): have %d; want %d", tt.id, r.M49(), tt.m49)
}
}
}
func TestRegionDeprecation(t *testing.T) {
tests := []struct{ in, out string }{
{"BU", "MM"},
{"BUR", "MM"},
{"CT", "KI"},
{"DD", "DE"},
{"DDR", "DE"},
{"DY", "BJ"},
{"FX", "FR"},
{"HV", "BF"},
{"JT", "UM"},
{"MI", "UM"},
{"NH", "VU"},
{"NQ", "AQ"},
{"PU", "UM"},
{"PZ", "PA"},
{"QU", "EU"},
{"RH", "ZW"},
{"TP", "TL"},
{"UK", "GB"},
{"VD", "VN"},
{"WK", "UM"},
{"YD", "YE"},
{"NL", "NL"},
}
for _, tt := range tests {
rIn, _ := getRegionID([]byte(tt.in))
rOut, _ := getRegionISO2([]byte(tt.out))
r := normRegion(rIn)
if rOut == rIn && r != 0 {
t.Errorf("%s: was %q; want %q", tt.in, r, tt.in)
}
if rOut != rIn && r != rOut {
t.Errorf("%s: was %q; want %q", tt.in, r, tt.out)
}
}
}
func TestGetScriptID(t *testing.T) {
idx := tag.Index("0000BbbbDdddEeeeZzzz\xff\xff\xff\xff")
tests := []struct {
in string
out scriptID
}{
{" ", 0},
{" ", 0},
{" ", 0},
{"", 0},
{"Aaaa", 0},
{"Bbbb", 1},
{"Dddd", 2},
{"dddd", 2},
{"dDDD", 2},
{"Eeee", 3},
{"Zzzz", 4},
}
for i, tt := range tests {
if id, err := getScriptID(idx, b(tt.in)); id != tt.out {
t.Errorf("%d:%s: found %d; want %d", i, tt.in, id, tt.out)
} else if id == 0 && err == nil {
t.Errorf("%d:%s: no error; expected one", i, tt.in)
}
}
}
func TestIsPrivateUse(t *testing.T) {
type test struct {
s string
private bool
}
tests := []test{
{"en", false},
{"und", false},
{"pzn", false},
{"qaa", true},
{"qtz", true},
{"qua", false},
}
for i, tt := range tests {
x, _ := getLangID([]byte(tt.s))
if b := x.IsPrivateUse(); b != tt.private {
t.Errorf("%d: langID.IsPrivateUse(%s) was %v; want %v", i, tt.s, b, tt.private)
}
}
tests = []test{
{"001", false},
{"419", false},
{"899", false},
{"900", false},
{"957", false},
{"958", true},
{"AA", true},
{"AC", false},
{"EU", false}, // CLDR grouping, exceptionally reserved in ISO.
{"QU", true}, // Canonicalizes to EU, User-assigned in ISO.
{"QO", true}, // CLDR grouping, User-assigned in ISO.
{"QA", false},
{"QM", true},
{"QZ", true},
{"XA", true},
{"XK", true}, // Assigned to Kosovo in CLDR, User-assigned in ISO.
{"XZ", true},
{"ZW", false},
{"ZZ", true},
}
for i, tt := range tests {
x, _ := getRegionID([]byte(tt.s))
if b := x.IsPrivateUse(); b != tt.private {
t.Errorf("%d: regionID.IsPrivateUse(%s) was %v; want %v", i, tt.s, b, tt.private)
}
}
tests = []test{
{"Latn", false},
{"Laaa", false}, // invalid
{"Qaaa", true},
{"Qabx", true},
{"Qaby", false},
{"Zyyy", false},
{"Zzzz", false},
}
for i, tt := range tests {
x, _ := getScriptID(script, []byte(tt.s))
if b := x.IsPrivateUse(); b != tt.private {
t.Errorf("%d: scriptID.IsPrivateUse(%s) was %v; want %v", i, tt.s, b, tt.private)
}
}
}

View File

@ -1,505 +0,0 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package language
import (
"bytes"
"flag"
"fmt"
"os"
"path"
"path/filepath"
"strings"
"testing"
"golang.org/x/text/internal/testtext"
"golang.org/x/text/internal/ucd"
)
var verbose = flag.Bool("verbose", false, "set to true to print the internal tables of matchers")
func TestCompliance(t *testing.T) {
filepath.Walk("testdata", func(file string, info os.FileInfo, err error) error {
if info.IsDir() {
return nil
}
r, err := os.Open(file)
if err != nil {
t.Fatal(err)
}
ucd.Parse(r, func(p *ucd.Parser) {
name := strings.Replace(path.Join(p.String(0), p.String(1)), " ", "", -1)
if skip[name] {
return
}
t.Run(info.Name()+"/"+name, func(t *testing.T) {
supported := makeTagList(p.String(0))
desired := makeTagList(p.String(1))
gotCombined, index, conf := NewMatcher(supported).Match(desired...)
gotMatch := supported[index]
wantMatch := mk(p.String(2))
if gotMatch != wantMatch {
t.Fatalf("match: got %q; want %q (%v)", gotMatch, wantMatch, conf)
}
wantCombined, err := Raw.Parse(p.String(3))
if err == nil && gotCombined != wantCombined {
t.Errorf("combined: got %q; want %q (%v)", gotCombined, wantCombined, conf)
}
})
})
return nil
})
}
var skip = map[string]bool{
// TODO: bugs
// Honor the wildcard match. This may only be useful to select non-exact
// stuff.
"mul,af/nl": true, // match: got "af"; want "mul"
// TODO: include other extensions.
// combined: got "en-GB-u-ca-buddhist-nu-arab"; want "en-GB-fonipa-t-m0-iso-i0-pinyin-u-ca-buddhist-nu-arab"
"und,en-GB-u-sd-gbsct/en-fonipa-u-nu-Arab-ca-buddhist-t-m0-iso-i0-pinyin": true,
// Inconsistencies with Mark Davis' implementation where it is not clear
// which is better.
// Inconsistencies in combined. I think the Go approach is more appropriate.
// We could use -u-rg- and -u-va- as alternative.
"und,fr/fr-BE-fonipa": true, // combined: got "fr"; want "fr-BE-fonipa"
"und,fr-CA/fr-BE-fonipa": true, // combined: got "fr-CA"; want "fr-BE-fonipa"
"und,fr-fonupa/fr-BE-fonipa": true, // combined: got "fr-fonupa"; want "fr-BE-fonipa"
"und,no/nn-BE-fonipa": true, // combined: got "no"; want "no-BE-fonipa"
"50,und,fr-CA-fonupa/fr-BE-fonipa": true, // combined: got "fr-CA-fonupa"; want "fr-BE-fonipa"
// The initial number is a threshold. As we don't use scoring, we will not
// implement this.
"50,und,fr-Cyrl-CA-fonupa/fr-BE-fonipa": true,
// match: got "und"; want "fr-Cyrl-CA-fonupa"
// combined: got "und"; want "fr-Cyrl-BE-fonipa"
// Other interesting cases to test:
// - Should same language or same script have the preference if there is
// usually no understanding of the other script?
// - More specific region in desired may replace enclosing supported.
}
func makeTagList(s string) (tags []Tag) {
for _, s := range strings.Split(s, ",") {
tags = append(tags, mk(strings.TrimSpace(s)))
}
return tags
}
func TestMatchStrings(t *testing.T) {
testCases := []struct {
supported string
desired string // strings separted by |
tag string
index int
}{{
supported: "en",
desired: "",
tag: "en",
index: 0,
}, {
supported: "en",
desired: "nl",
tag: "en",
index: 0,
}, {
supported: "en,nl",
desired: "nl",
tag: "nl",
index: 1,
}, {
supported: "en,nl",
desired: "nl|en",
tag: "nl",
index: 1,
}, {
supported: "en-GB,nl",
desired: "en ; q=0.1,nl",
tag: "nl",
index: 1,
}, {
supported: "en-GB,nl",
desired: "en;q=0.005 | dk; q=0.1,nl ",
tag: "en-GB",
index: 0,
}, {
// do not match faulty tags with und
supported: "en,und",
desired: "|en",
tag: "en",
index: 0,
}}
for _, tc := range testCases {
t.Run(path.Join(tc.supported, tc.desired), func(t *testing.T) {
m := NewMatcher(makeTagList(tc.supported))
tag, index := MatchStrings(m, strings.Split(tc.desired, "|")...)
if tag.String() != tc.tag || index != tc.index {
t.Errorf("got %v, %d; want %v, %d", tag, index, tc.tag, tc.index)
}
})
}
}
func TestAddLikelySubtags(t *testing.T) {
tests := []struct{ in, out string }{
{"aa", "aa-Latn-ET"},
{"aa-Latn", "aa-Latn-ET"},
{"aa-Arab", "aa-Arab-ET"},
{"aa-Arab-ER", "aa-Arab-ER"},
{"kk", "kk-Cyrl-KZ"},
{"kk-CN", "kk-Arab-CN"},
{"cmn", "cmn"},
{"zh-AU", "zh-Hant-AU"},
{"zh-VN", "zh-Hant-VN"},
{"zh-SG", "zh-Hans-SG"},
{"zh-Hant", "zh-Hant-TW"},
{"zh-Hani", "zh-Hani-CN"},
{"und-Hani", "zh-Hani-CN"},
{"und", "en-Latn-US"},
{"und-GB", "en-Latn-GB"},
{"und-CW", "pap-Latn-CW"},
{"und-YT", "fr-Latn-YT"},
{"und-Arab", "ar-Arab-EG"},
{"und-AM", "hy-Armn-AM"},
{"und-TW", "zh-Hant-TW"},
{"und-002", "en-Latn-NG"},
{"und-Latn-002", "en-Latn-NG"},
{"en-Latn-002", "en-Latn-NG"},
{"en-002", "en-Latn-NG"},
{"en-001", "en-Latn-US"},
{"und-003", "en-Latn-US"},
{"und-GB", "en-Latn-GB"},
{"Latn-001", "en-Latn-US"},
{"en-001", "en-Latn-US"},
{"es-419", "es-Latn-419"},
{"he-145", "he-Hebr-IL"},
{"ky-145", "ky-Latn-TR"},
{"kk", "kk-Cyrl-KZ"},
// Don't specialize duplicate and ambiguous matches.
{"kk-034", "kk-Arab-034"}, // Matches IR and AF. Both are Arab.
{"ku-145", "ku-Latn-TR"}, // Matches IQ, TR, and LB, but kk -> TR.
{"und-Arab-CC", "ms-Arab-CC"},
{"und-Arab-GB", "ks-Arab-GB"},
{"und-Hans-CC", "zh-Hans-CC"},
{"und-CC", "en-Latn-CC"},
{"sr", "sr-Cyrl-RS"},
{"sr-151", "sr-Latn-151"}, // Matches RO and RU.
// We would like addLikelySubtags to generate the same results if the input
// only changes by adding tags that would otherwise have been added
// by the expansion.
// In other words:
// und-AA -> xx-Scrp-AA implies und-Scrp-AA -> xx-Scrp-AA
// und-AA -> xx-Scrp-AA implies xx-AA -> xx-Scrp-AA
// und-Scrp -> xx-Scrp-AA implies und-Scrp-AA -> xx-Scrp-AA
// und-Scrp -> xx-Scrp-AA implies xx-Scrp -> xx-Scrp-AA
// xx -> xx-Scrp-AA implies xx-Scrp -> xx-Scrp-AA
// xx -> xx-Scrp-AA implies xx-AA -> xx-Scrp-AA
//
// The algorithm specified in
// http://unicode.org/reports/tr35/tr35-9.html#Supplemental_Data,
// Section C.10, does not handle the first case. For example,
// the CLDR data contains an entry und-BJ -> fr-Latn-BJ, but not
// there is no rule for und-Latn-BJ. According to spec, und-Latn-BJ
// would expand to en-Latn-BJ, violating the aforementioned principle.
// We deviate from the spec by letting und-Scrp-AA expand to xx-Scrp-AA
// if a rule of the form und-AA -> xx-Scrp-AA is defined.
// Note that as of version 23, CLDR has some explicitly specified
// entries that do not conform to these rules. The implementation
// will not correct these explicit inconsistencies. A later versions of CLDR
// is supposed to fix this.
{"und-Latn-BJ", "fr-Latn-BJ"},
{"und-Bugi-ID", "bug-Bugi-ID"},
// regions, scripts and languages without definitions
{"und-Arab-AA", "ar-Arab-AA"},
{"und-Afak-RE", "fr-Afak-RE"},
{"und-Arab-GB", "ks-Arab-GB"},
{"abp-Arab-GB", "abp-Arab-GB"},
// script has preference over region
{"und-Arab-NL", "ar-Arab-NL"},
{"zza", "zza-Latn-TR"},
// preserve variants and extensions
{"de-1901", "de-Latn-DE-1901"},
{"de-x-abc", "de-Latn-DE-x-abc"},
{"de-1901-x-abc", "de-Latn-DE-1901-x-abc"},
{"x-abc", "x-abc"}, // TODO: is this the desired behavior?
}
for i, tt := range tests {
in, _ := Parse(tt.in)
out, _ := Parse(tt.out)
in, _ = in.addLikelySubtags()
if in.String() != out.String() {
t.Errorf("%d: add(%s) was %s; want %s", i, tt.in, in, tt.out)
}
}
}
func TestMinimize(t *testing.T) {
tests := []struct{ in, out string }{
{"aa", "aa"},
{"aa-Latn", "aa"},
{"aa-Latn-ET", "aa"},
{"aa-ET", "aa"},
{"aa-Arab", "aa-Arab"},
{"aa-Arab-ER", "aa-Arab-ER"},
{"aa-Arab-ET", "aa-Arab"},
{"und", "und"},
{"und-Latn", "und"},
{"und-Latn-US", "und"},
{"en-Latn-US", "en"},
{"cmn", "cmn"},
{"cmn-Hans", "cmn-Hans"},
{"cmn-Hant", "cmn-Hant"},
{"zh-AU", "zh-AU"},
{"zh-VN", "zh-VN"},
{"zh-SG", "zh-SG"},
{"zh-Hant", "zh-Hant"},
{"zh-Hant-TW", "zh-TW"},
{"zh-Hans", "zh"},
{"zh-Hani", "zh-Hani"},
{"und-Hans", "und-Hans"},
{"und-Hani", "und-Hani"},
{"und-CW", "und-CW"},
{"und-YT", "und-YT"},
{"und-Arab", "und-Arab"},
{"und-AM", "und-AM"},
{"und-Arab-CC", "und-Arab-CC"},
{"und-CC", "und-CC"},
{"und-Latn-BJ", "und-BJ"},
{"und-Bugi-ID", "und-Bugi"},
{"bug-Bugi-ID", "bug-Bugi"},
// regions, scripts and languages without definitions
{"und-Arab-AA", "und-Arab-AA"},
// preserve variants and extensions
{"de-Latn-1901", "de-1901"},
{"de-Latn-x-abc", "de-x-abc"},
{"de-DE-1901-x-abc", "de-1901-x-abc"},
{"x-abc", "x-abc"}, // TODO: is this the desired behavior?
}
for i, tt := range tests {
in, _ := Parse(tt.in)
out, _ := Parse(tt.out)
min, _ := in.minimize()
if min.String() != out.String() {
t.Errorf("%d: min(%s) was %s; want %s", i, tt.in, min, tt.out)
}
max, _ := min.addLikelySubtags()
if x, _ := in.addLikelySubtags(); x.String() != max.String() {
t.Errorf("%d: max(min(%s)) = %s; want %s", i, tt.in, max, x)
}
}
}
func TestRegionGroups(t *testing.T) {
testCases := []struct {
a, b string
distance uint8
}{
{"zh-TW", "zh-HK", 5},
{"zh-MO", "zh-HK", 4},
{"es-ES", "es-AR", 5},
{"es-ES", "es", 4},
{"es-419", "es-MX", 4},
{"es-AR", "es-MX", 4},
{"es-ES", "es-MX", 5},
{"es-PT", "es-MX", 5},
}
for _, tc := range testCases {
a := MustParse(tc.a)
aScript, _ := a.Script()
b := MustParse(tc.b)
bScript, _ := b.Script()
if aScript != bScript {
t.Errorf("scripts differ: %q vs %q", aScript, bScript)
continue
}
d, _ := regionGroupDist(a.region, b.region, aScript.scriptID, a.lang)
if d != tc.distance {
t.Errorf("got %q; want %q", d, tc.distance)
}
}
}
func TestIsParadigmLocale(t *testing.T) {
testCases := map[string]bool{
"en-US": true,
"en-GB": true,
"en-VI": false,
"es-GB": false,
"es-ES": true,
"es-419": true,
}
for str, want := range testCases {
tag := Make(str)
got := isParadigmLocale(tag.lang, tag.region)
if got != want {
t.Errorf("isPL(%q) = %v; want %v", str, got, want)
}
}
}
// Implementation of String methods for various types for debugging purposes.
func (m *matcher) String() string {
w := &bytes.Buffer{}
fmt.Fprintln(w, "Default:", m.default_)
for tag, h := range m.index {
fmt.Fprintf(w, " %s: %v\n", tag, h)
}
return w.String()
}
func (h *matchHeader) String() string {
w := &bytes.Buffer{}
fmt.Fprint(w, "haveTag: ")
for _, h := range h.haveTags {
fmt.Fprintf(w, "%v, ", h)
}
return w.String()
}
func (t haveTag) String() string {
return fmt.Sprintf("%v:%d:%v:%v-%v|%v", t.tag, t.index, t.conf, t.maxRegion, t.maxScript, t.altScript)
}
func TestBestMatchAlloc(t *testing.T) {
m := NewMatcher(makeTagList("en sr nl"))
// Go allocates when creating a list of tags from a single tag!
list := []Tag{English}
avg := testtext.AllocsPerRun(1, func() {
m.Match(list...)
})
if avg > 0 {
t.Errorf("got %f; want 0", avg)
}
}
var benchHave = []Tag{
mk("en"),
mk("en-GB"),
mk("za"),
mk("zh-Hant"),
mk("zh-Hans-CN"),
mk("zh"),
mk("zh-HK"),
mk("ar-MK"),
mk("en-CA"),
mk("fr-CA"),
mk("fr-US"),
mk("fr-CH"),
mk("fr"),
mk("lt"),
mk("lv"),
mk("iw"),
mk("iw-NL"),
mk("he"),
mk("he-IT"),
mk("tlh"),
mk("ja"),
mk("ja-Jpan"),
mk("ja-Jpan-JP"),
mk("de"),
mk("de-CH"),
mk("de-AT"),
mk("de-DE"),
mk("sr"),
mk("sr-Latn"),
mk("sr-Cyrl"),
mk("sr-ME"),
}
var benchWant = [][]Tag{
[]Tag{
mk("en"),
},
[]Tag{
mk("en-AU"),
mk("de-HK"),
mk("nl"),
mk("fy"),
mk("lv"),
},
[]Tag{
mk("en-AU"),
mk("de-HK"),
mk("nl"),
mk("fy"),
},
[]Tag{
mk("ja-Hant"),
mk("da-HK"),
mk("nl"),
mk("zh-TW"),
},
[]Tag{
mk("ja-Hant"),
mk("da-HK"),
mk("nl"),
mk("hr"),
},
}
func BenchmarkMatch(b *testing.B) {
m := newMatcher(benchHave, nil)
for i := 0; i < b.N; i++ {
for _, want := range benchWant {
m.getBest(want...)
}
}
}
func BenchmarkMatchExact(b *testing.B) {
want := mk("en")
m := newMatcher(benchHave, nil)
for i := 0; i < b.N; i++ {
m.getBest(want)
}
}
func BenchmarkMatchAltLanguagePresent(b *testing.B) {
want := mk("hr")
m := newMatcher(benchHave, nil)
for i := 0; i < b.N; i++ {
m.getBest(want)
}
}
func BenchmarkMatchAltLanguageNotPresent(b *testing.B) {
want := mk("nn")
m := newMatcher(benchHave, nil)
for i := 0; i < b.N; i++ {
m.getBest(want)
}
}
func BenchmarkMatchAltScriptPresent(b *testing.B) {
want := mk("zh-Hant-CN")
m := newMatcher(benchHave, nil)
for i := 0; i < b.N; i++ {
m.getBest(want)
}
}
func BenchmarkMatchAltScriptNotPresent(b *testing.B) {
want := mk("fr-Cyrl")
m := newMatcher(benchHave, nil)
for i := 0; i < b.N; i++ {
m.getBest(want)
}
}
func BenchmarkMatchLimitedExact(b *testing.B) {
want := []Tag{mk("he-NL"), mk("iw-NL")}
m := newMatcher(benchHave, nil)
for i := 0; i < b.N; i++ {
m.getBest(want...)
}
}

View File

@ -1,517 +0,0 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package language
import (
"bytes"
"strings"
"testing"
"golang.org/x/text/internal/tag"
)
type scanTest struct {
ok bool // true if scanning does not result in an error
in string
tok []string // the expected tokens
}
var tests = []scanTest{
{true, "", []string{}},
{true, "1", []string{"1"}},
{true, "en", []string{"en"}},
{true, "root", []string{"root"}},
{true, "maxchars", []string{"maxchars"}},
{false, "bad/", []string{}},
{false, "morethan8", []string{}},
{false, "-", []string{}},
{false, "----", []string{}},
{false, "_", []string{}},
{true, "en-US", []string{"en", "US"}},
{true, "en_US", []string{"en", "US"}},
{false, "en-US-", []string{"en", "US"}},
{false, "en-US--", []string{"en", "US"}},
{false, "en-US---", []string{"en", "US"}},
{false, "en--US", []string{"en", "US"}},
{false, "-en-US", []string{"en", "US"}},
{false, "-en--US-", []string{"en", "US"}},
{false, "-en--US-", []string{"en", "US"}},
{false, "en-.-US", []string{"en", "US"}},
{false, ".-en--US-.", []string{"en", "US"}},
{false, "en-u.-US", []string{"en", "US"}},
{true, "en-u1-US", []string{"en", "u1", "US"}},
{true, "maxchar1_maxchar2-maxchar3", []string{"maxchar1", "maxchar2", "maxchar3"}},
{false, "moreThan8-moreThan8-e", []string{"e"}},
}
func TestScan(t *testing.T) {
for i, tt := range tests {
scan := makeScannerString(tt.in)
for j := 0; !scan.done; j++ {
if j >= len(tt.tok) {
t.Errorf("%d: extra token %q", i, scan.token)
} else if tag.Compare(tt.tok[j], scan.token) != 0 {
t.Errorf("%d: token %d: found %q; want %q", i, j, scan.token, tt.tok[j])
break
}
scan.scan()
}
if s := strings.Join(tt.tok, "-"); tag.Compare(s, bytes.Replace(scan.b, b("_"), b("-"), -1)) != 0 {
t.Errorf("%d: input: found %q; want %q", i, scan.b, s)
}
if (scan.err == nil) != tt.ok {
t.Errorf("%d: ok: found %v; want %v", i, scan.err == nil, tt.ok)
}
}
}
func TestAcceptMinSize(t *testing.T) {
for i, tt := range tests {
// count number of successive tokens with a minimum size.
for sz := 1; sz <= 8; sz++ {
scan := makeScannerString(tt.in)
scan.end, scan.next = 0, 0
end := scan.acceptMinSize(sz)
n := 0
for i := 0; i < len(tt.tok) && len(tt.tok[i]) >= sz; i++ {
n += len(tt.tok[i])
if i > 0 {
n++
}
}
if end != n {
t.Errorf("%d:%d: found len %d; want %d", i, sz, end, n)
}
}
}
}
type parseTest struct {
i int // the index of this test
in string
lang, script, region string
variants, ext string
extList []string // only used when more than one extension is present
invalid bool
rewrite bool // special rewrite not handled by parseTag
changed bool // string needed to be reformatted
}
func parseTests() []parseTest {
tests := []parseTest{
{in: "root", lang: "und"},
{in: "und", lang: "und"},
{in: "en", lang: "en"},
{in: "xy", lang: "und", invalid: true},
{in: "en-ZY", lang: "en", invalid: true},
{in: "gsw", lang: "gsw"},
{in: "sr_Latn", lang: "sr", script: "Latn"},
{in: "af-Arab", lang: "af", script: "Arab"},
{in: "nl-BE", lang: "nl", region: "BE"},
{in: "es-419", lang: "es", region: "419"},
{in: "und-001", lang: "und", region: "001"},
{in: "de-latn-be", lang: "de", script: "Latn", region: "BE"},
// Variants
{in: "de-1901", lang: "de", variants: "1901"},
// Accept with unsuppressed script.
{in: "de-Latn-1901", lang: "de", script: "Latn", variants: "1901"},
// Specialized.
{in: "sl-rozaj", lang: "sl", variants: "rozaj"},
{in: "sl-rozaj-lipaw", lang: "sl", variants: "rozaj-lipaw"},
{in: "sl-rozaj-biske", lang: "sl", variants: "rozaj-biske"},
{in: "sl-rozaj-biske-1994", lang: "sl", variants: "rozaj-biske-1994"},
{in: "sl-rozaj-1994", lang: "sl", variants: "rozaj-1994"},
// Maximum number of variants while adhering to prefix rules.
{in: "sl-rozaj-biske-1994-alalc97-fonipa-fonupa-fonxsamp", lang: "sl", variants: "rozaj-biske-1994-alalc97-fonipa-fonupa-fonxsamp"},
// Sorting.
{in: "sl-1994-biske-rozaj", lang: "sl", variants: "rozaj-biske-1994", changed: true},
{in: "sl-rozaj-biske-1994-alalc97-fonupa-fonipa-fonxsamp", lang: "sl", variants: "rozaj-biske-1994-alalc97-fonipa-fonupa-fonxsamp", changed: true},
{in: "nl-fonxsamp-alalc97-fonipa-fonupa", lang: "nl", variants: "alalc97-fonipa-fonupa-fonxsamp", changed: true},
// Duplicates variants are removed, but not an error.
{in: "nl-fonupa-fonupa", lang: "nl", variants: "fonupa"},
// Variants that do not have correct prefixes. We still accept these.
{in: "de-Cyrl-1901", lang: "de", script: "Cyrl", variants: "1901"},
{in: "sl-rozaj-lipaw-1994", lang: "sl", variants: "rozaj-lipaw-1994"},
{in: "sl-1994-biske-rozaj-1994-biske-rozaj", lang: "sl", variants: "rozaj-biske-1994", changed: true},
{in: "de-Cyrl-1901", lang: "de", script: "Cyrl", variants: "1901"},
// Invalid variant.
{in: "de-1902", lang: "de", variants: "", invalid: true},
{in: "EN_CYRL", lang: "en", script: "Cyrl"},
// private use and extensions
{in: "x-a-b-c-d", ext: "x-a-b-c-d"},
{in: "x_A.-B-C_D", ext: "x-b-c-d", invalid: true, changed: true},
{in: "x-aa-bbbb-cccccccc-d", ext: "x-aa-bbbb-cccccccc-d"},
{in: "en-c_cc-b-bbb-a-aaa", lang: "en", changed: true, extList: []string{"a-aaa", "b-bbb", "c-cc"}},
{in: "en-x_cc-b-bbb-a-aaa", lang: "en", ext: "x-cc-b-bbb-a-aaa", changed: true},
{in: "en-c_cc-b-bbb-a-aaa-x-x", lang: "en", changed: true, extList: []string{"a-aaa", "b-bbb", "c-cc", "x-x"}},
{in: "en-v-c", lang: "en", ext: "", invalid: true},
{in: "en-v-abcdefghi", lang: "en", ext: "", invalid: true},
{in: "en-v-abc-x", lang: "en", ext: "v-abc", invalid: true},
{in: "en-v-abc-x-", lang: "en", ext: "v-abc", invalid: true},
{in: "en-v-abc-w-x-xx", lang: "en", extList: []string{"v-abc", "x-xx"}, invalid: true, changed: true},
{in: "en-v-abc-w-y-yx", lang: "en", extList: []string{"v-abc", "y-yx"}, invalid: true, changed: true},
{in: "en-v-c-abc", lang: "en", ext: "c-abc", invalid: true, changed: true},
{in: "en-v-w-abc", lang: "en", ext: "w-abc", invalid: true, changed: true},
{in: "en-v-x-abc", lang: "en", ext: "x-abc", invalid: true, changed: true},
{in: "en-v-x-a", lang: "en", ext: "x-a", invalid: true, changed: true},
{in: "en-9-aa-0-aa-z-bb-x-a", lang: "en", extList: []string{"0-aa", "9-aa", "z-bb", "x-a"}, changed: true},
{in: "en-u-c", lang: "en", ext: "", invalid: true},
{in: "en-u-co-phonebk", lang: "en", ext: "u-co-phonebk"},
{in: "en-u-co-phonebk-ca", lang: "en", ext: "u-co-phonebk", invalid: true},
{in: "en-u-nu-arabic-co-phonebk-ca", lang: "en", ext: "u-co-phonebk-nu-arabic", invalid: true, changed: true},
{in: "en-u-nu-arabic-co-phonebk-ca-x", lang: "en", ext: "u-co-phonebk-nu-arabic", invalid: true, changed: true},
{in: "en-u-nu-arabic-co-phonebk-ca-s", lang: "en", ext: "u-co-phonebk-nu-arabic", invalid: true, changed: true},
{in: "en-u-nu-arabic-co-phonebk-ca-a12345678", lang: "en", ext: "u-co-phonebk-nu-arabic", invalid: true, changed: true},
{in: "en-u-co-phonebook", lang: "en", ext: "", invalid: true},
{in: "en-u-co-phonebook-cu-xau", lang: "en", ext: "u-cu-xau", invalid: true, changed: true},
{in: "en-Cyrl-u-co-phonebk", lang: "en", script: "Cyrl", ext: "u-co-phonebk"},
{in: "en-US-u-co-phonebk", lang: "en", region: "US", ext: "u-co-phonebk"},
{in: "en-US-u-co-phonebk-cu-xau", lang: "en", region: "US", ext: "u-co-phonebk-cu-xau"},
{in: "en-scotland-u-co-phonebk", lang: "en", variants: "scotland", ext: "u-co-phonebk"},
{in: "en-u-cu-xua-co-phonebk", lang: "en", ext: "u-co-phonebk-cu-xua", changed: true},
{in: "en-u-def-abc-cu-xua-co-phonebk", lang: "en", ext: "u-abc-def-co-phonebk-cu-xua", changed: true},
{in: "en-u-def-abc", lang: "en", ext: "u-abc-def", changed: true},
{in: "en-u-cu-xua-co-phonebk-a-cd", lang: "en", extList: []string{"a-cd", "u-co-phonebk-cu-xua"}, changed: true},
// Invalid "u" extension. Drop invalid parts.
{in: "en-u-cu-co-phonebk", lang: "en", extList: []string{"u-co-phonebk"}, invalid: true, changed: true},
{in: "en-u-cu-xau-co", lang: "en", extList: []string{"u-cu-xau"}, invalid: true},
// We allow duplicate keys as the LDML spec does not explicitly prohibit it.
// TODO: Consider eliminating duplicates and returning an error.
{in: "en-u-cu-xau-co-phonebk-cu-xau", lang: "en", ext: "u-co-phonebk-cu-xau-cu-xau", changed: true},
{in: "en-t-en-Cyrl-NL-fonipa", lang: "en", ext: "t-en-cyrl-nl-fonipa", changed: true},
{in: "en-t-en-Cyrl-NL-fonipa-t0-abc-def", lang: "en", ext: "t-en-cyrl-nl-fonipa-t0-abc-def", changed: true},
{in: "en-t-t0-abcd", lang: "en", ext: "t-t0-abcd"},
// Not necessary to have changed here.
{in: "en-t-nl-abcd", lang: "en", ext: "t-nl", invalid: true},
{in: "en-t-nl-latn", lang: "en", ext: "t-nl-latn"},
{in: "en-t-t0-abcd-x-a", lang: "en", extList: []string{"t-t0-abcd", "x-a"}},
// invalid
{in: "", lang: "und", invalid: true},
{in: "-", lang: "und", invalid: true},
{in: "x", lang: "und", invalid: true},
{in: "x-", lang: "und", invalid: true},
{in: "x--", lang: "und", invalid: true},
{in: "a-a-b-c-d", lang: "und", invalid: true},
{in: "en-", lang: "en", invalid: true},
{in: "enne-", lang: "und", invalid: true},
{in: "en.", lang: "und", invalid: true},
{in: "en.-latn", lang: "und", invalid: true},
{in: "en.-en", lang: "en", invalid: true},
{in: "x-a-tooManyChars-c-d", ext: "x-a-c-d", invalid: true, changed: true},
{in: "a-tooManyChars-c-d", lang: "und", invalid: true},
// TODO: check key-value validity
// { in: "en-u-cu-xd", lang: "en", ext: "u-cu-xd", invalid: true },
{in: "en-t-abcd", lang: "en", invalid: true},
{in: "en-Latn-US-en", lang: "en", script: "Latn", region: "US", invalid: true},
// rewrites (more tests in TestGrandfathered)
{in: "zh-min-nan", lang: "nan"},
{in: "zh-yue", lang: "yue"},
{in: "zh-xiang", lang: "hsn", rewrite: true},
{in: "zh-guoyu", lang: "cmn", rewrite: true},
{in: "iw", lang: "iw"},
{in: "sgn-BE-FR", lang: "sfb", rewrite: true},
{in: "i-klingon", lang: "tlh", rewrite: true},
}
for i, tt := range tests {
tests[i].i = i
if tt.extList != nil {
tests[i].ext = strings.Join(tt.extList, "-")
}
if tt.ext != "" && tt.extList == nil {
tests[i].extList = []string{tt.ext}
}
}
return tests
}
func TestParseExtensions(t *testing.T) {
for i, tt := range parseTests() {
if tt.ext == "" || tt.rewrite {
continue
}
scan := makeScannerString(tt.in)
if len(scan.b) > 1 && scan.b[1] != '-' {
scan.end = nextExtension(string(scan.b), 0)
scan.next = scan.end + 1
scan.scan()
}
start := scan.start
scan.toLower(start, len(scan.b))
parseExtensions(&scan)
ext := string(scan.b[start:])
if ext != tt.ext {
t.Errorf("%d(%s): ext was %v; want %v", i, tt.in, ext, tt.ext)
}
if changed := !strings.HasPrefix(tt.in[start:], ext); changed != tt.changed {
t.Errorf("%d(%s): changed was %v; want %v", i, tt.in, changed, tt.changed)
}
}
}
// partChecks runs checks for each part by calling the function returned by f.
func partChecks(t *testing.T, f func(*parseTest) (Tag, bool)) {
for i, tt := range parseTests() {
tag, skip := f(&tt)
if skip {
continue
}
if l, _ := getLangID(b(tt.lang)); l != tag.lang {
t.Errorf("%d: lang was %q; want %q", i, tag.lang, l)
}
if sc, _ := getScriptID(script, b(tt.script)); sc != tag.script {
t.Errorf("%d: script was %q; want %q", i, tag.script, sc)
}
if r, _ := getRegionID(b(tt.region)); r != tag.region {
t.Errorf("%d: region was %q; want %q", i, tag.region, r)
}
if tag.str == "" {
continue
}
p := int(tag.pVariant)
if p < int(tag.pExt) {
p++
}
if s, g := tag.str[p:tag.pExt], tt.variants; s != g {
t.Errorf("%d: variants was %q; want %q", i, s, g)
}
p = int(tag.pExt)
if p > 0 && p < len(tag.str) {
p++
}
if s, g := (tag.str)[p:], tt.ext; s != g {
t.Errorf("%d: extensions were %q; want %q", i, s, g)
}
}
}
func TestParseTag(t *testing.T) {
partChecks(t, func(tt *parseTest) (id Tag, skip bool) {
if strings.HasPrefix(tt.in, "x-") || tt.rewrite {
return Tag{}, true
}
scan := makeScannerString(tt.in)
id, end := parseTag(&scan)
id.str = string(scan.b[:end])
tt.ext = ""
tt.extList = []string{}
return id, false
})
}
func TestParse(t *testing.T) {
partChecks(t, func(tt *parseTest) (id Tag, skip bool) {
id, err := Raw.Parse(tt.in)
ext := ""
if id.str != "" {
if strings.HasPrefix(id.str, "x-") {
ext = id.str
} else if int(id.pExt) < len(id.str) && id.pExt > 0 {
ext = id.str[id.pExt+1:]
}
}
if tag, _ := Raw.Parse(id.String()); tag.String() != id.String() {
t.Errorf("%d:%s: reparse was %q; want %q", tt.i, tt.in, id.String(), tag.String())
}
if ext != tt.ext {
t.Errorf("%d:%s: ext was %q; want %q", tt.i, tt.in, ext, tt.ext)
}
changed := id.str != "" && !strings.HasPrefix(tt.in, id.str)
if changed != tt.changed {
t.Errorf("%d:%s: changed was %v; want %v", tt.i, tt.in, changed, tt.changed)
}
if (err != nil) != tt.invalid {
t.Errorf("%d:%s: invalid was %v; want %v. Error: %v", tt.i, tt.in, err != nil, tt.invalid, err)
}
return id, false
})
}
func TestErrors(t *testing.T) {
mkInvalid := func(s string) error {
return mkErrInvalid([]byte(s))
}
tests := []struct {
in string
out error
}{
// invalid subtags.
{"ac", mkInvalid("ac")},
{"AC", mkInvalid("ac")},
{"aa-Uuuu", mkInvalid("Uuuu")},
{"aa-AB", mkInvalid("AB")},
// ill-formed wins over invalid.
{"ac-u", errSyntax},
{"ac-u-ca", errSyntax},
{"ac-u-ca-co-pinyin", errSyntax},
{"noob", errSyntax},
}
for _, tt := range tests {
_, err := Parse(tt.in)
if err != tt.out {
t.Errorf("%s: was %q; want %q", tt.in, err, tt.out)
}
}
}
func TestCompose1(t *testing.T) {
partChecks(t, func(tt *parseTest) (id Tag, skip bool) {
l, _ := ParseBase(tt.lang)
s, _ := ParseScript(tt.script)
r, _ := ParseRegion(tt.region)
v := []Variant{}
for _, x := range strings.Split(tt.variants, "-") {
p, _ := ParseVariant(x)
v = append(v, p)
}
e := []Extension{}
for _, x := range tt.extList {
p, _ := ParseExtension(x)
e = append(e, p)
}
id, _ = Raw.Compose(l, s, r, v, e)
return id, false
})
}
func TestCompose2(t *testing.T) {
partChecks(t, func(tt *parseTest) (id Tag, skip bool) {
l, _ := ParseBase(tt.lang)
s, _ := ParseScript(tt.script)
r, _ := ParseRegion(tt.region)
p := []interface{}{l, s, r, s, r, l}
for _, x := range strings.Split(tt.variants, "-") {
v, _ := ParseVariant(x)
p = append(p, v)
}
for _, x := range tt.extList {
e, _ := ParseExtension(x)
p = append(p, e)
}
id, _ = Raw.Compose(p...)
return id, false
})
}
func TestCompose3(t *testing.T) {
partChecks(t, func(tt *parseTest) (id Tag, skip bool) {
id, _ = Raw.Parse(tt.in)
id, _ = Raw.Compose(id)
return id, false
})
}
func mk(s string) Tag {
return Raw.Make(s)
}
func TestParseAcceptLanguage(t *testing.T) {
type res struct {
t Tag
q float32
}
en := []res{{mk("en"), 1.0}}
tests := []struct {
out []res
in string
ok bool
}{
{en, "en", true},
{en, " en", true},
{en, "en ", true},
{en, " en ", true},
{en, "en,", true},
{en, ",en", true},
{en, ",,,en,,,", true},
{en, ",en;q=1", true},
// We allow an empty input, contrary to spec.
{nil, "", true},
{[]res{{mk("aa"), 1}}, "aa;", true}, // allow unspecified weight
// errors
{nil, ";", false},
{nil, "$", false},
{nil, "e;", false},
{nil, "x;", false},
{nil, "x", false},
{nil, "ac", false}, // non-existing language
{nil, "aa;q", false},
{nil, "aa;q=", false},
{nil, "aa;q=.", false},
// odd fallbacks
{
[]res{{mk("en"), 0.1}},
" english ;q=.1",
true,
},
{
[]res{{mk("it"), 1.0}, {mk("de"), 1.0}, {mk("fr"), 1.0}},
" italian, deutsch, french",
true,
},
// lists
{
[]res{{mk("en"), 0.1}},
"en;q=.1",
true,
},
{
[]res{{mk("mul"), 1.0}},
"*",
true,
},
{
[]res{{mk("en"), 1.0}, {mk("de"), 1.0}},
"en,de",
true,
},
{
[]res{{mk("en"), 1.0}, {mk("de"), .5}},
"en,de;q=0.5",
true,
},
{
[]res{{mk("de"), 0.8}, {mk("en"), 0.5}},
" en ; q = 0.5 , , de;q=0.8",
true,
},
{
[]res{{mk("en"), 1.0}, {mk("de"), 1.0}, {mk("fr"), 1.0}, {mk("tlh"), 1.0}},
"en,de,fr,i-klingon",
true,
},
// sorting
{
[]res{{mk("tlh"), 0.4}, {mk("de"), 0.2}, {mk("fr"), 0.2}, {mk("en"), 0.1}},
"en;q=0.1,de;q=0.2,fr;q=0.2,i-klingon;q=0.4",
true,
},
// dropping
{
[]res{{mk("fr"), 0.2}, {mk("en"), 0.1}},
"en;q=0.1,de;q=0,fr;q=0.2,i-klingon;q=0.0",
true,
},
}
for i, tt := range tests {
tags, qs, e := ParseAcceptLanguage(tt.in)
if e == nil != tt.ok {
t.Errorf("%d:%s:err: was %v; want %v", i, tt.in, e == nil, tt.ok)
}
for j, tag := range tags {
if out := tt.out[j]; !tag.equalTags(out.t) || qs[j] != out.q {
t.Errorf("%d:%s: was %s, %1f; want %s, %1f", i, tt.in, tag, qs[j], out.t, out.q)
break
}
}
}
}

View File

@ -1,389 +0,0 @@
# TODO: this file has not yet been included in the main CLDR release.
# The intent is to verify this file against the Go implementation and then
# correct the cases and add merge in other interesting test cases.
# See TestCLDRCompliance in match_test.go, as well as the list of exceptions
# defined in the map skip below it, for the work in progress.
# Data-driven test for the XLocaleMatcher.
# Format
# • Everything after "#" is a comment
# • Arguments are separated by ";". They are:
# supported ; desired ; expected
# • The supported may have the threshold distance reset as a first item, eg 50, en, fr
# A line starting with @debug will reach a statement in the test code where you can put a breakpoint for debugging
# The test code also supports reformatting this file, by setting the REFORMAT flag.
##################################################
# testParentLocales
# es-419, es-AR, and es-MX are in a cluster; es is in a different one
es-419, es-ES ; es-AR ; es-419
es-ES, es-419 ; es-AR ; es-419
es-419, es ; es-AR ; es-419
es, es-419 ; es-AR ; es-419
es-MX, es ; es-AR ; es-MX
es, es-MX ; es-AR ; es-MX
# en-GB, en-AU, and en-NZ are in a cluster; en in a different one
en-GB, en-US ; en-AU ; en-GB
en-US, en-GB ; en-AU ; en-GB
en-GB, en ; en-AU ; en-GB
en, en-GB ; en-AU ; en-GB
en-NZ, en-US ; en-AU ; en-NZ
en-US, en-NZ ; en-AU ; en-NZ
en-NZ, en ; en-AU ; en-NZ
en, en-NZ ; en-AU ; en-NZ
# pt-AU and pt-PT in one cluster; pt-BR in another
pt-PT, pt-BR ; pt-AO ; pt-PT
pt-BR, pt-PT ; pt-AO ; pt-PT
pt-PT, pt ; pt-AO ; pt-PT
pt, pt-PT ; pt-AO ; pt-PT
zh-MO, zh-TW ; zh-HK ; zh-MO
zh-TW, zh-MO ; zh-HK ; zh-MO
zh-MO, zh-TW ; zh-HK ; zh-MO
zh-TW, zh-MO ; zh-HK ; zh-MO
zh-MO, zh-CN ; zh-HK ; zh-MO
zh-CN, zh-MO ; zh-HK ; zh-MO
zh-MO, zh ; zh-HK ; zh-MO
zh, zh-MO ; zh-HK ; zh-MO
##################################################
# testChinese
zh-CN, zh-TW, iw ; zh-Hant-TW ; zh-TW
zh-CN, zh-TW, iw ; zh-Hant ; zh-TW
zh-CN, zh-TW, iw ; zh-TW ; zh-TW
zh-CN, zh-TW, iw ; zh-Hans-CN ; zh-CN
zh-CN, zh-TW, iw ; zh-CN ; zh-CN
zh-CN, zh-TW, iw ; zh ; zh-CN
##################################################
# testenGB
fr, en, en-GB, es-419, es-MX, es ; en-NZ ; en-GB
fr, en, en-GB, es-419, es-MX, es ; es-ES ; es
fr, en, en-GB, es-419, es-MX, es ; es-AR ; es-419
fr, en, en-GB, es-419, es-MX, es ; es-MX ; es-MX
##################################################
# testFallbacks
91, en, hi ; sa ; hi
##################################################
# testBasics
fr, en-GB, en ; en-GB ; en-GB
fr, en-GB, en ; en ; en
fr, en-GB, en ; fr ; fr
fr, en-GB, en ; ja ; fr # return first if no match
##################################################
# testFallback
# check that script fallbacks are handled right
zh-CN, zh-TW, iw ; zh-Hant ; zh-TW
zh-CN, zh-TW, iw ; zh ; zh-CN
zh-CN, zh-TW, iw ; zh-Hans-CN ; zh-CN
zh-CN, zh-TW, iw ; zh-Hant-HK ; zh-TW
zh-CN, zh-TW, iw ; he-IT ; iw
##################################################
# testSpecials
# check that nearby languages are handled
en, fil, ro, nn ; tl ; fil
en, fil, ro, nn ; mo ; ro
en, fil, ro, nn ; nb ; nn
# make sure default works
en, fil, ro, nn ; ja ; en
##################################################
# testRegionalSpecials
# verify that en-AU is closer to en-GB than to en (which is en-US)
en, en-GB, es, es-419 ; es-MX ; es-419
en, en-GB, es, es-419 ; en-AU ; en-GB
en, en-GB, es, es-419 ; es-ES ; es
##################################################
# testHK
# HK and MO are closer to each other for Hant than to TW
zh, zh-TW, zh-MO ; zh-HK ; zh-MO
zh, zh-TW, zh-HK ; zh-MO ; zh-HK
##################################################
# testMatch-exact
# see localeDistance.txt
##################################################
# testMatch-none
# see localeDistance.txt
##################################################
# testMatch-matchOnMazimized
zh, zh-Hant ; und-TW ; zh-Hant # und-TW should be closer to zh-Hant than to zh
en-Hant-TW, und-TW ; zh-Hant ; und-TW # zh-Hant should be closer to und-TW than to en-Hant-TW
en-Hant-TW, und-TW ; zh ; und-TW # zh should be closer to und-TW than to en-Hant-TW
##################################################
# testMatchGrandfatheredCode
fr, i-klingon, en-Latn-US ; en-GB-oed ; en-Latn-US
##################################################
# testGetBestMatchForList-exactMatch
fr, en-GB, ja, es-ES, es-MX ; ja, de ; ja
##################################################
# testGetBestMatchForList-simpleVariantMatch
fr, en-GB, ja, es-ES, es-MX ; de, en-US ; en-GB # Intentionally avoiding a perfect-match or two candidates for variant matches.
# Fallback.
fr, en-GB, ja, es-ES, es-MX ; de, zh ; fr
##################################################
# testGetBestMatchForList-matchOnMaximized
# Check that if the preference is maximized already, it works as well.
en, ja ; ja-Jpan-JP, en-AU ; ja # Match for ja-Jpan-JP (maximized already)
# ja-JP matches ja on likely subtags, and it's listed first, thus it wins over the second preference en-GB.
en, ja ; ja-JP, en-US ; ja # Match for ja-Jpan-JP (maximized already)
# Check that if the preference is maximized already, it works as well.
en, ja ; ja-Jpan-JP, en-US ; ja # Match for ja-Jpan-JP (maximized already)
##################################################
# testGetBestMatchForList-noMatchOnMaximized
# Regression test for http://b/5714572 .
# de maximizes to de-DE. Pick the exact match for the secondary language instead.
en, de, fr, ja ; de-CH, fr ; de
##################################################
# testBestMatchForTraditionalChinese
# Scenario: An application that only supports Simplified Chinese (and some other languages),
# but does not support Traditional Chinese. zh-Hans-CN could be replaced with zh-CN, zh, or
# zh-Hans, it wouldn't make much of a difference.
# The script distance (simplified vs. traditional Han) is considered small enough
# to be an acceptable match. The regional difference is considered almost insignificant.
fr, zh-Hans-CN, en-US ; zh-TW ; zh-Hans-CN
fr, zh-Hans-CN, en-US ; zh-Hant ; zh-Hans-CN
# For geo-political reasons, you might want to avoid a zh-Hant -> zh-Hans match.
# In this case, if zh-TW, zh-HK or a tag starting with zh-Hant is requested, you can
# change your call to getBestMatch to include a 2nd language preference.
# "en" is a better match since its distance to "en-US" is closer than the distance
# from "zh-TW" to "zh-CN" (script distance).
fr, zh-Hans-CN, en-US ; zh-TW, en ; en-US
fr, zh-Hans-CN, en-US ; zh-Hant-CN, en, en ; en-US
fr, zh-Hans-CN, en-US ; zh-Hans, en ; zh-Hans-CN
##################################################
# testUndefined
# When the undefined language doesn't match anything in the list,
# getBestMatch returns the default, as usual.
it, fr ; und ; it
# When it *does* occur in the list, bestMatch returns it, as expected.
it, und ; und ; und
# The unusual part: max("und") = "en-Latn-US", and since matching is based on maximized
# tags, the undefined language would normally match English. But that would produce the
# counterintuitive results that getBestMatch("und", XLocaleMatcher("it,en")) would be "en", and
# getBestMatch("en", XLocaleMatcher("it,und")) would be "und".
# To avoid that, we change the matcher's definitions of max
# so that max("und")="und". That produces the following, more desirable
# results:
it, en ; und ; it
it, und ; en ; it
##################################################
# testGetBestMatch-regionDistance
es-AR, es ; es-MX ; es-AR
fr, en, en-GB ; en-CA ; en-GB
de-AT, de-DE, de-CH ; de ; de-DE
##################################################
# testAsymmetry
mul, nl ; af ; nl # af => nl
mul, af ; nl ; mul # but nl !=> af
##################################################
# testGetBestMatchForList-matchOnMaximized2
# ja-JP matches ja on likely subtags, and it's listed first, thus it wins over the second preference en-GB.
fr, en-GB, ja, es-ES, es-MX ; ja-JP, en-GB ; ja # Match for ja-JP, with likely region subtag
# Check that if the preference is maximized already, it works as well.
fr, en-GB, ja, es-ES, es-MX ; ja-Jpan-JP, en-GB ; ja # Match for ja-Jpan-JP (maximized already)
##################################################
# testGetBestMatchForList-closeEnoughMatchOnMaximized
en-GB, en, de, fr, ja ; de-CH, fr ; de
en-GB, en, de, fr, ja ; en-US, ar, nl, de, ja ; en
##################################################
# testGetBestMatchForPortuguese
# pt might be supported and not pt-PT
# European user who prefers Spanish over Brazillian Portuguese as a fallback.
pt-PT, pt-BR, es, es-419 ; pt-PT, es, pt ; pt-PT
pt-PT, pt, es, es-419 ; pt-PT, es, pt ; pt-PT # pt implicit
# Brazillian user who prefers South American Spanish over European Portuguese as a fallback.
# The asymmetry between this case and above is because it's "pt-PT" that's missing between the
# matchers as "pt-BR" is a much more common language.
pt-PT, pt-BR, es, es-419 ; pt, es-419, pt-PT ; pt-BR
pt-PT, pt-BR, es, es-419 ; pt-PT, es, pt ; pt-PT
pt-PT, pt, es, es-419 ; pt-PT, es, pt ; pt-PT
pt-PT, pt, es, es-419 ; pt, es-419, pt-PT ; pt
pt-BR, es, es-419 ; pt, es-419, pt-PT ; pt-BR
# Code that adds the user's country can get "pt-US" for a user's language.
# That should fall back to "pt-BR".
pt-PT, pt-BR, es, es-419 ; pt-US, pt-PT ; pt-BR
pt-PT, pt, es, es-419 ; pt-US, pt-PT, pt ; pt # pt-BR implicit
##################################################
# testVariantWithScriptMatch 1 and 2
fr, en, sv ; en-GB ; en
fr, en, sv ; en-GB ; en
en, sv ; en-GB, sv ; en
##################################################
# testLongLists
en, sv ; sv ; sv
af, am, ar, az, be, bg, bn, bs, ca, cs, cy, cy, da, de, el, en, en-GB, es, es-419, et, eu, fa, fi, fil, fr, ga, gl, gu, hi, hr, hu, hy, id, is, it, iw, ja, ka, kk, km, kn, ko, ky, lo, lt, lv, mk, ml, mn, mr, ms, my, ne, nl, no, pa, pl, pt, pt-PT, ro, ru, si, sk, sl, sq, sr, sr-Latn, sv, sw, ta, te, th, tr, uk, ur, uz, vi, zh-CN, zh-TW, zu ; sv ; sv
af, af-NA, af-ZA, agq, agq-CM, ak, ak-GH, am, am-ET, ar, ar-001, ar-AE, ar-BH, ar-DJ, ar-DZ, ar-EG, ar-EH, ar-ER, ar-IL, ar-IQ, ar-JO, ar-KM, ar-KW, ar-LB, ar-LY, ar-MA, ar-MR, ar-OM, ar-PS, ar-QA, ar-SA, ar-SD, ar-SO, ar-SS, ar-SY, ar-TD, ar-TN, ar-YE, as, as-IN, asa, asa-TZ, ast, ast-ES, az, az-Cyrl, az-Cyrl-AZ, az-Latn, az-Latn-AZ, bas, bas-CM, be, be-BY, bem, bem-ZM, bez, bez-TZ, bg, bg-BG, bm, bm-ML, bn, bn-BD, bn-IN, bo, bo-CN, bo-IN, br, br-FR, brx, brx-IN, bs, bs-Cyrl, bs-Cyrl-BA, bs-Latn, bs-Latn-BA, ca, ca-AD, ca-ES, ca-ES-VALENCIA, ca-FR, ca-IT, ce, ce-RU, cgg, cgg-UG, chr, chr-US, ckb, ckb-IQ, ckb-IR, cs, cs-CZ, cu, cu-RU, cy, cy-GB, da, da-DK, da-GL, dav, dav-KE, de, de-AT, de-BE, de-CH, de-DE, de-LI, de-LU, dje, dje-NE, dsb, dsb-DE, dua, dua-CM, dyo, dyo-SN, dz, dz-BT, ebu, ebu-KE, ee, ee-GH, ee-TG, el, el-CY, el-GR, en, en-001, en-150, en-AG, en-AI, en-AS, en-AT, en-AU, en-BB, en-BE, en-BI, en-BM, en-BS, en-BW, en-BZ, en-CA, en-CC, en-CH, en-CK, en-CM, en-CX, en-CY, en-DE, en-DG, en-DK, en-DM, en-ER, en-FI, en-FJ, en-FK, en-FM, en-GB, en-GD, en-GG, en-GH, en-GI, en-GM, en-GU, en-GY, en-HK, en-IE, en-IL, en-IM, en-IN, en-IO, en-JE, en-JM, en-KE, en-KI, en-KN, en-KY, en-LC, en-LR, en-LS, en-MG, en-MH, en-MO, en-MP, en-MS, en-MT, en-MU, en-MW, en-MY, en-NA, en-NF, en-NG, en-NL, en-NR, en-NU, en-NZ, en-PG, en-PH, en-PK, en-PN, en-PR, en-PW, en-RW, en-SB, en-SC, en-SD, en-SE, en-SG, en-SH, en-SI, en-SL, en-SS, en-SX, en-SZ, en-TC, en-TK, en-TO, en-TT, en-TV, en-TZ, en-UG, en-UM, en-US, en-US-POSIX, en-VC, en-VG, en-VI, en-VU, en-WS, en-ZA, en-ZM, en-ZW, eo, eo-001, es, es-419, es-AR, es-BO, es-CL, es-CO, es-CR, es-CU, es-DO, es-EA, es-EC, es-ES, es-GQ, es-GT, es-HN, es-IC, es-MX, es-NI, es-PA, es-PE, es-PH, es-PR, es-PY, es-SV, es-US, es-UY, es-VE, et, et-EE, eu, eu-ES, ewo, ewo-CM, fa, fa-AF, fa-IR, ff, ff-CM, ff-GN, ff-MR, ff-SN, fi, fi-FI, fil, fil-PH, fo, fo-DK, fo-FO, fr, fr-BE, fr-BF, fr-BI, fr-BJ, fr-BL, fr-CA, fr-CD, fr-CF, fr-CG, fr-CH, fr-CI, fr-CM, fr-DJ, fr-DZ, fr-FR, fr-GA, fr-GF, fr-GN, fr-GP, fr-GQ, fr-HT, fr-KM, fr-LU, fr-MA, fr-MC, fr-MF, fr-MG, fr-ML, fr-MQ, fr-MR, fr-MU, fr-NC, fr-NE, fr-PF, fr-PM, fr-RE, fr-RW, fr-SC, fr-SN, fr-SY, fr-TD, fr-TG, fr-TN, fr-VU, fr-WF, fr-YT, fur, fur-IT, fy, fy-NL, ga, ga-IE, gd, gd-GB, gl, gl-ES, gsw, gsw-CH, gsw-FR, gsw-LI, gu, gu-IN, guz, guz-KE, gv, gv-IM, ha, ha-GH, ha-NE, ha-NG, haw, haw-US, he, he-IL, hi, hi-IN, hr, hr-BA, hr-HR, hsb, hsb-DE, hu, hu-HU, hy, hy-AM, id, id-ID, ig, ig-NG, ii, ii-CN, is, is-IS, it, it-CH, it-IT, it-SM, ja, ja-JP, jgo, jgo-CM, jmc, jmc-TZ, ka, ka-GE, kab, kab-DZ, kam, kam-KE, kde, kde-TZ, kea, kea-CV, khq, khq-ML, ki, ki-KE, kk, kk-KZ, kkj, kkj-CM, kl, kl-GL, kln, kln-KE, km, km-KH, kn, kn-IN, ko, ko-KP, ko-KR, kok, kok-IN, ks, ks-IN, ksb, ksb-TZ, ksf, ksf-CM, ksh, ksh-DE, kw, kw-GB, ky, ky-KG, lag, lag-TZ, lb, lb-LU, lg, lg-UG, lkt, lkt-US, ln, ln-AO, ln-CD, ln-CF, ln-CG, lo, lo-LA, lrc, lrc-IQ, lrc-IR, lt, lt-LT, lu, lu-CD, luo, luo-KE, luy, luy-KE, lv, lv-LV, mas, mas-KE, mas-TZ, mer, mer-KE, mfe, mfe-MU, mg, mg-MG, mgh, mgh-MZ, mgo, mgo-CM, mk, mk-MK, ml, ml-IN, mn, mn-MN, mr, mr-IN, ms, ms-BN, ms-MY, ms-SG, mt, mt-MT, mua, mua-CM, my, my-MM, mzn, mzn-IR, naq, naq-NA, nb, nb-NO, nb-SJ, nd, nd-ZW, ne, ne-IN, ne-NP, nl, nl-AW, nl-BE, nl-BQ, nl-CW, nl-NL, nl-SR, nl-SX, nmg, nmg-CM, nn, nn-NO, nnh, nnh-CM, nus, nus-SS, nyn, nyn-UG, om, om-ET, om-KE, or, or-IN, os, os-GE, os-RU, pa, pa-Arab, pa-Arab-PK, pa-Guru, pa-Guru-IN, pl, pl-PL, prg, prg-001, ps, ps-AF, pt, pt-AO, pt-BR, pt-CV, pt-GW, pt-MO, pt-MZ, pt-PT, pt-ST, pt-TL, qu, qu-BO, qu-EC, qu-PE, rm, rm-CH, rn, rn-BI, ro, ro-MD, ro-RO, rof, rof-TZ, root, ru, ru-BY, ru-KG, ru-KZ, ru-MD, ru-RU, ru-UA, rw, rw-RW, rwk, rwk-TZ, sah, sah-RU, saq, saq-KE, sbp, sbp-TZ, se, se-FI, se-NO, se-SE, seh, seh-MZ, ses, ses-ML, sg, sg-CF, shi, shi-Latn, shi-Latn-MA, shi-Tfng, shi-Tfng-MA, si, si-LK, sk, sk-SK, sl, sl-SI, smn, smn-FI, sn, sn-ZW, so, so-DJ, so-ET, so-KE, so-SO, sq, sq-AL, sq-MK, sq-XK, sr, sr-Cyrl, sr-Cyrl-BA, sr-Cyrl-ME, sr-Cyrl-RS, sr-Cyrl-XK, sr-Latn, sr-Latn-BA, sr-Latn-ME, sr-Latn-RS, sr-Latn-XK, sv, sv-AX, sv-FI, sv-SE, sw, sw-CD, sw-KE, sw-TZ, sw-UG, ta, ta-IN, ta-LK, ta-MY, ta-SG, te, te-IN, teo, teo-KE, teo-UG, th, th-TH, ti, ti-ER, ti-ET, tk, tk-TM, to, to-TO, tr, tr-CY, tr-TR, twq, twq-NE, tzm, tzm-MA, ug, ug-CN, uk, uk-UA, ur, ur-IN, ur-PK, uz, uz-Arab, uz-Arab-AF, uz-Cyrl, uz-Cyrl-UZ, uz-Latn, uz-Latn-UZ, vai, vai-Latn, vai-Latn-LR, vai-Vaii, vai-Vaii-LR, vi, vi-VN, vo, vo-001, vun, vun-TZ, wae, wae-CH, xog, xog-UG, yav, yav-CM, yi, yi-001, yo, yo-BJ, yo-NG, zgh, zgh-MA, zh, zh-Hans, zh-Hans-CN, zh-Hans-HK, zh-Hans-MO, zh-Hans-SG, zh-Hant, zh-Hant-HK, zh-Hant-MO, zh-Hant-TW, zu, zu-ZA ; sv ; sv
##################################################
# test8288
it, en ; und ; it
it, en ; und, en ; en
# examples from
# http://unicode.org/repos/cldr/tags/latest/common/bcp47/
# http://unicode.org/repos/cldr/tags/latest/common/validity/variant.xml
##################################################
# testUnHack
en-NZ, en-IT ; en-US ; en-NZ
##################################################
# testEmptySupported => null
; en ; null
##################################################
# testVariantsAndExtensions
##################################################
# tests the .combine() method
und, fr ; fr-BE-fonipa ; fr ; fr-BE-fonipa
und, fr-CA ; fr-BE-fonipa ; fr-CA ; fr-BE-fonipa
und, fr-fonupa ; fr-BE-fonipa ; fr-fonupa ; fr-BE-fonipa
und, no ; nn-BE-fonipa ; no ; no-BE-fonipa
und, en-GB-u-sd-gbsct ; en-fonipa-u-nu-Arab-ca-buddhist-t-m0-iso-i0-pinyin ; en-GB-u-sd-gbsct ; en-GB-fonipa-u-nu-Arab-ca-buddhist-t-m0-iso-i0-pinyin
en-PSCRACK, de-PSCRACK, fr-PSCRACK, pt-PT-PSCRACK ; fr-PSCRACK ; fr-PSCRACK
en-PSCRACK, de-PSCRACK, fr-PSCRACK, pt-PT-PSCRACK ; fr ; fr-PSCRACK
en-PSCRACK, de-PSCRACK, fr-PSCRACK, pt-PT-PSCRACK ; de-CH ; de-PSCRACK
##################################################
# testClusters
# we favor es-419 over others in cluster. Clusters: es- {ES, MA, EA} {419, AR, MX}
und, es, es-MA, es-MX, es-419 ; es-AR ; es-419
und, es-MA, es, es-419, es-MX ; es-AR ; es-419
und, es, es-MA, es-MX, es-419 ; es-EA ; es
und, es-MA, es, es-419, es-MX ; es-EA ; es
# of course, fall back to within cluster
und, es, es-MA, es-MX ; es-AR ; es-MX
und, es-MA, es, es-MX ; es-AR ; es-MX
und, es-MA, es-MX, es-419 ; es-EA ; es-MA
und, es-MA, es-419, es-MX ; es-EA ; es-MA
# we favor es-GB over others in cluster. Clusters: en- {US, GU, VI} {GB, IN, ZA}
und, en, en-GU, en-IN, en-GB ; en-ZA ; en-GB
und, en-GU, en, en-GB, en-IN ; en-ZA ; en-GB
und, en, en-GU, en-IN, en-GB ; en-VI ; en
und, en-GU, en, en-GB, en-IN ; en-VI ; en
# of course, fall back to within cluster
und, en, en-GU, en-IN ; en-ZA ; en-IN
und, en-GU, en, en-IN ; en-ZA ; en-IN
und, en-GU, en-IN, en-GB ; en-VI ; en-GU
und, en-GU, en-GB, en-IN ; en-VI ; en-GU
##################################################
# testThreshold
@Threshold=60
50, und, fr-CA-fonupa ; fr-BE-fonipa ; fr-CA-fonupa ; fr-BE-fonipa
50, und, fr-Cyrl-CA-fonupa ; fr-BE-fonipa ; fr-Cyrl-CA-fonupa ; fr-Cyrl-BE-fonipa
@Threshold=-1 # restore
##################################################
# testScriptFirst
@DistanceOption=SCRIPT_FIRST
@debug
ru, fr ; zh, pl ; fr
ru, fr ; zh-Cyrl, pl ; ru
hr, en-Cyrl; sr ; en-Cyrl
da, ru, hr; sr ; ru

View File

@ -1,226 +0,0 @@
# basics
fr, en-GB, en ; en-GB ; en-GB
fr, en-GB, en ; en-US ; en
fr, en-GB, en ; fr-FR ; fr
fr, en-GB, en ; ja-JP ; fr
# script fallbacks
zh-CN, zh-TW, iw ; zh-Hant ; zh-TW
zh-CN, zh-TW, iw ; zh ; zh-CN
zh-CN, zh-TW, iw ; zh-Hans-CN ; zh-CN
zh-CN, zh-TW, iw ; zh-Hant-HK ; zh-TW
zh-CN, zh-TW, iw ; he-IT ; iw ; iw
# language-specific script fallbacks 1
en, sr, nl ; sr-Latn ; sr
en, sr, nl ; sh ; sr # different script, but seems okay and is as CLDR suggests
en, sr, nl ; hr ; en
en, sr, nl ; bs ; en
en, sr, nl ; nl-Cyrl ; sr
# language-specific script fallbacks 2
en, sh ; sr ; sh
en, sh ; sr-Cyrl ; sh
en, sh ; hr ; sh
# don't match hr to sr-Latn
en, sr-Latn ; hr ; en
# both deprecated and not
fil, tl, iw, he ; he-IT ; he
fil, tl, iw, he ; he ; he
fil, tl, iw, he ; iw ; iw
fil, tl, iw, he ; fil-IT ; fil
fil, tl, iw, he ; fil ; fil
fil, tl, iw, he ; tl ; tl
# nearby languages
en, fil, ro, nn ; tl ; fil
en, fil, ro, nn ; mo ; ro
en, fil, ro, nn ; nb ; nn
en, fil, ro, nn ; ja ; en
# nearby languages: Nynorsk to Bokmål
en, nb ; nn ; nb
# nearby languages: Danish does not match nn
en, nn ; da ; en
# nearby languages: Danish matches no
en, no ; da ; no
# nearby languages: Danish matches nb
en, nb ; da ; nb
# prefer matching languages over language variants.
nn, en-GB ; no, en-US ; en-GB
nn, en-GB ; nb, en-US ; en-GB
# deprecated version is closer than same language with other differences
nl, he, en-GB ; iw, en-US ; he
# macro equivalent is closer than same language with other differences
nl, zh, en-GB, no ; cmn, en-US ; zh
nl, zh, en-GB, no ; nb, en-US ; no
# legacy equivalent is closer than same language with other differences
nl, fil, en-GB ; tl, en-US ; fil
# distinguish near equivalents
en, ro, mo, ro-MD ; ro ; ro
en, ro, mo, ro-MD ; mo ; mo
en, ro, mo, ro-MD ; ro-MD ; ro-MD
# maximization of legacy
sr-Cyrl, sr-Latn, ro, ro-MD ; sh ; sr-Latn
sr-Cyrl, sr-Latn, ro, ro-MD ; mo ; ro-MD
# empty
; fr ; und
; en ; und
# private use subtags
fr, en-GB, x-bork, es-ES, es-419 ; x-piglatin ; fr
fr, en-GB, x-bork, es-ES, es-419 ; x-bork ; x-bork
# grandfathered codes
fr, i-klingon, en-Latn-US ; en-GB-oed ; en-Latn-US
fr, i-klingon, en-Latn-US ; i-klingon ; tlh
# simple variant match
fr, en-GB, ja, es-ES, es-MX ; de, en-US ; en-GB
fr, en-GB, ja, es-ES, es-MX ; de, zh ; fr
# best match for traditional Chinese
fr, zh-Hans-CN, en-US ; zh-TW ; zh-Hans-CN
fr, zh-Hans-CN, en-US ; zh-Hant ; zh-Hans-CN
fr, zh-Hans-CN, en-US ; zh-TW, en ; en-US
fr, zh-Hans-CN, en-US ; zh-Hant-CN, en ; en-US
fr, zh-Hans-CN, en-US ; zh-Hans, en ; zh-Hans-CN
# more specific script should win in case regions are identical
af, af-Latn, af-Arab ; af ; af
af, af-Latn, af-Arab ; af-ZA ; af
af, af-Latn, af-Arab ; af-Latn-ZA ; af-Latn
af, af-Latn, af-Arab ; af-Latn ; af-Latn
# more specific region should win
nl, nl-NL, nl-BE ; nl ; nl
nl, nl-NL, nl-BE ; nl-Latn ; nl
nl, nl-NL, nl-BE ; nl-Latn-NL ; nl-NL
nl, nl-NL, nl-BE ; nl-NL ; nl-NL
# region may replace matched if matched is enclosing
es-419,es ; es-MX ; es-419 ; es-MX
es-419,es ; es-SG ; es
# more specific region wins over more specific script
nl, nl-Latn, nl-NL, nl-BE ; nl ; nl
nl, nl-Latn, nl-NL, nl-BE ; nl-Latn ; nl-Latn
nl, nl-Latn, nl-NL, nl-BE ; nl-NL ; nl-NL
nl, nl-Latn, nl-NL, nl-BE ; nl-Latn-NL ; nl-NL
# region distance Portuguese
pt, pt-PT ; pt-ES ; pt-PT
# if no preferred locale specified, pick top language, not regional
en, fr, fr-CA, fr-CH ; fr-US ; fr #TODO: ; fr-u-rg-US
# region distance German
de-AT, de-DE, de-CH ; de ; de-DE
# en-AU is closer to en-GB than to en (which is en-US)
en, en-GB, es-ES, es-419 ; en-AU ; en-GB
en, en-GB, es-ES, es-419 ; es-MX ; es-419 ; es-MX
en, en-GB, es-ES, es-419 ; es-PT ; es-ES
# undefined
it, fr ; und ; it
# und does not match en
it, en ; und ; it
# undefined in priority list
it, und ; und ; und
it, und ; en ; it
# undefined
it, fr, zh ; und-FR ; fr
it, fr, zh ; und-CN ; zh
it, fr, zh ; und-Hans ; zh
it, fr, zh ; und-Hant ; zh
it, fr, zh ; und-Latn ; it
# match on maximized tag
fr, en-GB, ja, es-ES, es-MX ; ja-JP, en-GB ; ja
fr, en-GB, ja, es-ES, es-MX ; ja-Jpan-JP, en-GB ; ja
# pick best maximized tag
ja, ja-Jpan-US, ja-JP, en, ru ; ja-Jpan, ru ; ja
ja, ja-Jpan-US, ja-JP, en, ru ; ja-JP, ru ; ja-JP
ja, ja-Jpan-US, ja-JP, en, ru ; ja-US, ru ; ja-Jpan-US
# termination: pick best maximized match
ja, ja-Jpan, ja-JP, en, ru ; ja-Jpan-JP, ru ; ja-JP
ja, ja-Jpan, ja-JP, en, ru ; ja-Jpan, ru ; ja-Jpan
# same language over exact, but distinguish when user is explicit
fr, en-GB, ja, es-ES, es-MX ; ja, de ; ja
en, de, fr, ja ; de-CH, fr ; de # TODO: ; de-u-rg-CH
en-GB, nl ; en, nl ; en-GB
en-GB, nl ; en, nl, en-GB ; nl
# parent relation preserved
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; en-150 ; en-GB
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; en-AU ; en-GB
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; en-BE ; en-GB
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; en-GG ; en-GB
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; en-GI ; en-GB
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; en-HK ; en-GB
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; en-IE ; en-GB
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; en-IM ; en-GB
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; en-IN ; en-GB
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; en-JE ; en-GB
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; en-MT ; en-GB
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; en-NZ ; en-GB
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; en-PK ; en-GB
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; en-SG ; en-GB
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; en-DE ; en-GB
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; en-MT ; en-GB
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-AR ; es-419 ; es-AR
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-BO ; es-419 ; es-BO
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-CL ; es-419 ; es-CL
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-CO ; es-419 ; es-CO
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-CR ; es-419 ; es-CR
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-CU ; es-419 ; es-CU
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-DO ; es-419 ; es-DO
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-EC ; es-419 ; es-EC
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-GT ; es-419 ; es-GT
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-HN ; es-419 ; es-HN
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-MX ; es-419 ; es-MX
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-NI ; es-419 ; es-NI
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-PA ; es-419 ; es-PA
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-PE ; es-419 ; es-PE
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-PR ; es-419 ; es-PR
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-PT ; es
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-PY ; es-419 ; es-PY
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-SV ; es-419 ; es-SV
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-US ; es-419
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-UY ; es-419 ; es-UY
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-VE ; es-419 ; es-VE
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; pt-AO ; pt-PT
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; pt-CV ; pt-PT
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; pt-GW ; pt-PT
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; pt-MO ; pt-PT
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; pt-MZ ; pt-PT
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; pt-ST ; pt-PT
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; pt-TL ; pt-PT
# preserve extensions
en, de, sl-nedis ; de-FR-u-co-phonebk ; de ; de-u-co-phonebk
en, de, sl-nedis ; sl-nedis-u-cu-eur ; sl-nedis ; sl-nedis-u-cu-eur
en, de, sl-nedis ; sl-u-cu-eur ; sl-nedis ; sl-nedis-u-cu-eur
en, de, sl-nedis ; sl-HR-nedis-u-cu-eur ; sl-nedis ; sl-nedis-u-cu-eur
en, de, sl-nedis ; de-t-m0-iso-i0-pinyin ; de ; de-t-m0-iso-i0-pinyin