summaryrefslogtreecommitdiffstats
path: root/vendor/golang.org/x/text
diff options
context:
space:
mode:
authorDawid Dziurla <dawidd0811@gmail.com>2019-10-31 12:47:53 +0100
committerJesse Duffield <jessedduffield@gmail.com>2019-11-05 19:22:01 +1100
commit66eaaf9cbb2b7d06790faac1f1c8f4cc35495b5b (patch)
treea7dd35f3bbfe0488e75a4ef15f1a1b7d31222076 /vendor/golang.org/x/text
parent87ac193b5e3b146b5689aea85a0fd0797bccee3d (diff)
go mod vendor
Diffstat (limited to 'vendor/golang.org/x/text')
-rw-r--r--vendor/golang.org/x/text/internal/language/compact/gen.go64
-rw-r--r--vendor/golang.org/x/text/internal/language/compact/gen_index.go113
-rw-r--r--vendor/golang.org/x/text/internal/language/compact/gen_parents.go54
-rw-r--r--vendor/golang.org/x/text/internal/language/gen.go1520
-rw-r--r--vendor/golang.org/x/text/internal/language/gen_common.go20
-rw-r--r--vendor/golang.org/x/text/language/gen.go305
-rw-r--r--vendor/golang.org/x/text/unicode/norm/maketables.go986
-rw-r--r--vendor/golang.org/x/text/unicode/norm/triegen.go117
8 files changed, 0 insertions, 3179 deletions
diff --git a/vendor/golang.org/x/text/internal/language/compact/gen.go b/vendor/golang.org/x/text/internal/language/compact/gen.go
deleted file mode 100644
index 0c36a052f..000000000
--- a/vendor/golang.org/x/text/internal/language/compact/gen.go
+++ /dev/null
@@ -1,64 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build ignore
-
-// Language tag table generator.
-// Data read from the web.
-
-package main
-
-import (
- "flag"
- "fmt"
- "log"
-
- "golang.org/x/text/internal/gen"
- "golang.org/x/text/unicode/cldr"
-)
-
-var (
- test = flag.Bool("test",
- false,
- "test existing tables; can be used to compare web data with package data.")
- outputFile = flag.String("output",
- "tables.go",
- "output file for generated tables")
-)
-
-func main() {
- gen.Init()
-
- w := gen.NewCodeWriter()
- defer w.WriteGoFile("tables.go", "compact")
-
- fmt.Fprintln(w, `import "golang.org/x/text/internal/language"`)
-
- b := newBuilder(w)
- gen.WriteCLDRVersion(w)
-
- b.writeCompactIndex()
-}
-
-type builder struct {
- w *gen.CodeWriter
- data *cldr.CLDR
- supp *cldr.SupplementalData
-}
-
-func newBuilder(w *gen.CodeWriter) *builder {
- r := gen.OpenCLDRCoreZip()
- defer r.Close()
- d := &cldr.Decoder{}
- data, err := d.DecodeZip(r)
- if err != nil {
- log.Fatal(err)
- }
- b := builder{
- w: w,
- data: data,
- supp: data.Supplemental(),
- }
- return &b
-}
diff --git a/vendor/golang.org/x/text/internal/language/compact/gen_index.go b/vendor/golang.org/x/text/internal/language/compact/gen_index.go
deleted file mode 100644
index 136cefaf0..000000000
--- a/vendor/golang.org/x/text/internal/language/compact/gen_index.go
+++ /dev/null
@@ -1,113 +0,0 @@
-// Copyright 2015 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build ignore
-
-package main
-
-// This file generates derivative tables based on the language package itself.
-
-import (
- "fmt"
- "log"
- "sort"
- "strings"
-
- "golang.org/x/text/internal/language"
-)
-
-// Compact indices:
-// Note -va-X variants only apply to localization variants.
-// BCP variants only ever apply to language.
-// The only ambiguity between tags is with regions.
-
-func (b *builder) writeCompactIndex() {
- // Collect all language tags for which we have any data in CLDR.
- m := map[language.Tag]bool{}
- for _, lang := range b.data.Locales() {
- // We include all locales unconditionally to be consistent with en_US.
- // We want en_US, even though it has no data associated with it.
-
- // TODO: put any of the languages for which no data exists at the end
- // of the index. This allows all components based on ICU to use that
- // as the cutoff point.
- // if x := data.RawLDML(lang); false ||
- // x.LocaleDisplayNames != nil ||
- // x.Characters != nil ||
- // x.Delimiters != nil ||
- // x.Measurement != nil ||
- // x.Dates != nil ||
- // x.Numbers != nil ||
- // x.Units != nil ||
- // x.ListPatterns != nil ||
- // x.Collations != nil ||
- // x.Segmentations != nil ||
- // x.Rbnf != nil ||
- // x.Annotations != nil ||
- // x.Metadata != nil {
-
- // TODO: support POSIX natively, albeit non-standard.
- tag := language.Make(strings.Replace(lang, "_POSIX", "-u-va-posix", 1))
- m[tag] = true
- // }
- }
-
- // TODO: plural rules are also defined for the deprecated tags:
- // iw mo sh tl
- // Consider removing these as compact tags.
-
- // Include locales for plural rules, which uses a different structure.
- for _, plurals := range b.supp.Plurals {
- for _, rules := range plurals.PluralRules {
- for _, lang := range strings.Split(rules.Locales, " ") {
- m[language.Make(lang)] = true
- }
- }
- }
-
- var coreTags []language.CompactCoreInfo
- var special []string
-
- for t := range m {
- if x := t.Extensions(); len(x) != 0 && fmt.Sprint(x) != "[u-va-posix]" {
- log.Fatalf("Unexpected extension %v in %v", x, t)
- }
- if len(t.Variants()) == 0 && len(t.Extensions()) == 0 {
- cci, ok := language.GetCompactCore(t)
- if !ok {
- log.Fatalf("Locale for non-basic language %q", t)
- }
- coreTags = append(coreTags, cci)
- } else {
- special = append(special, t.String())
- }
- }
-
- w := b.w
-
- sort.Slice(coreTags, func(i, j int) bool { return coreTags[i] < coreTags[j] })
- sort.Strings(special)
-
- w.WriteComment(`
- NumCompactTags is the number of common tags. The maximum tag is
- NumCompactTags-1.`)
- w.WriteConst("NumCompactTags", len(m))
-
- fmt.Fprintln(w, "const (")
- for i, t := range coreTags {
- fmt.Fprintf(w, "%s ID = %d\n", ident(t.Tag().String()), i)
- }
- for i, t := range special {
- fmt.Fprintf(w, "%s ID = %d\n", ident(t), i+len(coreTags))
- }
- fmt.Fprintln(w, ")")
-
- w.WriteVar("coreTags", coreTags)
-
- w.WriteConst("specialTagsStr", strings.Join(special, " "))
-}
-
-func ident(s string) string {
- return strings.Replace(s, "-", "", -1) + "Index"
-}
diff --git a/vendor/golang.org/x/text/internal/language/compact/gen_parents.go b/vendor/golang.org/x/text/internal/language/compact/gen_parents.go
deleted file mode 100644
index 9543d5832..000000000
--- a/vendor/golang.org/x/text/internal/language/compact/gen_parents.go
+++ /dev/null
@@ -1,54 +0,0 @@
-// Copyright 2018 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build ignore
-
-package main
-
-import (
- "log"
-
- "golang.org/x/text/internal/gen"
- "golang.org/x/text/internal/language"
- "golang.org/x/text/internal/language/compact"
- "golang.org/x/text/unicode/cldr"
-)
-
-func main() {
- r := gen.OpenCLDRCoreZip()
- defer r.Close()
-
- d := &cldr.Decoder{}
- data, err := d.DecodeZip(r)
- if err != nil {
- log.Fatalf("DecodeZip: %v", err)
- }
-
- w := gen.NewCodeWriter()
- defer w.WriteGoFile("parents.go", "compact")
-
- // Create parents table.
- type ID uint16
- parents := make([]ID, compact.NumCompactTags)
- for _, loc := range data.Locales() {
- tag := language.MustParse(loc)
- index, ok := compact.FromTag(tag)
- if !ok {
- continue
- }
- parentIndex := compact.ID(0) // und
- for p := tag.Parent(); p != language.Und; p = p.Parent() {
- if x, ok := compact.FromTag(p); ok {
- parentIndex = x
- break
- }
- }
- parents[index] = ID(parentIndex)
- }
-
- w.WriteComment(`
- parents maps a compact index of a tag to the compact index of the parent of
- this tag.`)
- w.WriteVar("parents", parents)
-}
diff --git a/vendor/golang.org/x/text/internal/language/gen.go b/vendor/golang.org/x/text/internal/language/gen.go
deleted file mode 100644
index cdcc7febc..000000000
--- a/vendor/golang.org/x/text/internal/language/gen.go
+++ /dev/null
@@ -1,1520 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build ignore
-
-// Language tag table generator.
-// Data read from the web.
-
-package main
-
-import (
- "bufio"
- "flag"
- "fmt"
- "io"
- "io/ioutil"
- "log"
- "math"
- "reflect"
- "regexp"
- "sort"
- "strconv"
- "strings"
-
- "golang.org/x/text/internal/gen"
- "golang.org/x/text/internal/tag"
- "golang.org/x/text/unicode/cldr"
-)
-
-var (
- test = flag.Bool("test",
- false,
- "test existing tables; can be used to compare web data with package data.")
- outputFile = flag.String("output",
- "tables.go",
- "output file for generated tables")
-)
-
-var comment = []string{
- `
-lang holds an alphabetically sorted list of ISO-639 language identifiers.
-All entries are 4 bytes. The index of the identifier (divided by 4) is the language tag.
-For 2-byte language identifiers, the two successive bytes have the following meaning:
- - if the first letter of the 2- and 3-letter ISO codes are the same:
- the second and third letter of the 3-letter ISO code.
- - otherwise: a 0 and a by 2 bits right-shifted index into altLangISO3.
-For 3-byte language identifiers the 4th byte is 0.`,
- `
-langNoIndex is a bit vector of all 3-letter language codes that are not used as an index
-in lookup tables. The language ids for these language codes are derived directly
-from the letters and are not consecutive.`,
- `
-altLangISO3 holds an alphabetically sorted list of 3-letter language code alternatives
-to 2-letter language codes that cannot be derived using the method described above.
-Each 3-letter code is followed by its 1-byte langID.`,
- `
-altLangIndex is used to convert indexes in altLangISO3 to langIDs.`,
- `
-AliasMap maps langIDs to their suggested replacements.`,
- `
-script is an alphabetically sorted list of ISO 15924 codes. The index
-of the script in the string, divided by 4, is the internal scriptID.`,
- `
-isoRegionOffset needs to be added to the index of regionISO to obtain the regionID
-for 2-letter ISO codes. (The first isoRegionOffset regionIDs are reserved for
-the UN.M49 codes used for groups.)`,
- `
-regionISO holds a list of alphabetically sorted 2-letter ISO region codes.
-Each 2-letter codes is followed by two bytes with the following meaning:
- - [A-Z}{2}: the first letter of the 2-letter code plus these two
- letters form the 3-letter ISO code.
- - 0, n: index into altRegionISO3.`,
- `
-regionTypes defines the status of a region for various standards.`,
- `
-m49 maps regionIDs to UN.M49 codes. The first isoRegionOffset entries are
-codes indicating collections of regions.`,
- `
-m49Index gives indexes into fromM49 based on the three most significant bits
-of a 10-bit UN.M49 code. To search an UN.M49 code in fromM49, search in
- fromM49[m49Index[msb39(code)]:m49Index[msb3(code)+1]]
-for an entry where the first 7 bits match the 7 lsb of the UN.M49 code.
-The region code is stored in the 9 lsb of the indexed value.`,
- `
-fromM49 contains entries to map UN.M49 codes to regions. See m49Index for details.`,
- `
-altRegionISO3 holds a list of 3-letter region codes that cannot be
-mapped to 2-letter codes using the default algorithm. This is a short list.`,
- `
-altRegionIDs holds a list of regionIDs the positions of which match those
-of the 3-letter ISO codes in altRegionISO3.`,
- `
-variantNumSpecialized is the number of specialized variants in variants.`,
- `
-suppressScript is an index from langID to the dominant script for that language,
-if it exists. If a script is given, it should be suppressed from the language tag.`,
- `
-likelyLang is a lookup table, indexed by langID, for the most likely
-scripts and regions given incomplete information. If more entries exist for a
-given language, region and script are the index and size respectively
-of the list in likelyLangList.`,
- `
-likelyLangList holds lists info associated with likelyLang.`,
- `
-likelyRegion is a lookup table, indexed by regionID, for the most likely
-languages and scripts given incomplete information. If more entries exist
-for a given regionID, lang and script are the index and size respectively
-of the list in likelyRegionList.
-TODO: exclude containers and user-definable regions from the list.`,
- `
-likelyRegionList holds lists info associated with likelyRegion.`,
- `
-likelyScript is a lookup table, indexed by scriptID, for the most likely
-languages and regions given a script.`,
- `
-nRegionGroups is the number of region groups.`,
- `
-regionInclusion maps region identifiers to sets of regions in regionInclusionBits,
-where each set holds all groupings that are directly connected in a region
-containment graph.`,
- `
-regionInclusionBits is an array of bit vectors where every vector represents
-a set of region groupings. These sets are used to compute the distance
-between two regions for the purpose of language matching.`,
- `
-regionInclusionNext marks, for each entry in regionInclusionBits, the set of
-all groups that are reachable from the groups set in the respective entry.`,
-}
-
-// TODO: consider changing some of these structures to tries. This can reduce
-// memory, but may increase the need for memory allocations. This could be
-// mitigated if we can piggyback on language tags for common cases.
-
-func failOnError(e error) {
- if e != nil {
- log.Panic(e)
- }
-}
-
-type setType int
-
-const (
- Indexed setType = 1 + iota // all elements must be of same size
- Linear
-)
-
-type stringSet struct {
- s []string
- sorted, frozen bool
-
- // We often need to update values after the creation of an index is completed.
- // We include a convenience map for keeping track of this.
- update map[string]string
- typ setType // used for checking.
-}
-
-func (ss *stringSet) clone() stringSet {
- c := *ss
- c.s = append([]string(nil), c.s...)
- return c
-}
-
-func (ss *stringSet) setType(t setType) {
- if ss.typ != t && ss.typ != 0 {
- log.Panicf("type %d cannot be assigned as it was already %d", t, ss.typ)
- }
-}
-
-// parse parses a whitespace-separated string and initializes ss with its
-// components.
-func (ss *stringSet) parse(s string) {
- scan := bufio.NewScanner(strings.NewReader(s))
- scan.Split(bufio.ScanWords)
- for scan.Scan() {
- ss.add(scan.Text())
- }
-}
-
-func (ss *stringSet) assertChangeable() {
- if ss.frozen {
- log.Panic("attempt to modify a frozen stringSet")
- }
-}
-
-func (ss *stringSet) add(s string) {
- ss.assertChangeable()
- ss.s = append(ss.s, s)
- ss.sorted = ss.frozen
-}
-
-func (ss *stringSet) freeze() {
- ss.compact()
- ss.frozen = true
-}
-
-func (ss *stringSet) compact() {
- if ss.sorted {
- return
- }
- a := ss.s
- sort.Strings(a)
- k := 0
- for i := 1; i < len(a); i++ {
- if a[k] != a[i] {
- a[k+1] = a[i]
- k++
- }
- }
- ss.s = a[:k+1]
- ss.sorted = ss.frozen
-}
-
-type funcSorter struct {
- fn func(a, b string) bool
- sort.StringSlice
-}
-
-func (s funcSorter) Less(i, j int) bool {
- return s.fn(s.StringSlice[i], s.StringSlice[j])
-}
-
-func (ss *stringSet) sortFunc(f func(a, b string) bool) {
- ss.compact()
- sort.Sort(funcSorter{f, sort.StringSlice(ss.s)})
-}
-
-func (ss *stringSet) remove(s string) {
- ss.assertChangeable()
- if i, ok := ss.find(s); ok {
- copy(ss.s[i:], ss.s[i+1:])
- ss.s = ss.s[:len(ss.s)-1]
- }
-}
-
-func (ss *stringSet) replace(ol, nu string) {
- ss.s[ss.index(ol)] = nu
- ss.sorted = ss.frozen
-}
-
-func (ss *stringSet) index(s string) int {
- ss.setType(Indexed)
- i, ok := ss.find(s)
- if !ok {
- if i < len(ss.s) {
- log.Panicf("find: item %q is not in list. Closest match is %q.", s, ss.s[i])
- }
- log.Panicf("find: item %q is not in list", s)
-
- }
- return i
-}
-
-func (ss *stringSet) find(s string) (int, bool) {
- ss.compact()
- i := sort.SearchStrings(ss.s, s)
- return i, i != len(ss.s) && ss.s[i] == s
-}
-
-func (ss *stringSet) slice() []string {
- ss.compact()
- return ss.s
-}
-
-func (ss *stringSet) updateLater(v, key string) {
- if ss.update == nil {
- ss.update = map[string]string{}
- }
- ss.update[v] = key
-}
-
-// join joins the string and ensures that all entries are of the same length.
-func (ss *stringSet) join() string {
- ss.setType(Indexed)
- n := len(ss.s[0])
- for _, s := range ss.s {
- if len(s) != n {
- log.Panicf("join: not all entries are of the same length: %q", s)
- }
- }
- ss.s = append(ss.s, strings.Repeat("\xff", n))
- return strings.Join(ss.s, "")
-}
-
-// ianaEntry holds information for an entry in the IANA Language Subtag Repository.
-// All types use the same entry.
-// See http://tools.ietf.org/html/bcp47#section-5.1 for a description of the various
-// fields.
-type ianaEntry struct {
- typ string
- description []string
- scope string
- added string
- preferred string
- deprecated string
- suppressScript string
- macro string
- prefix []string
-}
-
-type builder struct {
- w *gen.CodeWriter
- hw io.Writer // MultiWriter for w and w.Hash
- data *cldr.CLDR
- supp *cldr.SupplementalData
-
- // indices
- locale stringSet // common locales
- lang stringSet // canonical language ids (2 or 3 letter ISO codes) with data
- langNoIndex stringSet // 3-letter ISO codes with no associated data
- script stringSet // 4-letter ISO codes
- region stringSet // 2-letter ISO or 3-digit UN M49 codes
- variant stringSet // 4-8-alphanumeric variant code.
-
- // Region codes that are groups with their corresponding group IDs.
- groups map[int]index
-
- // langInfo
- registry map[string]*ianaEntry
-}
-
-type index uint
-
-func newBuilder(w *gen.CodeWriter) *builder {
- r := gen.OpenCLDRCoreZip()
- defer r.Close()
- d := &cldr.Decoder{}
- data, err := d.DecodeZip(r)
- failOnError(err)
- b := builder{
- w: w,
- hw: io.MultiWriter(w, w.Hash),
- data: data,
- supp: data.Supplemental(),
- }
- b.parseRegistry()
- return &b
-}
-
-func (b *builder) parseRegistry() {
- r := gen.OpenIANAFile("assignments/language-subtag-registry")
- defer r.Close()
- b.registry = make(map[string]*ianaEntry)
-
- scan := bufio.NewScanner(r)
- scan.Split(bufio.ScanWords)
- var record *ianaEntry
- for more := scan.Scan(); more; {
- key := scan.Text()
- more = scan.Scan()
- value := scan.Text()
- switch key {
- case "Type:":
- record = &ianaEntry{typ: value}
- case "Subtag:", "Tag:":
- if s := strings.SplitN(value, "..", 2); len(s) > 1 {
- for a := s[0]; a <= s[1]; a = inc(a) {
- b.addToRegistry(a, record)
- }
- } else {
- b.addToRegistry(value, record)
- }
- case "Suppress-Script:":
- record.suppressScript = value
- case "Added:":
- record.added = value
- case "Deprecated:":
- record.deprecated = value
- case "Macrolanguage:":
- record.macro = value
- case "Preferred-Value:":
- record.preferred = value
- case "Prefix:":
- record.prefix = append(record.prefix, value)
- case "Scope:":
- record.scope = value
- case "Description:":
- buf := []byte(value)
- for more = scan.Scan(); more; more = scan.Scan() {
- b := scan.Bytes()
- if b[0] == '%' || b[len(b)-1] == ':' {
- break
- }
- buf = append(buf, ' ')
- buf = append(buf, b...)
- }
- record.description = append(record.description, string(buf))
- continue
- default:
- continue
- }
- more = scan.Scan()
- }
- if scan.Err() != nil {
- log.Panic(scan.Err())
- }
-}
-
-func (b *builder) addToRegistry(key string, entry *ianaEntry) {
- if info, ok := b.registry[key]; ok {
- if info.typ != "language" || entry.typ != "extlang" {
- log.Fatalf("parseRegistry: tag %q already exists", key)
- }
- } else {
- b.registry[key] = entry
- }
-}
-
-var commentIndex = make(map[string]string)
-
-func init() {
- for _, s := range comment {
- key := strings.TrimSpace(strings.SplitN(s, " ", 2)[0])
- commentIndex[key] = s
- }
-}
-
-func (b *builder) comment(name string) {
- if s := commentIndex[name]; len(s) > 0 {
- b.w.WriteComment(s)
- } else {
- fmt.Fprintln(b.w)
- }
-}
-
-func (b *builder) pf(f string, x ...interface{}) {
- fmt.Fprintf(b.hw, f, x...)
- fmt.Fprint(b.hw, "\n")
-}
-
-func (b *builder) p(x ...interface{}) {
- fmt.Fprintln(b.hw, x...)
-}
-
-func (b *builder) addSize(s int) {
- b.w.Size += s
- b.pf("// Size: %d bytes", s)
-}
-
-func (b *builder) writeConst(name string, x interface{}) {
- b.comment(name)
- b.w.WriteConst(name, x)
-}
-
-// writeConsts computes f(v) for all v in values and writes the results
-// as constants named _v to a single constant block.
-func (b *builder) writeConsts(f func(string) int, values ...string) {
- b.pf("const (")
- for _, v := range values {
- b.pf("\t_%s = %v", v, f(v))
- }
- b.pf(")")
-}
-
-// writeType writes the type of the given value, which must be a struct.
-func (b *builder) writeType(value interface{}) {
- b.comment(reflect.TypeOf(value).Name())
- b.w.WriteType(value)
-}
-
-func (b *builder) writeSlice(name string, ss interface{}) {
- b.writeSliceAddSize(name, 0, ss)
-}
-
-func (b *builder) writeSliceAddSize(name string, extraSize int, ss interface{}) {
- b.comment(name)
- b.w.Size += extraSize
- v := reflect.ValueOf(ss)
- t := v.Type().Elem()
- b.pf("// Size: %d bytes, %d elements", v.Len()*int(t.Size())+extraSize, v.Len())
-
- fmt.Fprintf(b.w, "var %s = ", name)
- b.w.WriteArray(ss)
- b.p()
-}
-
-type FromTo struct {
- From, To uint16
-}
-
-func (b *builder) writeSortedMap(name string, ss *stringSet, index func(s string) uint16) {
- ss.sortFunc(func(a, b string) bool {
- return index(a) < index(b)
- })
- m := []FromTo{}
- for _, s := range ss.s {
- m = append(m, FromTo{index(s), index(ss.update[s])})
- }
- b.writeSlice(name, m)
-}
-
-const base = 'z' - 'a' + 1
-
-func strToInt(s string) uint {
- v := uint(0)
- for i := 0; i < len(s); i++ {
- v *= base
- v += uint(s[i] - 'a')
- }
- return v
-}
-
-// converts the given integer to the original ASCII string passed to strToInt.
-// len(s) must match the number of characters obtained.
-func intToStr(v uint, s []byte) {
- for i := len(s) - 1; i >= 0; i-- {
- s[i] = byte(v%base) + 'a'
- v /= base
- }
-}
-
-func (b *builder) writeBitVector(name string, ss []string) {
- vec := make([]uint8, int(math.Ceil(math.Pow(base, float64(len(ss[0])))/8)))
- for _, s := range ss {
- v := strToInt(s)
- vec[v/8] |= 1 << (v % 8)
- }
- b.writeSlice(name, vec)
-}
-
-// TODO: convert this type into a list or two-stage trie.
-func (b *builder) writeMapFunc(name string, m map[string]string, f func(string) uint16) {
- b.comment(name)
- v := reflect.ValueOf(m)
- sz := v.Len() * (2 + int(v.Type().Key().Size()))
- for _, k := range m {
- sz += len(k)
- }
- b.addSize(sz)
- keys := []string{}
- b.pf(`var %s = map[string]uint16{`, name)
- for k := range m {
- keys = append(keys, k)
- }
- sort.Strings(keys)
- for _, k := range keys {
- b.pf("\t%q: %v,", k, f(m[k]))
- }
- b.p("}")
-}
-
-func (b *builder) writeMap(name string, m interface{}) {
- b.comment(name)
- v := reflect.ValueOf(m)
- sz := v.Len() * (2 + int(v.Type().Key().Size()) + int(v.Type().Elem().Size()))
- b.addSize(sz)
- f := strings.FieldsFunc(fmt.Sprintf("%#v", m), func(r rune) bool {
- return strings.IndexRune("{}, ", r) != -1
- })
- sort.Strings(f[1:])
- b.pf(`var %s = %s{`, name, f[0])
- for _, kv := range f[1:] {
- b.pf("\t%s,", kv)
- }
- b.p("}")
-}
-
-func (b *builder) langIndex(s string) uint16 {
- if s == "und" {
- return 0
- }
- if i, ok := b.lang.find(s); ok {
- return uint16(i)
- }
- return uint16(strToInt(s)) + uint16(len(b.lang.s))
-}
-
-// inc advances the string to its lexicographical successor.
-func inc(s string) string {
- const maxTagLength = 4
- var buf [maxTagLength]byte
- intToStr(strToInt(strings.ToLower(s))+1, buf[:len(s)])
- for i := 0; i < len(s); i++ {
- if s[i] <= 'Z' {
- buf[i] -= 'a' - 'A'
- }
- }
- return string(buf[:len(s)])
-}
-
-func (b *builder) parseIndices() {
- meta := b.supp.Metadata
-
- for k, v := range b.registry {
- var ss *stringSet
- switch v.typ {
- case "language":
- if len(k) == 2 || v.suppressScript != "" || v.scope == "special" {
- b.lang.add(k)
- continue
- } else {
- ss = &b.langNoIndex
- }
- case "region":
- ss = &b.region
- case "script":
- ss = &b.script
- case "variant":
- ss = &b.variant
- default:
- continue
- }
- ss.add(k)
- }
- // Include any language for which there is data.
- for _, lang := range b.data.Locales() {
- if x := b.data.RawLDML(lang); false ||
- x.LocaleDisplayNames != nil ||
- x.Characters != nil ||
- x.Delimiters != nil ||
- x.Measurement != nil ||
- x.Dates != nil ||
- x.Numbers != nil ||
- x.Units != nil ||
- x.ListPatterns != nil ||
- x.Collations != nil ||
- x.Segmentations != nil ||
- x.Rbnf != nil ||
- x.Annotations != nil ||
- x.Metadata != nil {
-
- from := strings.Split(lang, "_")
- if lang := from[0]; lang != "root" {
- b.lang.add(lang)
- }
- }
- }
- // Include locales for plural rules, which uses a different structure.
- for _, plurals := range b.data.Supplemental().Plurals {
- for _, rules := range plurals.PluralRules {
- for _, lang := range strings.Split(rules.Locales, " ") {
- if lang = strings.Split(lang, "_")[0]; lang != "root" {
- b.lang.add(lang)
- }
- }
- }
- }
- // Include languages in likely subtags.
- for _, m := range b.supp.LikelySubtags.LikelySubtag {
- from := strings.Split(m.From, "_")
- b.lang.add(from[0])
- }
- // Include ISO-639 alpha-3 bibliographic entries.
- for _, a := range meta.Alias.LanguageAlias {
- if a.Reason == "bibliographic" {
- b.langNoIndex.add(a.Type)
- }
- }
- // Include regions in territoryAlias (not all are in the IANA registry!)
- for _, reg := range b.supp.Metadata.Alias.TerritoryAlias {
- if len(reg.Type) == 2 {
- b.region.add(reg.Type)
- }
- }
-
- for _, s := range b.lang.s {
- if len(s) == 3 {
- b.langNoIndex.remove(s)
- }
- }
- b.writeConst("NumLanguages", len(b.lang.slice())+len(b.langNoIndex.slice()))
- b.writeConst("NumScripts", len(b.script.slice()))
- b.writeConst("NumRegions", len(b.region.slice()))
-
- // Add dummy codes at the start of each list to represent "unspecified".
- b.lang.add("---")
- b.script.add("----")
- b.region.add("---")
-
- // common locales
- b.locale.parse(meta.DefaultContent.Locales)
-}
-
-// TODO: region inclusion data will probably not be use used in future matchers.
-
-func (b *builder) computeRegionGroups() {
- b.groups = make(map[int]index)
-
- // Create group indices.
- for i := 1; b.region.s[i][0] < 'A'; i++ { // Base M49 indices on regionID.
- b.groups[i] = index(len(b.groups))
- }
- for _, g := range b.supp.TerritoryContainment.Group {
- // Skip UN and EURO zone as they are flattening the containment
- // relationship.
- if g.Type == "EZ" || g.Type == "UN" {
- continue
- }
- group := b.region.index(g.Type)
- if _, ok := b.groups[group]; !ok {
- b.groups[group] = index(len(b.groups))
- }
- }
- if len(b.groups) > 64 {
- log.Fatalf("only 64 groups supported, found %d", len(b.groups))
- }
- b.writeConst("nRegionGroups", len(b.groups))
-}
-
-var langConsts = []string{
- "af", "am", "ar", "az", "bg", "bn", "ca", "cs", "da", "de", "el", "en", "es",
- "et", "fa", "fi", "fil", "fr", "gu", "he", "hi", "hr", "hu", "hy", "id", "is",
- "it", "ja", "ka", "kk", "km", "kn", "ko", "ky", "lo", "lt", "lv", "mk", "ml",
- "mn", "mo", "mr", "ms", "mul", "my", "nb", "ne", "nl", "no", "pa", "pl", "pt",
- "ro", "ru", "sh", "si", "sk", "sl", "sq", "sr", "sv", "sw", "ta", "te", "th",
- "tl", "tn", "tr", "uk", "ur", "uz", "vi", "zh", "zu",
-
- // constants for grandfathered tags (if not already defined)
- "jbo", "ami", "bnn", "hak", "tlh", "lb", "nv", "pwn", "tao", "tay", "tsu",
- "nn", "sfb", "vgt", "sgg", "cmn", "nan", "hsn",
-}
-
-// writeLanguage generates all tables needed for language canonicalization.
-func (b *builder) writeLanguage() {
- meta := b.supp.Metadata
-
- b.writeConst("nonCanonicalUnd", b.lang.index("und"))
- b.writeConsts(func(s string) int { return int(b.langIndex(s)) }, langConsts...)
- b.writeConst("langPrivateStart", b.langIndex("qaa"))
- b.writeConst("langPrivateEnd", b.langIndex("qtz"))
-
- // Get language codes that need to be mapped (overlong 3-letter codes,
- // deprecated 2-letter codes, legacy and grandfathered tags.)
- langAliasMap := stringSet{}
- aliasTypeMap := map[string]AliasType{}
-
- // altLangISO3 get the alternative ISO3 names that need to be mapped.
- altLangISO3 := stringSet{}
- // Add dummy start to avoid the use of index 0.
- altLangISO3.add("---")
- altLangISO3.updateLater("---", "aa")
-
- lang := b.lang.clone()
- for