gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

parse.go (7695B)


      1 // Copyright 2013 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package language
      6 
      7 import (
      8 	"errors"
      9 	"sort"
     10 	"strconv"
     11 	"strings"
     12 
     13 	"golang.org/x/text/internal/language"
     14 )
     15 
     16 // ValueError is returned by any of the parsing functions when the
     17 // input is well-formed but the respective subtag is not recognized
     18 // as a valid value.
     19 type ValueError interface {
     20 	error
     21 
     22 	// Subtag returns the subtag for which the error occurred.
     23 	Subtag() string
     24 }
     25 
     26 // Parse parses the given BCP 47 string and returns a valid Tag. If parsing
     27 // failed it returns an error and any part of the tag that could be parsed.
     28 // If parsing succeeded but an unknown value was found, it returns
     29 // ValueError. The Tag returned in this case is just stripped of the unknown
     30 // value. All other values are preserved. It accepts tags in the BCP 47 format
     31 // and extensions to this standard defined in
     32 // https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
     33 // The resulting tag is canonicalized using the default canonicalization type.
     34 func Parse(s string) (t Tag, err error) {
     35 	return Default.Parse(s)
     36 }
     37 
     38 // Parse parses the given BCP 47 string and returns a valid Tag. If parsing
     39 // failed it returns an error and any part of the tag that could be parsed.
     40 // If parsing succeeded but an unknown value was found, it returns
     41 // ValueError. The Tag returned in this case is just stripped of the unknown
     42 // value. All other values are preserved. It accepts tags in the BCP 47 format
     43 // and extensions to this standard defined in
     44 // https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
     45 // The resulting tag is canonicalized using the canonicalization type c.
     46 func (c CanonType) Parse(s string) (t Tag, err error) {
     47 	defer func() {
     48 		if recover() != nil {
     49 			t = Tag{}
     50 			err = language.ErrSyntax
     51 		}
     52 	}()
     53 
     54 	tt, err := language.Parse(s)
     55 	if err != nil {
     56 		return makeTag(tt), err
     57 	}
     58 	tt, changed := canonicalize(c, tt)
     59 	if changed {
     60 		tt.RemakeString()
     61 	}
     62 	return makeTag(tt), err
     63 }
     64 
     65 // Compose creates a Tag from individual parts, which may be of type Tag, Base,
     66 // Script, Region, Variant, []Variant, Extension, []Extension or error. If a
     67 // Base, Script or Region or slice of type Variant or Extension is passed more
     68 // than once, the latter will overwrite the former. Variants and Extensions are
     69 // accumulated, but if two extensions of the same type are passed, the latter
     70 // will replace the former. For -u extensions, though, the key-type pairs are
     71 // added, where later values overwrite older ones. A Tag overwrites all former
     72 // values and typically only makes sense as the first argument. The resulting
     73 // tag is returned after canonicalizing using the Default CanonType. If one or
     74 // more errors are encountered, one of the errors is returned.
     75 func Compose(part ...interface{}) (t Tag, err error) {
     76 	return Default.Compose(part...)
     77 }
     78 
     79 // Compose creates a Tag from individual parts, which may be of type Tag, Base,
     80 // Script, Region, Variant, []Variant, Extension, []Extension or error. If a
     81 // Base, Script or Region or slice of type Variant or Extension is passed more
     82 // than once, the latter will overwrite the former. Variants and Extensions are
     83 // accumulated, but if two extensions of the same type are passed, the latter
     84 // will replace the former. For -u extensions, though, the key-type pairs are
     85 // added, where later values overwrite older ones. A Tag overwrites all former
     86 // values and typically only makes sense as the first argument. The resulting
     87 // tag is returned after canonicalizing using CanonType c. If one or more errors
     88 // are encountered, one of the errors is returned.
     89 func (c CanonType) Compose(part ...interface{}) (t Tag, err error) {
     90 	defer func() {
     91 		if recover() != nil {
     92 			t = Tag{}
     93 			err = language.ErrSyntax
     94 		}
     95 	}()
     96 
     97 	var b language.Builder
     98 	if err = update(&b, part...); err != nil {
     99 		return und, err
    100 	}
    101 	b.Tag, _ = canonicalize(c, b.Tag)
    102 	return makeTag(b.Make()), err
    103 }
    104 
    105 var errInvalidArgument = errors.New("invalid Extension or Variant")
    106 
    107 func update(b *language.Builder, part ...interface{}) (err error) {
    108 	for _, x := range part {
    109 		switch v := x.(type) {
    110 		case Tag:
    111 			b.SetTag(v.tag())
    112 		case Base:
    113 			b.Tag.LangID = v.langID
    114 		case Script:
    115 			b.Tag.ScriptID = v.scriptID
    116 		case Region:
    117 			b.Tag.RegionID = v.regionID
    118 		case Variant:
    119 			if v.variant == "" {
    120 				err = errInvalidArgument
    121 				break
    122 			}
    123 			b.AddVariant(v.variant)
    124 		case Extension:
    125 			if v.s == "" {
    126 				err = errInvalidArgument
    127 				break
    128 			}
    129 			b.SetExt(v.s)
    130 		case []Variant:
    131 			b.ClearVariants()
    132 			for _, v := range v {
    133 				b.AddVariant(v.variant)
    134 			}
    135 		case []Extension:
    136 			b.ClearExtensions()
    137 			for _, e := range v {
    138 				b.SetExt(e.s)
    139 			}
    140 		// TODO: support parsing of raw strings based on morphology or just extensions?
    141 		case error:
    142 			if v != nil {
    143 				err = v
    144 			}
    145 		}
    146 	}
    147 	return
    148 }
    149 
    150 var errInvalidWeight = errors.New("ParseAcceptLanguage: invalid weight")
    151 var errTagListTooLarge = errors.New("tag list exceeds max length")
    152 
    153 // ParseAcceptLanguage parses the contents of an Accept-Language header as
    154 // defined in http://www.ietf.org/rfc/rfc2616.txt and returns a list of Tags and
    155 // a list of corresponding quality weights. It is more permissive than RFC 2616
    156 // and may return non-nil slices even if the input is not valid.
    157 // The Tags will be sorted by highest weight first and then by first occurrence.
    158 // Tags with a weight of zero will be dropped. An error will be returned if the
    159 // input could not be parsed.
    160 func ParseAcceptLanguage(s string) (tag []Tag, q []float32, err error) {
    161 	defer func() {
    162 		if recover() != nil {
    163 			tag = nil
    164 			q = nil
    165 			err = language.ErrSyntax
    166 		}
    167 	}()
    168 
    169 	if strings.Count(s, "-") > 1000 {
    170 		return nil, nil, errTagListTooLarge
    171 	}
    172 
    173 	var entry string
    174 	for s != "" {
    175 		if entry, s = split(s, ','); entry == "" {
    176 			continue
    177 		}
    178 
    179 		entry, weight := split(entry, ';')
    180 
    181 		// Scan the language.
    182 		t, err := Parse(entry)
    183 		if err != nil {
    184 			id, ok := acceptFallback[entry]
    185 			if !ok {
    186 				return nil, nil, err
    187 			}
    188 			t = makeTag(language.Tag{LangID: id})
    189 		}
    190 
    191 		// Scan the optional weight.
    192 		w := 1.0
    193 		if weight != "" {
    194 			weight = consume(weight, 'q')
    195 			weight = consume(weight, '=')
    196 			// consume returns the empty string when a token could not be
    197 			// consumed, resulting in an error for ParseFloat.
    198 			if w, err = strconv.ParseFloat(weight, 32); err != nil {
    199 				return nil, nil, errInvalidWeight
    200 			}
    201 			// Drop tags with a quality weight of 0.
    202 			if w <= 0 {
    203 				continue
    204 			}
    205 		}
    206 
    207 		tag = append(tag, t)
    208 		q = append(q, float32(w))
    209 	}
    210 	sort.Stable(&tagSort{tag, q})
    211 	return tag, q, nil
    212 }
    213 
    214 // consume removes a leading token c from s and returns the result or the empty
    215 // string if there is no such token.
    216 func consume(s string, c byte) string {
    217 	if s == "" || s[0] != c {
    218 		return ""
    219 	}
    220 	return strings.TrimSpace(s[1:])
    221 }
    222 
    223 func split(s string, c byte) (head, tail string) {
    224 	if i := strings.IndexByte(s, c); i >= 0 {
    225 		return strings.TrimSpace(s[:i]), strings.TrimSpace(s[i+1:])
    226 	}
    227 	return strings.TrimSpace(s), ""
    228 }
    229 
    230 // Add hack mapping to deal with a small number of cases that occur
    231 // in Accept-Language (with reasonable frequency).
    232 var acceptFallback = map[string]language.Language{
    233 	"english": _en,
    234 	"deutsch": _de,
    235 	"italian": _it,
    236 	"french":  _fr,
    237 	"*":       _mul, // defined in the spec to match all languages.
    238 }
    239 
    240 type tagSort struct {
    241 	tag []Tag
    242 	q   []float32
    243 }
    244 
    245 func (s *tagSort) Len() int {
    246 	return len(s.q)
    247 }
    248 
    249 func (s *tagSort) Less(i, j int) bool {
    250 	return s.q[i] > s.q[j]
    251 }
    252 
    253 func (s *tagSort) Swap(i, j int) {
    254 	s.tag[i], s.tag[j] = s.tag[j], s.tag[i]
    255 	s.q[i], s.q[j] = s.q[j], s.q[i]
    256 }