gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

match.go (5824B)


      1 // Copyright 2013 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package language
      6 
      7 import "errors"
      8 
      9 type scriptRegionFlags uint8
     10 
     11 const (
     12 	isList = 1 << iota
     13 	scriptInFrom
     14 	regionInFrom
     15 )
     16 
     17 func (t *Tag) setUndefinedLang(id Language) {
     18 	if t.LangID == 0 {
     19 		t.LangID = id
     20 	}
     21 }
     22 
     23 func (t *Tag) setUndefinedScript(id Script) {
     24 	if t.ScriptID == 0 {
     25 		t.ScriptID = id
     26 	}
     27 }
     28 
     29 func (t *Tag) setUndefinedRegion(id Region) {
     30 	if t.RegionID == 0 || t.RegionID.Contains(id) {
     31 		t.RegionID = id
     32 	}
     33 }
     34 
     35 // ErrMissingLikelyTagsData indicates no information was available
     36 // to compute likely values of missing tags.
     37 var ErrMissingLikelyTagsData = errors.New("missing likely tags data")
     38 
     39 // addLikelySubtags sets subtags to their most likely value, given the locale.
     40 // In most cases this means setting fields for unknown values, but in some
     41 // cases it may alter a value.  It returns an ErrMissingLikelyTagsData error
     42 // if the given locale cannot be expanded.
     43 func (t Tag) addLikelySubtags() (Tag, error) {
     44 	id, err := addTags(t)
     45 	if err != nil {
     46 		return t, err
     47 	} else if id.equalTags(t) {
     48 		return t, nil
     49 	}
     50 	id.RemakeString()
     51 	return id, nil
     52 }
     53 
     54 // specializeRegion attempts to specialize a group region.
     55 func specializeRegion(t *Tag) bool {
     56 	if i := regionInclusion[t.RegionID]; i < nRegionGroups {
     57 		x := likelyRegionGroup[i]
     58 		if Language(x.lang) == t.LangID && Script(x.script) == t.ScriptID {
     59 			t.RegionID = Region(x.region)
     60 		}
     61 		return true
     62 	}
     63 	return false
     64 }
     65 
     66 // Maximize returns a new tag with missing tags filled in.
     67 func (t Tag) Maximize() (Tag, error) {
     68 	return addTags(t)
     69 }
     70 
     71 func addTags(t Tag) (Tag, error) {
     72 	// We leave private use identifiers alone.
     73 	if t.IsPrivateUse() {
     74 		return t, nil
     75 	}
     76 	if t.ScriptID != 0 && t.RegionID != 0 {
     77 		if t.LangID != 0 {
     78 			// already fully specified
     79 			specializeRegion(&t)
     80 			return t, nil
     81 		}
     82 		// Search matches for und-script-region. Note that for these cases
     83 		// region will never be a group so there is no need to check for this.
     84 		list := likelyRegion[t.RegionID : t.RegionID+1]
     85 		if x := list[0]; x.flags&isList != 0 {
     86 			list = likelyRegionList[x.lang : x.lang+uint16(x.script)]
     87 		}
     88 		for _, x := range list {
     89 			// Deviating from the spec. See match_test.go for details.
     90 			if Script(x.script) == t.ScriptID {
     91 				t.setUndefinedLang(Language(x.lang))
     92 				return t, nil
     93 			}
     94 		}
     95 	}
     96 	if t.LangID != 0 {
     97 		// Search matches for lang-script and lang-region, where lang != und.
     98 		if t.LangID < langNoIndexOffset {
     99 			x := likelyLang[t.LangID]
    100 			if x.flags&isList != 0 {
    101 				list := likelyLangList[x.region : x.region+uint16(x.script)]
    102 				if t.ScriptID != 0 {
    103 					for _, x := range list {
    104 						if Script(x.script) == t.ScriptID && x.flags&scriptInFrom != 0 {
    105 							t.setUndefinedRegion(Region(x.region))
    106 							return t, nil
    107 						}
    108 					}
    109 				} else if t.RegionID != 0 {
    110 					count := 0
    111 					goodScript := true
    112 					tt := t
    113 					for _, x := range list {
    114 						// We visit all entries for which the script was not
    115 						// defined, including the ones where the region was not
    116 						// defined. This allows for proper disambiguation within
    117 						// regions.
    118 						if x.flags&scriptInFrom == 0 && t.RegionID.Contains(Region(x.region)) {
    119 							tt.RegionID = Region(x.region)
    120 							tt.setUndefinedScript(Script(x.script))
    121 							goodScript = goodScript && tt.ScriptID == Script(x.script)
    122 							count++
    123 						}
    124 					}
    125 					if count == 1 {
    126 						return tt, nil
    127 					}
    128 					// Even if we fail to find a unique Region, we might have
    129 					// an unambiguous script.
    130 					if goodScript {
    131 						t.ScriptID = tt.ScriptID
    132 					}
    133 				}
    134 			}
    135 		}
    136 	} else {
    137 		// Search matches for und-script.
    138 		if t.ScriptID != 0 {
    139 			x := likelyScript[t.ScriptID]
    140 			if x.region != 0 {
    141 				t.setUndefinedRegion(Region(x.region))
    142 				t.setUndefinedLang(Language(x.lang))
    143 				return t, nil
    144 			}
    145 		}
    146 		// Search matches for und-region. If und-script-region exists, it would
    147 		// have been found earlier.
    148 		if t.RegionID != 0 {
    149 			if i := regionInclusion[t.RegionID]; i < nRegionGroups {
    150 				x := likelyRegionGroup[i]
    151 				if x.region != 0 {
    152 					t.setUndefinedLang(Language(x.lang))
    153 					t.setUndefinedScript(Script(x.script))
    154 					t.RegionID = Region(x.region)
    155 				}
    156 			} else {
    157 				x := likelyRegion[t.RegionID]
    158 				if x.flags&isList != 0 {
    159 					x = likelyRegionList[x.lang]
    160 				}
    161 				if x.script != 0 && x.flags != scriptInFrom {
    162 					t.setUndefinedLang(Language(x.lang))
    163 					t.setUndefinedScript(Script(x.script))
    164 					return t, nil
    165 				}
    166 			}
    167 		}
    168 	}
    169 
    170 	// Search matches for lang.
    171 	if t.LangID < langNoIndexOffset {
    172 		x := likelyLang[t.LangID]
    173 		if x.flags&isList != 0 {
    174 			x = likelyLangList[x.region]
    175 		}
    176 		if x.region != 0 {
    177 			t.setUndefinedScript(Script(x.script))
    178 			t.setUndefinedRegion(Region(x.region))
    179 		}
    180 		specializeRegion(&t)
    181 		if t.LangID == 0 {
    182 			t.LangID = _en // default language
    183 		}
    184 		return t, nil
    185 	}
    186 	return t, ErrMissingLikelyTagsData
    187 }
    188 
    189 func (t *Tag) setTagsFrom(id Tag) {
    190 	t.LangID = id.LangID
    191 	t.ScriptID = id.ScriptID
    192 	t.RegionID = id.RegionID
    193 }
    194 
    195 // minimize removes the region or script subtags from t such that
    196 // t.addLikelySubtags() == t.minimize().addLikelySubtags().
    197 func (t Tag) minimize() (Tag, error) {
    198 	t, err := minimizeTags(t)
    199 	if err != nil {
    200 		return t, err
    201 	}
    202 	t.RemakeString()
    203 	return t, nil
    204 }
    205 
    206 // minimizeTags mimics the behavior of the ICU 51 C implementation.
    207 func minimizeTags(t Tag) (Tag, error) {
    208 	if t.equalTags(Und) {
    209 		return t, nil
    210 	}
    211 	max, err := addTags(t)
    212 	if err != nil {
    213 		return t, err
    214 	}
    215 	for _, id := range [...]Tag{
    216 		{LangID: t.LangID},
    217 		{LangID: t.LangID, RegionID: t.RegionID},
    218 		{LangID: t.LangID, ScriptID: t.ScriptID},
    219 	} {
    220 		if x, err := addTags(id); err == nil && max.equalTags(x) {
    221 			t.setTagsFrom(id)
    222 			break
    223 		}
    224 	}
    225 	return t, nil
    226 }