match.go (5824B)
1 // Copyright 2013 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package language 6 7 import "errors" 8 9 type scriptRegionFlags uint8 10 11 const ( 12 isList = 1 << iota 13 scriptInFrom 14 regionInFrom 15 ) 16 17 func (t *Tag) setUndefinedLang(id Language) { 18 if t.LangID == 0 { 19 t.LangID = id 20 } 21 } 22 23 func (t *Tag) setUndefinedScript(id Script) { 24 if t.ScriptID == 0 { 25 t.ScriptID = id 26 } 27 } 28 29 func (t *Tag) setUndefinedRegion(id Region) { 30 if t.RegionID == 0 || t.RegionID.Contains(id) { 31 t.RegionID = id 32 } 33 } 34 35 // ErrMissingLikelyTagsData indicates no information was available 36 // to compute likely values of missing tags. 37 var ErrMissingLikelyTagsData = errors.New("missing likely tags data") 38 39 // addLikelySubtags sets subtags to their most likely value, given the locale. 40 // In most cases this means setting fields for unknown values, but in some 41 // cases it may alter a value. It returns an ErrMissingLikelyTagsData error 42 // if the given locale cannot be expanded. 43 func (t Tag) addLikelySubtags() (Tag, error) { 44 id, err := addTags(t) 45 if err != nil { 46 return t, err 47 } else if id.equalTags(t) { 48 return t, nil 49 } 50 id.RemakeString() 51 return id, nil 52 } 53 54 // specializeRegion attempts to specialize a group region. 55 func specializeRegion(t *Tag) bool { 56 if i := regionInclusion[t.RegionID]; i < nRegionGroups { 57 x := likelyRegionGroup[i] 58 if Language(x.lang) == t.LangID && Script(x.script) == t.ScriptID { 59 t.RegionID = Region(x.region) 60 } 61 return true 62 } 63 return false 64 } 65 66 // Maximize returns a new tag with missing tags filled in. 67 func (t Tag) Maximize() (Tag, error) { 68 return addTags(t) 69 } 70 71 func addTags(t Tag) (Tag, error) { 72 // We leave private use identifiers alone. 73 if t.IsPrivateUse() { 74 return t, nil 75 } 76 if t.ScriptID != 0 && t.RegionID != 0 { 77 if t.LangID != 0 { 78 // already fully specified 79 specializeRegion(&t) 80 return t, nil 81 } 82 // Search matches for und-script-region. Note that for these cases 83 // region will never be a group so there is no need to check for this. 84 list := likelyRegion[t.RegionID : t.RegionID+1] 85 if x := list[0]; x.flags&isList != 0 { 86 list = likelyRegionList[x.lang : x.lang+uint16(x.script)] 87 } 88 for _, x := range list { 89 // Deviating from the spec. See match_test.go for details. 90 if Script(x.script) == t.ScriptID { 91 t.setUndefinedLang(Language(x.lang)) 92 return t, nil 93 } 94 } 95 } 96 if t.LangID != 0 { 97 // Search matches for lang-script and lang-region, where lang != und. 98 if t.LangID < langNoIndexOffset { 99 x := likelyLang[t.LangID] 100 if x.flags&isList != 0 { 101 list := likelyLangList[x.region : x.region+uint16(x.script)] 102 if t.ScriptID != 0 { 103 for _, x := range list { 104 if Script(x.script) == t.ScriptID && x.flags&scriptInFrom != 0 { 105 t.setUndefinedRegion(Region(x.region)) 106 return t, nil 107 } 108 } 109 } else if t.RegionID != 0 { 110 count := 0 111 goodScript := true 112 tt := t 113 for _, x := range list { 114 // We visit all entries for which the script was not 115 // defined, including the ones where the region was not 116 // defined. This allows for proper disambiguation within 117 // regions. 118 if x.flags&scriptInFrom == 0 && t.RegionID.Contains(Region(x.region)) { 119 tt.RegionID = Region(x.region) 120 tt.setUndefinedScript(Script(x.script)) 121 goodScript = goodScript && tt.ScriptID == Script(x.script) 122 count++ 123 } 124 } 125 if count == 1 { 126 return tt, nil 127 } 128 // Even if we fail to find a unique Region, we might have 129 // an unambiguous script. 130 if goodScript { 131 t.ScriptID = tt.ScriptID 132 } 133 } 134 } 135 } 136 } else { 137 // Search matches for und-script. 138 if t.ScriptID != 0 { 139 x := likelyScript[t.ScriptID] 140 if x.region != 0 { 141 t.setUndefinedRegion(Region(x.region)) 142 t.setUndefinedLang(Language(x.lang)) 143 return t, nil 144 } 145 } 146 // Search matches for und-region. If und-script-region exists, it would 147 // have been found earlier. 148 if t.RegionID != 0 { 149 if i := regionInclusion[t.RegionID]; i < nRegionGroups { 150 x := likelyRegionGroup[i] 151 if x.region != 0 { 152 t.setUndefinedLang(Language(x.lang)) 153 t.setUndefinedScript(Script(x.script)) 154 t.RegionID = Region(x.region) 155 } 156 } else { 157 x := likelyRegion[t.RegionID] 158 if x.flags&isList != 0 { 159 x = likelyRegionList[x.lang] 160 } 161 if x.script != 0 && x.flags != scriptInFrom { 162 t.setUndefinedLang(Language(x.lang)) 163 t.setUndefinedScript(Script(x.script)) 164 return t, nil 165 } 166 } 167 } 168 } 169 170 // Search matches for lang. 171 if t.LangID < langNoIndexOffset { 172 x := likelyLang[t.LangID] 173 if x.flags&isList != 0 { 174 x = likelyLangList[x.region] 175 } 176 if x.region != 0 { 177 t.setUndefinedScript(Script(x.script)) 178 t.setUndefinedRegion(Region(x.region)) 179 } 180 specializeRegion(&t) 181 if t.LangID == 0 { 182 t.LangID = _en // default language 183 } 184 return t, nil 185 } 186 return t, ErrMissingLikelyTagsData 187 } 188 189 func (t *Tag) setTagsFrom(id Tag) { 190 t.LangID = id.LangID 191 t.ScriptID = id.ScriptID 192 t.RegionID = id.RegionID 193 } 194 195 // minimize removes the region or script subtags from t such that 196 // t.addLikelySubtags() == t.minimize().addLikelySubtags(). 197 func (t Tag) minimize() (Tag, error) { 198 t, err := minimizeTags(t) 199 if err != nil { 200 return t, err 201 } 202 t.RemakeString() 203 return t, nil 204 } 205 206 // minimizeTags mimics the behavior of the ICU 51 C implementation. 207 func minimizeTags(t Tag) (Tag, error) { 208 if t.equalTags(Und) { 209 return t, nil 210 } 211 max, err := addTags(t) 212 if err != nil { 213 return t, err 214 } 215 for _, id := range [...]Tag{ 216 {LangID: t.LangID}, 217 {LangID: t.LangID, RegionID: t.RegionID}, 218 {LangID: t.LangID, ScriptID: t.ScriptID}, 219 } { 220 if x, err := addTags(id); err == nil && max.equalTags(x) { 221 t.setTagsFrom(id) 222 break 223 } 224 } 225 return t, nil 226 }