context.go (9701B)
1 // Copyright 2014 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package cases 6 7 import "golang.org/x/text/transform" 8 9 // A context is used for iterating over source bytes, fetching case info and 10 // writing to a destination buffer. 11 // 12 // Casing operations may need more than one rune of context to decide how a rune 13 // should be cased. Casing implementations should call checkpoint on context 14 // whenever it is known to be safe to return the runes processed so far. 15 // 16 // It is recommended for implementations to not allow for more than 30 case 17 // ignorables as lookahead (analogous to the limit in norm) and to use state if 18 // unbounded lookahead is needed for cased runes. 19 type context struct { 20 dst, src []byte 21 atEOF bool 22 23 pDst int // pDst points past the last written rune in dst. 24 pSrc int // pSrc points to the start of the currently scanned rune. 25 26 // checkpoints safe to return in Transform, where nDst <= pDst and nSrc <= pSrc. 27 nDst, nSrc int 28 err error 29 30 sz int // size of current rune 31 info info // case information of currently scanned rune 32 33 // State preserved across calls to Transform. 34 isMidWord bool // false if next cased letter needs to be title-cased. 35 } 36 37 func (c *context) Reset() { 38 c.isMidWord = false 39 } 40 41 // ret returns the return values for the Transform method. It checks whether 42 // there were insufficient bytes in src to complete and introduces an error 43 // accordingly, if necessary. 44 func (c *context) ret() (nDst, nSrc int, err error) { 45 if c.err != nil || c.nSrc == len(c.src) { 46 return c.nDst, c.nSrc, c.err 47 } 48 // This point is only reached by mappers if there was no short destination 49 // buffer. This means that the source buffer was exhausted and that c.sz was 50 // set to 0 by next. 51 if c.atEOF && c.pSrc == len(c.src) { 52 return c.pDst, c.pSrc, nil 53 } 54 return c.nDst, c.nSrc, transform.ErrShortSrc 55 } 56 57 // retSpan returns the return values for the Span method. It checks whether 58 // there were insufficient bytes in src to complete and introduces an error 59 // accordingly, if necessary. 60 func (c *context) retSpan() (n int, err error) { 61 _, nSrc, err := c.ret() 62 return nSrc, err 63 } 64 65 // checkpoint sets the return value buffer points for Transform to the current 66 // positions. 67 func (c *context) checkpoint() { 68 if c.err == nil { 69 c.nDst, c.nSrc = c.pDst, c.pSrc+c.sz 70 } 71 } 72 73 // unreadRune causes the last rune read by next to be reread on the next 74 // invocation of next. Only one unreadRune may be called after a call to next. 75 func (c *context) unreadRune() { 76 c.sz = 0 77 } 78 79 func (c *context) next() bool { 80 c.pSrc += c.sz 81 if c.pSrc == len(c.src) || c.err != nil { 82 c.info, c.sz = 0, 0 83 return false 84 } 85 v, sz := trie.lookup(c.src[c.pSrc:]) 86 c.info, c.sz = info(v), sz 87 if c.sz == 0 { 88 if c.atEOF { 89 // A zero size means we have an incomplete rune. If we are atEOF, 90 // this means it is an illegal rune, which we will consume one 91 // byte at a time. 92 c.sz = 1 93 } else { 94 c.err = transform.ErrShortSrc 95 return false 96 } 97 } 98 return true 99 } 100 101 // writeBytes adds bytes to dst. 102 func (c *context) writeBytes(b []byte) bool { 103 if len(c.dst)-c.pDst < len(b) { 104 c.err = transform.ErrShortDst 105 return false 106 } 107 // This loop is faster than using copy. 108 for _, ch := range b { 109 c.dst[c.pDst] = ch 110 c.pDst++ 111 } 112 return true 113 } 114 115 // writeString writes the given string to dst. 116 func (c *context) writeString(s string) bool { 117 if len(c.dst)-c.pDst < len(s) { 118 c.err = transform.ErrShortDst 119 return false 120 } 121 // This loop is faster than using copy. 122 for i := 0; i < len(s); i++ { 123 c.dst[c.pDst] = s[i] 124 c.pDst++ 125 } 126 return true 127 } 128 129 // copy writes the current rune to dst. 130 func (c *context) copy() bool { 131 return c.writeBytes(c.src[c.pSrc : c.pSrc+c.sz]) 132 } 133 134 // copyXOR copies the current rune to dst and modifies it by applying the XOR 135 // pattern of the case info. It is the responsibility of the caller to ensure 136 // that this is a rune with a XOR pattern defined. 137 func (c *context) copyXOR() bool { 138 if !c.copy() { 139 return false 140 } 141 if c.info&xorIndexBit == 0 { 142 // Fast path for 6-bit XOR pattern, which covers most cases. 143 c.dst[c.pDst-1] ^= byte(c.info >> xorShift) 144 } else { 145 // Interpret XOR bits as an index. 146 // TODO: test performance for unrolling this loop. Verify that we have 147 // at least two bytes and at most three. 148 idx := c.info >> xorShift 149 for p := c.pDst - 1; ; p-- { 150 c.dst[p] ^= xorData[idx] 151 idx-- 152 if xorData[idx] == 0 { 153 break 154 } 155 } 156 } 157 return true 158 } 159 160 // hasPrefix returns true if src[pSrc:] starts with the given string. 161 func (c *context) hasPrefix(s string) bool { 162 b := c.src[c.pSrc:] 163 if len(b) < len(s) { 164 return false 165 } 166 for i, c := range b[:len(s)] { 167 if c != s[i] { 168 return false 169 } 170 } 171 return true 172 } 173 174 // caseType returns an info with only the case bits, normalized to either 175 // cLower, cUpper, cTitle or cUncased. 176 func (c *context) caseType() info { 177 cm := c.info & 0x7 178 if cm < 4 { 179 return cm 180 } 181 if cm >= cXORCase { 182 // xor the last bit of the rune with the case type bits. 183 b := c.src[c.pSrc+c.sz-1] 184 return info(b&1) ^ cm&0x3 185 } 186 if cm == cIgnorableCased { 187 return cLower 188 } 189 return cUncased 190 } 191 192 // lower writes the lowercase version of the current rune to dst. 193 func lower(c *context) bool { 194 ct := c.caseType() 195 if c.info&hasMappingMask == 0 || ct == cLower { 196 return c.copy() 197 } 198 if c.info&exceptionBit == 0 { 199 return c.copyXOR() 200 } 201 e := exceptions[c.info>>exceptionShift:] 202 offset := 2 + e[0]&lengthMask // size of header + fold string 203 if nLower := (e[1] >> lengthBits) & lengthMask; nLower != noChange { 204 return c.writeString(e[offset : offset+nLower]) 205 } 206 return c.copy() 207 } 208 209 func isLower(c *context) bool { 210 ct := c.caseType() 211 if c.info&hasMappingMask == 0 || ct == cLower { 212 return true 213 } 214 if c.info&exceptionBit == 0 { 215 c.err = transform.ErrEndOfSpan 216 return false 217 } 218 e := exceptions[c.info>>exceptionShift:] 219 if nLower := (e[1] >> lengthBits) & lengthMask; nLower != noChange { 220 c.err = transform.ErrEndOfSpan 221 return false 222 } 223 return true 224 } 225 226 // upper writes the uppercase version of the current rune to dst. 227 func upper(c *context) bool { 228 ct := c.caseType() 229 if c.info&hasMappingMask == 0 || ct == cUpper { 230 return c.copy() 231 } 232 if c.info&exceptionBit == 0 { 233 return c.copyXOR() 234 } 235 e := exceptions[c.info>>exceptionShift:] 236 offset := 2 + e[0]&lengthMask // size of header + fold string 237 // Get length of first special case mapping. 238 n := (e[1] >> lengthBits) & lengthMask 239 if ct == cTitle { 240 // The first special case mapping is for lower. Set n to the second. 241 if n == noChange { 242 n = 0 243 } 244 n, e = e[1]&lengthMask, e[n:] 245 } 246 if n != noChange { 247 return c.writeString(e[offset : offset+n]) 248 } 249 return c.copy() 250 } 251 252 // isUpper writes the isUppercase version of the current rune to dst. 253 func isUpper(c *context) bool { 254 ct := c.caseType() 255 if c.info&hasMappingMask == 0 || ct == cUpper { 256 return true 257 } 258 if c.info&exceptionBit == 0 { 259 c.err = transform.ErrEndOfSpan 260 return false 261 } 262 e := exceptions[c.info>>exceptionShift:] 263 // Get length of first special case mapping. 264 n := (e[1] >> lengthBits) & lengthMask 265 if ct == cTitle { 266 n = e[1] & lengthMask 267 } 268 if n != noChange { 269 c.err = transform.ErrEndOfSpan 270 return false 271 } 272 return true 273 } 274 275 // title writes the title case version of the current rune to dst. 276 func title(c *context) bool { 277 ct := c.caseType() 278 if c.info&hasMappingMask == 0 || ct == cTitle { 279 return c.copy() 280 } 281 if c.info&exceptionBit == 0 { 282 if ct == cLower { 283 return c.copyXOR() 284 } 285 return c.copy() 286 } 287 // Get the exception data. 288 e := exceptions[c.info>>exceptionShift:] 289 offset := 2 + e[0]&lengthMask // size of header + fold string 290 291 nFirst := (e[1] >> lengthBits) & lengthMask 292 if nTitle := e[1] & lengthMask; nTitle != noChange { 293 if nFirst != noChange { 294 e = e[nFirst:] 295 } 296 return c.writeString(e[offset : offset+nTitle]) 297 } 298 if ct == cLower && nFirst != noChange { 299 // Use the uppercase version instead. 300 return c.writeString(e[offset : offset+nFirst]) 301 } 302 // Already in correct case. 303 return c.copy() 304 } 305 306 // isTitle reports whether the current rune is in title case. 307 func isTitle(c *context) bool { 308 ct := c.caseType() 309 if c.info&hasMappingMask == 0 || ct == cTitle { 310 return true 311 } 312 if c.info&exceptionBit == 0 { 313 if ct == cLower { 314 c.err = transform.ErrEndOfSpan 315 return false 316 } 317 return true 318 } 319 // Get the exception data. 320 e := exceptions[c.info>>exceptionShift:] 321 if nTitle := e[1] & lengthMask; nTitle != noChange { 322 c.err = transform.ErrEndOfSpan 323 return false 324 } 325 nFirst := (e[1] >> lengthBits) & lengthMask 326 if ct == cLower && nFirst != noChange { 327 c.err = transform.ErrEndOfSpan 328 return false 329 } 330 return true 331 } 332 333 // foldFull writes the foldFull version of the current rune to dst. 334 func foldFull(c *context) bool { 335 if c.info&hasMappingMask == 0 { 336 return c.copy() 337 } 338 ct := c.caseType() 339 if c.info&exceptionBit == 0 { 340 if ct != cLower || c.info&inverseFoldBit != 0 { 341 return c.copyXOR() 342 } 343 return c.copy() 344 } 345 e := exceptions[c.info>>exceptionShift:] 346 n := e[0] & lengthMask 347 if n == 0 { 348 if ct == cLower { 349 return c.copy() 350 } 351 n = (e[1] >> lengthBits) & lengthMask 352 } 353 return c.writeString(e[2 : 2+n]) 354 } 355 356 // isFoldFull reports whether the current run is mapped to foldFull 357 func isFoldFull(c *context) bool { 358 if c.info&hasMappingMask == 0 { 359 return true 360 } 361 ct := c.caseType() 362 if c.info&exceptionBit == 0 { 363 if ct != cLower || c.info&inverseFoldBit != 0 { 364 c.err = transform.ErrEndOfSpan 365 return false 366 } 367 return true 368 } 369 e := exceptions[c.info>>exceptionShift:] 370 n := e[0] & lengthMask 371 if n == 0 && ct == cLower { 372 return true 373 } 374 c.err = transform.ErrEndOfSpan 375 return false 376 }