gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

context.go (9701B)


      1 // Copyright 2014 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package cases
      6 
      7 import "golang.org/x/text/transform"
      8 
      9 // A context is used for iterating over source bytes, fetching case info and
     10 // writing to a destination buffer.
     11 //
     12 // Casing operations may need more than one rune of context to decide how a rune
     13 // should be cased. Casing implementations should call checkpoint on context
     14 // whenever it is known to be safe to return the runes processed so far.
     15 //
     16 // It is recommended for implementations to not allow for more than 30 case
     17 // ignorables as lookahead (analogous to the limit in norm) and to use state if
     18 // unbounded lookahead is needed for cased runes.
     19 type context struct {
     20 	dst, src []byte
     21 	atEOF    bool
     22 
     23 	pDst int // pDst points past the last written rune in dst.
     24 	pSrc int // pSrc points to the start of the currently scanned rune.
     25 
     26 	// checkpoints safe to return in Transform, where nDst <= pDst and nSrc <= pSrc.
     27 	nDst, nSrc int
     28 	err        error
     29 
     30 	sz   int  // size of current rune
     31 	info info // case information of currently scanned rune
     32 
     33 	// State preserved across calls to Transform.
     34 	isMidWord bool // false if next cased letter needs to be title-cased.
     35 }
     36 
     37 func (c *context) Reset() {
     38 	c.isMidWord = false
     39 }
     40 
     41 // ret returns the return values for the Transform method. It checks whether
     42 // there were insufficient bytes in src to complete and introduces an error
     43 // accordingly, if necessary.
     44 func (c *context) ret() (nDst, nSrc int, err error) {
     45 	if c.err != nil || c.nSrc == len(c.src) {
     46 		return c.nDst, c.nSrc, c.err
     47 	}
     48 	// This point is only reached by mappers if there was no short destination
     49 	// buffer. This means that the source buffer was exhausted and that c.sz was
     50 	// set to 0 by next.
     51 	if c.atEOF && c.pSrc == len(c.src) {
     52 		return c.pDst, c.pSrc, nil
     53 	}
     54 	return c.nDst, c.nSrc, transform.ErrShortSrc
     55 }
     56 
     57 // retSpan returns the return values for the Span method. It checks whether
     58 // there were insufficient bytes in src to complete and introduces an error
     59 // accordingly, if necessary.
     60 func (c *context) retSpan() (n int, err error) {
     61 	_, nSrc, err := c.ret()
     62 	return nSrc, err
     63 }
     64 
     65 // checkpoint sets the return value buffer points for Transform to the current
     66 // positions.
     67 func (c *context) checkpoint() {
     68 	if c.err == nil {
     69 		c.nDst, c.nSrc = c.pDst, c.pSrc+c.sz
     70 	}
     71 }
     72 
     73 // unreadRune causes the last rune read by next to be reread on the next
     74 // invocation of next. Only one unreadRune may be called after a call to next.
     75 func (c *context) unreadRune() {
     76 	c.sz = 0
     77 }
     78 
     79 func (c *context) next() bool {
     80 	c.pSrc += c.sz
     81 	if c.pSrc == len(c.src) || c.err != nil {
     82 		c.info, c.sz = 0, 0
     83 		return false
     84 	}
     85 	v, sz := trie.lookup(c.src[c.pSrc:])
     86 	c.info, c.sz = info(v), sz
     87 	if c.sz == 0 {
     88 		if c.atEOF {
     89 			// A zero size means we have an incomplete rune. If we are atEOF,
     90 			// this means it is an illegal rune, which we will consume one
     91 			// byte at a time.
     92 			c.sz = 1
     93 		} else {
     94 			c.err = transform.ErrShortSrc
     95 			return false
     96 		}
     97 	}
     98 	return true
     99 }
    100 
    101 // writeBytes adds bytes to dst.
    102 func (c *context) writeBytes(b []byte) bool {
    103 	if len(c.dst)-c.pDst < len(b) {
    104 		c.err = transform.ErrShortDst
    105 		return false
    106 	}
    107 	// This loop is faster than using copy.
    108 	for _, ch := range b {
    109 		c.dst[c.pDst] = ch
    110 		c.pDst++
    111 	}
    112 	return true
    113 }
    114 
    115 // writeString writes the given string to dst.
    116 func (c *context) writeString(s string) bool {
    117 	if len(c.dst)-c.pDst < len(s) {
    118 		c.err = transform.ErrShortDst
    119 		return false
    120 	}
    121 	// This loop is faster than using copy.
    122 	for i := 0; i < len(s); i++ {
    123 		c.dst[c.pDst] = s[i]
    124 		c.pDst++
    125 	}
    126 	return true
    127 }
    128 
    129 // copy writes the current rune to dst.
    130 func (c *context) copy() bool {
    131 	return c.writeBytes(c.src[c.pSrc : c.pSrc+c.sz])
    132 }
    133 
    134 // copyXOR copies the current rune to dst and modifies it by applying the XOR
    135 // pattern of the case info. It is the responsibility of the caller to ensure
    136 // that this is a rune with a XOR pattern defined.
    137 func (c *context) copyXOR() bool {
    138 	if !c.copy() {
    139 		return false
    140 	}
    141 	if c.info&xorIndexBit == 0 {
    142 		// Fast path for 6-bit XOR pattern, which covers most cases.
    143 		c.dst[c.pDst-1] ^= byte(c.info >> xorShift)
    144 	} else {
    145 		// Interpret XOR bits as an index.
    146 		// TODO: test performance for unrolling this loop. Verify that we have
    147 		// at least two bytes and at most three.
    148 		idx := c.info >> xorShift
    149 		for p := c.pDst - 1; ; p-- {
    150 			c.dst[p] ^= xorData[idx]
    151 			idx--
    152 			if xorData[idx] == 0 {
    153 				break
    154 			}
    155 		}
    156 	}
    157 	return true
    158 }
    159 
    160 // hasPrefix returns true if src[pSrc:] starts with the given string.
    161 func (c *context) hasPrefix(s string) bool {
    162 	b := c.src[c.pSrc:]
    163 	if len(b) < len(s) {
    164 		return false
    165 	}
    166 	for i, c := range b[:len(s)] {
    167 		if c != s[i] {
    168 			return false
    169 		}
    170 	}
    171 	return true
    172 }
    173 
    174 // caseType returns an info with only the case bits, normalized to either
    175 // cLower, cUpper, cTitle or cUncased.
    176 func (c *context) caseType() info {
    177 	cm := c.info & 0x7
    178 	if cm < 4 {
    179 		return cm
    180 	}
    181 	if cm >= cXORCase {
    182 		// xor the last bit of the rune with the case type bits.
    183 		b := c.src[c.pSrc+c.sz-1]
    184 		return info(b&1) ^ cm&0x3
    185 	}
    186 	if cm == cIgnorableCased {
    187 		return cLower
    188 	}
    189 	return cUncased
    190 }
    191 
    192 // lower writes the lowercase version of the current rune to dst.
    193 func lower(c *context) bool {
    194 	ct := c.caseType()
    195 	if c.info&hasMappingMask == 0 || ct == cLower {
    196 		return c.copy()
    197 	}
    198 	if c.info&exceptionBit == 0 {
    199 		return c.copyXOR()
    200 	}
    201 	e := exceptions[c.info>>exceptionShift:]
    202 	offset := 2 + e[0]&lengthMask // size of header + fold string
    203 	if nLower := (e[1] >> lengthBits) & lengthMask; nLower != noChange {
    204 		return c.writeString(e[offset : offset+nLower])
    205 	}
    206 	return c.copy()
    207 }
    208 
    209 func isLower(c *context) bool {
    210 	ct := c.caseType()
    211 	if c.info&hasMappingMask == 0 || ct == cLower {
    212 		return true
    213 	}
    214 	if c.info&exceptionBit == 0 {
    215 		c.err = transform.ErrEndOfSpan
    216 		return false
    217 	}
    218 	e := exceptions[c.info>>exceptionShift:]
    219 	if nLower := (e[1] >> lengthBits) & lengthMask; nLower != noChange {
    220 		c.err = transform.ErrEndOfSpan
    221 		return false
    222 	}
    223 	return true
    224 }
    225 
    226 // upper writes the uppercase version of the current rune to dst.
    227 func upper(c *context) bool {
    228 	ct := c.caseType()
    229 	if c.info&hasMappingMask == 0 || ct == cUpper {
    230 		return c.copy()
    231 	}
    232 	if c.info&exceptionBit == 0 {
    233 		return c.copyXOR()
    234 	}
    235 	e := exceptions[c.info>>exceptionShift:]
    236 	offset := 2 + e[0]&lengthMask // size of header + fold string
    237 	// Get length of first special case mapping.
    238 	n := (e[1] >> lengthBits) & lengthMask
    239 	if ct == cTitle {
    240 		// The first special case mapping is for lower. Set n to the second.
    241 		if n == noChange {
    242 			n = 0
    243 		}
    244 		n, e = e[1]&lengthMask, e[n:]
    245 	}
    246 	if n != noChange {
    247 		return c.writeString(e[offset : offset+n])
    248 	}
    249 	return c.copy()
    250 }
    251 
    252 // isUpper writes the isUppercase version of the current rune to dst.
    253 func isUpper(c *context) bool {
    254 	ct := c.caseType()
    255 	if c.info&hasMappingMask == 0 || ct == cUpper {
    256 		return true
    257 	}
    258 	if c.info&exceptionBit == 0 {
    259 		c.err = transform.ErrEndOfSpan
    260 		return false
    261 	}
    262 	e := exceptions[c.info>>exceptionShift:]
    263 	// Get length of first special case mapping.
    264 	n := (e[1] >> lengthBits) & lengthMask
    265 	if ct == cTitle {
    266 		n = e[1] & lengthMask
    267 	}
    268 	if n != noChange {
    269 		c.err = transform.ErrEndOfSpan
    270 		return false
    271 	}
    272 	return true
    273 }
    274 
    275 // title writes the title case version of the current rune to dst.
    276 func title(c *context) bool {
    277 	ct := c.caseType()
    278 	if c.info&hasMappingMask == 0 || ct == cTitle {
    279 		return c.copy()
    280 	}
    281 	if c.info&exceptionBit == 0 {
    282 		if ct == cLower {
    283 			return c.copyXOR()
    284 		}
    285 		return c.copy()
    286 	}
    287 	// Get the exception data.
    288 	e := exceptions[c.info>>exceptionShift:]
    289 	offset := 2 + e[0]&lengthMask // size of header + fold string
    290 
    291 	nFirst := (e[1] >> lengthBits) & lengthMask
    292 	if nTitle := e[1] & lengthMask; nTitle != noChange {
    293 		if nFirst != noChange {
    294 			e = e[nFirst:]
    295 		}
    296 		return c.writeString(e[offset : offset+nTitle])
    297 	}
    298 	if ct == cLower && nFirst != noChange {
    299 		// Use the uppercase version instead.
    300 		return c.writeString(e[offset : offset+nFirst])
    301 	}
    302 	// Already in correct case.
    303 	return c.copy()
    304 }
    305 
    306 // isTitle reports whether the current rune is in title case.
    307 func isTitle(c *context) bool {
    308 	ct := c.caseType()
    309 	if c.info&hasMappingMask == 0 || ct == cTitle {
    310 		return true
    311 	}
    312 	if c.info&exceptionBit == 0 {
    313 		if ct == cLower {
    314 			c.err = transform.ErrEndOfSpan
    315 			return false
    316 		}
    317 		return true
    318 	}
    319 	// Get the exception data.
    320 	e := exceptions[c.info>>exceptionShift:]
    321 	if nTitle := e[1] & lengthMask; nTitle != noChange {
    322 		c.err = transform.ErrEndOfSpan
    323 		return false
    324 	}
    325 	nFirst := (e[1] >> lengthBits) & lengthMask
    326 	if ct == cLower && nFirst != noChange {
    327 		c.err = transform.ErrEndOfSpan
    328 		return false
    329 	}
    330 	return true
    331 }
    332 
    333 // foldFull writes the foldFull version of the current rune to dst.
    334 func foldFull(c *context) bool {
    335 	if c.info&hasMappingMask == 0 {
    336 		return c.copy()
    337 	}
    338 	ct := c.caseType()
    339 	if c.info&exceptionBit == 0 {
    340 		if ct != cLower || c.info&inverseFoldBit != 0 {
    341 			return c.copyXOR()
    342 		}
    343 		return c.copy()
    344 	}
    345 	e := exceptions[c.info>>exceptionShift:]
    346 	n := e[0] & lengthMask
    347 	if n == 0 {
    348 		if ct == cLower {
    349 			return c.copy()
    350 		}
    351 		n = (e[1] >> lengthBits) & lengthMask
    352 	}
    353 	return c.writeString(e[2 : 2+n])
    354 }
    355 
    356 // isFoldFull reports whether the current run is mapped to foldFull
    357 func isFoldFull(c *context) bool {
    358 	if c.info&hasMappingMask == 0 {
    359 		return true
    360 	}
    361 	ct := c.caseType()
    362 	if c.info&exceptionBit == 0 {
    363 		if ct != cLower || c.info&inverseFoldBit != 0 {
    364 			c.err = transform.ErrEndOfSpan
    365 			return false
    366 		}
    367 		return true
    368 	}
    369 	e := exceptions[c.info>>exceptionShift:]
    370 	n := e[0] & lengthMask
    371 	if n == 0 && ct == cLower {
    372 		return true
    373 	}
    374 	c.err = transform.ErrEndOfSpan
    375 	return false
    376 }