gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

prop.go (5862B)


      1 // Copyright 2016 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package bidi
      6 
      7 import "unicode/utf8"
      8 
      9 // Properties provides access to BiDi properties of runes.
     10 type Properties struct {
     11 	entry uint8
     12 	last  uint8
     13 }
     14 
     15 var trie = newBidiTrie(0)
     16 
     17 // TODO: using this for bidirule reduces the running time by about 5%. Consider
     18 // if this is worth exposing or if we can find a way to speed up the Class
     19 // method.
     20 //
     21 // // CompactClass is like Class, but maps all of the BiDi control classes
     22 // // (LRO, RLO, LRE, RLE, PDF, LRI, RLI, FSI, PDI) to the class Control.
     23 // func (p Properties) CompactClass() Class {
     24 // 	return Class(p.entry & 0x0F)
     25 // }
     26 
     27 // Class returns the Bidi class for p.
     28 func (p Properties) Class() Class {
     29 	c := Class(p.entry & 0x0F)
     30 	if c == Control {
     31 		c = controlByteToClass[p.last&0xF]
     32 	}
     33 	return c
     34 }
     35 
     36 // IsBracket reports whether the rune is a bracket.
     37 func (p Properties) IsBracket() bool { return p.entry&0xF0 != 0 }
     38 
     39 // IsOpeningBracket reports whether the rune is an opening bracket.
     40 // IsBracket must return true.
     41 func (p Properties) IsOpeningBracket() bool { return p.entry&openMask != 0 }
     42 
     43 // TODO: find a better API and expose.
     44 func (p Properties) reverseBracket(r rune) rune {
     45 	return xorMasks[p.entry>>xorMaskShift] ^ r
     46 }
     47 
     48 var controlByteToClass = [16]Class{
     49 	0xD: LRO, // U+202D LeftToRightOverride,
     50 	0xE: RLO, // U+202E RightToLeftOverride,
     51 	0xA: LRE, // U+202A LeftToRightEmbedding,
     52 	0xB: RLE, // U+202B RightToLeftEmbedding,
     53 	0xC: PDF, // U+202C PopDirectionalFormat,
     54 	0x6: LRI, // U+2066 LeftToRightIsolate,
     55 	0x7: RLI, // U+2067 RightToLeftIsolate,
     56 	0x8: FSI, // U+2068 FirstStrongIsolate,
     57 	0x9: PDI, // U+2069 PopDirectionalIsolate,
     58 }
     59 
     60 // LookupRune returns properties for r.
     61 func LookupRune(r rune) (p Properties, size int) {
     62 	var buf [4]byte
     63 	n := utf8.EncodeRune(buf[:], r)
     64 	return Lookup(buf[:n])
     65 }
     66 
     67 // TODO: these lookup methods are based on the generated trie code. The returned
     68 // sizes have slightly different semantics from the generated code, in that it
     69 // always returns size==1 for an illegal UTF-8 byte (instead of the length
     70 // of the maximum invalid subsequence). Most Transformers, like unicode/norm,
     71 // leave invalid UTF-8 untouched, in which case it has performance benefits to
     72 // do so (without changing the semantics). Bidi requires the semantics used here
     73 // for the bidirule implementation to be compatible with the Go semantics.
     74 //  They ultimately should perhaps be adopted by all trie implementations, for
     75 // convenience sake.
     76 // This unrolled code also boosts performance of the secure/bidirule package by
     77 // about 30%.
     78 // So, to remove this code:
     79 //   - add option to trie generator to define return type.
     80 //   - always return 1 byte size for ill-formed UTF-8 runes.
     81 
     82 // Lookup returns properties for the first rune in s and the width in bytes of
     83 // its encoding. The size will be 0 if s does not hold enough bytes to complete
     84 // the encoding.
     85 func Lookup(s []byte) (p Properties, sz int) {
     86 	c0 := s[0]
     87 	switch {
     88 	case c0 < 0x80: // is ASCII
     89 		return Properties{entry: bidiValues[c0]}, 1
     90 	case c0 < 0xC2:
     91 		return Properties{}, 1
     92 	case c0 < 0xE0: // 2-byte UTF-8
     93 		if len(s) < 2 {
     94 			return Properties{}, 0
     95 		}
     96 		i := bidiIndex[c0]
     97 		c1 := s[1]
     98 		if c1 < 0x80 || 0xC0 <= c1 {
     99 			return Properties{}, 1
    100 		}
    101 		return Properties{entry: trie.lookupValue(uint32(i), c1)}, 2
    102 	case c0 < 0xF0: // 3-byte UTF-8
    103 		if len(s) < 3 {
    104 			return Properties{}, 0
    105 		}
    106 		i := bidiIndex[c0]
    107 		c1 := s[1]
    108 		if c1 < 0x80 || 0xC0 <= c1 {
    109 			return Properties{}, 1
    110 		}
    111 		o := uint32(i)<<6 + uint32(c1)
    112 		i = bidiIndex[o]
    113 		c2 := s[2]
    114 		if c2 < 0x80 || 0xC0 <= c2 {
    115 			return Properties{}, 1
    116 		}
    117 		return Properties{entry: trie.lookupValue(uint32(i), c2), last: c2}, 3
    118 	case c0 < 0xF8: // 4-byte UTF-8
    119 		if len(s) < 4 {
    120 			return Properties{}, 0
    121 		}
    122 		i := bidiIndex[c0]
    123 		c1 := s[1]
    124 		if c1 < 0x80 || 0xC0 <= c1 {
    125 			return Properties{}, 1
    126 		}
    127 		o := uint32(i)<<6 + uint32(c1)
    128 		i = bidiIndex[o]
    129 		c2 := s[2]
    130 		if c2 < 0x80 || 0xC0 <= c2 {
    131 			return Properties{}, 1
    132 		}
    133 		o = uint32(i)<<6 + uint32(c2)
    134 		i = bidiIndex[o]
    135 		c3 := s[3]
    136 		if c3 < 0x80 || 0xC0 <= c3 {
    137 			return Properties{}, 1
    138 		}
    139 		return Properties{entry: trie.lookupValue(uint32(i), c3)}, 4
    140 	}
    141 	// Illegal rune
    142 	return Properties{}, 1
    143 }
    144 
    145 // LookupString returns properties for the first rune in s and the width in
    146 // bytes of its encoding. The size will be 0 if s does not hold enough bytes to
    147 // complete the encoding.
    148 func LookupString(s string) (p Properties, sz int) {
    149 	c0 := s[0]
    150 	switch {
    151 	case c0 < 0x80: // is ASCII
    152 		return Properties{entry: bidiValues[c0]}, 1
    153 	case c0 < 0xC2:
    154 		return Properties{}, 1
    155 	case c0 < 0xE0: // 2-byte UTF-8
    156 		if len(s) < 2 {
    157 			return Properties{}, 0
    158 		}
    159 		i := bidiIndex[c0]
    160 		c1 := s[1]
    161 		if c1 < 0x80 || 0xC0 <= c1 {
    162 			return Properties{}, 1
    163 		}
    164 		return Properties{entry: trie.lookupValue(uint32(i), c1)}, 2
    165 	case c0 < 0xF0: // 3-byte UTF-8
    166 		if len(s) < 3 {
    167 			return Properties{}, 0
    168 		}
    169 		i := bidiIndex[c0]
    170 		c1 := s[1]
    171 		if c1 < 0x80 || 0xC0 <= c1 {
    172 			return Properties{}, 1
    173 		}
    174 		o := uint32(i)<<6 + uint32(c1)
    175 		i = bidiIndex[o]
    176 		c2 := s[2]
    177 		if c2 < 0x80 || 0xC0 <= c2 {
    178 			return Properties{}, 1
    179 		}
    180 		return Properties{entry: trie.lookupValue(uint32(i), c2), last: c2}, 3
    181 	case c0 < 0xF8: // 4-byte UTF-8
    182 		if len(s) < 4 {
    183 			return Properties{}, 0
    184 		}
    185 		i := bidiIndex[c0]
    186 		c1 := s[1]
    187 		if c1 < 0x80 || 0xC0 <= c1 {
    188 			return Properties{}, 1
    189 		}
    190 		o := uint32(i)<<6 + uint32(c1)
    191 		i = bidiIndex[o]
    192 		c2 := s[2]
    193 		if c2 < 0x80 || 0xC0 <= c2 {
    194 			return Properties{}, 1
    195 		}
    196 		o = uint32(i)<<6 + uint32(c2)
    197 		i = bidiIndex[o]
    198 		c3 := s[3]
    199 		if c3 < 0x80 || 0xC0 <= c3 {
    200 			return Properties{}, 1
    201 		}
    202 		return Properties{entry: trie.lookupValue(uint32(i), c3)}, 4
    203 	}
    204 	// Illegal rune
    205 	return Properties{}, 1
    206 }