transform.go (5809B)
1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package width 6 7 import ( 8 "unicode/utf8" 9 10 "golang.org/x/text/transform" 11 ) 12 13 type foldTransform struct { 14 transform.NopResetter 15 } 16 17 func (foldTransform) Span(src []byte, atEOF bool) (n int, err error) { 18 for n < len(src) { 19 if src[n] < utf8.RuneSelf { 20 // ASCII fast path. 21 for n++; n < len(src) && src[n] < utf8.RuneSelf; n++ { 22 } 23 continue 24 } 25 v, size := trie.lookup(src[n:]) 26 if size == 0 { // incomplete UTF-8 encoding 27 if !atEOF { 28 err = transform.ErrShortSrc 29 } else { 30 n = len(src) 31 } 32 break 33 } 34 if elem(v)&tagNeedsFold != 0 { 35 err = transform.ErrEndOfSpan 36 break 37 } 38 n += size 39 } 40 return n, err 41 } 42 43 func (foldTransform) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { 44 for nSrc < len(src) { 45 if src[nSrc] < utf8.RuneSelf { 46 // ASCII fast path. 47 start, end := nSrc, len(src) 48 if d := len(dst) - nDst; d < end-start { 49 end = nSrc + d 50 } 51 for nSrc++; nSrc < end && src[nSrc] < utf8.RuneSelf; nSrc++ { 52 } 53 n := copy(dst[nDst:], src[start:nSrc]) 54 if nDst += n; nDst == len(dst) { 55 nSrc = start + n 56 if nSrc == len(src) { 57 return nDst, nSrc, nil 58 } 59 if src[nSrc] < utf8.RuneSelf { 60 return nDst, nSrc, transform.ErrShortDst 61 } 62 } 63 continue 64 } 65 v, size := trie.lookup(src[nSrc:]) 66 if size == 0 { // incomplete UTF-8 encoding 67 if !atEOF { 68 return nDst, nSrc, transform.ErrShortSrc 69 } 70 size = 1 // gobble 1 byte 71 } 72 if elem(v)&tagNeedsFold == 0 { 73 if size != copy(dst[nDst:], src[nSrc:nSrc+size]) { 74 return nDst, nSrc, transform.ErrShortDst 75 } 76 nDst += size 77 } else { 78 data := inverseData[byte(v)] 79 if len(dst)-nDst < int(data[0]) { 80 return nDst, nSrc, transform.ErrShortDst 81 } 82 i := 1 83 for end := int(data[0]); i < end; i++ { 84 dst[nDst] = data[i] 85 nDst++ 86 } 87 dst[nDst] = data[i] ^ src[nSrc+size-1] 88 nDst++ 89 } 90 nSrc += size 91 } 92 return nDst, nSrc, nil 93 } 94 95 type narrowTransform struct { 96 transform.NopResetter 97 } 98 99 func (narrowTransform) Span(src []byte, atEOF bool) (n int, err error) { 100 for n < len(src) { 101 if src[n] < utf8.RuneSelf { 102 // ASCII fast path. 103 for n++; n < len(src) && src[n] < utf8.RuneSelf; n++ { 104 } 105 continue 106 } 107 v, size := trie.lookup(src[n:]) 108 if size == 0 { // incomplete UTF-8 encoding 109 if !atEOF { 110 err = transform.ErrShortSrc 111 } else { 112 n = len(src) 113 } 114 break 115 } 116 if k := elem(v).kind(); byte(v) == 0 || k != EastAsianFullwidth && k != EastAsianWide && k != EastAsianAmbiguous { 117 } else { 118 err = transform.ErrEndOfSpan 119 break 120 } 121 n += size 122 } 123 return n, err 124 } 125 126 func (narrowTransform) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { 127 for nSrc < len(src) { 128 if src[nSrc] < utf8.RuneSelf { 129 // ASCII fast path. 130 start, end := nSrc, len(src) 131 if d := len(dst) - nDst; d < end-start { 132 end = nSrc + d 133 } 134 for nSrc++; nSrc < end && src[nSrc] < utf8.RuneSelf; nSrc++ { 135 } 136 n := copy(dst[nDst:], src[start:nSrc]) 137 if nDst += n; nDst == len(dst) { 138 nSrc = start + n 139 if nSrc == len(src) { 140 return nDst, nSrc, nil 141 } 142 if src[nSrc] < utf8.RuneSelf { 143 return nDst, nSrc, transform.ErrShortDst 144 } 145 } 146 continue 147 } 148 v, size := trie.lookup(src[nSrc:]) 149 if size == 0 { // incomplete UTF-8 encoding 150 if !atEOF { 151 return nDst, nSrc, transform.ErrShortSrc 152 } 153 size = 1 // gobble 1 byte 154 } 155 if k := elem(v).kind(); byte(v) == 0 || k != EastAsianFullwidth && k != EastAsianWide && k != EastAsianAmbiguous { 156 if size != copy(dst[nDst:], src[nSrc:nSrc+size]) { 157 return nDst, nSrc, transform.ErrShortDst 158 } 159 nDst += size 160 } else { 161 data := inverseData[byte(v)] 162 if len(dst)-nDst < int(data[0]) { 163 return nDst, nSrc, transform.ErrShortDst 164 } 165 i := 1 166 for end := int(data[0]); i < end; i++ { 167 dst[nDst] = data[i] 168 nDst++ 169 } 170 dst[nDst] = data[i] ^ src[nSrc+size-1] 171 nDst++ 172 } 173 nSrc += size 174 } 175 return nDst, nSrc, nil 176 } 177 178 type wideTransform struct { 179 transform.NopResetter 180 } 181 182 func (wideTransform) Span(src []byte, atEOF bool) (n int, err error) { 183 for n < len(src) { 184 // TODO: Consider ASCII fast path. Special-casing ASCII handling can 185 // reduce the ns/op of BenchmarkWideASCII by about 30%. This is probably 186 // not enough to warrant the extra code and complexity. 187 v, size := trie.lookup(src[n:]) 188 if size == 0 { // incomplete UTF-8 encoding 189 if !atEOF { 190 err = transform.ErrShortSrc 191 } else { 192 n = len(src) 193 } 194 break 195 } 196 if k := elem(v).kind(); byte(v) == 0 || k != EastAsianHalfwidth && k != EastAsianNarrow { 197 } else { 198 err = transform.ErrEndOfSpan 199 break 200 } 201 n += size 202 } 203 return n, err 204 } 205 206 func (wideTransform) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { 207 for nSrc < len(src) { 208 // TODO: Consider ASCII fast path. Special-casing ASCII handling can 209 // reduce the ns/op of BenchmarkWideASCII by about 30%. This is probably 210 // not enough to warrant the extra code and complexity. 211 v, size := trie.lookup(src[nSrc:]) 212 if size == 0 { // incomplete UTF-8 encoding 213 if !atEOF { 214 return nDst, nSrc, transform.ErrShortSrc 215 } 216 size = 1 // gobble 1 byte 217 } 218 if k := elem(v).kind(); byte(v) == 0 || k != EastAsianHalfwidth && k != EastAsianNarrow { 219 if size != copy(dst[nDst:], src[nSrc:nSrc+size]) { 220 return nDst, nSrc, transform.ErrShortDst 221 } 222 nDst += size 223 } else { 224 data := inverseData[byte(v)] 225 if len(dst)-nDst < int(data[0]) { 226 return nDst, nSrc, transform.ErrShortDst 227 } 228 i := 1 229 for end := int(data[0]); i < end; i++ { 230 dst[nDst] = data[i] 231 nDst++ 232 } 233 dst[nDst] = data[i] ^ src[nSrc+size-1] 234 nDst++ 235 } 236 nSrc += size 237 } 238 return nDst, nSrc, nil 239 }