profile.go (10582B)
1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package precis 6 7 import ( 8 "bytes" 9 "errors" 10 "unicode/utf8" 11 12 "golang.org/x/text/cases" 13 "golang.org/x/text/language" 14 "golang.org/x/text/runes" 15 "golang.org/x/text/secure/bidirule" 16 "golang.org/x/text/transform" 17 "golang.org/x/text/width" 18 ) 19 20 var ( 21 errDisallowedRune = errors.New("precis: disallowed rune encountered") 22 ) 23 24 var dpTrie = newDerivedPropertiesTrie(0) 25 26 // A Profile represents a set of rules for normalizing and validating strings in 27 // the PRECIS framework. 28 type Profile struct { 29 options 30 class *class 31 } 32 33 // NewIdentifier creates a new PRECIS profile based on the Identifier string 34 // class. Profiles created from this class are suitable for use where safety is 35 // prioritized over expressiveness like network identifiers, user accounts, chat 36 // rooms, and file names. 37 func NewIdentifier(opts ...Option) *Profile { 38 return &Profile{ 39 options: getOpts(opts...), 40 class: identifier, 41 } 42 } 43 44 // NewFreeform creates a new PRECIS profile based on the Freeform string class. 45 // Profiles created from this class are suitable for use where expressiveness is 46 // prioritized over safety like passwords, and display-elements such as 47 // nicknames in a chat room. 48 func NewFreeform(opts ...Option) *Profile { 49 return &Profile{ 50 options: getOpts(opts...), 51 class: freeform, 52 } 53 } 54 55 // NewRestrictedProfile creates a new PRECIS profile based on an existing 56 // profile. 57 // If the parent profile already had the Disallow option set, the new rule 58 // overrides the parents rule. 59 func NewRestrictedProfile(parent *Profile, disallow runes.Set) *Profile { 60 p := *parent 61 Disallow(disallow)(&p.options) 62 return &p 63 } 64 65 // NewTransformer creates a new transform.Transformer that performs the PRECIS 66 // preparation and enforcement steps on the given UTF-8 encoded bytes. 67 func (p *Profile) NewTransformer() *Transformer { 68 var ts []transform.Transformer 69 70 // These transforms are applied in the order defined in 71 // https://tools.ietf.org/html/rfc7564#section-7 72 73 // RFC 8266 ยง2.1: 74 // 75 // Implementation experience has shown that applying the rules for the 76 // Nickname profile is not an idempotent procedure for all code points. 77 // Therefore, an implementation SHOULD apply the rules repeatedly until 78 // the output string is stable; if the output string does not stabilize 79 // after reapplying the rules three (3) additional times after the first 80 // application, the implementation SHOULD terminate application of the 81 // rules and reject the input string as invalid. 82 // 83 // There is no known string that will change indefinitely, so repeat 4 times 84 // and rely on the Span method to keep things relatively performant. 85 r := 1 86 if p.options.repeat { 87 r = 4 88 } 89 for ; r > 0; r-- { 90 if p.options.foldWidth { 91 ts = append(ts, width.Fold) 92 } 93 94 for _, f := range p.options.additional { 95 ts = append(ts, f()) 96 } 97 98 if p.options.cases != nil { 99 ts = append(ts, p.options.cases) 100 } 101 102 ts = append(ts, p.options.norm) 103 104 if p.options.bidiRule { 105 ts = append(ts, bidirule.New()) 106 } 107 108 ts = append(ts, &checker{p: p, allowed: p.Allowed()}) 109 } 110 111 // TODO: Add the disallow empty rule with a dummy transformer? 112 113 return &Transformer{transform.Chain(ts...)} 114 } 115 116 var errEmptyString = errors.New("precis: transformation resulted in empty string") 117 118 type buffers struct { 119 src []byte 120 buf [2][]byte 121 next int 122 } 123 124 func (b *buffers) apply(t transform.SpanningTransformer) (err error) { 125 n, err := t.Span(b.src, true) 126 if err != transform.ErrEndOfSpan { 127 return err 128 } 129 x := b.next & 1 130 if b.buf[x] == nil { 131 b.buf[x] = make([]byte, 0, 8+len(b.src)+len(b.src)>>2) 132 } 133 span := append(b.buf[x][:0], b.src[:n]...) 134 b.src, _, err = transform.Append(t, span, b.src[n:]) 135 b.buf[x] = b.src 136 b.next++ 137 return err 138 } 139 140 // Pre-allocate transformers when possible. In some cases this avoids allocation. 141 var ( 142 foldWidthT transform.SpanningTransformer = width.Fold 143 lowerCaseT transform.SpanningTransformer = cases.Lower(language.Und, cases.HandleFinalSigma(false)) 144 ) 145 146 // TODO: make this a method on profile. 147 148 func (b *buffers) enforce(p *Profile, src []byte, comparing bool) (str []byte, err error) { 149 b.src = src 150 151 ascii := true 152 for _, c := range src { 153 if c >= utf8.RuneSelf { 154 ascii = false 155 break 156 } 157 } 158 // ASCII fast path. 159 if ascii { 160 for _, f := range p.options.additional { 161 if err = b.apply(f()); err != nil { 162 return nil, err 163 } 164 } 165 switch { 166 case p.options.asciiLower || (comparing && p.options.ignorecase): 167 for i, c := range b.src { 168 if 'A' <= c && c <= 'Z' { 169 b.src[i] = c ^ 1<<5 170 } 171 } 172 case p.options.cases != nil: 173 b.apply(p.options.cases) 174 } 175 c := checker{p: p} 176 if _, err := c.span(b.src, true); err != nil { 177 return nil, err 178 } 179 if p.disallow != nil { 180 for _, c := range b.src { 181 if p.disallow.Contains(rune(c)) { 182 return nil, errDisallowedRune 183 } 184 } 185 } 186 if p.options.disallowEmpty && len(b.src) == 0 { 187 return nil, errEmptyString 188 } 189 return b.src, nil 190 } 191 192 // These transforms are applied in the order defined in 193 // https://tools.ietf.org/html/rfc8264#section-7 194 195 r := 1 196 if p.options.repeat { 197 r = 4 198 } 199 for ; r > 0; r-- { 200 // TODO: allow different width transforms options. 201 if p.options.foldWidth || (p.options.ignorecase && comparing) { 202 b.apply(foldWidthT) 203 } 204 for _, f := range p.options.additional { 205 if err = b.apply(f()); err != nil { 206 return nil, err 207 } 208 } 209 if p.options.cases != nil { 210 b.apply(p.options.cases) 211 } 212 if comparing && p.options.ignorecase { 213 b.apply(lowerCaseT) 214 } 215 b.apply(p.norm) 216 if p.options.bidiRule && !bidirule.Valid(b.src) { 217 return nil, bidirule.ErrInvalid 218 } 219 c := checker{p: p} 220 if _, err := c.span(b.src, true); err != nil { 221 return nil, err 222 } 223 if p.disallow != nil { 224 for i := 0; i < len(b.src); { 225 r, size := utf8.DecodeRune(b.src[i:]) 226 if p.disallow.Contains(r) { 227 return nil, errDisallowedRune 228 } 229 i += size 230 } 231 } 232 if p.options.disallowEmpty && len(b.src) == 0 { 233 return nil, errEmptyString 234 } 235 } 236 return b.src, nil 237 } 238 239 // Append appends the result of applying p to src writing the result to dst. 240 // It returns an error if the input string is invalid. 241 func (p *Profile) Append(dst, src []byte) ([]byte, error) { 242 var buf buffers 243 b, err := buf.enforce(p, src, false) 244 if err != nil { 245 return nil, err 246 } 247 return append(dst, b...), nil 248 } 249 250 func processBytes(p *Profile, b []byte, key bool) ([]byte, error) { 251 var buf buffers 252 b, err := buf.enforce(p, b, key) 253 if err != nil { 254 return nil, err 255 } 256 if buf.next == 0 { 257 c := make([]byte, len(b)) 258 copy(c, b) 259 return c, nil 260 } 261 return b, nil 262 } 263 264 // Bytes returns a new byte slice with the result of applying the profile to b. 265 func (p *Profile) Bytes(b []byte) ([]byte, error) { 266 return processBytes(p, b, false) 267 } 268 269 // AppendCompareKey appends the result of applying p to src (including any 270 // optional rules to make strings comparable or useful in a map key such as 271 // applying lowercasing) writing the result to dst. It returns an error if the 272 // input string is invalid. 273 func (p *Profile) AppendCompareKey(dst, src []byte) ([]byte, error) { 274 var buf buffers 275 b, err := buf.enforce(p, src, true) 276 if err != nil { 277 return nil, err 278 } 279 return append(dst, b...), nil 280 } 281 282 func processString(p *Profile, s string, key bool) (string, error) { 283 var buf buffers 284 b, err := buf.enforce(p, []byte(s), key) 285 if err != nil { 286 return "", err 287 } 288 return string(b), nil 289 } 290 291 // String returns a string with the result of applying the profile to s. 292 func (p *Profile) String(s string) (string, error) { 293 return processString(p, s, false) 294 } 295 296 // CompareKey returns a string that can be used for comparison, hashing, or 297 // collation. 298 func (p *Profile) CompareKey(s string) (string, error) { 299 return processString(p, s, true) 300 } 301 302 // Compare enforces both strings, and then compares them for bit-string identity 303 // (byte-for-byte equality). If either string cannot be enforced, the comparison 304 // is false. 305 func (p *Profile) Compare(a, b string) bool { 306 var buf buffers 307 308 akey, err := buf.enforce(p, []byte(a), true) 309 if err != nil { 310 return false 311 } 312 313 buf = buffers{} 314 bkey, err := buf.enforce(p, []byte(b), true) 315 if err != nil { 316 return false 317 } 318 319 return bytes.Equal(akey, bkey) 320 } 321 322 // Allowed returns a runes.Set containing every rune that is a member of the 323 // underlying profile's string class and not disallowed by any profile specific 324 // rules. 325 func (p *Profile) Allowed() runes.Set { 326 if p.options.disallow != nil { 327 return runes.Predicate(func(r rune) bool { 328 return p.class.Contains(r) && !p.options.disallow.Contains(r) 329 }) 330 } 331 return p.class 332 } 333 334 type checker struct { 335 p *Profile 336 allowed runes.Set 337 338 beforeBits catBitmap 339 termBits catBitmap 340 acceptBits catBitmap 341 } 342 343 func (c *checker) Reset() { 344 c.beforeBits = 0 345 c.termBits = 0 346 c.acceptBits = 0 347 } 348 349 func (c *checker) span(src []byte, atEOF bool) (n int, err error) { 350 for n < len(src) { 351 e, sz := dpTrie.lookup(src[n:]) 352 d := categoryTransitions[category(e&catMask)] 353 if sz == 0 { 354 if !atEOF { 355 return n, transform.ErrShortSrc 356 } 357 return n, errDisallowedRune 358 } 359 doLookAhead := false 360 if property(e) < c.p.class.validFrom { 361 if d.rule == nil { 362 return n, errDisallowedRune 363 } 364 doLookAhead, err = d.rule(c.beforeBits) 365 if err != nil { 366 return n, err 367 } 368 } 369 c.beforeBits &= d.keep 370 c.beforeBits |= d.set 371 if c.termBits != 0 { 372 // We are currently in an unterminated lookahead. 373 if c.beforeBits&c.termBits != 0 { 374 c.termBits = 0 375 c.acceptBits = 0 376 } else if c.beforeBits&c.acceptBits == 0 { 377 // Invalid continuation of the unterminated lookahead sequence. 378 return n, errContext 379 } 380 } 381 if doLookAhead { 382 if c.termBits != 0 { 383 // A previous lookahead run has not been terminated yet. 384 return n, errContext 385 } 386 c.termBits = d.term 387 c.acceptBits = d.accept 388 } 389 n += sz 390 } 391 if m := c.beforeBits >> finalShift; c.beforeBits&m != m || c.termBits != 0 { 392 err = errContext 393 } 394 return n, err 395 } 396 397 // TODO: we may get rid of this transform if transform.Chain understands 398 // something like a Spanner interface. 399 func (c checker) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { 400 short := false 401 if len(dst) < len(src) { 402 src = src[:len(dst)] 403 atEOF = false 404 short = true 405 } 406 nSrc, err = c.span(src, atEOF) 407 nDst = copy(dst, src[:nSrc]) 408 if short && (err == transform.ErrShortSrc || err == nil) { 409 err = transform.ErrShortDst 410 } 411 return nDst, nSrc, err 412 }