gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

profile.go (10582B)


      1 // Copyright 2015 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package precis
      6 
      7 import (
      8 	"bytes"
      9 	"errors"
     10 	"unicode/utf8"
     11 
     12 	"golang.org/x/text/cases"
     13 	"golang.org/x/text/language"
     14 	"golang.org/x/text/runes"
     15 	"golang.org/x/text/secure/bidirule"
     16 	"golang.org/x/text/transform"
     17 	"golang.org/x/text/width"
     18 )
     19 
     20 var (
     21 	errDisallowedRune = errors.New("precis: disallowed rune encountered")
     22 )
     23 
     24 var dpTrie = newDerivedPropertiesTrie(0)
     25 
     26 // A Profile represents a set of rules for normalizing and validating strings in
     27 // the PRECIS framework.
     28 type Profile struct {
     29 	options
     30 	class *class
     31 }
     32 
     33 // NewIdentifier creates a new PRECIS profile based on the Identifier string
     34 // class. Profiles created from this class are suitable for use where safety is
     35 // prioritized over expressiveness like network identifiers, user accounts, chat
     36 // rooms, and file names.
     37 func NewIdentifier(opts ...Option) *Profile {
     38 	return &Profile{
     39 		options: getOpts(opts...),
     40 		class:   identifier,
     41 	}
     42 }
     43 
     44 // NewFreeform creates a new PRECIS profile based on the Freeform string class.
     45 // Profiles created from this class are suitable for use where expressiveness is
     46 // prioritized over safety like passwords, and display-elements such as
     47 // nicknames in a chat room.
     48 func NewFreeform(opts ...Option) *Profile {
     49 	return &Profile{
     50 		options: getOpts(opts...),
     51 		class:   freeform,
     52 	}
     53 }
     54 
     55 // NewRestrictedProfile creates a new PRECIS profile based on an existing
     56 // profile.
     57 // If the parent profile already had the Disallow option set, the new rule
     58 // overrides the parents rule.
     59 func NewRestrictedProfile(parent *Profile, disallow runes.Set) *Profile {
     60 	p := *parent
     61 	Disallow(disallow)(&p.options)
     62 	return &p
     63 }
     64 
     65 // NewTransformer creates a new transform.Transformer that performs the PRECIS
     66 // preparation and enforcement steps on the given UTF-8 encoded bytes.
     67 func (p *Profile) NewTransformer() *Transformer {
     68 	var ts []transform.Transformer
     69 
     70 	// These transforms are applied in the order defined in
     71 	// https://tools.ietf.org/html/rfc7564#section-7
     72 
     73 	// RFC 8266 ยง2.1:
     74 	//
     75 	//     Implementation experience has shown that applying the rules for the
     76 	//     Nickname profile is not an idempotent procedure for all code points.
     77 	//     Therefore, an implementation SHOULD apply the rules repeatedly until
     78 	//     the output string is stable; if the output string does not stabilize
     79 	//     after reapplying the rules three (3) additional times after the first
     80 	//     application, the implementation SHOULD terminate application of the
     81 	//     rules and reject the input string as invalid.
     82 	//
     83 	// There is no known string that will change indefinitely, so repeat 4 times
     84 	// and rely on the Span method to keep things relatively performant.
     85 	r := 1
     86 	if p.options.repeat {
     87 		r = 4
     88 	}
     89 	for ; r > 0; r-- {
     90 		if p.options.foldWidth {
     91 			ts = append(ts, width.Fold)
     92 		}
     93 
     94 		for _, f := range p.options.additional {
     95 			ts = append(ts, f())
     96 		}
     97 
     98 		if p.options.cases != nil {
     99 			ts = append(ts, p.options.cases)
    100 		}
    101 
    102 		ts = append(ts, p.options.norm)
    103 
    104 		if p.options.bidiRule {
    105 			ts = append(ts, bidirule.New())
    106 		}
    107 
    108 		ts = append(ts, &checker{p: p, allowed: p.Allowed()})
    109 	}
    110 
    111 	// TODO: Add the disallow empty rule with a dummy transformer?
    112 
    113 	return &Transformer{transform.Chain(ts...)}
    114 }
    115 
    116 var errEmptyString = errors.New("precis: transformation resulted in empty string")
    117 
    118 type buffers struct {
    119 	src  []byte
    120 	buf  [2][]byte
    121 	next int
    122 }
    123 
    124 func (b *buffers) apply(t transform.SpanningTransformer) (err error) {
    125 	n, err := t.Span(b.src, true)
    126 	if err != transform.ErrEndOfSpan {
    127 		return err
    128 	}
    129 	x := b.next & 1
    130 	if b.buf[x] == nil {
    131 		b.buf[x] = make([]byte, 0, 8+len(b.src)+len(b.src)>>2)
    132 	}
    133 	span := append(b.buf[x][:0], b.src[:n]...)
    134 	b.src, _, err = transform.Append(t, span, b.src[n:])
    135 	b.buf[x] = b.src
    136 	b.next++
    137 	return err
    138 }
    139 
    140 // Pre-allocate transformers when possible. In some cases this avoids allocation.
    141 var (
    142 	foldWidthT transform.SpanningTransformer = width.Fold
    143 	lowerCaseT transform.SpanningTransformer = cases.Lower(language.Und, cases.HandleFinalSigma(false))
    144 )
    145 
    146 // TODO: make this a method on profile.
    147 
    148 func (b *buffers) enforce(p *Profile, src []byte, comparing bool) (str []byte, err error) {
    149 	b.src = src
    150 
    151 	ascii := true
    152 	for _, c := range src {
    153 		if c >= utf8.RuneSelf {
    154 			ascii = false
    155 			break
    156 		}
    157 	}
    158 	// ASCII fast path.
    159 	if ascii {
    160 		for _, f := range p.options.additional {
    161 			if err = b.apply(f()); err != nil {
    162 				return nil, err
    163 			}
    164 		}
    165 		switch {
    166 		case p.options.asciiLower || (comparing && p.options.ignorecase):
    167 			for i, c := range b.src {
    168 				if 'A' <= c && c <= 'Z' {
    169 					b.src[i] = c ^ 1<<5
    170 				}
    171 			}
    172 		case p.options.cases != nil:
    173 			b.apply(p.options.cases)
    174 		}
    175 		c := checker{p: p}
    176 		if _, err := c.span(b.src, true); err != nil {
    177 			return nil, err
    178 		}
    179 		if p.disallow != nil {
    180 			for _, c := range b.src {
    181 				if p.disallow.Contains(rune(c)) {
    182 					return nil, errDisallowedRune
    183 				}
    184 			}
    185 		}
    186 		if p.options.disallowEmpty && len(b.src) == 0 {
    187 			return nil, errEmptyString
    188 		}
    189 		return b.src, nil
    190 	}
    191 
    192 	// These transforms are applied in the order defined in
    193 	// https://tools.ietf.org/html/rfc8264#section-7
    194 
    195 	r := 1
    196 	if p.options.repeat {
    197 		r = 4
    198 	}
    199 	for ; r > 0; r-- {
    200 		// TODO: allow different width transforms options.
    201 		if p.options.foldWidth || (p.options.ignorecase && comparing) {
    202 			b.apply(foldWidthT)
    203 		}
    204 		for _, f := range p.options.additional {
    205 			if err = b.apply(f()); err != nil {
    206 				return nil, err
    207 			}
    208 		}
    209 		if p.options.cases != nil {
    210 			b.apply(p.options.cases)
    211 		}
    212 		if comparing && p.options.ignorecase {
    213 			b.apply(lowerCaseT)
    214 		}
    215 		b.apply(p.norm)
    216 		if p.options.bidiRule && !bidirule.Valid(b.src) {
    217 			return nil, bidirule.ErrInvalid
    218 		}
    219 		c := checker{p: p}
    220 		if _, err := c.span(b.src, true); err != nil {
    221 			return nil, err
    222 		}
    223 		if p.disallow != nil {
    224 			for i := 0; i < len(b.src); {
    225 				r, size := utf8.DecodeRune(b.src[i:])
    226 				if p.disallow.Contains(r) {
    227 					return nil, errDisallowedRune
    228 				}
    229 				i += size
    230 			}
    231 		}
    232 		if p.options.disallowEmpty && len(b.src) == 0 {
    233 			return nil, errEmptyString
    234 		}
    235 	}
    236 	return b.src, nil
    237 }
    238 
    239 // Append appends the result of applying p to src writing the result to dst.
    240 // It returns an error if the input string is invalid.
    241 func (p *Profile) Append(dst, src []byte) ([]byte, error) {
    242 	var buf buffers
    243 	b, err := buf.enforce(p, src, false)
    244 	if err != nil {
    245 		return nil, err
    246 	}
    247 	return append(dst, b...), nil
    248 }
    249 
    250 func processBytes(p *Profile, b []byte, key bool) ([]byte, error) {
    251 	var buf buffers
    252 	b, err := buf.enforce(p, b, key)
    253 	if err != nil {
    254 		return nil, err
    255 	}
    256 	if buf.next == 0 {
    257 		c := make([]byte, len(b))
    258 		copy(c, b)
    259 		return c, nil
    260 	}
    261 	return b, nil
    262 }
    263 
    264 // Bytes returns a new byte slice with the result of applying the profile to b.
    265 func (p *Profile) Bytes(b []byte) ([]byte, error) {
    266 	return processBytes(p, b, false)
    267 }
    268 
    269 // AppendCompareKey appends the result of applying p to src (including any
    270 // optional rules to make strings comparable or useful in a map key such as
    271 // applying lowercasing) writing the result to dst. It returns an error if the
    272 // input string is invalid.
    273 func (p *Profile) AppendCompareKey(dst, src []byte) ([]byte, error) {
    274 	var buf buffers
    275 	b, err := buf.enforce(p, src, true)
    276 	if err != nil {
    277 		return nil, err
    278 	}
    279 	return append(dst, b...), nil
    280 }
    281 
    282 func processString(p *Profile, s string, key bool) (string, error) {
    283 	var buf buffers
    284 	b, err := buf.enforce(p, []byte(s), key)
    285 	if err != nil {
    286 		return "", err
    287 	}
    288 	return string(b), nil
    289 }
    290 
    291 // String returns a string with the result of applying the profile to s.
    292 func (p *Profile) String(s string) (string, error) {
    293 	return processString(p, s, false)
    294 }
    295 
    296 // CompareKey returns a string that can be used for comparison, hashing, or
    297 // collation.
    298 func (p *Profile) CompareKey(s string) (string, error) {
    299 	return processString(p, s, true)
    300 }
    301 
    302 // Compare enforces both strings, and then compares them for bit-string identity
    303 // (byte-for-byte equality). If either string cannot be enforced, the comparison
    304 // is false.
    305 func (p *Profile) Compare(a, b string) bool {
    306 	var buf buffers
    307 
    308 	akey, err := buf.enforce(p, []byte(a), true)
    309 	if err != nil {
    310 		return false
    311 	}
    312 
    313 	buf = buffers{}
    314 	bkey, err := buf.enforce(p, []byte(b), true)
    315 	if err != nil {
    316 		return false
    317 	}
    318 
    319 	return bytes.Equal(akey, bkey)
    320 }
    321 
    322 // Allowed returns a runes.Set containing every rune that is a member of the
    323 // underlying profile's string class and not disallowed by any profile specific
    324 // rules.
    325 func (p *Profile) Allowed() runes.Set {
    326 	if p.options.disallow != nil {
    327 		return runes.Predicate(func(r rune) bool {
    328 			return p.class.Contains(r) && !p.options.disallow.Contains(r)
    329 		})
    330 	}
    331 	return p.class
    332 }
    333 
    334 type checker struct {
    335 	p       *Profile
    336 	allowed runes.Set
    337 
    338 	beforeBits catBitmap
    339 	termBits   catBitmap
    340 	acceptBits catBitmap
    341 }
    342 
    343 func (c *checker) Reset() {
    344 	c.beforeBits = 0
    345 	c.termBits = 0
    346 	c.acceptBits = 0
    347 }
    348 
    349 func (c *checker) span(src []byte, atEOF bool) (n int, err error) {
    350 	for n < len(src) {
    351 		e, sz := dpTrie.lookup(src[n:])
    352 		d := categoryTransitions[category(e&catMask)]
    353 		if sz == 0 {
    354 			if !atEOF {
    355 				return n, transform.ErrShortSrc
    356 			}
    357 			return n, errDisallowedRune
    358 		}
    359 		doLookAhead := false
    360 		if property(e) < c.p.class.validFrom {
    361 			if d.rule == nil {
    362 				return n, errDisallowedRune
    363 			}
    364 			doLookAhead, err = d.rule(c.beforeBits)
    365 			if err != nil {
    366 				return n, err
    367 			}
    368 		}
    369 		c.beforeBits &= d.keep
    370 		c.beforeBits |= d.set
    371 		if c.termBits != 0 {
    372 			// We are currently in an unterminated lookahead.
    373 			if c.beforeBits&c.termBits != 0 {
    374 				c.termBits = 0
    375 				c.acceptBits = 0
    376 			} else if c.beforeBits&c.acceptBits == 0 {
    377 				// Invalid continuation of the unterminated lookahead sequence.
    378 				return n, errContext
    379 			}
    380 		}
    381 		if doLookAhead {
    382 			if c.termBits != 0 {
    383 				// A previous lookahead run has not been terminated yet.
    384 				return n, errContext
    385 			}
    386 			c.termBits = d.term
    387 			c.acceptBits = d.accept
    388 		}
    389 		n += sz
    390 	}
    391 	if m := c.beforeBits >> finalShift; c.beforeBits&m != m || c.termBits != 0 {
    392 		err = errContext
    393 	}
    394 	return n, err
    395 }
    396 
    397 // TODO: we may get rid of this transform if transform.Chain understands
    398 // something like a Spanner interface.
    399 func (c checker) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
    400 	short := false
    401 	if len(dst) < len(src) {
    402 		src = src[:len(dst)]
    403 		atEOF = false
    404 		short = true
    405 	}
    406 	nSrc, err = c.span(src, atEOF)
    407 	nDst = copy(dst, src[:nSrc])
    408 	if short && (err == transform.ErrShortSrc || err == nil) {
    409 		err = transform.ErrShortDst
    410 	}
    411 	return nDst, nSrc, err
    412 }