gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

parse.go (8351B)


      1 package httprule
      2 
      3 import (
      4 	"fmt"
      5 	"strings"
      6 )
      7 
      8 // InvalidTemplateError indicates that the path template is not valid.
      9 type InvalidTemplateError struct {
     10 	tmpl string
     11 	msg  string
     12 }
     13 
     14 func (e InvalidTemplateError) Error() string {
     15 	return fmt.Sprintf("%s: %s", e.msg, e.tmpl)
     16 }
     17 
     18 // Parse parses the string representation of path template
     19 func Parse(tmpl string) (Compiler, error) {
     20 	if !strings.HasPrefix(tmpl, "/") {
     21 		return template{}, InvalidTemplateError{tmpl: tmpl, msg: "no leading /"}
     22 	}
     23 	tokens, verb := tokenize(tmpl[1:])
     24 
     25 	p := parser{tokens: tokens}
     26 	segs, err := p.topLevelSegments()
     27 	if err != nil {
     28 		return template{}, InvalidTemplateError{tmpl: tmpl, msg: err.Error()}
     29 	}
     30 
     31 	return template{
     32 		segments: segs,
     33 		verb:     verb,
     34 		template: tmpl,
     35 	}, nil
     36 }
     37 
     38 func tokenize(path string) (tokens []string, verb string) {
     39 	if path == "" {
     40 		return []string{eof}, ""
     41 	}
     42 
     43 	const (
     44 		init = iota
     45 		field
     46 		nested
     47 	)
     48 	st := init
     49 	for path != "" {
     50 		var idx int
     51 		switch st {
     52 		case init:
     53 			idx = strings.IndexAny(path, "/{")
     54 		case field:
     55 			idx = strings.IndexAny(path, ".=}")
     56 		case nested:
     57 			idx = strings.IndexAny(path, "/}")
     58 		}
     59 		if idx < 0 {
     60 			tokens = append(tokens, path)
     61 			break
     62 		}
     63 		switch r := path[idx]; r {
     64 		case '/', '.':
     65 		case '{':
     66 			st = field
     67 		case '=':
     68 			st = nested
     69 		case '}':
     70 			st = init
     71 		}
     72 		if idx == 0 {
     73 			tokens = append(tokens, path[idx:idx+1])
     74 		} else {
     75 			tokens = append(tokens, path[:idx], path[idx:idx+1])
     76 		}
     77 		path = path[idx+1:]
     78 	}
     79 
     80 	l := len(tokens)
     81 	// See
     82 	// https://github.com/grpc-ecosystem/grpc-gateway/pull/1947#issuecomment-774523693 ;
     83 	// although normal and backwards-compat logic here is to use the last index
     84 	// of a colon, if the final segment is a variable followed by a colon, the
     85 	// part following the colon must be a verb. Hence if the previous token is
     86 	// an end var marker, we switch the index we're looking for to Index instead
     87 	// of LastIndex, so that we correctly grab the remaining part of the path as
     88 	// the verb.
     89 	var penultimateTokenIsEndVar bool
     90 	switch l {
     91 	case 0, 1:
     92 		// Not enough to be variable so skip this logic and don't result in an
     93 		// invalid index
     94 	default:
     95 		penultimateTokenIsEndVar = tokens[l-2] == "}"
     96 	}
     97 	t := tokens[l-1]
     98 	var idx int
     99 	if penultimateTokenIsEndVar {
    100 		idx = strings.Index(t, ":")
    101 	} else {
    102 		idx = strings.LastIndex(t, ":")
    103 	}
    104 	if idx == 0 {
    105 		tokens, verb = tokens[:l-1], t[1:]
    106 	} else if idx > 0 {
    107 		tokens[l-1], verb = t[:idx], t[idx+1:]
    108 	}
    109 	tokens = append(tokens, eof)
    110 	return tokens, verb
    111 }
    112 
    113 // parser is a parser of the template syntax defined in github.com/googleapis/googleapis/google/api/http.proto.
    114 type parser struct {
    115 	tokens   []string
    116 	accepted []string
    117 }
    118 
    119 // topLevelSegments is the target of this parser.
    120 func (p *parser) topLevelSegments() ([]segment, error) {
    121 	if _, err := p.accept(typeEOF); err == nil {
    122 		p.tokens = p.tokens[:0]
    123 		return []segment{literal(eof)}, nil
    124 	}
    125 	segs, err := p.segments()
    126 	if err != nil {
    127 		return nil, err
    128 	}
    129 	if _, err := p.accept(typeEOF); err != nil {
    130 		return nil, fmt.Errorf("unexpected token %q after segments %q", p.tokens[0], strings.Join(p.accepted, ""))
    131 	}
    132 	return segs, nil
    133 }
    134 
    135 func (p *parser) segments() ([]segment, error) {
    136 	s, err := p.segment()
    137 	if err != nil {
    138 		return nil, err
    139 	}
    140 
    141 	segs := []segment{s}
    142 	for {
    143 		if _, err := p.accept("/"); err != nil {
    144 			return segs, nil
    145 		}
    146 		s, err := p.segment()
    147 		if err != nil {
    148 			return segs, err
    149 		}
    150 		segs = append(segs, s)
    151 	}
    152 }
    153 
    154 func (p *parser) segment() (segment, error) {
    155 	if _, err := p.accept("*"); err == nil {
    156 		return wildcard{}, nil
    157 	}
    158 	if _, err := p.accept("**"); err == nil {
    159 		return deepWildcard{}, nil
    160 	}
    161 	if l, err := p.literal(); err == nil {
    162 		return l, nil
    163 	}
    164 
    165 	v, err := p.variable()
    166 	if err != nil {
    167 		return nil, fmt.Errorf("segment neither wildcards, literal or variable: %v", err)
    168 	}
    169 	return v, err
    170 }
    171 
    172 func (p *parser) literal() (segment, error) {
    173 	lit, err := p.accept(typeLiteral)
    174 	if err != nil {
    175 		return nil, err
    176 	}
    177 	return literal(lit), nil
    178 }
    179 
    180 func (p *parser) variable() (segment, error) {
    181 	if _, err := p.accept("{"); err != nil {
    182 		return nil, err
    183 	}
    184 
    185 	path, err := p.fieldPath()
    186 	if err != nil {
    187 		return nil, err
    188 	}
    189 
    190 	var segs []segment
    191 	if _, err := p.accept("="); err == nil {
    192 		segs, err = p.segments()
    193 		if err != nil {
    194 			return nil, fmt.Errorf("invalid segment in variable %q: %v", path, err)
    195 		}
    196 	} else {
    197 		segs = []segment{wildcard{}}
    198 	}
    199 
    200 	if _, err := p.accept("}"); err != nil {
    201 		return nil, fmt.Errorf("unterminated variable segment: %s", path)
    202 	}
    203 	return variable{
    204 		path:     path,
    205 		segments: segs,
    206 	}, nil
    207 }
    208 
    209 func (p *parser) fieldPath() (string, error) {
    210 	c, err := p.accept(typeIdent)
    211 	if err != nil {
    212 		return "", err
    213 	}
    214 	components := []string{c}
    215 	for {
    216 		if _, err = p.accept("."); err != nil {
    217 			return strings.Join(components, "."), nil
    218 		}
    219 		c, err := p.accept(typeIdent)
    220 		if err != nil {
    221 			return "", fmt.Errorf("invalid field path component: %v", err)
    222 		}
    223 		components = append(components, c)
    224 	}
    225 }
    226 
    227 // A termType is a type of terminal symbols.
    228 type termType string
    229 
    230 // These constants define some of valid values of termType.
    231 // They improve readability of parse functions.
    232 //
    233 // You can also use "/", "*", "**", "." or "=" as valid values.
    234 const (
    235 	typeIdent   = termType("ident")
    236 	typeLiteral = termType("literal")
    237 	typeEOF     = termType("$")
    238 )
    239 
    240 const (
    241 	// eof is the terminal symbol which always appears at the end of token sequence.
    242 	eof = "\u0000"
    243 )
    244 
    245 // accept tries to accept a token in "p".
    246 // This function consumes a token and returns it if it matches to the specified "term".
    247 // If it doesn't match, the function does not consume any tokens and return an error.
    248 func (p *parser) accept(term termType) (string, error) {
    249 	t := p.tokens[0]
    250 	switch term {
    251 	case "/", "*", "**", ".", "=", "{", "}":
    252 		if t != string(term) && t != "/" {
    253 			return "", fmt.Errorf("expected %q but got %q", term, t)
    254 		}
    255 	case typeEOF:
    256 		if t != eof {
    257 			return "", fmt.Errorf("expected EOF but got %q", t)
    258 		}
    259 	case typeIdent:
    260 		if err := expectIdent(t); err != nil {
    261 			return "", err
    262 		}
    263 	case typeLiteral:
    264 		if err := expectPChars(t); err != nil {
    265 			return "", err
    266 		}
    267 	default:
    268 		return "", fmt.Errorf("unknown termType %q", term)
    269 	}
    270 	p.tokens = p.tokens[1:]
    271 	p.accepted = append(p.accepted, t)
    272 	return t, nil
    273 }
    274 
    275 // expectPChars determines if "t" consists of only pchars defined in RFC3986.
    276 //
    277 // https://www.ietf.org/rfc/rfc3986.txt, P.49
    278 //   pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
    279 //   unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
    280 //   sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
    281 //                 / "*" / "+" / "," / ";" / "="
    282 //   pct-encoded   = "%" HEXDIG HEXDIG
    283 func expectPChars(t string) error {
    284 	const (
    285 		init = iota
    286 		pct1
    287 		pct2
    288 	)
    289 	st := init
    290 	for _, r := range t {
    291 		if st != init {
    292 			if !isHexDigit(r) {
    293 				return fmt.Errorf("invalid hexdigit: %c(%U)", r, r)
    294 			}
    295 			switch st {
    296 			case pct1:
    297 				st = pct2
    298 			case pct2:
    299 				st = init
    300 			}
    301 			continue
    302 		}
    303 
    304 		// unreserved
    305 		switch {
    306 		case 'A' <= r && r <= 'Z':
    307 			continue
    308 		case 'a' <= r && r <= 'z':
    309 			continue
    310 		case '0' <= r && r <= '9':
    311 			continue
    312 		}
    313 		switch r {
    314 		case '-', '.', '_', '~':
    315 			// unreserved
    316 		case '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=':
    317 			// sub-delims
    318 		case ':', '@':
    319 			// rest of pchar
    320 		case '%':
    321 			// pct-encoded
    322 			st = pct1
    323 		default:
    324 			return fmt.Errorf("invalid character in path segment: %q(%U)", r, r)
    325 		}
    326 	}
    327 	if st != init {
    328 		return fmt.Errorf("invalid percent-encoding in %q", t)
    329 	}
    330 	return nil
    331 }
    332 
    333 // expectIdent determines if "ident" is a valid identifier in .proto schema ([[:alpha:]_][[:alphanum:]_]*).
    334 func expectIdent(ident string) error {
    335 	if ident == "" {
    336 		return fmt.Errorf("empty identifier")
    337 	}
    338 	for pos, r := range ident {
    339 		switch {
    340 		case '0' <= r && r <= '9':
    341 			if pos == 0 {
    342 				return fmt.Errorf("identifier starting with digit: %s", ident)
    343 			}
    344 			continue
    345 		case 'A' <= r && r <= 'Z':
    346 			continue
    347 		case 'a' <= r && r <= 'z':
    348 			continue
    349 		case r == '_':
    350 			continue
    351 		default:
    352 			return fmt.Errorf("invalid character %q(%U) in identifier: %s", r, r, ident)
    353 		}
    354 	}
    355 	return nil
    356 }
    357 
    358 func isHexDigit(r rune) bool {
    359 	switch {
    360 	case '0' <= r && r <= '9':
    361 		return true
    362 	case 'A' <= r && r <= 'F':
    363 		return true
    364 	case 'a' <= r && r <= 'f':
    365 		return true
    366 	}
    367 	return false
    368 }