parse.go (8351B)
1 package httprule 2 3 import ( 4 "fmt" 5 "strings" 6 ) 7 8 // InvalidTemplateError indicates that the path template is not valid. 9 type InvalidTemplateError struct { 10 tmpl string 11 msg string 12 } 13 14 func (e InvalidTemplateError) Error() string { 15 return fmt.Sprintf("%s: %s", e.msg, e.tmpl) 16 } 17 18 // Parse parses the string representation of path template 19 func Parse(tmpl string) (Compiler, error) { 20 if !strings.HasPrefix(tmpl, "/") { 21 return template{}, InvalidTemplateError{tmpl: tmpl, msg: "no leading /"} 22 } 23 tokens, verb := tokenize(tmpl[1:]) 24 25 p := parser{tokens: tokens} 26 segs, err := p.topLevelSegments() 27 if err != nil { 28 return template{}, InvalidTemplateError{tmpl: tmpl, msg: err.Error()} 29 } 30 31 return template{ 32 segments: segs, 33 verb: verb, 34 template: tmpl, 35 }, nil 36 } 37 38 func tokenize(path string) (tokens []string, verb string) { 39 if path == "" { 40 return []string{eof}, "" 41 } 42 43 const ( 44 init = iota 45 field 46 nested 47 ) 48 st := init 49 for path != "" { 50 var idx int 51 switch st { 52 case init: 53 idx = strings.IndexAny(path, "/{") 54 case field: 55 idx = strings.IndexAny(path, ".=}") 56 case nested: 57 idx = strings.IndexAny(path, "/}") 58 } 59 if idx < 0 { 60 tokens = append(tokens, path) 61 break 62 } 63 switch r := path[idx]; r { 64 case '/', '.': 65 case '{': 66 st = field 67 case '=': 68 st = nested 69 case '}': 70 st = init 71 } 72 if idx == 0 { 73 tokens = append(tokens, path[idx:idx+1]) 74 } else { 75 tokens = append(tokens, path[:idx], path[idx:idx+1]) 76 } 77 path = path[idx+1:] 78 } 79 80 l := len(tokens) 81 // See 82 // https://github.com/grpc-ecosystem/grpc-gateway/pull/1947#issuecomment-774523693 ; 83 // although normal and backwards-compat logic here is to use the last index 84 // of a colon, if the final segment is a variable followed by a colon, the 85 // part following the colon must be a verb. Hence if the previous token is 86 // an end var marker, we switch the index we're looking for to Index instead 87 // of LastIndex, so that we correctly grab the remaining part of the path as 88 // the verb. 89 var penultimateTokenIsEndVar bool 90 switch l { 91 case 0, 1: 92 // Not enough to be variable so skip this logic and don't result in an 93 // invalid index 94 default: 95 penultimateTokenIsEndVar = tokens[l-2] == "}" 96 } 97 t := tokens[l-1] 98 var idx int 99 if penultimateTokenIsEndVar { 100 idx = strings.Index(t, ":") 101 } else { 102 idx = strings.LastIndex(t, ":") 103 } 104 if idx == 0 { 105 tokens, verb = tokens[:l-1], t[1:] 106 } else if idx > 0 { 107 tokens[l-1], verb = t[:idx], t[idx+1:] 108 } 109 tokens = append(tokens, eof) 110 return tokens, verb 111 } 112 113 // parser is a parser of the template syntax defined in github.com/googleapis/googleapis/google/api/http.proto. 114 type parser struct { 115 tokens []string 116 accepted []string 117 } 118 119 // topLevelSegments is the target of this parser. 120 func (p *parser) topLevelSegments() ([]segment, error) { 121 if _, err := p.accept(typeEOF); err == nil { 122 p.tokens = p.tokens[:0] 123 return []segment{literal(eof)}, nil 124 } 125 segs, err := p.segments() 126 if err != nil { 127 return nil, err 128 } 129 if _, err := p.accept(typeEOF); err != nil { 130 return nil, fmt.Errorf("unexpected token %q after segments %q", p.tokens[0], strings.Join(p.accepted, "")) 131 } 132 return segs, nil 133 } 134 135 func (p *parser) segments() ([]segment, error) { 136 s, err := p.segment() 137 if err != nil { 138 return nil, err 139 } 140 141 segs := []segment{s} 142 for { 143 if _, err := p.accept("/"); err != nil { 144 return segs, nil 145 } 146 s, err := p.segment() 147 if err != nil { 148 return segs, err 149 } 150 segs = append(segs, s) 151 } 152 } 153 154 func (p *parser) segment() (segment, error) { 155 if _, err := p.accept("*"); err == nil { 156 return wildcard{}, nil 157 } 158 if _, err := p.accept("**"); err == nil { 159 return deepWildcard{}, nil 160 } 161 if l, err := p.literal(); err == nil { 162 return l, nil 163 } 164 165 v, err := p.variable() 166 if err != nil { 167 return nil, fmt.Errorf("segment neither wildcards, literal or variable: %v", err) 168 } 169 return v, err 170 } 171 172 func (p *parser) literal() (segment, error) { 173 lit, err := p.accept(typeLiteral) 174 if err != nil { 175 return nil, err 176 } 177 return literal(lit), nil 178 } 179 180 func (p *parser) variable() (segment, error) { 181 if _, err := p.accept("{"); err != nil { 182 return nil, err 183 } 184 185 path, err := p.fieldPath() 186 if err != nil { 187 return nil, err 188 } 189 190 var segs []segment 191 if _, err := p.accept("="); err == nil { 192 segs, err = p.segments() 193 if err != nil { 194 return nil, fmt.Errorf("invalid segment in variable %q: %v", path, err) 195 } 196 } else { 197 segs = []segment{wildcard{}} 198 } 199 200 if _, err := p.accept("}"); err != nil { 201 return nil, fmt.Errorf("unterminated variable segment: %s", path) 202 } 203 return variable{ 204 path: path, 205 segments: segs, 206 }, nil 207 } 208 209 func (p *parser) fieldPath() (string, error) { 210 c, err := p.accept(typeIdent) 211 if err != nil { 212 return "", err 213 } 214 components := []string{c} 215 for { 216 if _, err = p.accept("."); err != nil { 217 return strings.Join(components, "."), nil 218 } 219 c, err := p.accept(typeIdent) 220 if err != nil { 221 return "", fmt.Errorf("invalid field path component: %v", err) 222 } 223 components = append(components, c) 224 } 225 } 226 227 // A termType is a type of terminal symbols. 228 type termType string 229 230 // These constants define some of valid values of termType. 231 // They improve readability of parse functions. 232 // 233 // You can also use "/", "*", "**", "." or "=" as valid values. 234 const ( 235 typeIdent = termType("ident") 236 typeLiteral = termType("literal") 237 typeEOF = termType("$") 238 ) 239 240 const ( 241 // eof is the terminal symbol which always appears at the end of token sequence. 242 eof = "\u0000" 243 ) 244 245 // accept tries to accept a token in "p". 246 // This function consumes a token and returns it if it matches to the specified "term". 247 // If it doesn't match, the function does not consume any tokens and return an error. 248 func (p *parser) accept(term termType) (string, error) { 249 t := p.tokens[0] 250 switch term { 251 case "/", "*", "**", ".", "=", "{", "}": 252 if t != string(term) && t != "/" { 253 return "", fmt.Errorf("expected %q but got %q", term, t) 254 } 255 case typeEOF: 256 if t != eof { 257 return "", fmt.Errorf("expected EOF but got %q", t) 258 } 259 case typeIdent: 260 if err := expectIdent(t); err != nil { 261 return "", err 262 } 263 case typeLiteral: 264 if err := expectPChars(t); err != nil { 265 return "", err 266 } 267 default: 268 return "", fmt.Errorf("unknown termType %q", term) 269 } 270 p.tokens = p.tokens[1:] 271 p.accepted = append(p.accepted, t) 272 return t, nil 273 } 274 275 // expectPChars determines if "t" consists of only pchars defined in RFC3986. 276 // 277 // https://www.ietf.org/rfc/rfc3986.txt, P.49 278 // pchar = unreserved / pct-encoded / sub-delims / ":" / "@" 279 // unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" 280 // sub-delims = "!" / "$" / "&" / "'" / "(" / ")" 281 // / "*" / "+" / "," / ";" / "=" 282 // pct-encoded = "%" HEXDIG HEXDIG 283 func expectPChars(t string) error { 284 const ( 285 init = iota 286 pct1 287 pct2 288 ) 289 st := init 290 for _, r := range t { 291 if st != init { 292 if !isHexDigit(r) { 293 return fmt.Errorf("invalid hexdigit: %c(%U)", r, r) 294 } 295 switch st { 296 case pct1: 297 st = pct2 298 case pct2: 299 st = init 300 } 301 continue 302 } 303 304 // unreserved 305 switch { 306 case 'A' <= r && r <= 'Z': 307 continue 308 case 'a' <= r && r <= 'z': 309 continue 310 case '0' <= r && r <= '9': 311 continue 312 } 313 switch r { 314 case '-', '.', '_', '~': 315 // unreserved 316 case '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=': 317 // sub-delims 318 case ':', '@': 319 // rest of pchar 320 case '%': 321 // pct-encoded 322 st = pct1 323 default: 324 return fmt.Errorf("invalid character in path segment: %q(%U)", r, r) 325 } 326 } 327 if st != init { 328 return fmt.Errorf("invalid percent-encoding in %q", t) 329 } 330 return nil 331 } 332 333 // expectIdent determines if "ident" is a valid identifier in .proto schema ([[:alpha:]_][[:alphanum:]_]*). 334 func expectIdent(ident string) error { 335 if ident == "" { 336 return fmt.Errorf("empty identifier") 337 } 338 for pos, r := range ident { 339 switch { 340 case '0' <= r && r <= '9': 341 if pos == 0 { 342 return fmt.Errorf("identifier starting with digit: %s", ident) 343 } 344 continue 345 case 'A' <= r && r <= 'Z': 346 continue 347 case 'a' <= r && r <= 'z': 348 continue 349 case r == '_': 350 continue 351 default: 352 return fmt.Errorf("invalid character %q(%U) in identifier: %s", r, r, ident) 353 } 354 } 355 return nil 356 } 357 358 func isHexDigit(r rune) bool { 359 switch { 360 case '0' <= r && r <= '9': 361 return true 362 case 'A' <= r && r <= 'F': 363 return true 364 case 'a' <= r && r <= 'f': 365 return true 366 } 367 return false 368 }