123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368 |
- package httprule
- import (
- "errors"
- "fmt"
- "strings"
- )
- // InvalidTemplateError indicates that the path template is not valid.
- type InvalidTemplateError struct {
- tmpl string
- msg string
- }
- func (e InvalidTemplateError) Error() string {
- return fmt.Sprintf("%s: %s", e.msg, e.tmpl)
- }
- // Parse parses the string representation of path template
- func Parse(tmpl string) (Compiler, error) {
- if !strings.HasPrefix(tmpl, "/") {
- return template{}, InvalidTemplateError{tmpl: tmpl, msg: "no leading /"}
- }
- tokens, verb := tokenize(tmpl[1:])
- p := parser{tokens: tokens}
- segs, err := p.topLevelSegments()
- if err != nil {
- return template{}, InvalidTemplateError{tmpl: tmpl, msg: err.Error()}
- }
- return template{
- segments: segs,
- verb: verb,
- template: tmpl,
- }, nil
- }
- func tokenize(path string) (tokens []string, verb string) {
- if path == "" {
- return []string{eof}, ""
- }
- const (
- init = iota
- field
- nested
- )
- st := init
- for path != "" {
- var idx int
- switch st {
- case init:
- idx = strings.IndexAny(path, "/{")
- case field:
- idx = strings.IndexAny(path, ".=}")
- case nested:
- idx = strings.IndexAny(path, "/}")
- }
- if idx < 0 {
- tokens = append(tokens, path)
- break
- }
- switch r := path[idx]; r {
- case '/', '.':
- case '{':
- st = field
- case '=':
- st = nested
- case '}':
- st = init
- }
- if idx == 0 {
- tokens = append(tokens, path[idx:idx+1])
- } else {
- tokens = append(tokens, path[:idx], path[idx:idx+1])
- }
- path = path[idx+1:]
- }
- l := len(tokens)
- // See
- // https://github.com/grpc-ecosystem/grpc-gateway/pull/1947#issuecomment-774523693 ;
- // although normal and backwards-compat logic here is to use the last index
- // of a colon, if the final segment is a variable followed by a colon, the
- // part following the colon must be a verb. Hence if the previous token is
- // an end var marker, we switch the index we're looking for to Index instead
- // of LastIndex, so that we correctly grab the remaining part of the path as
- // the verb.
- var penultimateTokenIsEndVar bool
- switch l {
- case 0, 1:
- // Not enough to be variable so skip this logic and don't result in an
- // invalid index
- default:
- penultimateTokenIsEndVar = tokens[l-2] == "}"
- }
- t := tokens[l-1]
- var idx int
- if penultimateTokenIsEndVar {
- idx = strings.Index(t, ":")
- } else {
- idx = strings.LastIndex(t, ":")
- }
- if idx == 0 {
- tokens, verb = tokens[:l-1], t[1:]
- } else if idx > 0 {
- tokens[l-1], verb = t[:idx], t[idx+1:]
- }
- tokens = append(tokens, eof)
- return tokens, verb
- }
- // parser is a parser of the template syntax defined in github.com/googleapis/googleapis/google/api/http.proto.
- type parser struct {
- tokens []string
- accepted []string
- }
- // topLevelSegments is the target of this parser.
- func (p *parser) topLevelSegments() ([]segment, error) {
- if _, err := p.accept(typeEOF); err == nil {
- p.tokens = p.tokens[:0]
- return []segment{literal(eof)}, nil
- }
- segs, err := p.segments()
- if err != nil {
- return nil, err
- }
- if _, err := p.accept(typeEOF); err != nil {
- return nil, fmt.Errorf("unexpected token %q after segments %q", p.tokens[0], strings.Join(p.accepted, ""))
- }
- return segs, nil
- }
- func (p *parser) segments() ([]segment, error) {
- s, err := p.segment()
- if err != nil {
- return nil, err
- }
- segs := []segment{s}
- for {
- if _, err := p.accept("/"); err != nil {
- return segs, nil
- }
- s, err := p.segment()
- if err != nil {
- return segs, err
- }
- segs = append(segs, s)
- }
- }
- func (p *parser) segment() (segment, error) {
- if _, err := p.accept("*"); err == nil {
- return wildcard{}, nil
- }
- if _, err := p.accept("**"); err == nil {
- return deepWildcard{}, nil
- }
- if l, err := p.literal(); err == nil {
- return l, nil
- }
- v, err := p.variable()
- if err != nil {
- return nil, fmt.Errorf("segment neither wildcards, literal or variable: %w", err)
- }
- return v, nil
- }
- func (p *parser) literal() (segment, error) {
- lit, err := p.accept(typeLiteral)
- if err != nil {
- return nil, err
- }
- return literal(lit), nil
- }
- func (p *parser) variable() (segment, error) {
- if _, err := p.accept("{"); err != nil {
- return nil, err
- }
- path, err := p.fieldPath()
- if err != nil {
- return nil, err
- }
- var segs []segment
- if _, err := p.accept("="); err == nil {
- segs, err = p.segments()
- if err != nil {
- return nil, fmt.Errorf("invalid segment in variable %q: %w", path, err)
- }
- } else {
- segs = []segment{wildcard{}}
- }
- if _, err := p.accept("}"); err != nil {
- return nil, fmt.Errorf("unterminated variable segment: %s", path)
- }
- return variable{
- path: path,
- segments: segs,
- }, nil
- }
- func (p *parser) fieldPath() (string, error) {
- c, err := p.accept(typeIdent)
- if err != nil {
- return "", err
- }
- components := []string{c}
- for {
- if _, err := p.accept("."); err != nil {
- return strings.Join(components, "."), nil
- }
- c, err := p.accept(typeIdent)
- if err != nil {
- return "", fmt.Errorf("invalid field path component: %w", err)
- }
- components = append(components, c)
- }
- }
- // A termType is a type of terminal symbols.
- type termType string
- // These constants define some of valid values of termType.
- // They improve readability of parse functions.
- //
- // You can also use "/", "*", "**", "." or "=" as valid values.
- const (
- typeIdent = termType("ident")
- typeLiteral = termType("literal")
- typeEOF = termType("$")
- )
- // eof is the terminal symbol which always appears at the end of token sequence.
- const eof = "\u0000"
- // accept tries to accept a token in "p".
- // This function consumes a token and returns it if it matches to the specified "term".
- // If it doesn't match, the function does not consume any tokens and return an error.
- func (p *parser) accept(term termType) (string, error) {
- t := p.tokens[0]
- switch term {
- case "/", "*", "**", ".", "=", "{", "}":
- if t != string(term) && t != "/" {
- return "", fmt.Errorf("expected %q but got %q", term, t)
- }
- case typeEOF:
- if t != eof {
- return "", fmt.Errorf("expected EOF but got %q", t)
- }
- case typeIdent:
- if err := expectIdent(t); err != nil {
- return "", err
- }
- case typeLiteral:
- if err := expectPChars(t); err != nil {
- return "", err
- }
- default:
- return "", fmt.Errorf("unknown termType %q", term)
- }
- p.tokens = p.tokens[1:]
- p.accepted = append(p.accepted, t)
- return t, nil
- }
- // expectPChars determines if "t" consists of only pchars defined in RFC3986.
- //
- // https://www.ietf.org/rfc/rfc3986.txt, P.49
- //
- // pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
- // unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
- // sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
- // / "*" / "+" / "," / ";" / "="
- // pct-encoded = "%" HEXDIG HEXDIG
- func expectPChars(t string) error {
- const (
- init = iota
- pct1
- pct2
- )
- st := init
- for _, r := range t {
- if st != init {
- if !isHexDigit(r) {
- return fmt.Errorf("invalid hexdigit: %c(%U)", r, r)
- }
- switch st {
- case pct1:
- st = pct2
- case pct2:
- st = init
- }
- continue
- }
- // unreserved
- switch {
- case 'A' <= r && r <= 'Z':
- continue
- case 'a' <= r && r <= 'z':
- continue
- case '0' <= r && r <= '9':
- continue
- }
- switch r {
- case '-', '.', '_', '~':
- // unreserved
- case '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=':
- // sub-delims
- case ':', '@':
- // rest of pchar
- case '%':
- // pct-encoded
- st = pct1
- default:
- return fmt.Errorf("invalid character in path segment: %q(%U)", r, r)
- }
- }
- if st != init {
- return fmt.Errorf("invalid percent-encoding in %q", t)
- }
- return nil
- }
- // expectIdent determines if "ident" is a valid identifier in .proto schema ([[:alpha:]_][[:alphanum:]_]*).
- func expectIdent(ident string) error {
- if ident == "" {
- return errors.New("empty identifier")
- }
- for pos, r := range ident {
- switch {
- case '0' <= r && r <= '9':
- if pos == 0 {
- return fmt.Errorf("identifier starting with digit: %s", ident)
- }
- continue
- case 'A' <= r && r <= 'Z':
- continue
- case 'a' <= r && r <= 'z':
- continue
- case r == '_':
- continue
- default:
- return fmt.Errorf("invalid character %q(%U) in identifier: %s", r, r, ident)
- }
- }
- return nil
- }
- func isHexDigit(r rune) bool {
- switch {
- case '0' <= r && r <= '9':
- return true
- case 'A' <= r && r <= 'F':
- return true
- case 'a' <= r && r <= 'f':
- return true
- }
- return false
- }
|