parse.go 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368
  1. package httprule
  2. import (
  3. "errors"
  4. "fmt"
  5. "strings"
  6. )
  7. // InvalidTemplateError indicates that the path template is not valid.
  8. type InvalidTemplateError struct {
  9. tmpl string
  10. msg string
  11. }
  12. func (e InvalidTemplateError) Error() string {
  13. return fmt.Sprintf("%s: %s", e.msg, e.tmpl)
  14. }
  15. // Parse parses the string representation of path template
  16. func Parse(tmpl string) (Compiler, error) {
  17. if !strings.HasPrefix(tmpl, "/") {
  18. return template{}, InvalidTemplateError{tmpl: tmpl, msg: "no leading /"}
  19. }
  20. tokens, verb := tokenize(tmpl[1:])
  21. p := parser{tokens: tokens}
  22. segs, err := p.topLevelSegments()
  23. if err != nil {
  24. return template{}, InvalidTemplateError{tmpl: tmpl, msg: err.Error()}
  25. }
  26. return template{
  27. segments: segs,
  28. verb: verb,
  29. template: tmpl,
  30. }, nil
  31. }
  32. func tokenize(path string) (tokens []string, verb string) {
  33. if path == "" {
  34. return []string{eof}, ""
  35. }
  36. const (
  37. init = iota
  38. field
  39. nested
  40. )
  41. st := init
  42. for path != "" {
  43. var idx int
  44. switch st {
  45. case init:
  46. idx = strings.IndexAny(path, "/{")
  47. case field:
  48. idx = strings.IndexAny(path, ".=}")
  49. case nested:
  50. idx = strings.IndexAny(path, "/}")
  51. }
  52. if idx < 0 {
  53. tokens = append(tokens, path)
  54. break
  55. }
  56. switch r := path[idx]; r {
  57. case '/', '.':
  58. case '{':
  59. st = field
  60. case '=':
  61. st = nested
  62. case '}':
  63. st = init
  64. }
  65. if idx == 0 {
  66. tokens = append(tokens, path[idx:idx+1])
  67. } else {
  68. tokens = append(tokens, path[:idx], path[idx:idx+1])
  69. }
  70. path = path[idx+1:]
  71. }
  72. l := len(tokens)
  73. // See
  74. // https://github.com/grpc-ecosystem/grpc-gateway/pull/1947#issuecomment-774523693 ;
  75. // although normal and backwards-compat logic here is to use the last index
  76. // of a colon, if the final segment is a variable followed by a colon, the
  77. // part following the colon must be a verb. Hence if the previous token is
  78. // an end var marker, we switch the index we're looking for to Index instead
  79. // of LastIndex, so that we correctly grab the remaining part of the path as
  80. // the verb.
  81. var penultimateTokenIsEndVar bool
  82. switch l {
  83. case 0, 1:
  84. // Not enough to be variable so skip this logic and don't result in an
  85. // invalid index
  86. default:
  87. penultimateTokenIsEndVar = tokens[l-2] == "}"
  88. }
  89. t := tokens[l-1]
  90. var idx int
  91. if penultimateTokenIsEndVar {
  92. idx = strings.Index(t, ":")
  93. } else {
  94. idx = strings.LastIndex(t, ":")
  95. }
  96. if idx == 0 {
  97. tokens, verb = tokens[:l-1], t[1:]
  98. } else if idx > 0 {
  99. tokens[l-1], verb = t[:idx], t[idx+1:]
  100. }
  101. tokens = append(tokens, eof)
  102. return tokens, verb
  103. }
  104. // parser is a parser of the template syntax defined in github.com/googleapis/googleapis/google/api/http.proto.
  105. type parser struct {
  106. tokens []string
  107. accepted []string
  108. }
  109. // topLevelSegments is the target of this parser.
  110. func (p *parser) topLevelSegments() ([]segment, error) {
  111. if _, err := p.accept(typeEOF); err == nil {
  112. p.tokens = p.tokens[:0]
  113. return []segment{literal(eof)}, nil
  114. }
  115. segs, err := p.segments()
  116. if err != nil {
  117. return nil, err
  118. }
  119. if _, err := p.accept(typeEOF); err != nil {
  120. return nil, fmt.Errorf("unexpected token %q after segments %q", p.tokens[0], strings.Join(p.accepted, ""))
  121. }
  122. return segs, nil
  123. }
  124. func (p *parser) segments() ([]segment, error) {
  125. s, err := p.segment()
  126. if err != nil {
  127. return nil, err
  128. }
  129. segs := []segment{s}
  130. for {
  131. if _, err := p.accept("/"); err != nil {
  132. return segs, nil
  133. }
  134. s, err := p.segment()
  135. if err != nil {
  136. return segs, err
  137. }
  138. segs = append(segs, s)
  139. }
  140. }
  141. func (p *parser) segment() (segment, error) {
  142. if _, err := p.accept("*"); err == nil {
  143. return wildcard{}, nil
  144. }
  145. if _, err := p.accept("**"); err == nil {
  146. return deepWildcard{}, nil
  147. }
  148. if l, err := p.literal(); err == nil {
  149. return l, nil
  150. }
  151. v, err := p.variable()
  152. if err != nil {
  153. return nil, fmt.Errorf("segment neither wildcards, literal or variable: %w", err)
  154. }
  155. return v, nil
  156. }
  157. func (p *parser) literal() (segment, error) {
  158. lit, err := p.accept(typeLiteral)
  159. if err != nil {
  160. return nil, err
  161. }
  162. return literal(lit), nil
  163. }
  164. func (p *parser) variable() (segment, error) {
  165. if _, err := p.accept("{"); err != nil {
  166. return nil, err
  167. }
  168. path, err := p.fieldPath()
  169. if err != nil {
  170. return nil, err
  171. }
  172. var segs []segment
  173. if _, err := p.accept("="); err == nil {
  174. segs, err = p.segments()
  175. if err != nil {
  176. return nil, fmt.Errorf("invalid segment in variable %q: %w", path, err)
  177. }
  178. } else {
  179. segs = []segment{wildcard{}}
  180. }
  181. if _, err := p.accept("}"); err != nil {
  182. return nil, fmt.Errorf("unterminated variable segment: %s", path)
  183. }
  184. return variable{
  185. path: path,
  186. segments: segs,
  187. }, nil
  188. }
  189. func (p *parser) fieldPath() (string, error) {
  190. c, err := p.accept(typeIdent)
  191. if err != nil {
  192. return "", err
  193. }
  194. components := []string{c}
  195. for {
  196. if _, err := p.accept("."); err != nil {
  197. return strings.Join(components, "."), nil
  198. }
  199. c, err := p.accept(typeIdent)
  200. if err != nil {
  201. return "", fmt.Errorf("invalid field path component: %w", err)
  202. }
  203. components = append(components, c)
  204. }
  205. }
  206. // A termType is a type of terminal symbols.
  207. type termType string
  208. // These constants define some of valid values of termType.
  209. // They improve readability of parse functions.
  210. //
  211. // You can also use "/", "*", "**", "." or "=" as valid values.
  212. const (
  213. typeIdent = termType("ident")
  214. typeLiteral = termType("literal")
  215. typeEOF = termType("$")
  216. )
  217. // eof is the terminal symbol which always appears at the end of token sequence.
  218. const eof = "\u0000"
  219. // accept tries to accept a token in "p".
  220. // This function consumes a token and returns it if it matches to the specified "term".
  221. // If it doesn't match, the function does not consume any tokens and return an error.
  222. func (p *parser) accept(term termType) (string, error) {
  223. t := p.tokens[0]
  224. switch term {
  225. case "/", "*", "**", ".", "=", "{", "}":
  226. if t != string(term) && t != "/" {
  227. return "", fmt.Errorf("expected %q but got %q", term, t)
  228. }
  229. case typeEOF:
  230. if t != eof {
  231. return "", fmt.Errorf("expected EOF but got %q", t)
  232. }
  233. case typeIdent:
  234. if err := expectIdent(t); err != nil {
  235. return "", err
  236. }
  237. case typeLiteral:
  238. if err := expectPChars(t); err != nil {
  239. return "", err
  240. }
  241. default:
  242. return "", fmt.Errorf("unknown termType %q", term)
  243. }
  244. p.tokens = p.tokens[1:]
  245. p.accepted = append(p.accepted, t)
  246. return t, nil
  247. }
  248. // expectPChars determines if "t" consists of only pchars defined in RFC3986.
  249. //
  250. // https://www.ietf.org/rfc/rfc3986.txt, P.49
  251. //
  252. // pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
  253. // unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
  254. // sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
  255. // / "*" / "+" / "," / ";" / "="
  256. // pct-encoded = "%" HEXDIG HEXDIG
  257. func expectPChars(t string) error {
  258. const (
  259. init = iota
  260. pct1
  261. pct2
  262. )
  263. st := init
  264. for _, r := range t {
  265. if st != init {
  266. if !isHexDigit(r) {
  267. return fmt.Errorf("invalid hexdigit: %c(%U)", r, r)
  268. }
  269. switch st {
  270. case pct1:
  271. st = pct2
  272. case pct2:
  273. st = init
  274. }
  275. continue
  276. }
  277. // unreserved
  278. switch {
  279. case 'A' <= r && r <= 'Z':
  280. continue
  281. case 'a' <= r && r <= 'z':
  282. continue
  283. case '0' <= r && r <= '9':
  284. continue
  285. }
  286. switch r {
  287. case '-', '.', '_', '~':
  288. // unreserved
  289. case '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=':
  290. // sub-delims
  291. case ':', '@':
  292. // rest of pchar
  293. case '%':
  294. // pct-encoded
  295. st = pct1
  296. default:
  297. return fmt.Errorf("invalid character in path segment: %q(%U)", r, r)
  298. }
  299. }
  300. if st != init {
  301. return fmt.Errorf("invalid percent-encoding in %q", t)
  302. }
  303. return nil
  304. }
  305. // expectIdent determines if "ident" is a valid identifier in .proto schema ([[:alpha:]_][[:alphanum:]_]*).
  306. func expectIdent(ident string) error {
  307. if ident == "" {
  308. return errors.New("empty identifier")
  309. }
  310. for pos, r := range ident {
  311. switch {
  312. case '0' <= r && r <= '9':
  313. if pos == 0 {
  314. return fmt.Errorf("identifier starting with digit: %s", ident)
  315. }
  316. continue
  317. case 'A' <= r && r <= 'Z':
  318. continue
  319. case 'a' <= r && r <= 'z':
  320. continue
  321. case r == '_':
  322. continue
  323. default:
  324. return fmt.Errorf("invalid character %q(%U) in identifier: %s", r, r, ident)
  325. }
  326. }
  327. return nil
  328. }
  329. func isHexDigit(r rune) bool {
  330. switch {
  331. case '0' <= r && r <= '9':
  332. return true
  333. case 'A' <= r && r <= 'F':
  334. return true
  335. case 'a' <= r && r <= 'f':
  336. return true
  337. }
  338. return false
  339. }