decoder.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397
  1. /*
  2. Copyright 2014 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package yaml
  14. import (
  15. "bufio"
  16. "bytes"
  17. "encoding/json"
  18. "fmt"
  19. "io"
  20. "strings"
  21. "unicode"
  22. jsonutil "k8s.io/apimachinery/pkg/util/json"
  23. "sigs.k8s.io/yaml"
  24. )
  25. // Unmarshal unmarshals the given data
  26. // If v is a *map[string]interface{}, *[]interface{}, or *interface{} numbers
  27. // are converted to int64 or float64
  28. func Unmarshal(data []byte, v interface{}) error {
  29. preserveIntFloat := func(d *json.Decoder) *json.Decoder {
  30. d.UseNumber()
  31. return d
  32. }
  33. switch v := v.(type) {
  34. case *map[string]interface{}:
  35. if err := yaml.Unmarshal(data, v, preserveIntFloat); err != nil {
  36. return err
  37. }
  38. return jsonutil.ConvertMapNumbers(*v, 0)
  39. case *[]interface{}:
  40. if err := yaml.Unmarshal(data, v, preserveIntFloat); err != nil {
  41. return err
  42. }
  43. return jsonutil.ConvertSliceNumbers(*v, 0)
  44. case *interface{}:
  45. if err := yaml.Unmarshal(data, v, preserveIntFloat); err != nil {
  46. return err
  47. }
  48. return jsonutil.ConvertInterfaceNumbers(v, 0)
  49. default:
  50. return yaml.Unmarshal(data, v)
  51. }
  52. }
  53. // UnmarshalStrict unmarshals the given data
  54. // strictly (erroring when there are duplicate fields).
  55. func UnmarshalStrict(data []byte, v interface{}) error {
  56. preserveIntFloat := func(d *json.Decoder) *json.Decoder {
  57. d.UseNumber()
  58. return d
  59. }
  60. switch v := v.(type) {
  61. case *map[string]interface{}:
  62. if err := yaml.UnmarshalStrict(data, v, preserveIntFloat); err != nil {
  63. return err
  64. }
  65. return jsonutil.ConvertMapNumbers(*v, 0)
  66. case *[]interface{}:
  67. if err := yaml.UnmarshalStrict(data, v, preserveIntFloat); err != nil {
  68. return err
  69. }
  70. return jsonutil.ConvertSliceNumbers(*v, 0)
  71. case *interface{}:
  72. if err := yaml.UnmarshalStrict(data, v, preserveIntFloat); err != nil {
  73. return err
  74. }
  75. return jsonutil.ConvertInterfaceNumbers(v, 0)
  76. default:
  77. return yaml.UnmarshalStrict(data, v)
  78. }
  79. }
  80. // ToJSON converts a single YAML document into a JSON document
  81. // or returns an error. If the document appears to be JSON the
  82. // YAML decoding path is not used (so that error messages are
  83. // JSON specific).
  84. func ToJSON(data []byte) ([]byte, error) {
  85. if hasJSONPrefix(data) {
  86. return data, nil
  87. }
  88. return yaml.YAMLToJSON(data)
  89. }
  90. // YAMLToJSONDecoder decodes YAML documents from an io.Reader by
  91. // separating individual documents. It first converts the YAML
  92. // body to JSON, then unmarshals the JSON.
  93. type YAMLToJSONDecoder struct {
  94. reader Reader
  95. }
  96. // NewYAMLToJSONDecoder decodes YAML documents from the provided
  97. // stream in chunks by converting each document (as defined by
  98. // the YAML spec) into its own chunk, converting it to JSON via
  99. // yaml.YAMLToJSON, and then passing it to json.Decoder.
  100. func NewYAMLToJSONDecoder(r io.Reader) *YAMLToJSONDecoder {
  101. reader := bufio.NewReader(r)
  102. return &YAMLToJSONDecoder{
  103. reader: NewYAMLReader(reader),
  104. }
  105. }
  106. // Decode reads a YAML document as JSON from the stream or returns
  107. // an error. The decoding rules match json.Unmarshal, not
  108. // yaml.Unmarshal.
  109. func (d *YAMLToJSONDecoder) Decode(into interface{}) error {
  110. bytes, err := d.reader.Read()
  111. if err != nil && err != io.EOF {
  112. return err
  113. }
  114. if len(bytes) != 0 {
  115. err := yaml.Unmarshal(bytes, into)
  116. if err != nil {
  117. return YAMLSyntaxError{err}
  118. }
  119. }
  120. return err
  121. }
  122. // YAMLDecoder reads chunks of objects and returns ErrShortBuffer if
  123. // the data is not sufficient.
  124. type YAMLDecoder struct {
  125. r io.ReadCloser
  126. scanner *bufio.Scanner
  127. remaining []byte
  128. }
  129. // NewDocumentDecoder decodes YAML documents from the provided
  130. // stream in chunks by converting each document (as defined by
  131. // the YAML spec) into its own chunk. io.ErrShortBuffer will be
  132. // returned if the entire buffer could not be read to assist
  133. // the caller in framing the chunk.
  134. func NewDocumentDecoder(r io.ReadCloser) io.ReadCloser {
  135. scanner := bufio.NewScanner(r)
  136. // the size of initial allocation for buffer 4k
  137. buf := make([]byte, 4*1024)
  138. // the maximum size used to buffer a token 5M
  139. scanner.Buffer(buf, 5*1024*1024)
  140. scanner.Split(splitYAMLDocument)
  141. return &YAMLDecoder{
  142. r: r,
  143. scanner: scanner,
  144. }
  145. }
  146. // Read reads the previous slice into the buffer, or attempts to read
  147. // the next chunk.
  148. // TODO: switch to readline approach.
  149. func (d *YAMLDecoder) Read(data []byte) (n int, err error) {
  150. left := len(d.remaining)
  151. if left == 0 {
  152. // return the next chunk from the stream
  153. if !d.scanner.Scan() {
  154. err := d.scanner.Err()
  155. if err == nil {
  156. err = io.EOF
  157. }
  158. return 0, err
  159. }
  160. out := d.scanner.Bytes()
  161. d.remaining = out
  162. left = len(out)
  163. }
  164. // fits within data
  165. if left <= len(data) {
  166. copy(data, d.remaining)
  167. d.remaining = nil
  168. return left, nil
  169. }
  170. // caller will need to reread
  171. copy(data, d.remaining[:len(data)])
  172. d.remaining = d.remaining[len(data):]
  173. return len(data), io.ErrShortBuffer
  174. }
  175. func (d *YAMLDecoder) Close() error {
  176. return d.r.Close()
  177. }
  178. const yamlSeparator = "\n---"
  179. const separator = "---"
  180. // splitYAMLDocument is a bufio.SplitFunc for splitting YAML streams into individual documents.
  181. func splitYAMLDocument(data []byte, atEOF bool) (advance int, token []byte, err error) {
  182. if atEOF && len(data) == 0 {
  183. return 0, nil, nil
  184. }
  185. sep := len([]byte(yamlSeparator))
  186. if i := bytes.Index(data, []byte(yamlSeparator)); i >= 0 {
  187. // We have a potential document terminator
  188. i += sep
  189. after := data[i:]
  190. if len(after) == 0 {
  191. // we can't read any more characters
  192. if atEOF {
  193. return len(data), data[:len(data)-sep], nil
  194. }
  195. return 0, nil, nil
  196. }
  197. if j := bytes.IndexByte(after, '\n'); j >= 0 {
  198. return i + j + 1, data[0 : i-sep], nil
  199. }
  200. return 0, nil, nil
  201. }
  202. // If we're at EOF, we have a final, non-terminated line. Return it.
  203. if atEOF {
  204. return len(data), data, nil
  205. }
  206. // Request more data.
  207. return 0, nil, nil
  208. }
  209. // decoder is a convenience interface for Decode.
  210. type decoder interface {
  211. Decode(into interface{}) error
  212. }
  213. // YAMLOrJSONDecoder attempts to decode a stream of JSON documents or
  214. // YAML documents by sniffing for a leading { character.
  215. type YAMLOrJSONDecoder struct {
  216. r io.Reader
  217. bufferSize int
  218. decoder decoder
  219. }
  220. type JSONSyntaxError struct {
  221. Offset int64
  222. Err error
  223. }
  224. func (e JSONSyntaxError) Error() string {
  225. return fmt.Sprintf("json: offset %d: %s", e.Offset, e.Err.Error())
  226. }
  227. type YAMLSyntaxError struct {
  228. err error
  229. }
  230. func (e YAMLSyntaxError) Error() string {
  231. return e.err.Error()
  232. }
  233. // NewYAMLOrJSONDecoder returns a decoder that will process YAML documents
  234. // or JSON documents from the given reader as a stream. bufferSize determines
  235. // how far into the stream the decoder will look to figure out whether this
  236. // is a JSON stream (has whitespace followed by an open brace).
  237. func NewYAMLOrJSONDecoder(r io.Reader, bufferSize int) *YAMLOrJSONDecoder {
  238. return &YAMLOrJSONDecoder{
  239. r: r,
  240. bufferSize: bufferSize,
  241. }
  242. }
  243. // Decode unmarshals the next object from the underlying stream into the
  244. // provide object, or returns an error.
  245. func (d *YAMLOrJSONDecoder) Decode(into interface{}) error {
  246. if d.decoder == nil {
  247. buffer, _, isJSON := GuessJSONStream(d.r, d.bufferSize)
  248. if isJSON {
  249. d.decoder = json.NewDecoder(buffer)
  250. } else {
  251. d.decoder = NewYAMLToJSONDecoder(buffer)
  252. }
  253. }
  254. err := d.decoder.Decode(into)
  255. if syntax, ok := err.(*json.SyntaxError); ok {
  256. return JSONSyntaxError{
  257. Offset: syntax.Offset,
  258. Err: syntax,
  259. }
  260. }
  261. return err
  262. }
  263. type Reader interface {
  264. Read() ([]byte, error)
  265. }
  266. type YAMLReader struct {
  267. reader Reader
  268. }
  269. func NewYAMLReader(r *bufio.Reader) *YAMLReader {
  270. return &YAMLReader{
  271. reader: &LineReader{reader: r},
  272. }
  273. }
  274. // Read returns a full YAML document.
  275. func (r *YAMLReader) Read() ([]byte, error) {
  276. var buffer bytes.Buffer
  277. for {
  278. line, err := r.reader.Read()
  279. if err != nil && err != io.EOF {
  280. return nil, err
  281. }
  282. sep := len([]byte(separator))
  283. if i := bytes.Index(line, []byte(separator)); i == 0 {
  284. // We have a potential document terminator
  285. i += sep
  286. trimmed := strings.TrimSpace(string(line[i:]))
  287. // We only allow comments and spaces following the yaml doc separator, otherwise we'll return an error
  288. if len(trimmed) > 0 && string(trimmed[0]) != "#" {
  289. return nil, YAMLSyntaxError{
  290. err: fmt.Errorf("invalid Yaml document separator: %s", trimmed),
  291. }
  292. }
  293. if buffer.Len() != 0 {
  294. return buffer.Bytes(), nil
  295. }
  296. if err == io.EOF {
  297. return nil, err
  298. }
  299. }
  300. if err == io.EOF {
  301. if buffer.Len() != 0 {
  302. // If we're at EOF, we have a final, non-terminated line. Return it.
  303. return buffer.Bytes(), nil
  304. }
  305. return nil, err
  306. }
  307. buffer.Write(line)
  308. }
  309. }
  310. type LineReader struct {
  311. reader *bufio.Reader
  312. }
  313. // Read returns a single line (with '\n' ended) from the underlying reader.
  314. // An error is returned iff there is an error with the underlying reader.
  315. func (r *LineReader) Read() ([]byte, error) {
  316. var (
  317. isPrefix bool = true
  318. err error = nil
  319. line []byte
  320. buffer bytes.Buffer
  321. )
  322. for isPrefix && err == nil {
  323. line, isPrefix, err = r.reader.ReadLine()
  324. buffer.Write(line)
  325. }
  326. buffer.WriteByte('\n')
  327. return buffer.Bytes(), err
  328. }
  329. // GuessJSONStream scans the provided reader up to size, looking
  330. // for an open brace indicating this is JSON. It will return the
  331. // bufio.Reader it creates for the consumer.
  332. func GuessJSONStream(r io.Reader, size int) (io.Reader, []byte, bool) {
  333. buffer := bufio.NewReaderSize(r, size)
  334. b, _ := buffer.Peek(size)
  335. return buffer, b, hasJSONPrefix(b)
  336. }
  337. // IsJSONBuffer scans the provided buffer, looking
  338. // for an open brace indicating this is JSON.
  339. func IsJSONBuffer(buf []byte) bool {
  340. return hasJSONPrefix(buf)
  341. }
  342. var jsonPrefix = []byte("{")
  343. // hasJSONPrefix returns true if the provided buffer appears to start with
  344. // a JSON open brace.
  345. func hasJSONPrefix(buf []byte) bool {
  346. return hasPrefix(buf, jsonPrefix)
  347. }
  348. // Return true if the first non-whitespace bytes in buf is
  349. // prefix.
  350. func hasPrefix(buf []byte, prefix []byte) bool {
  351. trim := bytes.TrimLeftFunc(buf, unicode.IsSpace)
  352. return bytes.HasPrefix(trim, prefix)
  353. }