visitor.go 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332
  1. /*
  2. * Copyright 2021 ByteDance Inc.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. package ast
  17. import (
  18. `encoding/json`
  19. `errors`
  20. `github.com/bytedance/sonic/internal/native/types`
  21. )
  22. // Visitor handles the callbacks during preorder traversal of a JSON AST.
  23. //
  24. // According to the JSON RFC8259, a JSON AST can be defined by
  25. // the following rules without separator / whitespace tokens.
  26. //
  27. // JSON-AST = value
  28. // value = false / null / true / object / array / number / string
  29. // object = begin-object [ member *( member ) ] end-object
  30. // member = string value
  31. // array = begin-array [ value *( value ) ] end-array
  32. //
  33. type Visitor interface {
  34. // OnNull handles a JSON null value.
  35. OnNull() error
  36. // OnBool handles a JSON true / false value.
  37. OnBool(v bool) error
  38. // OnString handles a JSON string value.
  39. OnString(v string) error
  40. // OnInt64 handles a JSON number value with int64 type.
  41. OnInt64(v int64, n json.Number) error
  42. // OnFloat64 handles a JSON number value with float64 type.
  43. OnFloat64(v float64, n json.Number) error
  44. // OnObjectBegin handles the beginning of a JSON object value with a
  45. // suggested capacity that can be used to make your custom object container.
  46. //
  47. // After this point the visitor will receive a sequence of callbacks like
  48. // [string, value, string, value, ......, ObjectEnd].
  49. //
  50. // Note:
  51. // 1. This is a recursive definition which means the value can
  52. // also be a JSON object / array described by a sequence of callbacks.
  53. // 2. The suggested capacity will be 0 if current object is empty.
  54. // 3. Currently sonic use a fixed capacity for non-empty object (keep in
  55. // sync with ast.Node) which might not be very suitable. This may be
  56. // improved in future version.
  57. OnObjectBegin(capacity int) error
  58. // OnObjectKey handles a JSON object key string in member.
  59. OnObjectKey(key string) error
  60. // OnObjectEnd handles the ending of a JSON object value.
  61. OnObjectEnd() error
  62. // OnArrayBegin handles the beginning of a JSON array value with a
  63. // suggested capacity that can be used to make your custom array container.
  64. //
  65. // After this point the visitor will receive a sequence of callbacks like
  66. // [value, value, value, ......, ArrayEnd].
  67. //
  68. // Note:
  69. // 1. This is a recursive definition which means the value can
  70. // also be a JSON object / array described by a sequence of callbacks.
  71. // 2. The suggested capacity will be 0 if current array is empty.
  72. // 3. Currently sonic use a fixed capacity for non-empty array (keep in
  73. // sync with ast.Node) which might not be very suitable. This may be
  74. // improved in future version.
  75. OnArrayBegin(capacity int) error
  76. // OnArrayEnd handles the ending of a JSON array value.
  77. OnArrayEnd() error
  78. }
  79. // VisitorOptions contains all Visitor's options. The default value is an
  80. // empty VisitorOptions{}.
  81. type VisitorOptions struct {
  82. // OnlyNumber indicates parser to directly return number value without
  83. // conversion, then the first argument of OnInt64 / OnFloat64 will always
  84. // be zero.
  85. OnlyNumber bool
  86. }
  87. var defaultVisitorOptions = &VisitorOptions{}
  88. // Preorder decodes the whole JSON string and callbacks each AST node to visitor
  89. // during preorder traversal. Any visitor method with an error returned will
  90. // break the traversal and the given error will be directly returned. The opts
  91. // argument can be reused after every call.
  92. func Preorder(str string, visitor Visitor, opts *VisitorOptions) error {
  93. if opts == nil {
  94. opts = defaultVisitorOptions
  95. }
  96. // process VisitorOptions first to guarantee that all options will be
  97. // constant during decoding and make options more readable.
  98. var (
  99. optDecodeNumber = !opts.OnlyNumber
  100. )
  101. tv := &traverser{
  102. parser: Parser{
  103. s: str,
  104. noLazy: true,
  105. skipValue: false,
  106. },
  107. visitor: visitor,
  108. }
  109. if optDecodeNumber {
  110. tv.parser.decodeNumber(true)
  111. }
  112. err := tv.decodeValue()
  113. if optDecodeNumber {
  114. tv.parser.decodeNumber(false)
  115. }
  116. return err
  117. }
  118. type traverser struct {
  119. parser Parser
  120. visitor Visitor
  121. }
  122. // NOTE: keep in sync with (*Parser).Parse method.
  123. func (self *traverser) decodeValue() error {
  124. switch val := self.parser.decodeValue(); val.Vt {
  125. case types.V_EOF:
  126. return types.ERR_EOF
  127. case types.V_NULL:
  128. return self.visitor.OnNull()
  129. case types.V_TRUE:
  130. return self.visitor.OnBool(true)
  131. case types.V_FALSE:
  132. return self.visitor.OnBool(false)
  133. case types.V_STRING:
  134. return self.decodeString(val.Iv, val.Ep)
  135. case types.V_DOUBLE:
  136. return self.visitor.OnFloat64(val.Dv,
  137. json.Number(self.parser.s[val.Ep:self.parser.p]))
  138. case types.V_INTEGER:
  139. return self.visitor.OnInt64(val.Iv,
  140. json.Number(self.parser.s[val.Ep:self.parser.p]))
  141. case types.V_ARRAY:
  142. return self.decodeArray()
  143. case types.V_OBJECT:
  144. return self.decodeObject()
  145. default:
  146. return types.ParsingError(-val.Vt)
  147. }
  148. }
  149. // NOTE: keep in sync with (*Parser).decodeArray method.
  150. func (self *traverser) decodeArray() error {
  151. sp := self.parser.p
  152. ns := len(self.parser.s)
  153. /* allocate array space and parse every element */
  154. if err := self.visitor.OnArrayBegin(_DEFAULT_NODE_CAP); err != nil {
  155. if err == VisitOPSkip {
  156. // NOTICE: for user needs to skip entiry object
  157. self.parser.p -= 1
  158. if _, e := self.parser.skipFast(); e != 0 {
  159. return e
  160. }
  161. return self.visitor.OnArrayEnd()
  162. }
  163. return err
  164. }
  165. /* check for EOF */
  166. self.parser.p = self.parser.lspace(sp)
  167. if self.parser.p >= ns {
  168. return types.ERR_EOF
  169. }
  170. /* check for empty array */
  171. if self.parser.s[self.parser.p] == ']' {
  172. self.parser.p++
  173. return self.visitor.OnArrayEnd()
  174. }
  175. for {
  176. /* decode the value */
  177. if err := self.decodeValue(); err != nil {
  178. return err
  179. }
  180. self.parser.p = self.parser.lspace(self.parser.p)
  181. /* check for EOF */
  182. if self.parser.p >= ns {
  183. return types.ERR_EOF
  184. }
  185. /* check for the next character */
  186. switch self.parser.s[self.parser.p] {
  187. case ',':
  188. self.parser.p++
  189. case ']':
  190. self.parser.p++
  191. return self.visitor.OnArrayEnd()
  192. default:
  193. return types.ERR_INVALID_CHAR
  194. }
  195. }
  196. }
  197. // NOTE: keep in sync with (*Parser).decodeObject method.
  198. func (self *traverser) decodeObject() error {
  199. sp := self.parser.p
  200. ns := len(self.parser.s)
  201. /* allocate object space and decode each pair */
  202. if err := self.visitor.OnObjectBegin(_DEFAULT_NODE_CAP); err != nil {
  203. if err == VisitOPSkip {
  204. // NOTICE: for user needs to skip entiry object
  205. self.parser.p -= 1
  206. if _, e := self.parser.skipFast(); e != 0 {
  207. return e
  208. }
  209. return self.visitor.OnObjectEnd()
  210. }
  211. return err
  212. }
  213. /* check for EOF */
  214. self.parser.p = self.parser.lspace(sp)
  215. if self.parser.p >= ns {
  216. return types.ERR_EOF
  217. }
  218. /* check for empty object */
  219. if self.parser.s[self.parser.p] == '}' {
  220. self.parser.p++
  221. return self.visitor.OnObjectEnd()
  222. }
  223. for {
  224. var njs types.JsonState
  225. var err types.ParsingError
  226. /* decode the key */
  227. if njs = self.parser.decodeValue(); njs.Vt != types.V_STRING {
  228. return types.ERR_INVALID_CHAR
  229. }
  230. /* extract the key */
  231. idx := self.parser.p - 1
  232. key := self.parser.s[njs.Iv:idx]
  233. /* check for escape sequence */
  234. if njs.Ep != -1 {
  235. if key, err = unquote(key); err != 0 {
  236. return err
  237. }
  238. }
  239. if err := self.visitor.OnObjectKey(key); err != nil {
  240. return err
  241. }
  242. /* expect a ':' delimiter */
  243. if err = self.parser.delim(); err != 0 {
  244. return err
  245. }
  246. /* decode the value */
  247. if err := self.decodeValue(); err != nil {
  248. return err
  249. }
  250. self.parser.p = self.parser.lspace(self.parser.p)
  251. /* check for EOF */
  252. if self.parser.p >= ns {
  253. return types.ERR_EOF
  254. }
  255. /* check for the next character */
  256. switch self.parser.s[self.parser.p] {
  257. case ',':
  258. self.parser.p++
  259. case '}':
  260. self.parser.p++
  261. return self.visitor.OnObjectEnd()
  262. default:
  263. return types.ERR_INVALID_CHAR
  264. }
  265. }
  266. }
  267. // NOTE: keep in sync with (*Parser).decodeString method.
  268. func (self *traverser) decodeString(iv int64, ep int) error {
  269. p := self.parser.p - 1
  270. s := self.parser.s[iv:p]
  271. /* fast path: no escape sequence */
  272. if ep == -1 {
  273. return self.visitor.OnString(s)
  274. }
  275. /* unquote the string */
  276. out, err := unquote(s)
  277. if err != 0 {
  278. return err
  279. }
  280. return self.visitor.OnString(out)
  281. }
  282. // If visitor return this error on `OnObjectBegin()` or `OnArrayBegin()`,
  283. // the transverer will skip entiry object or array
  284. var VisitOPSkip = errors.New("")