decode.go 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562
  1. /*
  2. * Copyright 2022 ByteDance Inc.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. package ast
  17. import (
  18. "encoding/base64"
  19. "runtime"
  20. "strconv"
  21. "unsafe"
  22. "github.com/bytedance/sonic/internal/native/types"
  23. "github.com/bytedance/sonic/internal/rt"
  24. "github.com/bytedance/sonic/internal/utils"
  25. )
  26. // Hack: this is used for both checking space and cause friendly compile errors in 32-bit arch.
  27. const _Sonic_Not_Support_32Bit_Arch__Checking_32Bit_Arch_Here = (1 << ' ') | (1 << '\t') | (1 << '\r') | (1 << '\n')
  28. var bytesNull = []byte("null")
  29. const (
  30. strNull = "null"
  31. bytesTrue = "true"
  32. bytesFalse = "false"
  33. bytesObject = "{}"
  34. bytesArray = "[]"
  35. )
  36. func isSpace(c byte) bool {
  37. return (int(1<<c) & _Sonic_Not_Support_32Bit_Arch__Checking_32Bit_Arch_Here) != 0
  38. }
  39. //go:nocheckptr
  40. func skipBlank(src string, pos int) int {
  41. se := uintptr(rt.IndexChar(src, len(src)))
  42. sp := uintptr(rt.IndexChar(src, pos))
  43. for sp < se {
  44. if !isSpace(*(*byte)(unsafe.Pointer(sp))) {
  45. break
  46. }
  47. sp += 1
  48. }
  49. if sp >= se {
  50. return -int(types.ERR_EOF)
  51. }
  52. runtime.KeepAlive(src)
  53. return int(sp - uintptr(rt.IndexChar(src, 0)))
  54. }
  55. func decodeNull(src string, pos int) (ret int) {
  56. ret = pos + 4
  57. if ret > len(src) {
  58. return -int(types.ERR_EOF)
  59. }
  60. if src[pos:ret] == strNull {
  61. return ret
  62. } else {
  63. return -int(types.ERR_INVALID_CHAR)
  64. }
  65. }
  66. func decodeTrue(src string, pos int) (ret int) {
  67. ret = pos + 4
  68. if ret > len(src) {
  69. return -int(types.ERR_EOF)
  70. }
  71. if src[pos:ret] == bytesTrue {
  72. return ret
  73. } else {
  74. return -int(types.ERR_INVALID_CHAR)
  75. }
  76. }
  77. func decodeFalse(src string, pos int) (ret int) {
  78. ret = pos + 5
  79. if ret > len(src) {
  80. return -int(types.ERR_EOF)
  81. }
  82. if src[pos:ret] == bytesFalse {
  83. return ret
  84. }
  85. return -int(types.ERR_INVALID_CHAR)
  86. }
  87. //go:nocheckptr
  88. func decodeString(src string, pos int) (ret int, v string) {
  89. ret, ep := skipString(src, pos)
  90. if ep == -1 {
  91. (*rt.GoString)(unsafe.Pointer(&v)).Ptr = rt.IndexChar(src, pos+1)
  92. (*rt.GoString)(unsafe.Pointer(&v)).Len = ret - pos - 2
  93. return ret, v
  94. }
  95. vv, ok := unquoteBytes(rt.Str2Mem(src[pos:ret]))
  96. if !ok {
  97. return -int(types.ERR_INVALID_CHAR), ""
  98. }
  99. runtime.KeepAlive(src)
  100. return ret, rt.Mem2Str(vv)
  101. }
  102. func decodeBinary(src string, pos int) (ret int, v []byte) {
  103. var vv string
  104. ret, vv = decodeString(src, pos)
  105. if ret < 0 {
  106. return ret, nil
  107. }
  108. var err error
  109. v, err = base64.StdEncoding.DecodeString(vv)
  110. if err != nil {
  111. return -int(types.ERR_INVALID_CHAR), nil
  112. }
  113. return ret, v
  114. }
  115. func isDigit(c byte) bool {
  116. return c >= '0' && c <= '9'
  117. }
  118. //go:nocheckptr
  119. func decodeInt64(src string, pos int) (ret int, v int64, err error) {
  120. sp := uintptr(rt.IndexChar(src, pos))
  121. ss := uintptr(sp)
  122. se := uintptr(rt.IndexChar(src, len(src)))
  123. if uintptr(sp) >= se {
  124. return -int(types.ERR_EOF), 0, nil
  125. }
  126. if c := *(*byte)(unsafe.Pointer(sp)); c == '-' {
  127. sp += 1
  128. }
  129. if sp == se {
  130. return -int(types.ERR_EOF), 0, nil
  131. }
  132. for ; sp < se; sp += uintptr(1) {
  133. if !isDigit(*(*byte)(unsafe.Pointer(sp))) {
  134. break
  135. }
  136. }
  137. if sp < se {
  138. if c := *(*byte)(unsafe.Pointer(sp)); c == '.' || c == 'e' || c == 'E' {
  139. return -int(types.ERR_INVALID_NUMBER_FMT), 0, nil
  140. }
  141. }
  142. var vv string
  143. ret = int(uintptr(sp) - uintptr((*rt.GoString)(unsafe.Pointer(&src)).Ptr))
  144. (*rt.GoString)(unsafe.Pointer(&vv)).Ptr = unsafe.Pointer(ss)
  145. (*rt.GoString)(unsafe.Pointer(&vv)).Len = ret - pos
  146. v, err = strconv.ParseInt(vv, 10, 64)
  147. if err != nil {
  148. //NOTICE: allow overflow here
  149. if err.(*strconv.NumError).Err == strconv.ErrRange {
  150. return ret, 0, err
  151. }
  152. return -int(types.ERR_INVALID_CHAR), 0, err
  153. }
  154. runtime.KeepAlive(src)
  155. return ret, v, nil
  156. }
  157. func isNumberChars(c byte) bool {
  158. return (c >= '0' && c <= '9') || c == '+' || c == '-' || c == 'e' || c == 'E' || c == '.'
  159. }
  160. //go:nocheckptr
  161. func decodeFloat64(src string, pos int) (ret int, v float64, err error) {
  162. sp := uintptr(rt.IndexChar(src, pos))
  163. ss := uintptr(sp)
  164. se := uintptr(rt.IndexChar(src, len(src)))
  165. if uintptr(sp) >= se {
  166. return -int(types.ERR_EOF), 0, nil
  167. }
  168. if c := *(*byte)(unsafe.Pointer(sp)); c == '-' {
  169. sp += 1
  170. }
  171. if sp == se {
  172. return -int(types.ERR_EOF), 0, nil
  173. }
  174. for ; sp < se; sp += uintptr(1) {
  175. if !isNumberChars(*(*byte)(unsafe.Pointer(sp))) {
  176. break
  177. }
  178. }
  179. var vv string
  180. ret = int(uintptr(sp) - uintptr((*rt.GoString)(unsafe.Pointer(&src)).Ptr))
  181. (*rt.GoString)(unsafe.Pointer(&vv)).Ptr = unsafe.Pointer(ss)
  182. (*rt.GoString)(unsafe.Pointer(&vv)).Len = ret - pos
  183. v, err = strconv.ParseFloat(vv, 64)
  184. if err != nil {
  185. //NOTICE: allow overflow here
  186. if err.(*strconv.NumError).Err == strconv.ErrRange {
  187. return ret, 0, err
  188. }
  189. return -int(types.ERR_INVALID_CHAR), 0, err
  190. }
  191. runtime.KeepAlive(src)
  192. return ret, v, nil
  193. }
  194. func decodeValue(src string, pos int, skipnum bool) (ret int, v types.JsonState) {
  195. pos = skipBlank(src, pos)
  196. if pos < 0 {
  197. return pos, types.JsonState{Vt: types.ValueType(pos)}
  198. }
  199. switch c := src[pos]; c {
  200. case 'n':
  201. ret = decodeNull(src, pos)
  202. if ret < 0 {
  203. return ret, types.JsonState{Vt: types.ValueType(ret)}
  204. }
  205. return ret, types.JsonState{Vt: types.V_NULL}
  206. case '"':
  207. var ep int
  208. ret, ep = skipString(src, pos)
  209. if ret < 0 {
  210. return ret, types.JsonState{Vt: types.ValueType(ret)}
  211. }
  212. return ret, types.JsonState{Vt: types.V_STRING, Iv: int64(pos + 1), Ep: ep}
  213. case '{':
  214. return pos + 1, types.JsonState{Vt: types.V_OBJECT}
  215. case '[':
  216. return pos + 1, types.JsonState{Vt: types.V_ARRAY}
  217. case 't':
  218. ret = decodeTrue(src, pos)
  219. if ret < 0 {
  220. return ret, types.JsonState{Vt: types.ValueType(ret)}
  221. }
  222. return ret, types.JsonState{Vt: types.V_TRUE}
  223. case 'f':
  224. ret = decodeFalse(src, pos)
  225. if ret < 0 {
  226. return ret, types.JsonState{Vt: types.ValueType(ret)}
  227. }
  228. return ret, types.JsonState{Vt: types.V_FALSE}
  229. case '-', '+', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
  230. if skipnum {
  231. ret = skipNumber(src, pos)
  232. if ret >= 0 {
  233. return ret, types.JsonState{Vt: types.V_DOUBLE, Iv: 0, Ep: pos}
  234. } else {
  235. return ret, types.JsonState{Vt: types.ValueType(ret)}
  236. }
  237. } else {
  238. var iv int64
  239. ret, iv, _ = decodeInt64(src, pos)
  240. if ret >= 0 {
  241. return ret, types.JsonState{Vt: types.V_INTEGER, Iv: iv, Ep: pos}
  242. } else if ret != -int(types.ERR_INVALID_NUMBER_FMT) {
  243. return ret, types.JsonState{Vt: types.ValueType(ret)}
  244. }
  245. var fv float64
  246. ret, fv, _ = decodeFloat64(src, pos)
  247. if ret >= 0 {
  248. return ret, types.JsonState{Vt: types.V_DOUBLE, Dv: fv, Ep: pos}
  249. } else {
  250. return ret, types.JsonState{Vt: types.ValueType(ret)}
  251. }
  252. }
  253. default:
  254. return -int(types.ERR_INVALID_CHAR), types.JsonState{Vt:-types.ValueType(types.ERR_INVALID_CHAR)}
  255. }
  256. }
  257. //go:nocheckptr
  258. func skipNumber(src string, pos int) (ret int) {
  259. return utils.SkipNumber(src, pos)
  260. }
  261. //go:nocheckptr
  262. func skipString(src string, pos int) (ret int, ep int) {
  263. if pos+1 >= len(src) {
  264. return -int(types.ERR_EOF), -1
  265. }
  266. sp := uintptr(rt.IndexChar(src, pos))
  267. se := uintptr(rt.IndexChar(src, len(src)))
  268. // not start with quote
  269. if *(*byte)(unsafe.Pointer(sp)) != '"' {
  270. return -int(types.ERR_INVALID_CHAR), -1
  271. }
  272. sp += 1
  273. ep = -1
  274. for sp < se {
  275. c := *(*byte)(unsafe.Pointer(sp))
  276. if c == '\\' {
  277. if ep == -1 {
  278. ep = int(uintptr(sp) - uintptr((*rt.GoString)(unsafe.Pointer(&src)).Ptr))
  279. }
  280. sp += 2
  281. continue
  282. }
  283. sp += 1
  284. if c == '"' {
  285. return int(uintptr(sp) - uintptr((*rt.GoString)(unsafe.Pointer(&src)).Ptr)), ep
  286. }
  287. }
  288. runtime.KeepAlive(src)
  289. // not found the closed quote until EOF
  290. return -int(types.ERR_EOF), -1
  291. }
  292. //go:nocheckptr
  293. func skipPair(src string, pos int, lchar byte, rchar byte) (ret int) {
  294. if pos+1 >= len(src) {
  295. return -int(types.ERR_EOF)
  296. }
  297. sp := uintptr(rt.IndexChar(src, pos))
  298. se := uintptr(rt.IndexChar(src, len(src)))
  299. if *(*byte)(unsafe.Pointer(sp)) != lchar {
  300. return -int(types.ERR_INVALID_CHAR)
  301. }
  302. sp += 1
  303. nbrace := 1
  304. inquote := false
  305. for sp < se {
  306. c := *(*byte)(unsafe.Pointer(sp))
  307. if c == '\\' {
  308. sp += 2
  309. continue
  310. } else if c == '"' {
  311. inquote = !inquote
  312. } else if c == lchar {
  313. if !inquote {
  314. nbrace += 1
  315. }
  316. } else if c == rchar {
  317. if !inquote {
  318. nbrace -= 1
  319. if nbrace == 0 {
  320. sp += 1
  321. break
  322. }
  323. }
  324. }
  325. sp += 1
  326. }
  327. if nbrace != 0 {
  328. return -int(types.ERR_INVALID_CHAR)
  329. }
  330. runtime.KeepAlive(src)
  331. return int(uintptr(sp) - uintptr((*rt.GoString)(unsafe.Pointer(&src)).Ptr))
  332. }
  333. func skipValueFast(src string, pos int) (ret int, start int) {
  334. pos = skipBlank(src, pos)
  335. if pos < 0 {
  336. return pos, -1
  337. }
  338. switch c := src[pos]; c {
  339. case 'n':
  340. ret = decodeNull(src, pos)
  341. case '"':
  342. ret, _ = skipString(src, pos)
  343. case '{':
  344. ret = skipPair(src, pos, '{', '}')
  345. case '[':
  346. ret = skipPair(src, pos, '[', ']')
  347. case 't':
  348. ret = decodeTrue(src, pos)
  349. case 'f':
  350. ret = decodeFalse(src, pos)
  351. case '-', '+', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
  352. ret = skipNumber(src, pos)
  353. default:
  354. ret = -int(types.ERR_INVALID_CHAR)
  355. }
  356. return ret, pos
  357. }
  358. func skipValue(src string, pos int) (ret int, start int) {
  359. pos = skipBlank(src, pos)
  360. if pos < 0 {
  361. return pos, -1
  362. }
  363. switch c := src[pos]; c {
  364. case 'n':
  365. ret = decodeNull(src, pos)
  366. case '"':
  367. ret, _ = skipString(src, pos)
  368. case '{':
  369. ret, _ = skipObject(src, pos)
  370. case '[':
  371. ret, _ = skipArray(src, pos)
  372. case 't':
  373. ret = decodeTrue(src, pos)
  374. case 'f':
  375. ret = decodeFalse(src, pos)
  376. case '-', '+', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
  377. ret = skipNumber(src, pos)
  378. default:
  379. ret = -int(types.ERR_INVALID_CHAR)
  380. }
  381. return ret, pos
  382. }
  383. func skipObject(src string, pos int) (ret int, start int) {
  384. start = skipBlank(src, pos)
  385. if start < 0 {
  386. return start, -1
  387. }
  388. if src[start] != '{' {
  389. return -int(types.ERR_INVALID_CHAR), -1
  390. }
  391. pos = start + 1
  392. pos = skipBlank(src, pos)
  393. if pos < 0 {
  394. return pos, -1
  395. }
  396. if src[pos] == '}' {
  397. return pos + 1, start
  398. }
  399. for {
  400. pos, _ = skipString(src, pos)
  401. if pos < 0 {
  402. return pos, -1
  403. }
  404. pos = skipBlank(src, pos)
  405. if pos < 0 {
  406. return pos, -1
  407. }
  408. if src[pos] != ':' {
  409. return -int(types.ERR_INVALID_CHAR), -1
  410. }
  411. pos++
  412. pos, _ = skipValue(src, pos)
  413. if pos < 0 {
  414. return pos, -1
  415. }
  416. pos = skipBlank(src, pos)
  417. if pos < 0 {
  418. return pos, -1
  419. }
  420. if src[pos] == '}' {
  421. return pos + 1, start
  422. }
  423. if src[pos] != ',' {
  424. return -int(types.ERR_INVALID_CHAR), -1
  425. }
  426. pos++
  427. pos = skipBlank(src, pos)
  428. if pos < 0 {
  429. return pos, -1
  430. }
  431. }
  432. }
  433. func skipArray(src string, pos int) (ret int, start int) {
  434. start = skipBlank(src, pos)
  435. if start < 0 {
  436. return start, -1
  437. }
  438. if src[start] != '[' {
  439. return -int(types.ERR_INVALID_CHAR), -1
  440. }
  441. pos = start + 1
  442. pos = skipBlank(src, pos)
  443. if pos < 0 {
  444. return pos, -1
  445. }
  446. if src[pos] == ']' {
  447. return pos + 1, start
  448. }
  449. for {
  450. pos, _ = skipValue(src, pos)
  451. if pos < 0 {
  452. return pos, -1
  453. }
  454. pos = skipBlank(src, pos)
  455. if pos < 0 {
  456. return pos, -1
  457. }
  458. if src[pos] == ']' {
  459. return pos + 1, start
  460. }
  461. if src[pos] != ',' {
  462. return -int(types.ERR_INVALID_CHAR), -1
  463. }
  464. pos++
  465. }
  466. }
  467. // DecodeString decodes a JSON string from pos and return golang string.
  468. // - needEsc indicates if to unescaped escaping chars
  469. // - hasEsc tells if the returned string has escaping chars
  470. // - validStr enables validating UTF8 charset
  471. //
  472. func _DecodeString(src string, pos int, needEsc bool, validStr bool) (v string, ret int, hasEsc bool) {
  473. p := NewParserObj(src)
  474. p.p = pos
  475. switch val := p.decodeValue(); val.Vt {
  476. case types.V_STRING:
  477. str := p.s[val.Iv : p.p-1]
  478. if validStr && !validate_utf8(str) {
  479. return "", -int(types.ERR_INVALID_UTF8), false
  480. }
  481. /* fast path: no escape sequence */
  482. if val.Ep == -1 {
  483. return str, p.p, false
  484. } else if !needEsc {
  485. return str, p.p, true
  486. }
  487. /* unquote the string */
  488. out, err := unquote(str)
  489. /* check for errors */
  490. if err != 0 {
  491. return "", -int(err), true
  492. } else {
  493. return out, p.p, true
  494. }
  495. default:
  496. return "", -int(_ERR_UNSUPPORT_TYPE), false
  497. }
  498. }