native.go 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270
  1. package optdec
  2. import (
  3. "fmt"
  4. "reflect"
  5. "unsafe"
  6. "sync"
  7. "github.com/bytedance/sonic/internal/native"
  8. "github.com/bytedance/sonic/internal/native/types"
  9. "github.com/bytedance/sonic/internal/rt"
  10. "github.com/bytedance/sonic/utf8"
  11. )
  12. type ErrorCode int
  13. const (
  14. SONIC_OK = 0;
  15. SONIC_CONTROL_CHAR = 1;
  16. SONIC_INVALID_ESCAPED = 2;
  17. SONIC_INVALID_NUM = 3;
  18. SONIC_FLOAT_INF = 4;
  19. SONIC_EOF = 5;
  20. SONIC_INVALID_CHAR = 6;
  21. SONIC_EXPECT_KEY = 7;
  22. SONIC_EXPECT_COLON = 8;
  23. SONIC_EXPECT_OBJ_COMMA_OR_END = 9;
  24. SONIC_EXPECT_ARR_COMMA_OR_END = 10;
  25. SONIC_VISIT_FAILED = 11;
  26. SONIC_INVALID_ESCAPED_UTF = 12;
  27. SONIC_INVALID_LITERAL = 13;
  28. SONIC_STACK_OVERFLOW = 14;
  29. )
  30. var ParsingErrors = []string{
  31. SONIC_OK : "ok",
  32. SONIC_CONTROL_CHAR : "control chars in string",
  33. SONIC_INVALID_ESCAPED : "invalid escaped chars in string",
  34. SONIC_INVALID_NUM : "invalid number",
  35. SONIC_FLOAT_INF : "float infinity",
  36. SONIC_EOF : "eof",
  37. SONIC_INVALID_CHAR : "invalid chars",
  38. SONIC_EXPECT_KEY : "expect a json key",
  39. SONIC_EXPECT_COLON : "expect a `:`",
  40. SONIC_EXPECT_OBJ_COMMA_OR_END : "expect a `,` or `}`",
  41. SONIC_EXPECT_ARR_COMMA_OR_END : "expect a `,` or `]`",
  42. SONIC_VISIT_FAILED : "failed in json visitor",
  43. SONIC_INVALID_ESCAPED_UTF : "invalid escaped unicodes",
  44. SONIC_INVALID_LITERAL : "invalid literal(true/false/null)",
  45. SONIC_STACK_OVERFLOW : "json is exceeded max depth 4096, cause stack overflow",
  46. }
  47. func (code ErrorCode) Error() string {
  48. return ParsingErrors[code]
  49. }
  50. type node struct {
  51. typ uint64
  52. val uint64
  53. }
  54. // should consistent with native/parser.c
  55. type _nospaceBlock struct {
  56. _ [8]byte
  57. _ [8]byte
  58. }
  59. // should consistent with native/parser.c
  60. type nodeBuf struct {
  61. ncur uintptr
  62. parent int64
  63. depth uint64
  64. nstart uintptr
  65. nend uintptr
  66. iskey bool
  67. stat jsonStat
  68. }
  69. func (self *nodeBuf) init(nodes []node) {
  70. self.ncur = uintptr(unsafe.Pointer(&nodes[0]))
  71. self.nstart = self.ncur
  72. self.nend = self.ncur + uintptr(cap(nodes)) * unsafe.Sizeof(node{})
  73. self.parent = -1
  74. }
  75. // should consistent with native/parser.c
  76. type Parser struct {
  77. Json string
  78. padded []byte
  79. nodes []node
  80. dbuf []byte
  81. backup []node
  82. options uint64
  83. // JSON cursor
  84. start uintptr
  85. cur uintptr
  86. end uintptr
  87. _nbk _nospaceBlock
  88. // node buffer cursor
  89. nbuf nodeBuf
  90. Utf8Inv bool
  91. isEface bool
  92. }
  93. // only when parse non-empty object/array are needed.
  94. type jsonStat struct {
  95. object uint32
  96. array uint32
  97. str uint32
  98. number uint32
  99. array_elems uint32
  100. object_keys uint32
  101. max_depth uint32
  102. }
  103. var (
  104. defaultJsonPaddedCap uintptr = 1 << 20 // 1 Mb
  105. defaultNodesCap uintptr = (1 << 20) / unsafe.Sizeof(node{}) // 1 Mb
  106. )
  107. var parsePool sync.Pool = sync.Pool {
  108. New: func () interface{} {
  109. return &Parser{
  110. options: 0,
  111. padded: make([]byte, 0, defaultJsonPaddedCap),
  112. nodes: make([]node, defaultNodesCap, defaultNodesCap),
  113. dbuf: make([]byte, types.MaxDigitNums, types.MaxDigitNums),
  114. }
  115. },
  116. }
  117. var padding string = "x\"x\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  118. func newParser(data string, pos int, opt uint64) *Parser {
  119. p := parsePool.Get().(*Parser)
  120. /* validate json if needed */
  121. if (opt & (1 << _F_validate_string)) != 0 && !utf8.ValidateString(data){
  122. dbuf := utf8.CorrectWith(nil, rt.Str2Mem(data[pos:]), "\ufffd")
  123. dbuf = append(dbuf, padding...)
  124. p.Json = rt.Mem2Str(dbuf[:len(dbuf) - len(padding)])
  125. p.Utf8Inv = true
  126. p.start = uintptr((*rt.GoString)(unsafe.Pointer(&p.Json)).Ptr)
  127. } else {
  128. p.Json = data
  129. // TODO: prevent too large JSON
  130. p.padded = append(p.padded, data[pos:]...)
  131. p.padded = append(p.padded, padding...)
  132. p.start = uintptr((*rt.GoSlice)(unsafe.Pointer(&p.padded)).Ptr)
  133. }
  134. p.cur = p.start
  135. p.end = p.cur + uintptr(len(p.Json))
  136. p.options = opt
  137. p.nbuf.init(p.nodes)
  138. return p
  139. }
  140. func (p *Parser) Pos() int {
  141. return int(p.cur - p.start)
  142. }
  143. func (p *Parser) JsonBytes() []byte {
  144. if p.Utf8Inv {
  145. return (rt.Str2Mem(p.Json))
  146. } else {
  147. return p.padded
  148. }
  149. }
  150. var nodeType = rt.UnpackType(reflect.TypeOf(node{}))
  151. //go:inline
  152. func calMaxNodeCap(jsonSize int) int {
  153. return jsonSize / 2 + 2
  154. }
  155. func (p *Parser) parse() ErrorCode {
  156. // when decode into struct, we should decode number as possible
  157. old := p.options
  158. if !p.isEface {
  159. p.options &^= 1 << _F_use_number
  160. }
  161. // fast path with limited node buffer
  162. err := ErrorCode(native.ParseWithPadding(unsafe.Pointer(p)))
  163. if err != SONIC_VISIT_FAILED {
  164. p.options = old
  165. return err
  166. }
  167. // check OoB here
  168. offset := p.nbuf.ncur - p.nbuf.nstart
  169. curLen := int(offset / unsafe.Sizeof(node{}))
  170. if curLen != len(p.nodes) {
  171. panic(fmt.Sprintf("current len: %d, real len: %d cap: %d", curLen, len(p.nodes), cap(p.nodes)))
  172. }
  173. // node buf is not enough, continue parse
  174. // the maxCap is always meet all valid JSON
  175. maxCap := curLen + calMaxNodeCap(len(p.Json) - int(p.cur - p.start))
  176. slice := rt.GoSlice{
  177. Ptr: rt.Mallocgc(uintptr(maxCap) * nodeType.Size, nodeType, false),
  178. Len: maxCap,
  179. Cap: maxCap,
  180. }
  181. rt.Memmove(unsafe.Pointer(slice.Ptr), unsafe.Pointer(&p.nodes[0]), offset)
  182. p.backup = p.nodes
  183. p.nodes = *(*[]node)(unsafe.Pointer(&slice))
  184. // update node cursor
  185. p.nbuf.nstart = uintptr(unsafe.Pointer(&p.nodes[0]))
  186. p.nbuf.nend = p.nbuf.nstart + uintptr(cap(p.nodes)) * unsafe.Sizeof(node{})
  187. p.nbuf.ncur = p.nbuf.nstart + offset
  188. // continue parse json
  189. err = ErrorCode(native.ParseWithPadding(unsafe.Pointer(p)))
  190. p.options = old
  191. return err
  192. }
  193. func (p *Parser) reset() {
  194. p.options = 0
  195. p.padded = p.padded[:0]
  196. // nodes is too large here, we will not reset it and use small backup nodes buffer
  197. if p.backup != nil {
  198. p.nodes = p.backup
  199. p.backup = nil
  200. }
  201. p.start = 0
  202. p.cur = 0
  203. p.end = 0
  204. p.Json = ""
  205. p.nbuf = nodeBuf{}
  206. p._nbk = _nospaceBlock{}
  207. p.Utf8Inv = false
  208. p.isEface = false
  209. }
  210. func (p *Parser) free() {
  211. p.reset()
  212. parsePool.Put(p)
  213. }
  214. //go:noinline
  215. func (p *Parser) fixError(code ErrorCode) error {
  216. if code == SONIC_OK {
  217. return nil
  218. }
  219. if p.Pos() == 0 {
  220. code = SONIC_EOF;
  221. }
  222. pos := p.Pos() - 1
  223. return error_syntax(pos, p.Json, ParsingErrors[code])
  224. }
  225. func Parse(data string, opt uint64) error {
  226. p := newParser(data, 0, opt)
  227. err := p.parse()
  228. p.free()
  229. return err
  230. }