stream.go 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257
  1. /*
  2. * Copyright 2021 ByteDance Inc.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. package decoder
  17. import (
  18. `bytes`
  19. `io`
  20. `sync`
  21. `github.com/bytedance/sonic/internal/native`
  22. `github.com/bytedance/sonic/internal/native/types`
  23. `github.com/bytedance/sonic/internal/rt`
  24. `github.com/bytedance/sonic/option`
  25. )
  26. var (
  27. minLeftBufferShift uint = 1
  28. )
  29. // StreamDecoder is the decoder context object for streaming input.
  30. type StreamDecoder struct {
  31. r io.Reader
  32. buf []byte
  33. scanp int
  34. scanned int64
  35. err error
  36. Decoder
  37. }
  38. var bufPool = sync.Pool{
  39. New: func () interface{} {
  40. return make([]byte, 0, option.DefaultDecoderBufferSize)
  41. },
  42. }
  43. // NewStreamDecoder adapts to encoding/json.NewDecoder API.
  44. //
  45. // NewStreamDecoder returns a new decoder that reads from r.
  46. func NewStreamDecoder(r io.Reader) *StreamDecoder {
  47. return &StreamDecoder{r : r}
  48. }
  49. // Decode decodes input stream into val with corresponding data.
  50. // Redundantly bytes may be read and left in its buffer, and can be used at next call.
  51. // Either io error from underlying io.Reader (except io.EOF)
  52. // or syntax error from data will be recorded and stop subsequently decoding.
  53. func (self *StreamDecoder) Decode(val interface{}) (err error) {
  54. // read more data into buf
  55. if self.More() {
  56. // println(string(self.buf))
  57. var s = self.scanp
  58. try_skip:
  59. var e = len(self.buf)
  60. // println("s:", s, "e:", e, "scanned:",self.scanned, "scanp:",self.scanp, self.buf)
  61. var src = rt.Mem2Str(self.buf[s:e])
  62. // if len(src) > 5 {
  63. // println(src[:5], src[len(src)-5:])
  64. // } else {
  65. // println(src)
  66. // }
  67. // try skip
  68. var x = 0;
  69. if y := native.SkipOneFast(&src, &x); y < 0 {
  70. if self.readMore() {
  71. // println("more")
  72. goto try_skip
  73. } else {
  74. // println("no more")
  75. err = SyntaxError{e, self.s, types.ParsingError(-s), ""}
  76. self.setErr(err)
  77. return
  78. }
  79. } else {
  80. s = y + s
  81. e = x + s
  82. }
  83. // println("decode: ", s, e)
  84. // must copy string here for safety
  85. self.Decoder.Reset(string(self.buf[s:e]))
  86. err = self.Decoder.Decode(val)
  87. if err != nil {
  88. self.setErr(err)
  89. return
  90. }
  91. self.scanp = e
  92. _, empty := self.scan()
  93. if empty {
  94. // println("recycle")
  95. // no remain valid bytes, thus we just recycle buffer
  96. mem := self.buf
  97. self.buf = nil
  98. bufPool.Put(mem[:0])
  99. } else {
  100. // println("keep")
  101. // remain undecoded bytes, move them onto head
  102. n := copy(self.buf, self.buf[self.scanp:])
  103. self.buf = self.buf[:n]
  104. }
  105. self.scanned += int64(self.scanp)
  106. self.scanp = 0
  107. }
  108. return self.err
  109. }
  110. // InputOffset returns the input stream byte offset of the current decoder position.
  111. // The offset gives the location of the end of the most recently returned token and the beginning of the next token.
  112. func (self *StreamDecoder) InputOffset() int64 {
  113. // println("input offset",self.scanned, self.scanp)
  114. return self.scanned + int64(self.scanp)
  115. }
  116. // Buffered returns a reader of the data remaining in the Decoder's buffer.
  117. // The reader is valid until the next call to Decode.
  118. func (self *StreamDecoder) Buffered() io.Reader {
  119. return bytes.NewReader(self.buf[self.scanp:])
  120. }
  121. // More reports whether there is another element in the
  122. // current array or object being parsed.
  123. func (self *StreamDecoder) More() bool {
  124. if self.err != nil {
  125. return false
  126. }
  127. c, err := self.peek()
  128. return err == nil && c != ']' && c != '}'
  129. }
  130. // More reports whether there is another element in the
  131. // current array or object being parsed.
  132. func (self *StreamDecoder) readMore() bool {
  133. if self.err != nil {
  134. return false
  135. }
  136. var err error
  137. var n int
  138. for {
  139. // Grow buffer if not large enough.
  140. l := len(self.buf)
  141. realloc(&self.buf)
  142. n, err = self.r.Read(self.buf[l:cap(self.buf)])
  143. self.buf = self.buf[: l+n]
  144. self.scanp = l
  145. _, empty := self.scan()
  146. if !empty {
  147. return true
  148. }
  149. // buffer has been scanned, now report any error
  150. if err != nil {
  151. self.setErr(err)
  152. return false
  153. }
  154. }
  155. }
  156. func (self *StreamDecoder) setErr(err error) {
  157. self.err = err
  158. mem := self.buf[:0]
  159. self.buf = nil
  160. bufPool.Put(mem)
  161. }
  162. func (self *StreamDecoder) peek() (byte, error) {
  163. var err error
  164. for {
  165. c, empty := self.scan()
  166. if !empty {
  167. return byte(c), nil
  168. }
  169. // buffer has been scanned, now report any error
  170. if err != nil {
  171. self.setErr(err)
  172. return 0, err
  173. }
  174. err = self.refill()
  175. }
  176. }
  177. func (self *StreamDecoder) scan() (byte, bool) {
  178. for i := self.scanp; i < len(self.buf); i++ {
  179. c := self.buf[i]
  180. if isSpace(c) {
  181. continue
  182. }
  183. self.scanp = i
  184. return c, false
  185. }
  186. return 0, true
  187. }
  188. func isSpace(c byte) bool {
  189. return types.SPACE_MASK & (1 << c) != 0
  190. }
  191. func (self *StreamDecoder) refill() error {
  192. // Make room to read more into the buffer.
  193. // First slide down data already consumed.
  194. if self.scanp > 0 {
  195. self.scanned += int64(self.scanp)
  196. n := copy(self.buf, self.buf[self.scanp:])
  197. self.buf = self.buf[:n]
  198. self.scanp = 0
  199. }
  200. // Grow buffer if not large enough.
  201. realloc(&self.buf)
  202. // Read. Delay error for next iteration (after scan).
  203. n, err := self.r.Read(self.buf[len(self.buf):cap(self.buf)])
  204. self.buf = self.buf[0 : len(self.buf)+n]
  205. return err
  206. }
  207. func realloc(buf *[]byte) bool {
  208. l := uint(len(*buf))
  209. c := uint(cap(*buf))
  210. if c == 0 {
  211. // println("use pool!")
  212. *buf = bufPool.Get().([]byte)
  213. return true
  214. }
  215. if c - l <= c >> minLeftBufferShift {
  216. // println("realloc!")
  217. e := l+(l>>minLeftBufferShift)
  218. if e <= c {
  219. e = c*2
  220. }
  221. tmp := make([]byte, l, e)
  222. copy(tmp, *buf)
  223. *buf = tmp
  224. return true
  225. }
  226. return false
  227. }