unmarshal_text.go 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285
  1. package decoder
  2. import (
  3. "bytes"
  4. "encoding"
  5. "fmt"
  6. "unicode"
  7. "unicode/utf16"
  8. "unicode/utf8"
  9. "unsafe"
  10. "github.com/goccy/go-json/internal/errors"
  11. "github.com/goccy/go-json/internal/runtime"
  12. )
  13. type unmarshalTextDecoder struct {
  14. typ *runtime.Type
  15. structName string
  16. fieldName string
  17. }
  18. func newUnmarshalTextDecoder(typ *runtime.Type, structName, fieldName string) *unmarshalTextDecoder {
  19. return &unmarshalTextDecoder{
  20. typ: typ,
  21. structName: structName,
  22. fieldName: fieldName,
  23. }
  24. }
  25. func (d *unmarshalTextDecoder) annotateError(cursor int64, err error) {
  26. switch e := err.(type) {
  27. case *errors.UnmarshalTypeError:
  28. e.Struct = d.structName
  29. e.Field = d.fieldName
  30. case *errors.SyntaxError:
  31. e.Offset = cursor
  32. }
  33. }
  34. var (
  35. nullbytes = []byte(`null`)
  36. )
  37. func (d *unmarshalTextDecoder) DecodeStream(s *Stream, depth int64, p unsafe.Pointer) error {
  38. s.skipWhiteSpace()
  39. start := s.cursor
  40. if err := s.skipValue(depth); err != nil {
  41. return err
  42. }
  43. src := s.buf[start:s.cursor]
  44. if len(src) > 0 {
  45. switch src[0] {
  46. case '[':
  47. return &errors.UnmarshalTypeError{
  48. Value: "array",
  49. Type: runtime.RType2Type(d.typ),
  50. Offset: s.totalOffset(),
  51. }
  52. case '{':
  53. return &errors.UnmarshalTypeError{
  54. Value: "object",
  55. Type: runtime.RType2Type(d.typ),
  56. Offset: s.totalOffset(),
  57. }
  58. case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
  59. return &errors.UnmarshalTypeError{
  60. Value: "number",
  61. Type: runtime.RType2Type(d.typ),
  62. Offset: s.totalOffset(),
  63. }
  64. case 'n':
  65. if bytes.Equal(src, nullbytes) {
  66. *(*unsafe.Pointer)(p) = nil
  67. return nil
  68. }
  69. }
  70. }
  71. dst := make([]byte, len(src))
  72. copy(dst, src)
  73. if b, ok := unquoteBytes(dst); ok {
  74. dst = b
  75. }
  76. v := *(*interface{})(unsafe.Pointer(&emptyInterface{
  77. typ: d.typ,
  78. ptr: p,
  79. }))
  80. if err := v.(encoding.TextUnmarshaler).UnmarshalText(dst); err != nil {
  81. d.annotateError(s.cursor, err)
  82. return err
  83. }
  84. return nil
  85. }
  86. func (d *unmarshalTextDecoder) Decode(ctx *RuntimeContext, cursor, depth int64, p unsafe.Pointer) (int64, error) {
  87. buf := ctx.Buf
  88. cursor = skipWhiteSpace(buf, cursor)
  89. start := cursor
  90. end, err := skipValue(buf, cursor, depth)
  91. if err != nil {
  92. return 0, err
  93. }
  94. src := buf[start:end]
  95. if len(src) > 0 {
  96. switch src[0] {
  97. case '[':
  98. return 0, &errors.UnmarshalTypeError{
  99. Value: "array",
  100. Type: runtime.RType2Type(d.typ),
  101. Offset: start,
  102. }
  103. case '{':
  104. return 0, &errors.UnmarshalTypeError{
  105. Value: "object",
  106. Type: runtime.RType2Type(d.typ),
  107. Offset: start,
  108. }
  109. case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
  110. return 0, &errors.UnmarshalTypeError{
  111. Value: "number",
  112. Type: runtime.RType2Type(d.typ),
  113. Offset: start,
  114. }
  115. case 'n':
  116. if bytes.Equal(src, nullbytes) {
  117. *(*unsafe.Pointer)(p) = nil
  118. return end, nil
  119. }
  120. }
  121. }
  122. if s, ok := unquoteBytes(src); ok {
  123. src = s
  124. }
  125. v := *(*interface{})(unsafe.Pointer(&emptyInterface{
  126. typ: d.typ,
  127. ptr: *(*unsafe.Pointer)(unsafe.Pointer(&p)),
  128. }))
  129. if err := v.(encoding.TextUnmarshaler).UnmarshalText(src); err != nil {
  130. d.annotateError(cursor, err)
  131. return 0, err
  132. }
  133. return end, nil
  134. }
  135. func (d *unmarshalTextDecoder) DecodePath(ctx *RuntimeContext, cursor, depth int64) ([][]byte, int64, error) {
  136. return nil, 0, fmt.Errorf("json: unmarshal text decoder does not support decode path")
  137. }
  138. func unquoteBytes(s []byte) (t []byte, ok bool) { //nolint: nonamedreturns
  139. length := len(s)
  140. if length < 2 || s[0] != '"' || s[length-1] != '"' {
  141. return
  142. }
  143. s = s[1 : length-1]
  144. length -= 2
  145. // Check for unusual characters. If there are none,
  146. // then no unquoting is needed, so return a slice of the
  147. // original bytes.
  148. r := 0
  149. for r < length {
  150. c := s[r]
  151. if c == '\\' || c == '"' || c < ' ' {
  152. break
  153. }
  154. if c < utf8.RuneSelf {
  155. r++
  156. continue
  157. }
  158. rr, size := utf8.DecodeRune(s[r:])
  159. if rr == utf8.RuneError && size == 1 {
  160. break
  161. }
  162. r += size
  163. }
  164. if r == length {
  165. return s, true
  166. }
  167. b := make([]byte, length+2*utf8.UTFMax)
  168. w := copy(b, s[0:r])
  169. for r < length {
  170. // Out of room? Can only happen if s is full of
  171. // malformed UTF-8 and we're replacing each
  172. // byte with RuneError.
  173. if w >= len(b)-2*utf8.UTFMax {
  174. nb := make([]byte, (len(b)+utf8.UTFMax)*2)
  175. copy(nb, b[0:w])
  176. b = nb
  177. }
  178. switch c := s[r]; {
  179. case c == '\\':
  180. r++
  181. if r >= length {
  182. return
  183. }
  184. switch s[r] {
  185. default:
  186. return
  187. case '"', '\\', '/', '\'':
  188. b[w] = s[r]
  189. r++
  190. w++
  191. case 'b':
  192. b[w] = '\b'
  193. r++
  194. w++
  195. case 'f':
  196. b[w] = '\f'
  197. r++
  198. w++
  199. case 'n':
  200. b[w] = '\n'
  201. r++
  202. w++
  203. case 'r':
  204. b[w] = '\r'
  205. r++
  206. w++
  207. case 't':
  208. b[w] = '\t'
  209. r++
  210. w++
  211. case 'u':
  212. r--
  213. rr := getu4(s[r:])
  214. if rr < 0 {
  215. return
  216. }
  217. r += 6
  218. if utf16.IsSurrogate(rr) {
  219. rr1 := getu4(s[r:])
  220. if dec := utf16.DecodeRune(rr, rr1); dec != unicode.ReplacementChar {
  221. // A valid pair; consume.
  222. r += 6
  223. w += utf8.EncodeRune(b[w:], dec)
  224. break
  225. }
  226. // Invalid surrogate; fall back to replacement rune.
  227. rr = unicode.ReplacementChar
  228. }
  229. w += utf8.EncodeRune(b[w:], rr)
  230. }
  231. // Quote, control characters are invalid.
  232. case c == '"', c < ' ':
  233. return
  234. // ASCII
  235. case c < utf8.RuneSelf:
  236. b[w] = c
  237. r++
  238. w++
  239. // Coerce to well-formed UTF-8.
  240. default:
  241. rr, size := utf8.DecodeRune(s[r:])
  242. r += size
  243. w += utf8.EncodeRune(b[w:], rr)
  244. }
  245. }
  246. return b[0:w], true
  247. }
  248. func getu4(s []byte) rune {
  249. if len(s) < 6 || s[0] != '\\' || s[1] != 'u' {
  250. return -1
  251. }
  252. var r rune
  253. for _, c := range s[2:6] {
  254. switch {
  255. case '0' <= c && c <= '9':
  256. c = c - '0'
  257. case 'a' <= c && c <= 'f':
  258. c = c - 'a' + 10
  259. case 'A' <= c && c <= 'F':
  260. c = c - 'A' + 10
  261. default:
  262. return -1
  263. }
  264. r = r*16 + rune(c)
  265. }
  266. return r
  267. }