encoder.go 10.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355
  1. /*
  2. * Copyright 2021 ByteDance Inc.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. package encoder
  17. import (
  18. `bytes`
  19. `encoding/json`
  20. `reflect`
  21. `runtime`
  22. `unsafe`
  23. `github.com/bytedance/sonic/internal/native`
  24. `github.com/bytedance/sonic/internal/native/types`
  25. `github.com/bytedance/sonic/internal/rt`
  26. `github.com/bytedance/sonic/utf8`
  27. `github.com/bytedance/sonic/option`
  28. )
  29. // Options is a set of encoding options.
  30. type Options uint64
  31. const (
  32. bitSortMapKeys = iota
  33. bitEscapeHTML
  34. bitCompactMarshaler
  35. bitNoQuoteTextMarshaler
  36. bitNoNullSliceOrMap
  37. bitValidateString
  38. bitNoValidateJSONMarshaler
  39. bitNoEncoderNewline
  40. // used for recursive compile
  41. bitPointerValue = 63
  42. )
  43. const (
  44. // SortMapKeys indicates that the keys of a map needs to be sorted
  45. // before serializing into JSON.
  46. // WARNING: This hurts performance A LOT, USE WITH CARE.
  47. SortMapKeys Options = 1 << bitSortMapKeys
  48. // EscapeHTML indicates encoder to escape all HTML characters
  49. // after serializing into JSON (see https://pkg.go.dev/encoding/json#HTMLEscape).
  50. // WARNING: This hurts performance A LOT, USE WITH CARE.
  51. EscapeHTML Options = 1 << bitEscapeHTML
  52. // CompactMarshaler indicates that the output JSON from json.Marshaler
  53. // is always compact and needs no validation
  54. CompactMarshaler Options = 1 << bitCompactMarshaler
  55. // NoQuoteTextMarshaler indicates that the output text from encoding.TextMarshaler
  56. // is always escaped string and needs no quoting
  57. NoQuoteTextMarshaler Options = 1 << bitNoQuoteTextMarshaler
  58. // NoNullSliceOrMap indicates all empty Array or Object are encoded as '[]' or '{}',
  59. // instead of 'null'
  60. NoNullSliceOrMap Options = 1 << bitNoNullSliceOrMap
  61. // ValidateString indicates that encoder should validate the input string
  62. // before encoding it into JSON.
  63. ValidateString Options = 1 << bitValidateString
  64. // NoValidateJSONMarshaler indicates that the encoder should not validate the output string
  65. // after encoding the JSONMarshaler to JSON.
  66. NoValidateJSONMarshaler Options = 1 << bitNoValidateJSONMarshaler
  67. // NoEncoderNewline indicates that the encoder should not add a newline after every message
  68. NoEncoderNewline Options = 1 << bitNoEncoderNewline
  69. // CompatibleWithStd is used to be compatible with std encoder.
  70. CompatibleWithStd Options = SortMapKeys | EscapeHTML | CompactMarshaler
  71. )
  72. // Encoder represents a specific set of encoder configurations.
  73. type Encoder struct {
  74. Opts Options
  75. prefix string
  76. indent string
  77. }
  78. // Encode returns the JSON encoding of v.
  79. func (self *Encoder) Encode(v interface{}) ([]byte, error) {
  80. if self.indent != "" || self.prefix != "" {
  81. return EncodeIndented(v, self.prefix, self.indent, self.Opts)
  82. }
  83. return Encode(v, self.Opts)
  84. }
  85. // SortKeys enables the SortMapKeys option.
  86. func (self *Encoder) SortKeys() *Encoder {
  87. self.Opts |= SortMapKeys
  88. return self
  89. }
  90. // SetEscapeHTML specifies if option EscapeHTML opens
  91. func (self *Encoder) SetEscapeHTML(f bool) {
  92. if f {
  93. self.Opts |= EscapeHTML
  94. } else {
  95. self.Opts &= ^EscapeHTML
  96. }
  97. }
  98. // SetValidateString specifies if option ValidateString opens
  99. func (self *Encoder) SetValidateString(f bool) {
  100. if f {
  101. self.Opts |= ValidateString
  102. } else {
  103. self.Opts &= ^ValidateString
  104. }
  105. }
  106. // SetNoValidateJSONMarshaler specifies if option NoValidateJSONMarshaler opens
  107. func (self *Encoder) SetNoValidateJSONMarshaler(f bool) {
  108. if f {
  109. self.Opts |= NoValidateJSONMarshaler
  110. } else {
  111. self.Opts &= ^NoValidateJSONMarshaler
  112. }
  113. }
  114. // SetNoEncoderNewline specifies if option NoEncoderNewline opens
  115. func (self *Encoder) SetNoEncoderNewline(f bool) {
  116. if f {
  117. self.Opts |= NoEncoderNewline
  118. } else {
  119. self.Opts &= ^NoEncoderNewline
  120. }
  121. }
  122. // SetCompactMarshaler specifies if option CompactMarshaler opens
  123. func (self *Encoder) SetCompactMarshaler(f bool) {
  124. if f {
  125. self.Opts |= CompactMarshaler
  126. } else {
  127. self.Opts &= ^CompactMarshaler
  128. }
  129. }
  130. // SetNoQuoteTextMarshaler specifies if option NoQuoteTextMarshaler opens
  131. func (self *Encoder) SetNoQuoteTextMarshaler(f bool) {
  132. if f {
  133. self.Opts |= NoQuoteTextMarshaler
  134. } else {
  135. self.Opts &= ^NoQuoteTextMarshaler
  136. }
  137. }
  138. // SetIndent instructs the encoder to format each subsequent encoded
  139. // value as if indented by the package-level function EncodeIndent().
  140. // Calling SetIndent("", "") disables indentation.
  141. func (enc *Encoder) SetIndent(prefix, indent string) {
  142. enc.prefix = prefix
  143. enc.indent = indent
  144. }
  145. // Quote returns the JSON-quoted version of s.
  146. func Quote(s string) string {
  147. var n int
  148. var p []byte
  149. /* check for empty string */
  150. if s == "" {
  151. return `""`
  152. }
  153. /* allocate space for result */
  154. n = len(s) + 2
  155. p = make([]byte, 0, n)
  156. /* call the encoder */
  157. _ = encodeString(&p, s)
  158. return rt.Mem2Str(p)
  159. }
  160. // Encode returns the JSON encoding of val, encoded with opts.
  161. func Encode(val interface{}, opts Options) ([]byte, error) {
  162. var ret []byte
  163. buf := newBytes()
  164. err := encodeInto(&buf, val, opts)
  165. /* check for errors */
  166. if err != nil {
  167. freeBytes(buf)
  168. return nil, err
  169. }
  170. /* htmlescape or correct UTF-8 if opts enable */
  171. old := buf
  172. buf = encodeFinish(old, opts)
  173. pbuf := ((*rt.GoSlice)(unsafe.Pointer(&buf))).Ptr
  174. pold := ((*rt.GoSlice)(unsafe.Pointer(&old))).Ptr
  175. /* return when allocated a new buffer */
  176. if pbuf != pold {
  177. freeBytes(old)
  178. return buf, nil
  179. }
  180. /* make a copy of the result */
  181. ret = make([]byte, len(buf))
  182. copy(ret, buf)
  183. freeBytes(buf)
  184. /* return the buffer into pool */
  185. return ret, nil
  186. }
  187. // EncodeInto is like Encode but uses a user-supplied buffer instead of allocating
  188. // a new one.
  189. func EncodeInto(buf *[]byte, val interface{}, opts Options) error {
  190. err := encodeInto(buf, val, opts)
  191. if err != nil {
  192. return err
  193. }
  194. *buf = encodeFinish(*buf, opts)
  195. return err
  196. }
  197. func encodeInto(buf *[]byte, val interface{}, opts Options) error {
  198. stk := newStack()
  199. efv := rt.UnpackEface(val)
  200. err := encodeTypedPointer(buf, efv.Type, &efv.Value, stk, uint64(opts))
  201. /* return the stack into pool */
  202. if err != nil {
  203. resetStack(stk)
  204. }
  205. freeStack(stk)
  206. /* avoid GC ahead */
  207. runtime.KeepAlive(buf)
  208. runtime.KeepAlive(efv)
  209. return err
  210. }
  211. func encodeFinish(buf []byte, opts Options) []byte {
  212. if opts & EscapeHTML != 0 {
  213. buf = HTMLEscape(nil, buf)
  214. }
  215. if opts & ValidateString != 0 && !utf8.Validate(buf) {
  216. buf = utf8.CorrectWith(nil, buf, `\ufffd`)
  217. }
  218. return buf
  219. }
  220. var typeByte = rt.UnpackType(reflect.TypeOf(byte(0)))
  221. // HTMLEscape appends to dst the JSON-encoded src with <, >, &, U+2028 and U+2029
  222. // characters inside string literals changed to \u003c, \u003e, \u0026, \u2028, \u2029
  223. // so that the JSON will be safe to embed inside HTML <script> tags.
  224. // For historical reasons, web browsers don't honor standard HTML
  225. // escaping within <script> tags, so an alternative JSON encoding must
  226. // be used.
  227. func HTMLEscape(dst []byte, src []byte) []byte {
  228. return htmlEscape(dst, src)
  229. }
  230. // EncodeIndented is like Encode but applies Indent to format the output.
  231. // Each JSON element in the output will begin on a new line beginning with prefix
  232. // followed by one or more copies of indent according to the indentation nesting.
  233. func EncodeIndented(val interface{}, prefix string, indent string, opts Options) ([]byte, error) {
  234. var err error
  235. var out []byte
  236. var buf *bytes.Buffer
  237. /* encode into the buffer */
  238. out = newBytes()
  239. err = EncodeInto(&out, val, opts)
  240. /* check for errors */
  241. if err != nil {
  242. freeBytes(out)
  243. return nil, err
  244. }
  245. /* indent the JSON */
  246. buf = newBuffer()
  247. err = json.Indent(buf, out, prefix, indent)
  248. /* check for errors */
  249. if err != nil {
  250. freeBytes(out)
  251. freeBuffer(buf)
  252. return nil, err
  253. }
  254. /* copy to the result buffer */
  255. ret := make([]byte, buf.Len())
  256. copy(ret, buf.Bytes())
  257. /* return the buffers into pool */
  258. freeBytes(out)
  259. freeBuffer(buf)
  260. return ret, nil
  261. }
  262. // Pretouch compiles vt ahead-of-time to avoid JIT compilation on-the-fly, in
  263. // order to reduce the first-hit latency.
  264. //
  265. // Opts are the compile options, for example, "option.WithCompileRecursiveDepth" is
  266. // a compile option to set the depth of recursive compile for the nested struct type.
  267. func Pretouch(vt reflect.Type, opts ...option.CompileOption) error {
  268. cfg := option.DefaultCompileOptions()
  269. for _, opt := range opts {
  270. opt(&cfg)
  271. }
  272. return pretouchRec(map[reflect.Type]uint8{vt: 0}, cfg)
  273. }
  274. // Valid validates json and returns first non-blank character position,
  275. // if it is only one valid json value.
  276. // Otherwise returns invalid character position using start.
  277. //
  278. // Note: it does not check for the invalid UTF-8 characters.
  279. func Valid(data []byte) (ok bool, start int) {
  280. n := len(data)
  281. if n == 0 {
  282. return false, -1
  283. }
  284. s := rt.Mem2Str(data)
  285. p := 0
  286. m := types.NewStateMachine()
  287. ret := native.ValidateOne(&s, &p, m, types.F_VALIDATE_STRING)
  288. types.FreeStateMachine(m)
  289. if ret < 0 {
  290. return false, p-1
  291. }
  292. /* check for trailing spaces */
  293. for ;p < n; p++ {
  294. if (types.SPACE_MASK & (1 << data[p])) == 0 {
  295. return false, p
  296. }
  297. }
  298. return true, ret
  299. }