encoder.go 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318
  1. /*
  2. * Copyright 2021 ByteDance Inc.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. package encoder
  17. import (
  18. "bytes"
  19. "encoding/json"
  20. "reflect"
  21. "runtime"
  22. "unsafe"
  23. "github.com/bytedance/sonic/utf8"
  24. "github.com/bytedance/sonic/internal/encoder/alg"
  25. "github.com/bytedance/sonic/internal/encoder/vars"
  26. "github.com/bytedance/sonic/internal/rt"
  27. "github.com/bytedance/sonic/option"
  28. )
  29. // Options is a set of encoding options.
  30. type Options uint64
  31. const (
  32. // SortMapKeys indicates that the keys of a map needs to be sorted
  33. // before serializing into JSON.
  34. // WARNING: This hurts performance A LOT, USE WITH CARE.
  35. SortMapKeys Options = 1 << alg.BitSortMapKeys
  36. // EscapeHTML indicates encoder to escape all HTML characters
  37. // after serializing into JSON (see https://pkg.go.dev/encoding/json#HTMLEscape).
  38. // WARNING: This hurts performance A LOT, USE WITH CARE.
  39. EscapeHTML Options = 1 << alg.BitEscapeHTML
  40. // CompactMarshaler indicates that the output JSON from json.Marshaler
  41. // is always compact and needs no validation
  42. CompactMarshaler Options = 1 << alg.BitCompactMarshaler
  43. // NoQuoteTextMarshaler indicates that the output text from encoding.TextMarshaler
  44. // is always escaped string and needs no quoting
  45. NoQuoteTextMarshaler Options = 1 << alg.BitNoQuoteTextMarshaler
  46. // NoNullSliceOrMap indicates all empty Array or Object are encoded as '[]' or '{}',
  47. // instead of 'null'.
  48. // NOTE: The priority of this option is lower than json tag `omitempty`.
  49. NoNullSliceOrMap Options = 1 << alg.BitNoNullSliceOrMap
  50. // ValidateString indicates that encoder should validate the input string
  51. // before encoding it into JSON.
  52. ValidateString Options = 1 << alg.BitValidateString
  53. // NoValidateJSONMarshaler indicates that the encoder should not validate the output string
  54. // after encoding the JSONMarshaler to JSON.
  55. NoValidateJSONMarshaler Options = 1 << alg.BitNoValidateJSONMarshaler
  56. // NoEncoderNewline indicates that the encoder should not add a newline after every message
  57. NoEncoderNewline Options = 1 << alg.BitNoEncoderNewline
  58. // CompatibleWithStd is used to be compatible with std encoder.
  59. CompatibleWithStd Options = SortMapKeys | EscapeHTML | CompactMarshaler
  60. // Encode Infinity or Nan float into `null`, instead of returning an error.
  61. EncodeNullForInfOrNan Options = 1 << alg.BitEncodeNullForInfOrNan
  62. )
  63. // Encoder represents a specific set of encoder configurations.
  64. type Encoder struct {
  65. Opts Options
  66. prefix string
  67. indent string
  68. }
  69. // Encode returns the JSON encoding of v.
  70. func (self *Encoder) Encode(v interface{}) ([]byte, error) {
  71. if self.indent != "" || self.prefix != "" {
  72. return EncodeIndented(v, self.prefix, self.indent, self.Opts)
  73. }
  74. return Encode(v, self.Opts)
  75. }
  76. // SortKeys enables the SortMapKeys option.
  77. func (self *Encoder) SortKeys() *Encoder {
  78. self.Opts |= SortMapKeys
  79. return self
  80. }
  81. // SetEscapeHTML specifies if option EscapeHTML opens
  82. func (self *Encoder) SetEscapeHTML(f bool) {
  83. if f {
  84. self.Opts |= EscapeHTML
  85. } else {
  86. self.Opts &= ^EscapeHTML
  87. }
  88. }
  89. // SetValidateString specifies if option ValidateString opens
  90. func (self *Encoder) SetValidateString(f bool) {
  91. if f {
  92. self.Opts |= ValidateString
  93. } else {
  94. self.Opts &= ^ValidateString
  95. }
  96. }
  97. // SetNoValidateJSONMarshaler specifies if option NoValidateJSONMarshaler opens
  98. func (self *Encoder) SetNoValidateJSONMarshaler(f bool) {
  99. if f {
  100. self.Opts |= NoValidateJSONMarshaler
  101. } else {
  102. self.Opts &= ^NoValidateJSONMarshaler
  103. }
  104. }
  105. // SetNoEncoderNewline specifies if option NoEncoderNewline opens
  106. func (self *Encoder) SetNoEncoderNewline(f bool) {
  107. if f {
  108. self.Opts |= NoEncoderNewline
  109. } else {
  110. self.Opts &= ^NoEncoderNewline
  111. }
  112. }
  113. // SetCompactMarshaler specifies if option CompactMarshaler opens
  114. func (self *Encoder) SetCompactMarshaler(f bool) {
  115. if f {
  116. self.Opts |= CompactMarshaler
  117. } else {
  118. self.Opts &= ^CompactMarshaler
  119. }
  120. }
  121. // SetNoQuoteTextMarshaler specifies if option NoQuoteTextMarshaler opens
  122. func (self *Encoder) SetNoQuoteTextMarshaler(f bool) {
  123. if f {
  124. self.Opts |= NoQuoteTextMarshaler
  125. } else {
  126. self.Opts &= ^NoQuoteTextMarshaler
  127. }
  128. }
  129. // SetIndent instructs the encoder to format each subsequent encoded
  130. // value as if indented by the package-level function EncodeIndent().
  131. // Calling SetIndent("", "") disables indentation.
  132. func (enc *Encoder) SetIndent(prefix, indent string) {
  133. enc.prefix = prefix
  134. enc.indent = indent
  135. }
  136. // Quote returns the JSON-quoted version of s.
  137. func Quote(s string) string {
  138. buf := make([]byte, 0, len(s)+2)
  139. buf = alg.Quote(buf, s, false)
  140. return rt.Mem2Str(buf)
  141. }
  142. // Encode returns the JSON encoding of val, encoded with opts.
  143. func Encode(val interface{}, opts Options) ([]byte, error) {
  144. var ret []byte
  145. buf := vars.NewBytes()
  146. err := encodeIntoCheckRace(buf, val, opts)
  147. /* check for errors */
  148. if err != nil {
  149. vars.FreeBytes(buf)
  150. return nil, err
  151. }
  152. /* htmlescape or correct UTF-8 if opts enable */
  153. old := buf
  154. *buf = encodeFinish(*old, opts)
  155. pbuf := ((*rt.GoSlice)(unsafe.Pointer(buf))).Ptr
  156. pold := ((*rt.GoSlice)(unsafe.Pointer(old))).Ptr
  157. /* return when allocated a new buffer */
  158. if pbuf != pold {
  159. vars.FreeBytes(old)
  160. return *buf, nil
  161. }
  162. /* make a copy of the result */
  163. if rt.CanSizeResue(cap(*buf)) {
  164. ret = make([]byte, len(*buf))
  165. copy(ret, *buf)
  166. vars.FreeBytes(buf)
  167. } else {
  168. ret = *buf
  169. }
  170. /* return the buffer into pool */
  171. return ret, nil
  172. }
  173. // EncodeInto is like Encode but uses a user-supplied buffer instead of allocating
  174. // a new one.
  175. func EncodeInto(buf *[]byte, val interface{}, opts Options) error {
  176. err := encodeIntoCheckRace(buf, val, opts)
  177. if err != nil {
  178. return err
  179. }
  180. *buf = encodeFinish(*buf, opts)
  181. return err
  182. }
  183. func encodeInto(buf *[]byte, val interface{}, opts Options) error {
  184. stk := vars.NewStack()
  185. efv := rt.UnpackEface(val)
  186. err := encodeTypedPointer(buf, efv.Type, &efv.Value, stk, uint64(opts))
  187. /* return the stack into pool */
  188. if err != nil {
  189. vars.ResetStack(stk)
  190. }
  191. vars.FreeStack(stk)
  192. /* avoid GC ahead */
  193. runtime.KeepAlive(buf)
  194. runtime.KeepAlive(efv)
  195. return err
  196. }
  197. func encodeFinish(buf []byte, opts Options) []byte {
  198. if opts & EscapeHTML != 0 {
  199. buf = HTMLEscape(nil, buf)
  200. }
  201. if (opts & ValidateString != 0) && !utf8.Validate(buf) {
  202. buf = utf8.CorrectWith(nil, buf, `\ufffd`)
  203. }
  204. return buf
  205. }
  206. // HTMLEscape appends to dst the JSON-encoded src with <, >, &, U+2028 and U+2029
  207. // characters inside string literals changed to \u003c, \u003e, \u0026, \u2028, \u2029
  208. // so that the JSON will be safe to embed inside HTML <script> tags.
  209. // For historical reasons, web browsers don't honor standard HTML
  210. // escaping within <script> tags, so an alternative JSON encoding must
  211. // be used.
  212. func HTMLEscape(dst []byte, src []byte) []byte {
  213. return alg.HtmlEscape(dst, src)
  214. }
  215. // EncodeIndented is like Encode but applies Indent to format the output.
  216. // Each JSON element in the output will begin on a new line beginning with prefix
  217. // followed by one or more copies of indent according to the indentation nesting.
  218. func EncodeIndented(val interface{}, prefix string, indent string, opts Options) ([]byte, error) {
  219. var err error
  220. var buf *bytes.Buffer
  221. /* encode into the buffer */
  222. out := vars.NewBytes()
  223. err = EncodeInto(out, val, opts)
  224. /* check for errors */
  225. if err != nil {
  226. vars.FreeBytes(out)
  227. return nil, err
  228. }
  229. /* indent the JSON */
  230. buf = vars.NewBuffer()
  231. err = json.Indent(buf, *out, prefix, indent)
  232. vars.FreeBytes(out)
  233. /* check for errors */
  234. if err != nil {
  235. vars.FreeBuffer(buf)
  236. return nil, err
  237. }
  238. /* copy to the result buffer */
  239. var ret []byte
  240. if rt.CanSizeResue(cap(buf.Bytes())) {
  241. ret = make([]byte, buf.Len())
  242. copy(ret, buf.Bytes())
  243. /* return the buffers into pool */
  244. vars.FreeBuffer(buf)
  245. } else {
  246. ret = buf.Bytes()
  247. }
  248. return ret, nil
  249. }
  250. // Pretouch compiles vt ahead-of-time to avoid JIT compilation on-the-fly, in
  251. // order to reduce the first-hit latency.
  252. //
  253. // Opts are the compile options, for example, "option.WithCompileRecursiveDepth" is
  254. // a compile option to set the depth of recursive compile for the nested struct type.
  255. func Pretouch(vt reflect.Type, opts ...option.CompileOption) error {
  256. cfg := option.DefaultCompileOptions()
  257. for _, opt := range opts {
  258. opt(&cfg)
  259. }
  260. return pretouchRec(map[reflect.Type]uint8{vt: 0}, cfg)
  261. }
  262. // Valid validates json and returns first non-blank character position,
  263. // if it is only one valid json value.
  264. // Otherwise returns invalid character position using start.
  265. //
  266. // Note: it does not check for the invalid UTF-8 characters.
  267. func Valid(data []byte) (ok bool, start int) {
  268. return alg.Valid(data)
  269. }