parser.go 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331
  1. //
  2. // Copyright 2024 CloudWeGo Authors
  3. //
  4. // Licensed under the Apache License, Version 2.0 (the "License");
  5. // you may not use this file except in compliance with the License.
  6. // You may obtain a copy of the License at
  7. //
  8. // http://www.apache.org/licenses/LICENSE-2.0
  9. //
  10. // Unless required by applicable law or agreed to in writing, software
  11. // distributed under the License is distributed on an "AS IS" BASIS,
  12. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. // See the License for the specific language governing permissions and
  14. // limitations under the License.
  15. //
  16. package expr
  17. import (
  18. "strconv"
  19. "unicode"
  20. "unsafe"
  21. )
  22. type _TokenKind uint8
  23. const (
  24. _T_end _TokenKind = iota + 1
  25. _T_int
  26. _T_punc
  27. _T_name
  28. )
  29. const (
  30. _OP2 = 0x80
  31. _POW = _OP2 | '*'
  32. _SHL = _OP2 | '<'
  33. _SHR = _OP2 | '>'
  34. )
  35. type _Slice struct {
  36. p unsafe.Pointer
  37. n int
  38. c int
  39. }
  40. type _Token struct {
  41. pos int
  42. ptr *rune
  43. u64 uint64
  44. tag _TokenKind
  45. }
  46. func (self _Token) str() (v string) {
  47. return string(self.rbuf())
  48. }
  49. func (self _Token) rbuf() (v []rune) {
  50. (*_Slice)(unsafe.Pointer(&v)).c = int(self.u64)
  51. (*_Slice)(unsafe.Pointer(&v)).n = int(self.u64)
  52. (*_Slice)(unsafe.Pointer(&v)).p = unsafe.Pointer(self.ptr)
  53. return
  54. }
  55. func tokenEnd(p int) _Token {
  56. return _Token{
  57. pos: p,
  58. tag: _T_end,
  59. }
  60. }
  61. func tokenInt(p int, v uint64) _Token {
  62. return _Token{
  63. pos: p,
  64. u64: v,
  65. tag: _T_int,
  66. }
  67. }
  68. func tokenPunc(p int, v rune) _Token {
  69. return _Token{
  70. pos: p,
  71. tag: _T_punc,
  72. u64: uint64(v),
  73. }
  74. }
  75. func tokenName(p int, v []rune) _Token {
  76. return _Token{
  77. pos: p,
  78. ptr: &v[0],
  79. tag: _T_name,
  80. u64: uint64(len(v)),
  81. }
  82. }
  83. // Repository represents a repository of Term's.
  84. type Repository interface {
  85. Get(name string) (Term, error)
  86. }
  87. // Parser parses an expression string to it's AST representation.
  88. type Parser struct {
  89. pos int
  90. src []rune
  91. }
  92. var binaryOps = [...]func(*Expr, *Expr) *Expr{
  93. '+': (*Expr).Add,
  94. '-': (*Expr).Sub,
  95. '*': (*Expr).Mul,
  96. '/': (*Expr).Div,
  97. '%': (*Expr).Mod,
  98. '&': (*Expr).And,
  99. '^': (*Expr).Xor,
  100. '|': (*Expr).Or,
  101. _SHL: (*Expr).Shl,
  102. _SHR: (*Expr).Shr,
  103. _POW: (*Expr).Pow,
  104. }
  105. var precedence = [...]map[int]bool{
  106. {_SHL: true, _SHR: true},
  107. {'|': true},
  108. {'^': true},
  109. {'&': true},
  110. {'+': true, '-': true},
  111. {'*': true, '/': true, '%': true},
  112. {_POW: true},
  113. }
  114. func (self *Parser) ch() rune {
  115. return self.src[self.pos]
  116. }
  117. func (self *Parser) eof() bool {
  118. return self.pos >= len(self.src)
  119. }
  120. func (self *Parser) rch() (v rune) {
  121. v, self.pos = self.src[self.pos], self.pos+1
  122. return
  123. }
  124. func (self *Parser) hex(ss []rune) bool {
  125. if len(ss) == 1 && ss[0] == '0' {
  126. return unicode.ToLower(self.ch()) == 'x'
  127. } else if len(ss) <= 1 || unicode.ToLower(ss[1]) != 'x' {
  128. return unicode.IsDigit(self.ch())
  129. } else {
  130. return ishexdigit(self.ch())
  131. }
  132. }
  133. func (self *Parser) int(p int, ss []rune) (_Token, error) {
  134. var err error
  135. var val uint64
  136. /* find all the digits */
  137. for !self.eof() && self.hex(ss) {
  138. ss = append(ss, self.rch())
  139. }
  140. /* parse the value */
  141. if val, err = strconv.ParseUint(string(ss), 0, 64); err != nil {
  142. return _Token{}, err
  143. } else {
  144. return tokenInt(p, val), nil
  145. }
  146. }
  147. func (self *Parser) name(p int, ss []rune) _Token {
  148. for !self.eof() && isident(self.ch()) {
  149. ss = append(ss, self.rch())
  150. }
  151. return tokenName(p, ss)
  152. }
  153. func (self *Parser) read(p int, ch rune) (_Token, error) {
  154. if isdigit(ch) {
  155. return self.int(p, []rune{ch})
  156. } else if isident0(ch) {
  157. return self.name(p, []rune{ch}), nil
  158. } else if isop2ch(ch) && !self.eof() && self.ch() == ch {
  159. return tokenPunc(p, _OP2|self.rch()), nil
  160. } else if isop1ch(ch) {
  161. return tokenPunc(p, ch), nil
  162. } else {
  163. return _Token{}, newSyntaxError(self.pos, "invalid character "+strconv.QuoteRuneToASCII(ch))
  164. }
  165. }
  166. func (self *Parser) next() (_Token, error) {
  167. for {
  168. var p int
  169. var c rune
  170. /* check for EOF */
  171. if self.eof() {
  172. return tokenEnd(self.pos), nil
  173. }
  174. /* read the next char */
  175. p = self.pos
  176. c = self.rch()
  177. /* parse the token if not a space */
  178. if !unicode.IsSpace(c) {
  179. return self.read(p, c)
  180. }
  181. }
  182. }
  183. func (self *Parser) grab(tk _Token, repo Repository) (*Expr, error) {
  184. if repo == nil {
  185. return nil, newSyntaxError(tk.pos, "unresolved symbol: "+tk.str())
  186. } else if term, err := repo.Get(tk.str()); err != nil {
  187. return nil, err
  188. } else {
  189. return Ref(term), nil
  190. }
  191. }
  192. func (self *Parser) nest(nest int, repo Repository) (*Expr, error) {
  193. var err error
  194. var ret *Expr
  195. var ntk _Token
  196. /* evaluate the nested expression */
  197. if ret, err = self.expr(0, nest+1, repo); err != nil {
  198. return nil, err
  199. }
  200. /* must follows with a ')' */
  201. if ntk, err = self.next(); err != nil {
  202. return nil, err
  203. } else if ntk.tag != _T_punc || ntk.u64 != ')' {
  204. return nil, newSyntaxError(ntk.pos, "')' expected")
  205. } else {
  206. return ret, nil
  207. }
  208. }
  209. func (self *Parser) unit(nest int, repo Repository) (*Expr, error) {
  210. if tk, err := self.next(); err != nil {
  211. return nil, err
  212. } else if tk.tag == _T_int {
  213. return Int(int64(tk.u64)), nil
  214. } else if tk.tag == _T_name {
  215. return self.grab(tk, repo)
  216. } else if tk.tag == _T_punc && tk.u64 == '(' {
  217. return self.nest(nest, repo)
  218. } else if tk.tag == _T_punc && tk.u64 == '+' {
  219. return self.unit(nest, repo)
  220. } else if tk.tag == _T_punc && tk.u64 == '-' {
  221. return neg2(self.unit(nest, repo))
  222. } else if tk.tag == _T_punc && tk.u64 == '~' {
  223. return not2(self.unit(nest, repo))
  224. } else {
  225. return nil, newSyntaxError(tk.pos, "integer, unary operator or nested expression expected")
  226. }
  227. }
  228. func (self *Parser) term(prec int, nest int, repo Repository) (*Expr, error) {
  229. var err error
  230. var val *Expr
  231. /* parse the LHS operand */
  232. if val, err = self.expr(prec+1, nest, repo); err != nil {
  233. return nil, err
  234. }
  235. /* parse all the operators of the same precedence */
  236. for {
  237. var op int
  238. var rv *Expr
  239. var tk _Token
  240. /* peek the next token */
  241. pp := self.pos
  242. tk, err = self.next()
  243. /* check for errors */
  244. if err != nil {
  245. return nil, err
  246. }
  247. /* encountered EOF */
  248. if tk.tag == _T_end {
  249. return val, nil
  250. }
  251. /* must be an operator */
  252. if tk.tag != _T_punc {
  253. return nil, newSyntaxError(tk.pos, "operators expected")
  254. }
  255. /* check for the operator precedence */
  256. if op = int(tk.u64); !precedence[prec][op] {
  257. self.pos = pp
  258. return val, nil
  259. }
  260. /* evaluate the RHS operand, and combine the value */
  261. if rv, err = self.expr(prec+1, nest, repo); err != nil {
  262. return nil, err
  263. } else {
  264. val = binaryOps[op](val, rv)
  265. }
  266. }
  267. }
  268. func (self *Parser) expr(prec int, nest int, repo Repository) (*Expr, error) {
  269. if prec >= len(precedence) {
  270. return self.unit(nest, repo)
  271. } else {
  272. return self.term(prec, nest, repo)
  273. }
  274. }
  275. // Parse parses the expression, and returns it's AST tree.
  276. func (self *Parser) Parse(repo Repository) (*Expr, error) {
  277. return self.expr(0, 0, repo)
  278. }
  279. // SetSource resets the expression parser and sets the expression source.
  280. func (self *Parser) SetSource(src string) *Parser {
  281. self.pos = 0
  282. self.src = []rune(src)
  283. return self
  284. }