parser.go 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329
  1. //
  2. // Copyright 2024 CloudWeGo Authors
  3. //
  4. // Licensed under the Apache License, Version 2.0 (the "License");
  5. // you may not use this file except in compliance with the License.
  6. // You may obtain a copy of the License at
  7. //
  8. // http://www.apache.org/licenses/LICENSE-2.0
  9. //
  10. // Unless required by applicable law or agreed to in writing, software
  11. // distributed under the License is distributed on an "AS IS" BASIS,
  12. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. // See the License for the specific language governing permissions and
  14. // limitations under the License.
  15. //
  16. package expr
  17. import (
  18. `strconv`
  19. `unicode`
  20. `unsafe`
  21. )
  22. type _TokenKind uint8
  23. const (
  24. _T_end _TokenKind = iota + 1
  25. _T_int
  26. _T_punc
  27. _T_name
  28. )
  29. const (
  30. _OP2 = 0x80
  31. _POW = _OP2 | '*'
  32. _SHL = _OP2 | '<'
  33. _SHR = _OP2 | '>'
  34. )
  35. type _Slice struct {
  36. p unsafe.Pointer
  37. n int
  38. c int
  39. }
  40. type _Token struct {
  41. pos int
  42. ptr *rune
  43. u64 uint64
  44. tag _TokenKind
  45. }
  46. func (self _Token) str() (v string) {
  47. return string(self.rbuf())
  48. }
  49. func (self _Token) rbuf() (v []rune) {
  50. (*_Slice)(unsafe.Pointer(&v)).c = int(self.u64)
  51. (*_Slice)(unsafe.Pointer(&v)).n = int(self.u64)
  52. (*_Slice)(unsafe.Pointer(&v)).p = unsafe.Pointer(self.ptr)
  53. return
  54. }
  55. func tokenEnd(p int) _Token {
  56. return _Token {
  57. pos: p,
  58. tag: _T_end,
  59. }
  60. }
  61. func tokenInt(p int, v uint64) _Token {
  62. return _Token {
  63. pos: p,
  64. u64: v,
  65. tag: _T_int,
  66. }
  67. }
  68. func tokenPunc(p int, v rune) _Token {
  69. return _Token {
  70. pos: p,
  71. tag: _T_punc,
  72. u64: uint64(v),
  73. }
  74. }
  75. func tokenName(p int, v []rune) _Token {
  76. return _Token {
  77. pos: p,
  78. ptr: &v[0],
  79. tag: _T_name,
  80. u64: uint64(len(v)),
  81. }
  82. }
  83. // Repository represents a repository of Term's.
  84. type Repository interface {
  85. Get(name string) (Term, error)
  86. }
  87. // Parser parses an expression string to it's AST representation.
  88. type Parser struct {
  89. pos int
  90. src []rune
  91. }
  92. var binaryOps = [...]func(*Expr, *Expr) *Expr {
  93. '+' : (*Expr).Add,
  94. '-' : (*Expr).Sub,
  95. '*' : (*Expr).Mul,
  96. '/' : (*Expr).Div,
  97. '%' : (*Expr).Mod,
  98. '&' : (*Expr).And,
  99. '^' : (*Expr).Xor,
  100. '|' : (*Expr).Or,
  101. _SHL : (*Expr).Shl,
  102. _SHR : (*Expr).Shr,
  103. _POW : (*Expr).Pow,
  104. }
  105. var precedence = [...]map[int]bool {
  106. {_SHL: true, _SHR: true},
  107. {'|' : true},
  108. {'^' : true},
  109. {'&' : true},
  110. {'+' : true, '-': true},
  111. {'*' : true, '/': true, '%': true},
  112. {_POW: true},
  113. }
  114. func (self *Parser) ch() rune {
  115. return self.src[self.pos]
  116. }
  117. func (self *Parser) eof() bool {
  118. return self.pos >= len(self.src)
  119. }
  120. func (self *Parser) rch() (v rune) {
  121. v, self.pos = self.src[self.pos], self.pos + 1
  122. return
  123. }
  124. func (self *Parser) hex(ss []rune) bool {
  125. if len(ss) == 1 && ss[0] == '0' {
  126. return unicode.ToLower(self.ch()) == 'x'
  127. } else if len(ss) <= 1 || unicode.ToLower(ss[1]) != 'x' {
  128. return unicode.IsDigit(self.ch())
  129. } else {
  130. return ishexdigit(self.ch())
  131. }
  132. }
  133. func (self *Parser) int(p int, ss []rune) (_Token, error) {
  134. var err error
  135. var val uint64
  136. /* find all the digits */
  137. for !self.eof() && self.hex(ss) {
  138. ss = append(ss, self.rch())
  139. }
  140. /* parse the value */
  141. if val, err = strconv.ParseUint(string(ss), 0, 64); err != nil {
  142. return _Token{}, err
  143. } else {
  144. return tokenInt(p, val), nil
  145. }
  146. }
  147. func (self *Parser) name(p int, ss []rune) _Token {
  148. for !self.eof() && isident(self.ch()) { ss = append(ss, self.rch()) }
  149. return tokenName(p, ss)
  150. }
  151. func (self *Parser) read(p int, ch rune) (_Token, error) {
  152. if isdigit(ch) {
  153. return self.int(p, []rune { ch })
  154. } else if isident0(ch) {
  155. return self.name(p, []rune { ch }), nil
  156. } else if isop2ch(ch) && !self.eof() && self.ch() == ch {
  157. return tokenPunc(p, _OP2 | self.rch()), nil
  158. } else if isop1ch(ch) {
  159. return tokenPunc(p, ch), nil
  160. } else {
  161. return _Token{}, newSyntaxError(self.pos, "invalid character " + strconv.QuoteRuneToASCII(ch))
  162. }
  163. }
  164. func (self *Parser) next() (_Token, error) {
  165. for {
  166. var p int
  167. var c rune
  168. /* check for EOF */
  169. if self.eof() {
  170. return tokenEnd(self.pos), nil
  171. }
  172. /* read the next char */
  173. p = self.pos
  174. c = self.rch()
  175. /* parse the token if not a space */
  176. if !unicode.IsSpace(c) {
  177. return self.read(p, c)
  178. }
  179. }
  180. }
  181. func (self *Parser) grab(tk _Token, repo Repository) (*Expr, error) {
  182. if repo == nil {
  183. return nil, newSyntaxError(tk.pos, "unresolved symbol: " + tk.str())
  184. } else if term, err := repo.Get(tk.str()); err != nil {
  185. return nil, err
  186. } else {
  187. return Ref(term), nil
  188. }
  189. }
  190. func (self *Parser) nest(nest int, repo Repository) (*Expr, error) {
  191. var err error
  192. var ret *Expr
  193. var ntk _Token
  194. /* evaluate the nested expression */
  195. if ret, err = self.expr(0, nest + 1, repo); err != nil {
  196. return nil, err
  197. }
  198. /* must follows with a ')' */
  199. if ntk, err = self.next(); err != nil {
  200. return nil, err
  201. } else if ntk.tag != _T_punc || ntk.u64 != ')' {
  202. return nil, newSyntaxError(ntk.pos, "')' expected")
  203. } else {
  204. return ret, nil
  205. }
  206. }
  207. func (self *Parser) unit(nest int, repo Repository) (*Expr, error) {
  208. if tk, err := self.next(); err != nil {
  209. return nil, err
  210. } else if tk.tag == _T_int {
  211. return Int(int64(tk.u64)), nil
  212. } else if tk.tag == _T_name {
  213. return self.grab(tk, repo)
  214. } else if tk.tag == _T_punc && tk.u64 == '(' {
  215. return self.nest(nest, repo)
  216. } else if tk.tag == _T_punc && tk.u64 == '+' {
  217. return self.unit(nest, repo)
  218. } else if tk.tag == _T_punc && tk.u64 == '-' {
  219. return neg2(self.unit(nest, repo))
  220. } else if tk.tag == _T_punc && tk.u64 == '~' {
  221. return not2(self.unit(nest, repo))
  222. } else {
  223. return nil, newSyntaxError(tk.pos, "integer, unary operator or nested expression expected")
  224. }
  225. }
  226. func (self *Parser) term(prec int, nest int, repo Repository) (*Expr, error) {
  227. var err error
  228. var val *Expr
  229. /* parse the LHS operand */
  230. if val, err = self.expr(prec + 1, nest, repo); err != nil {
  231. return nil, err
  232. }
  233. /* parse all the operators of the same precedence */
  234. for {
  235. var op int
  236. var rv *Expr
  237. var tk _Token
  238. /* peek the next token */
  239. pp := self.pos
  240. tk, err = self.next()
  241. /* check for errors */
  242. if err != nil {
  243. return nil, err
  244. }
  245. /* encountered EOF */
  246. if tk.tag == _T_end {
  247. return val, nil
  248. }
  249. /* must be an operator */
  250. if tk.tag != _T_punc {
  251. return nil, newSyntaxError(tk.pos, "operators expected")
  252. }
  253. /* check for the operator precedence */
  254. if op = int(tk.u64); !precedence[prec][op] {
  255. self.pos = pp
  256. return val, nil
  257. }
  258. /* evaluate the RHS operand, and combine the value */
  259. if rv, err = self.expr(prec + 1, nest, repo); err != nil {
  260. return nil, err
  261. } else {
  262. val = binaryOps[op](val, rv)
  263. }
  264. }
  265. }
  266. func (self *Parser) expr(prec int, nest int, repo Repository) (*Expr, error) {
  267. if prec >= len(precedence) {
  268. return self.unit(nest, repo)
  269. } else {
  270. return self.term(prec, nest, repo)
  271. }
  272. }
  273. // Parse parses the expression, and returns it's AST tree.
  274. func (self *Parser) Parse(repo Repository) (*Expr, error) {
  275. return self.expr(0, 0, repo)
  276. }
  277. // SetSource resets the expression parser and sets the expression source.
  278. func (self *Parser) SetSource(src string) *Parser {
  279. self.pos = 0
  280. self.src = []rune(src)
  281. return self
  282. }