assembler.go 49 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819
  1. //
  2. // Copyright 2024 CloudWeGo Authors
  3. //
  4. // Licensed under the Apache License, Version 2.0 (the "License");
  5. // you may not use this file except in compliance with the License.
  6. // You may obtain a copy of the License at
  7. //
  8. // http://www.apache.org/licenses/LICENSE-2.0
  9. //
  10. // Unless required by applicable law or agreed to in writing, software
  11. // distributed under the License is distributed on an "AS IS" BASIS,
  12. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. // See the License for the specific language governing permissions and
  14. // limitations under the License.
  15. //
  16. package x86_64
  17. import (
  18. `bytes`
  19. `errors`
  20. `fmt`
  21. `math`
  22. `strconv`
  23. `strings`
  24. `unicode`
  25. `github.com/cloudwego/iasm/expr`
  26. )
  27. type (
  28. _TokenKind int
  29. _Punctuation int
  30. )
  31. const (
  32. _T_end _TokenKind = iota + 1
  33. _T_int
  34. _T_name
  35. _T_punc
  36. _T_space
  37. )
  38. const (
  39. _P_plus _Punctuation = iota + 1
  40. _P_minus
  41. _P_star
  42. _P_slash
  43. _P_percent
  44. _P_amp
  45. _P_bar
  46. _P_caret
  47. _P_shl
  48. _P_shr
  49. _P_tilde
  50. _P_lbrk
  51. _P_rbrk
  52. _P_dot
  53. _P_comma
  54. _P_colon
  55. _P_dollar
  56. _P_hash
  57. )
  58. var _PUNC_NAME = map[_Punctuation]string {
  59. _P_plus : "+",
  60. _P_minus : "-",
  61. _P_star : "*",
  62. _P_slash : "/",
  63. _P_percent : "%",
  64. _P_amp : "&",
  65. _P_bar : "|",
  66. _P_caret : "^",
  67. _P_shl : "<<",
  68. _P_shr : ">>",
  69. _P_tilde : "~",
  70. _P_lbrk : "(",
  71. _P_rbrk : ")",
  72. _P_dot : ".",
  73. _P_comma : ",",
  74. _P_colon : ":",
  75. _P_dollar : "$",
  76. _P_hash : "#",
  77. }
  78. func (self _Punctuation) String() string {
  79. if v, ok := _PUNC_NAME[self]; ok {
  80. return v
  81. } else {
  82. return fmt.Sprintf("_Punctuation(%d)", self)
  83. }
  84. }
  85. type _Token struct {
  86. pos int
  87. end int
  88. u64 uint64
  89. str string
  90. tag _TokenKind
  91. }
  92. func (self *_Token) punc() _Punctuation {
  93. return _Punctuation(self.u64)
  94. }
  95. func (self *_Token) String() string {
  96. switch self.tag {
  97. case _T_end : return "<END>"
  98. case _T_int : return fmt.Sprintf("<INT %d>", self.u64)
  99. case _T_punc : return fmt.Sprintf("<PUNC %s>", _Punctuation(self.u64))
  100. case _T_name : return fmt.Sprintf("<NAME %s>", strconv.QuoteToASCII(self.str))
  101. case _T_space : return "<SPACE>"
  102. default : return fmt.Sprintf("<UNK:%d %d %s>", self.tag, self.u64, strconv.QuoteToASCII(self.str))
  103. }
  104. }
  105. func tokenEnd(p int, end int) _Token {
  106. return _Token {
  107. pos: p,
  108. end: end,
  109. tag: _T_end,
  110. }
  111. }
  112. func tokenInt(p int, val uint64) _Token {
  113. return _Token {
  114. pos: p,
  115. u64: val,
  116. tag: _T_int,
  117. }
  118. }
  119. func tokenName(p int, name string) _Token {
  120. return _Token {
  121. pos: p,
  122. str: name,
  123. tag: _T_name,
  124. }
  125. }
  126. func tokenPunc(p int, punc _Punctuation) _Token {
  127. return _Token {
  128. pos: p,
  129. tag: _T_punc,
  130. u64: uint64(punc),
  131. }
  132. }
  133. func tokenSpace(p int, end int) _Token {
  134. return _Token {
  135. pos: p,
  136. end: end,
  137. tag: _T_space,
  138. }
  139. }
  140. // SyntaxError represents an error in the assembly syntax.
  141. type SyntaxError struct {
  142. Pos int
  143. Row int
  144. Src []rune
  145. Reason string
  146. }
  147. // Error implements the error interface.
  148. func (self *SyntaxError) Error() string {
  149. if self.Pos < 0 {
  150. return fmt.Sprintf("%s at line %d", self.Reason, self.Row)
  151. } else {
  152. return fmt.Sprintf("%s at %d:%d", self.Reason, self.Row, self.Pos + 1)
  153. }
  154. }
  155. type _Tokenizer struct {
  156. pos int
  157. row int
  158. src []rune
  159. }
  160. func (self *_Tokenizer) ch() rune {
  161. return self.src[self.pos]
  162. }
  163. func (self *_Tokenizer) eof() bool {
  164. return self.pos >= len(self.src)
  165. }
  166. func (self *_Tokenizer) rch() (ret rune) {
  167. ret, self.pos = self.src[self.pos], self.pos + 1
  168. return
  169. }
  170. func (self *_Tokenizer) err(pos int, msg string) *SyntaxError {
  171. return &SyntaxError {
  172. Pos : pos,
  173. Row : self.row,
  174. Src : self.src,
  175. Reason : msg,
  176. }
  177. }
  178. type _TrimState int
  179. const (
  180. _TS_normal _TrimState = iota
  181. _TS_slcomm
  182. _TS_hscomm
  183. _TS_string
  184. _TS_escape
  185. _TS_accept
  186. _TS_nolast
  187. )
  188. func (self *_Tokenizer) init(src string) {
  189. var i int
  190. var ch rune
  191. var st _TrimState
  192. /* set the source */
  193. self.pos = 0
  194. self.src = []rune(src)
  195. /* remove commends, including "//" and "##" */
  196. loop: for i, ch = range self.src {
  197. switch {
  198. case st == _TS_normal && ch == '/' : st = _TS_slcomm
  199. case st == _TS_normal && ch == '"' : st = _TS_string
  200. case st == _TS_normal && ch == ';' : st = _TS_accept; break loop
  201. case st == _TS_normal && ch == '#' : st = _TS_hscomm
  202. case st == _TS_slcomm && ch == '/' : st = _TS_nolast; break loop
  203. case st == _TS_slcomm : st = _TS_normal
  204. case st == _TS_hscomm && ch == '#' : st = _TS_nolast; break loop
  205. case st == _TS_hscomm : st = _TS_normal
  206. case st == _TS_string && ch == '"' : st = _TS_normal
  207. case st == _TS_string && ch == '\\' : st = _TS_escape
  208. case st == _TS_escape : st = _TS_string
  209. }
  210. }
  211. /* check for errors */
  212. switch st {
  213. case _TS_accept: self.src = self.src[:i]
  214. case _TS_nolast: self.src = self.src[:i - 1]
  215. case _TS_string: panic(self.err(i, "string is not terminated"))
  216. case _TS_escape: panic(self.err(i, "escape sequence is not terminated"))
  217. }
  218. }
  219. func (self *_Tokenizer) skip(check func(v rune) bool) {
  220. for !self.eof() && check(self.ch()) {
  221. self.pos++
  222. }
  223. }
  224. func (self *_Tokenizer) find(pos int, check func(v rune) bool) string {
  225. self.skip(check)
  226. return string(self.src[pos:self.pos])
  227. }
  228. func (self *_Tokenizer) chrv(p int) _Token {
  229. var err error
  230. var val uint64
  231. /* starting and ending position */
  232. p0 := p + 1
  233. p1 := p0 + 1
  234. /* find the end of the literal */
  235. for p1 < len(self.src) && self.src[p1] != '\'' {
  236. if p1++; self.src[p1 - 1] == '\\' {
  237. p1++
  238. }
  239. }
  240. /* empty literal */
  241. if p1 == p0 {
  242. panic(self.err(p1, "empty character constant"))
  243. }
  244. /* check for EOF */
  245. if p1 == len(self.src) {
  246. panic(self.err(p1, "unexpected EOF when scanning literals"))
  247. }
  248. /* parse the literal */
  249. if val, err = literal64(string(self.src[p0:p1])); err != nil {
  250. panic(self.err(p0, "cannot parse literal: " + err.Error()))
  251. }
  252. /* skip the closing '\'' */
  253. self.pos = p1 + 1
  254. return tokenInt(p, val)
  255. }
  256. func (self *_Tokenizer) numv(p int) _Token {
  257. if val, err := strconv.ParseUint(self.find(p, isnumber), 0, 64); err != nil {
  258. panic(self.err(p, "invalid immediate value: " + err.Error()))
  259. } else {
  260. return tokenInt(p, val)
  261. }
  262. }
  263. func (self *_Tokenizer) defv(p int, cc rune) _Token {
  264. if isdigit(cc) {
  265. return self.numv(p)
  266. } else if isident0(cc) {
  267. return tokenName(p, self.find(p, isident))
  268. } else {
  269. panic(self.err(p, "invalid char: " + strconv.QuoteRune(cc)))
  270. }
  271. }
  272. func (self *_Tokenizer) rep2(p int, pp _Punctuation, cc rune) _Token {
  273. if self.eof() {
  274. panic(self.err(self.pos, "unexpected EOF when scanning operators"))
  275. } else if c := self.rch(); c != cc {
  276. panic(self.err(p + 1, strconv.QuoteRune(cc) + " expected, got " + strconv.QuoteRune(c)))
  277. } else {
  278. return tokenPunc(p, pp)
  279. }
  280. }
  281. func (self *_Tokenizer) read() _Token {
  282. var p int
  283. var c rune
  284. var t _Token
  285. /* check for EOF */
  286. if self.eof() {
  287. return tokenEnd(self.pos, self.pos)
  288. }
  289. /* skip spaces as needed */
  290. if p = self.pos; unicode.IsSpace(self.src[p]) {
  291. self.skip(unicode.IsSpace)
  292. return tokenSpace(p, self.pos)
  293. }
  294. /* check for line comments */
  295. if p = self.pos; p < len(self.src) - 1 && self.src[p] == '/' && self.src[p + 1] == '/' {
  296. self.pos = len(self.src)
  297. return tokenEnd(p, self.pos)
  298. }
  299. /* read the next character */
  300. p = self.pos
  301. c = self.rch()
  302. /* parse the next character */
  303. switch c {
  304. case '+' : t = tokenPunc(p, _P_plus)
  305. case '-' : t = tokenPunc(p, _P_minus)
  306. case '*' : t = tokenPunc(p, _P_star)
  307. case '/' : t = tokenPunc(p, _P_slash)
  308. case '%' : t = tokenPunc(p, _P_percent)
  309. case '&' : t = tokenPunc(p, _P_amp)
  310. case '|' : t = tokenPunc(p, _P_bar)
  311. case '^' : t = tokenPunc(p, _P_caret)
  312. case '<' : t = self.rep2(p, _P_shl, '<')
  313. case '>' : t = self.rep2(p, _P_shr, '>')
  314. case '~' : t = tokenPunc(p, _P_tilde)
  315. case '(' : t = tokenPunc(p, _P_lbrk)
  316. case ')' : t = tokenPunc(p, _P_rbrk)
  317. case '.' : t = tokenPunc(p, _P_dot)
  318. case ',' : t = tokenPunc(p, _P_comma)
  319. case ':' : t = tokenPunc(p, _P_colon)
  320. case '$' : t = tokenPunc(p, _P_dollar)
  321. case '#' : t = tokenPunc(p, _P_hash)
  322. case '\'' : t = self.chrv(p)
  323. default : t = self.defv(p, c)
  324. }
  325. /* mark the end of token */
  326. t.end = self.pos
  327. return t
  328. }
  329. func (self *_Tokenizer) next() (tk _Token) {
  330. for {
  331. if tk = self.read(); tk.tag != _T_space {
  332. return
  333. }
  334. }
  335. }
  336. // LabelKind indicates the type of label reference.
  337. type LabelKind int
  338. // OperandKind indicates the type of the operand.
  339. type OperandKind int
  340. // InstructionPrefix indicates the prefix bytes prepended to the instruction.
  341. type InstructionPrefix byte
  342. const (
  343. // OpImm means the operand is an immediate value.
  344. OpImm OperandKind = 1 << iota
  345. // OpReg means the operand is a register.
  346. OpReg
  347. // OpMem means the operand is a memory address.
  348. OpMem
  349. // OpLabel means the operand is a label, specifically for
  350. // branch instructions.
  351. OpLabel
  352. )
  353. const (
  354. // Declaration means the label is a declaration.
  355. Declaration LabelKind = iota + 1
  356. // BranchTarget means the label should be treated as a branch target.
  357. BranchTarget
  358. // RelativeAddress means the label should be treated as a reference to
  359. // the code section (e.g. RIP-relative addressing).
  360. RelativeAddress
  361. )
  362. const (
  363. // PrefixLock causes the processor's LOCK# signal to be asserted during execution of
  364. // the accompanying instruction (turns the instruction into an atomic instruction).
  365. // In a multiprocessor environment, the LOCK# signal insures that the processor
  366. // has exclusive use of any shared memory while the signal is asserted.
  367. PrefixLock InstructionPrefix = iota
  368. // PrefixSegmentCS overrides the memory operation of this instruction to CS (Code Segment).
  369. PrefixSegmentCS
  370. // PrefixSegmentDS overrides the memory operation of this instruction to DS (Data Segment),
  371. // this is the default section for most instructions if not specified.
  372. PrefixSegmentDS
  373. // PrefixSegmentES overrides the memory operation of this instruction to ES (Extra Segment).
  374. PrefixSegmentES
  375. // PrefixSegmentFS overrides the memory operation of this instruction to FS.
  376. PrefixSegmentFS
  377. // PrefixSegmentGS overrides the memory operation of this instruction to GS.
  378. PrefixSegmentGS
  379. // PrefixSegmentSS overrides the memory operation of this instruction to SS (Stack Segment).
  380. PrefixSegmentSS
  381. )
  382. // ParsedLabel represents a label in the source, either a jump target or
  383. // an RIP-relative addressing.
  384. type ParsedLabel struct {
  385. Name string
  386. Kind LabelKind
  387. }
  388. // ParsedOperand represents an operand of an instruction in the source.
  389. type ParsedOperand struct {
  390. Op OperandKind
  391. Imm int64
  392. Reg Register
  393. Label ParsedLabel
  394. Memory MemoryAddress
  395. }
  396. // ParsedInstruction represents an instruction in the source.
  397. type ParsedInstruction struct {
  398. Mnemonic string
  399. Operands []ParsedOperand
  400. Prefixes []InstructionPrefix
  401. }
  402. func (self *ParsedInstruction) imm(v int64) {
  403. self.Operands = append(self.Operands, ParsedOperand {
  404. Op : OpImm,
  405. Imm : v,
  406. })
  407. }
  408. func (self *ParsedInstruction) reg(v Register) {
  409. self.Operands = append(self.Operands, ParsedOperand {
  410. Op : OpReg,
  411. Reg : v,
  412. })
  413. }
  414. func (self *ParsedInstruction) mem(v MemoryAddress) {
  415. self.Operands = append(self.Operands, ParsedOperand {
  416. Op : OpMem,
  417. Memory : v,
  418. })
  419. }
  420. func (self *ParsedInstruction) target(v string) {
  421. self.Operands = append(self.Operands, ParsedOperand {
  422. Op : OpLabel,
  423. Label : ParsedLabel {
  424. Name: v,
  425. Kind: BranchTarget,
  426. },
  427. })
  428. }
  429. func (self *ParsedInstruction) reference(v string) {
  430. self.Operands = append(self.Operands, ParsedOperand {
  431. Op : OpLabel,
  432. Label : ParsedLabel {
  433. Name: v,
  434. Kind: RelativeAddress,
  435. },
  436. })
  437. }
  438. // LineKind indicates the type of ParsedLine.
  439. type LineKind int
  440. const (
  441. // LineLabel means the ParsedLine is a label.
  442. LineLabel LineKind = iota + 1
  443. // LineInstr means the ParsedLine is an instruction.
  444. LineInstr
  445. // LineCommand means the ParsedLine is a ParsedCommand.
  446. LineCommand
  447. )
  448. // ParsedLine represents a parsed source line.
  449. type ParsedLine struct {
  450. Row int
  451. Src []rune
  452. Kind LineKind
  453. Label ParsedLabel
  454. Command ParsedCommand
  455. Instruction ParsedInstruction
  456. }
  457. // ParsedCommand represents a parsed assembly directive command.
  458. type ParsedCommand struct {
  459. Cmd string
  460. Args []ParsedCommandArg
  461. }
  462. // ParsedCommandArg represents an argument of a ParsedCommand.
  463. type ParsedCommandArg struct {
  464. Value string
  465. IsString bool
  466. }
  467. // Parser parses the source, and generates a sequence of ParsedInstruction's.
  468. type Parser struct {
  469. lex _Tokenizer
  470. exp expr.Parser
  471. }
  472. const (
  473. rip Register64 = 0xff
  474. )
  475. var _RegBranch = map[string]bool {
  476. "jmp" : true,
  477. "jmpq" : true,
  478. "call" : true,
  479. "callq" : true,
  480. }
  481. var _SegPrefix = map[string]InstructionPrefix {
  482. "cs": PrefixSegmentCS,
  483. "ds": PrefixSegmentDS,
  484. "es": PrefixSegmentES,
  485. "fs": PrefixSegmentFS,
  486. "gs": PrefixSegmentGS,
  487. "ss": PrefixSegmentSS,
  488. }
  489. func (self *Parser) i32(tk _Token, v int64) int32 {
  490. if v >= math.MinInt32 && v <= math.MaxUint32 {
  491. return int32(v)
  492. } else {
  493. panic(self.err(tk.pos, fmt.Sprintf("32-bit integer out ouf range: %d", v)))
  494. }
  495. }
  496. func (self *Parser) err(pos int, msg string) *SyntaxError {
  497. return &SyntaxError {
  498. Pos : pos,
  499. Row : self.lex.row,
  500. Src : self.lex.src,
  501. Reason : msg,
  502. }
  503. }
  504. func (self *Parser) negv() int64 {
  505. tk := self.lex.read()
  506. tt := tk.tag
  507. /* must be an integer */
  508. if tt != _T_int {
  509. panic(self.err(tk.pos, "integer expected after '-'"))
  510. } else {
  511. return -int64(tk.u64)
  512. }
  513. }
  514. func (self *Parser) eval(p int) (r int64) {
  515. var e error
  516. var v *expr.Expr
  517. /* searching start */
  518. n := 1
  519. q := p + 1
  520. /* find the end of expression */
  521. for n > 0 && q < len(self.lex.src) {
  522. switch self.lex.src[q] {
  523. case '(' : q++; n++
  524. case ')' : q++; n--
  525. default : q++
  526. }
  527. }
  528. /* check for EOF */
  529. if n != 0 {
  530. panic(self.err(q, "unexpected EOF when parsing expressions"))
  531. }
  532. /* evaluate the expression */
  533. if v, e = self.exp.SetSource(string(self.lex.src[p:q - 1])).Parse(nil); e != nil {
  534. panic(self.err(p, "cannot evaluate expression: " + e.Error()))
  535. }
  536. /* evaluate the expression */
  537. if r, e = v.Evaluate(); e != nil {
  538. panic(self.err(p, "cannot evaluate expression: " + e.Error()))
  539. }
  540. /* skip the last ')' */
  541. v.Free()
  542. self.lex.pos = q
  543. return
  544. }
  545. func (self *Parser) relx(tk _Token) {
  546. if tk.tag != _T_punc || tk.punc() != _P_lbrk {
  547. panic(self.err(tk.pos, "'(' expected for RIP-relative addressing"))
  548. } else if tk = self.lex.next(); self.regx(tk) != rip {
  549. panic(self.err(tk.pos, "RIP-relative addressing expects %rip as the base register"))
  550. } else if tk = self.lex.next(); tk.tag != _T_punc || tk.punc() != _P_rbrk {
  551. panic(self.err(tk.pos, "RIP-relative addressing does not support indexing or scaling"))
  552. }
  553. }
  554. func (self *Parser) immx(tk _Token) int64 {
  555. if tk.tag != _T_punc || tk.punc() != _P_dollar {
  556. panic(self.err(tk.pos, "'$' expected for registers"))
  557. } else if tk = self.lex.read(); tk.tag == _T_int {
  558. return int64(tk.u64)
  559. } else if tk.tag == _T_punc && tk.punc() == _P_lbrk {
  560. return self.eval(self.lex.pos)
  561. } else if tk.tag == _T_punc && tk.punc() == _P_minus {
  562. return self.negv()
  563. } else {
  564. panic(self.err(tk.pos, "immediate value expected"))
  565. }
  566. }
  567. func (self *Parser) regx(tk _Token) Register {
  568. if tk.tag != _T_punc || tk.punc() != _P_percent {
  569. panic(self.err(tk.pos, "'%' expected for registers"))
  570. } else if tk = self.lex.read(); tk.tag != _T_name {
  571. panic(self.err(tk.pos, "register name expected"))
  572. } else if tk.str == "rip" {
  573. return rip
  574. } else if reg, ok := Registers[tk.str]; ok {
  575. return reg
  576. } else {
  577. panic(self.err(tk.pos, "invalid register name: " + strconv.Quote(tk.str)))
  578. }
  579. }
  580. func (self *Parser) regv(tk _Token) Register {
  581. if reg := self.regx(tk); reg == rip {
  582. panic(self.err(tk.pos, "%rip is not accessable as a dedicated register"))
  583. } else {
  584. return reg
  585. }
  586. }
  587. func (self *Parser) disp(vv int32) MemoryAddress {
  588. switch tk := self.lex.next(); tk.tag {
  589. case _T_end : return MemoryAddress { Displacement: vv }
  590. case _T_punc : return self.relm(tk, vv)
  591. default : panic(self.err(tk.pos, "',' or '(' expected"))
  592. }
  593. }
  594. func (self *Parser) relm(tv _Token, disp int32) MemoryAddress {
  595. var tk _Token
  596. var tt _TokenKind
  597. /* check for absolute addressing */
  598. if tv.punc() == _P_comma {
  599. self.lex.pos--
  600. return MemoryAddress { Displacement: disp }
  601. }
  602. /* must be '(' now */
  603. if tv.punc() != _P_lbrk {
  604. panic(self.err(tv.pos, "',' or '(' expected"))
  605. }
  606. /* read the next token */
  607. tk = self.lex.next()
  608. tt = tk.tag
  609. /* must be a punctuation */
  610. if tt != _T_punc {
  611. panic(self.err(tk.pos, "'%' or ',' expected"))
  612. }
  613. /* check for base */
  614. switch tk.punc() {
  615. case _P_percent : return self.base(tk, disp)
  616. case _P_comma : return self.index(nil, disp)
  617. default : panic(self.err(tk.pos, "'%' or ',' expected"))
  618. }
  619. }
  620. func (self *Parser) base(tk _Token, disp int32) MemoryAddress {
  621. rr := self.regx(tk)
  622. nk := self.lex.next()
  623. /* check for register indirection or base-index addressing */
  624. if !isReg64(rr) {
  625. panic(self.err(tk.pos, "not a valid base register"))
  626. } else if nk.tag != _T_punc {
  627. panic(self.err(nk.pos, "',' or ')' expected"))
  628. } else if nk.punc() == _P_comma {
  629. return self.index(rr, disp)
  630. } else if nk.punc() == _P_rbrk {
  631. return MemoryAddress { Base: rr, Displacement: disp }
  632. } else {
  633. panic(self.err(nk.pos, "',' or ')' expected"))
  634. }
  635. }
  636. func (self *Parser) index(base Register, disp int32) MemoryAddress {
  637. tk := self.lex.next()
  638. rr := self.regx(tk)
  639. nk := self.lex.next()
  640. /* check for scaled indexing */
  641. if base == rip {
  642. panic(self.err(tk.pos, "RIP-relative addressing does not support indexing or scaling"))
  643. } else if !isIndexable(rr) {
  644. panic(self.err(tk.pos, "not a valid index register"))
  645. } else if nk.tag != _T_punc {
  646. panic(self.err(nk.pos, "',' or ')' expected"))
  647. } else if nk.punc() == _P_comma {
  648. return self.scale(base, rr, disp)
  649. } else if nk.punc() == _P_rbrk {
  650. return MemoryAddress { Base: base, Index: rr, Scale: 1, Displacement: disp }
  651. } else {
  652. panic(self.err(nk.pos, "',' or ')' expected"))
  653. }
  654. }
  655. func (self *Parser) scale(base Register, index Register, disp int32) MemoryAddress {
  656. tk := self.lex.next()
  657. tt := tk.tag
  658. tv := tk.u64
  659. /* must be an integer */
  660. if tt != _T_int {
  661. panic(self.err(tk.pos, "integer expected"))
  662. }
  663. /* scale can only be 1, 2, 4 or 8 */
  664. if tv == 0 || (_Scales & (1 << tv)) == 0 {
  665. panic(self.err(tk.pos, "scale can only be 1, 2, 4 or 8"))
  666. }
  667. /* read next token */
  668. tk = self.lex.next()
  669. tt = tk.tag
  670. /* check for the closing ')' */
  671. if tt != _T_punc || tk.punc() != _P_rbrk {
  672. panic(self.err(tk.pos, "')' expected"))
  673. }
  674. /* construct the memory address */
  675. return MemoryAddress {
  676. Base : base,
  677. Index : index,
  678. Scale : uint8(tv),
  679. Displacement : disp,
  680. }
  681. }
  682. func (self *Parser) cmds() *ParsedLine {
  683. cmd := ""
  684. pos := self.lex.pos
  685. buf := []ParsedCommandArg(nil)
  686. /* find the end of command */
  687. for p := pos; pos < len(self.lex.src); pos++ {
  688. if unicode.IsSpace(self.lex.src[pos]) {
  689. cmd = string(self.lex.src[p:pos])
  690. break
  691. }
  692. }
  693. /* parse the arguments */
  694. loop: for {
  695. switch self.next(&pos) {
  696. case 0 : break loop
  697. case '#' : break loop
  698. case '"' : pos = self.strings(&buf, pos)
  699. default : pos = self.expressions(&buf, pos)
  700. }
  701. }
  702. /* construct the line */
  703. return &ParsedLine {
  704. Row : self.lex.row,
  705. Src : self.lex.src,
  706. Kind : LineCommand,
  707. Command : ParsedCommand {
  708. Cmd : cmd,
  709. Args : buf,
  710. },
  711. }
  712. }
  713. func (self *Parser) feed(line string) *ParsedLine {
  714. ff := true
  715. rr := false
  716. lk := false
  717. /* reset the lexer */
  718. self.lex.row++
  719. self.lex.init(line)
  720. /* parse the first token */
  721. tk := self.lex.next()
  722. tt := tk.tag
  723. /* it is a directive if it starts with a dot */
  724. if tk.tag == _T_punc && tk.punc() == _P_dot {
  725. return self.cmds()
  726. }
  727. /* otherwise it could be labels or instructions */
  728. if tt != _T_name {
  729. panic(self.err(tk.pos, "identifier expected"))
  730. }
  731. /* peek the next token */
  732. lex := self.lex
  733. tkx := lex.next()
  734. /* check for labels */
  735. if tkx.tag == _T_punc && tkx.punc() == _P_colon {
  736. tkx = lex.next()
  737. ttx := tkx.tag
  738. /* the line must end here */
  739. if ttx != _T_end {
  740. panic(self.err(tkx.pos, "garbage after label definition"))
  741. }
  742. /* construct the label */
  743. return &ParsedLine {
  744. Row : self.lex.row,
  745. Src : self.lex.src,
  746. Kind : LineLabel,
  747. Label : ParsedLabel {
  748. Kind: Declaration,
  749. Name: tk.str,
  750. },
  751. }
  752. }
  753. /* special case for the "lock" prefix */
  754. if tk.tag == _T_name && strings.ToLower(tk.str) == "lock" {
  755. lk = true
  756. tk = self.lex.next()
  757. /* must be an instruction */
  758. if tk.tag != _T_name {
  759. panic(self.err(tk.pos, "identifier expected"))
  760. }
  761. }
  762. /* set the line kind and mnemonic */
  763. ret := &ParsedLine {
  764. Row : self.lex.row,
  765. Src : self.lex.src,
  766. Kind : LineInstr,
  767. Instruction : ParsedInstruction { Mnemonic: strings.ToLower(tk.str) },
  768. }
  769. /* check for LOCK prefix */
  770. if lk {
  771. ret.Instruction.Prefixes = append(ret.Instruction.Prefixes, PrefixLock)
  772. }
  773. /* parse all the operands */
  774. for {
  775. tk = self.lex.next()
  776. tt = tk.tag
  777. /* check for end of line */
  778. if tt == _T_end {
  779. break
  780. }
  781. /* expect a comma if not the first operand */
  782. if !ff {
  783. if tt == _T_punc && tk.punc() == _P_comma {
  784. tk = self.lex.next()
  785. } else {
  786. panic(self.err(tk.pos, "',' expected"))
  787. }
  788. }
  789. /* not the first operand anymore */
  790. ff = false
  791. tt = tk.tag
  792. /* encountered an integer, must be a SIB memory address */
  793. if tt == _T_int {
  794. ret.Instruction.mem(self.disp(self.i32(tk, int64(tk.u64))))
  795. continue
  796. }
  797. /* encountered an identifier, maybe an expression or a jump target, or a segment override prefix */
  798. if tt == _T_name {
  799. ts := tk.str
  800. tp := self.lex.pos
  801. /* if the next token is EOF or a comma, it's a jumpt target */
  802. if tk = self.lex.next(); tk.tag == _T_end || (tk.tag == _T_punc && tk.punc() == _P_comma) {
  803. self.lex.pos = tp
  804. ret.Instruction.target(ts)
  805. continue
  806. }
  807. /* if it is a colon, it's a segment override prefix, otherwise it must be an RIP-relative addressing operand */
  808. if tk.tag != _T_punc || tk.punc() != _P_colon {
  809. self.relx(tk)
  810. ret.Instruction.reference(ts)
  811. continue
  812. }
  813. /* lookup segment prefixes */
  814. if p, ok := _SegPrefix[strings.ToLower(ts)]; !ok {
  815. panic(self.err(tk.pos, "invalid segment name"))
  816. } else {
  817. ret.Instruction.Prefixes = append(ret.Instruction.Prefixes, p)
  818. }
  819. /* read the next token */
  820. tk = self.lex.next()
  821. tt = tk.tag
  822. /* encountered an integer, must be a SIB memory address */
  823. if tt == _T_int {
  824. ret.Instruction.mem(self.disp(self.i32(tk, int64(tk.u64))))
  825. continue
  826. }
  827. }
  828. /* certain instructions may have a "*" before operands */
  829. if tt == _T_punc && tk.punc() == _P_star {
  830. tk = self.lex.next()
  831. tt = tk.tag
  832. rr = true
  833. }
  834. /* ... otherwise it must be a punctuation */
  835. if tt != _T_punc {
  836. panic(self.err(tk.pos, "'$', '%', '-' or '(' expected"))
  837. }
  838. /* check the operator */
  839. switch tk.punc() {
  840. case _P_lbrk : break
  841. case _P_minus : ret.Instruction.mem(self.disp(self.i32(tk, self.negv()))) ; continue
  842. case _P_dollar : ret.Instruction.imm(self.immx(tk)) ; continue
  843. case _P_percent : ret.Instruction.reg(self.regv(tk)) ; continue
  844. default : panic(self.err(tk.pos, "'$', '%', '-' or '(' expected"))
  845. }
  846. /* special case of '(', might be either `(expr)(SIB)` or just `(SIB)`
  847. * read one more token to confirm */
  848. tk = self.lex.next()
  849. tt = tk.tag
  850. /* the next token is '%', it's a memory address,
  851. * or ',' if it's a memory address without base,
  852. * otherwise it must be in `(expr)(SIB)` form */
  853. if tk.tag == _T_punc && tk.punc() == _P_percent {
  854. ret.Instruction.mem(self.base(tk, 0))
  855. } else if tk.tag == _T_punc && tk.punc() == _P_comma {
  856. ret.Instruction.mem(self.index(nil, 0))
  857. } else {
  858. ret.Instruction.mem(self.disp(self.i32(tk, self.eval(tk.pos))))
  859. }
  860. }
  861. /* check "jmp" and "call" instructions */
  862. if !_RegBranch[ret.Instruction.Mnemonic] {
  863. return ret
  864. } else if len(ret.Instruction.Operands) != 1 {
  865. panic(self.err(tk.pos, fmt.Sprintf(`"%s" requires exact 1 argument`, ret.Instruction.Mnemonic)))
  866. } else if !rr && ret.Instruction.Operands[0].Op != OpReg && ret.Instruction.Operands[0].Op != OpLabel {
  867. panic(self.err(tk.pos, fmt.Sprintf(`invalid operand for "%s" instruction`, ret.Instruction.Mnemonic)))
  868. } else {
  869. return ret
  870. }
  871. }
  872. func (self *Parser) next(p *int) rune {
  873. for {
  874. if *p >= len(self.lex.src) {
  875. return 0
  876. } else if cc := self.lex.src[*p]; !unicode.IsSpace(cc) {
  877. return cc
  878. } else {
  879. *p++
  880. }
  881. }
  882. }
  883. func (self *Parser) delim(p int) int {
  884. if cc := self.next(&p); cc == 0 {
  885. return p
  886. } else if cc == ',' {
  887. return p + 1
  888. } else {
  889. panic(self.err(p, "',' expected"))
  890. }
  891. }
  892. func (self *Parser) strings(argv *[]ParsedCommandArg, p int) int {
  893. var i int
  894. var e error
  895. var v string
  896. /* find the end of string */
  897. for i = p + 1; i < len(self.lex.src) && self.lex.src[i] != '"'; i++ {
  898. if self.lex.src[i] == '\\' {
  899. i++
  900. }
  901. }
  902. /* check for EOF */
  903. if i == len(self.lex.src) {
  904. panic(self.err(i, "unexpected EOF when scanning strings"))
  905. }
  906. /* unquote the string */
  907. if v, e = strconv.Unquote(string(self.lex.src[p:i + 1])); e != nil {
  908. panic(self.err(p, "invalid string: " + e.Error()))
  909. }
  910. /* add the argument to buffer */
  911. *argv = append(*argv, ParsedCommandArg { Value: v, IsString: true })
  912. return self.delim(i + 1)
  913. }
  914. func (self *Parser) directives(line string) {
  915. self.lex.row++
  916. self.lex.init(line)
  917. /* parse the first token */
  918. tk := self.lex.next()
  919. tt := tk.tag
  920. /* check for EOF */
  921. if tt == _T_end {
  922. return
  923. }
  924. /* must be a directive */
  925. if tt != _T_punc || tk.punc() != _P_hash {
  926. panic(self.err(tk.pos, "'#' expected"))
  927. }
  928. /* parse the line number */
  929. tk = self.lex.next()
  930. tt = tk.tag
  931. /* must be a line number, if it is, set the row number, and ignore the rest of the line */
  932. if tt != _T_int {
  933. panic(self.err(tk.pos, "line number expected"))
  934. } else {
  935. self.lex.row = int(tk.u64) - 1
  936. }
  937. }
  938. func (self *Parser) expressions(argv *[]ParsedCommandArg, p int) int {
  939. var i int
  940. var n int
  941. var s int
  942. /* scan until the first standalone ',' or EOF */
  943. loop: for i = p; i < len(self.lex.src); i++ {
  944. switch self.lex.src[i] {
  945. case ',' : if s == 0 { if n == 0 { break loop } }
  946. case ']', '}', '>' : if s == 0 { if n == 0 { break loop } else { n-- } }
  947. case '[', '{', '<' : if s == 0 { n++ }
  948. case '\\' : if s != 0 { i++ }
  949. case '\'' : if s != 2 { s ^= 1 }
  950. case '"' : if s != 1 { s ^= 2 }
  951. }
  952. }
  953. /* check for EOF in strings */
  954. if s != 0 {
  955. panic(self.err(i, "unexpected EOF when scanning strings"))
  956. }
  957. /* check for bracket matching */
  958. if n != 0 {
  959. panic(self.err(i, "unbalanced '{' or '[' or '<'"))
  960. }
  961. /* add the argument to buffer */
  962. *argv = append(*argv, ParsedCommandArg { Value: string(self.lex.src[p:i]) })
  963. return self.delim(i)
  964. }
  965. // Feed feeds the parser with one more line, and the parser
  966. // parses it into a ParsedLine.
  967. //
  968. // NOTE: Feed does not handle empty lines or multiple lines,
  969. // it panics when this happens. Use Parse to parse multiple
  970. // lines of assembly source.
  971. //
  972. func (self *Parser) Feed(src string) (ret *ParsedLine, err error) {
  973. var ok bool
  974. var ss string
  975. var vv interface{}
  976. /* check for multiple lines */
  977. if strings.ContainsRune(src, '\n') {
  978. return nil, errors.New("passing multiple lines to Feed()")
  979. }
  980. /* check for blank lines */
  981. if ss = strings.TrimSpace(src); ss == "" || ss[0] == '#' || strings.HasPrefix(ss, "//") {
  982. return nil, errors.New("blank line or line with only comments or line-marks")
  983. }
  984. /* setup error handler */
  985. defer func() {
  986. if vv = recover(); vv != nil {
  987. if err, ok = vv.(*SyntaxError); !ok {
  988. panic(vv)
  989. }
  990. }
  991. }()
  992. /* call the actual parser */
  993. ret = self.feed(src)
  994. return
  995. }
  996. // Parse parses the entire assembly source (possibly multiple lines) into
  997. // a sequence of *ParsedLine.
  998. func (self *Parser) Parse(src string) (ret []*ParsedLine, err error) {
  999. var ok bool
  1000. var ss string
  1001. var vv interface{}
  1002. /* setup error handler */
  1003. defer func() {
  1004. if vv = recover(); vv != nil {
  1005. if err, ok = vv.(*SyntaxError); !ok {
  1006. panic(vv)
  1007. }
  1008. }
  1009. }()
  1010. /* feed every line */
  1011. for _, line := range strings.Split(src, "\n") {
  1012. if ss = strings.TrimSpace(line); ss == "" || strings.HasPrefix(ss, "//") {
  1013. self.lex.row++
  1014. } else if ss[0] == '#' {
  1015. self.directives(line)
  1016. } else {
  1017. ret = append(ret, self.feed(line))
  1018. }
  1019. }
  1020. /* all done */
  1021. err = nil
  1022. return
  1023. }
  1024. // Directive handles the directive.
  1025. func (self *Parser) Directive(line string) (err error) {
  1026. var ok bool
  1027. var ss string
  1028. var vv interface{}
  1029. /* check for directives */
  1030. if ss = strings.TrimSpace(line); ss == "" || ss[0] != '#' {
  1031. return errors.New("not a directive")
  1032. }
  1033. /* setup error handler */
  1034. defer func() {
  1035. if vv = recover(); vv != nil {
  1036. if err, ok = vv.(*SyntaxError); !ok {
  1037. panic(vv)
  1038. }
  1039. }
  1040. }()
  1041. /* call the directive parser */
  1042. self.directives(line)
  1043. return
  1044. }
  1045. type _TermRepo struct {
  1046. terms map[string]expr.Term
  1047. }
  1048. func (self *_TermRepo) Get(name string) (expr.Term, error) {
  1049. if ret, ok := self.terms[name]; ok {
  1050. return ret, nil
  1051. } else {
  1052. return nil, errors.New("undefined name: " + name)
  1053. }
  1054. }
  1055. func (self *_TermRepo) label(name string) (*Label, error) {
  1056. var ok bool
  1057. var lb *Label
  1058. var tr expr.Term
  1059. /* check for existing terms */
  1060. if tr, ok = self.terms[name]; ok {
  1061. if lb, ok = tr.(*Label); ok {
  1062. return lb, nil
  1063. } else {
  1064. return nil, errors.New("name is not a label: " + name)
  1065. }
  1066. }
  1067. /* create a new one as needed */
  1068. lb = new(Label)
  1069. lb.Name = name
  1070. /* create the map if needed */
  1071. if self.terms == nil {
  1072. self.terms = make(map[string]expr.Term, 1)
  1073. }
  1074. /* register the label */
  1075. self.terms[name] = lb
  1076. return lb, nil
  1077. }
  1078. func (self *_TermRepo) define(name string, term expr.Term) {
  1079. var ok bool
  1080. var tr expr.Term
  1081. /* create the map if needed */
  1082. if self.terms == nil {
  1083. self.terms = make(map[string]expr.Term, 1)
  1084. }
  1085. /* check for existing terms */
  1086. if tr, ok = self.terms[name]; !ok {
  1087. self.terms[name] = term
  1088. } else if _, ok = tr.(*Label); !ok {
  1089. self.terms[name] = term
  1090. } else {
  1091. panic("conflicting term types: " + name)
  1092. }
  1093. }
  1094. // _Command describes an assembler command.
  1095. //
  1096. // The _Command.args describes both the arity and argument type with characters,
  1097. // the length is the number of arguments, the character itself represents the
  1098. // argument type.
  1099. //
  1100. // Possible values are:
  1101. //
  1102. // s This argument should be a string
  1103. // e This argument should be an expression
  1104. // ? The next argument is optional, and must be the last argument.
  1105. //
  1106. type _Command struct {
  1107. args string
  1108. handler func(*Assembler, *Program, []ParsedCommandArg) error
  1109. }
  1110. // Options controls the behavior of Assembler.
  1111. type Options struct {
  1112. // InstructionAliasing specifies whether to enable instruction aliasing.
  1113. // Set to true enables instruction aliasing, and the Assembler will try harder to find instructions.
  1114. InstructionAliasing bool
  1115. // IgnoreUnknownDirectives specifies whether to report errors when encountered unknown directives.
  1116. // Set to true ignores all unknwon directives silently, useful for parsing generated assembly.
  1117. IgnoreUnknownDirectives bool
  1118. }
  1119. // Assembler assembles the entire assembly program and generates the corresponding
  1120. // machine code representations.
  1121. type Assembler struct {
  1122. cc int
  1123. ps Parser
  1124. pc uintptr
  1125. buf []byte
  1126. main string
  1127. opts Options
  1128. repo _TermRepo
  1129. expr expr.Parser
  1130. line *ParsedLine
  1131. }
  1132. var asmCommands = map[string]_Command {
  1133. "org" : { "e" , (*Assembler).assembleCommandOrg },
  1134. "set" : { "ee" , (*Assembler).assembleCommandSet },
  1135. "byte" : { "e" , (*Assembler).assembleCommandByte },
  1136. "word" : { "e" , (*Assembler).assembleCommandWord },
  1137. "long" : { "e" , (*Assembler).assembleCommandLong },
  1138. "quad" : { "e" , (*Assembler).assembleCommandQuad },
  1139. "fill" : { "e?e" , (*Assembler).assembleCommandFill },
  1140. "space" : { "e?e" , (*Assembler).assembleCommandFill },
  1141. "align" : { "e?e" , (*Assembler).assembleCommandAlign },
  1142. "entry" : { "e" , (*Assembler).assembleCommandEntry },
  1143. "ascii" : { "s" , (*Assembler).assembleCommandAscii },
  1144. "asciz" : { "s" , (*Assembler).assembleCommandAsciz },
  1145. "p2align" : { "e?e" , (*Assembler).assembleCommandP2Align },
  1146. }
  1147. func (self *Assembler) err(msg string) *SyntaxError {
  1148. return &SyntaxError {
  1149. Pos : -1,
  1150. Row : self.line.Row,
  1151. Src : self.line.Src,
  1152. Reason : msg,
  1153. }
  1154. }
  1155. func (self *Assembler) eval(expr string) (int64, error) {
  1156. if exp, err := self.expr.SetSource(expr).Parse(nil); err != nil {
  1157. return 0, err
  1158. } else {
  1159. return exp.Evaluate()
  1160. }
  1161. }
  1162. func (self *Assembler) checkArgs(i int, n int, v *ParsedCommand, isString bool) error {
  1163. if i >= len(v.Args) {
  1164. return self.err(fmt.Sprintf("command %s takes exact %d arguments", strconv.Quote(v.Cmd), n))
  1165. } else if isString && !v.Args[i].IsString {
  1166. return self.err(fmt.Sprintf("argument %d of command %s must be a string", i + 1, strconv.Quote(v.Cmd)))
  1167. } else if !isString && v.Args[i].IsString {
  1168. return self.err(fmt.Sprintf("argument %d of command %s must be an expression", i + 1, strconv.Quote(v.Cmd)))
  1169. } else {
  1170. return nil
  1171. }
  1172. }
  1173. func (self *Assembler) assembleLabel(p *Program, lb *ParsedLabel) error {
  1174. if v, err := self.repo.label(lb.Name); err != nil {
  1175. return err
  1176. } else {
  1177. p.Link(v)
  1178. return nil
  1179. }
  1180. }
  1181. func (self *Assembler) assembleInstr(p *Program, line *ParsedInstruction) (err error) {
  1182. var ok bool
  1183. var pfx []byte
  1184. var ops []interface{}
  1185. var enc _InstructionEncoder
  1186. /* convert to lower-case */
  1187. opts := self.opts
  1188. name := strings.ToLower(line.Mnemonic)
  1189. /* fix register-addressing branches if needed */
  1190. if opts.InstructionAliasing && len(line.Operands) == 1 {
  1191. switch {
  1192. case name == "retq" : name = "ret"
  1193. case name == "movabsq" : name = "movq"
  1194. case name == "jmp" && line.Operands[0].Op != OpLabel : name = "jmpq"
  1195. case name == "jmpq" && line.Operands[0].Op == OpLabel : name = "jmp"
  1196. case name == "call" && line.Operands[0].Op != OpLabel : name = "callq"
  1197. case name == "callq" && line.Operands[0].Op == OpLabel : name = "call"
  1198. }
  1199. }
  1200. /* lookup from the alias table if needed */
  1201. if opts.InstructionAliasing {
  1202. enc, ok = _InstructionAliases[name]
  1203. }
  1204. /* lookup from the instruction table */
  1205. if !ok {
  1206. enc, ok = Instructions[name]
  1207. }
  1208. /* remove size suffix if possible */
  1209. if !ok && opts.InstructionAliasing {
  1210. switch i := len(name) - 1; name[i] {
  1211. case 'b', 'w', 'l', 'q': {
  1212. enc, ok = Instructions[name[:i]]
  1213. }
  1214. }
  1215. }
  1216. /* check for instruction name */
  1217. if !ok {
  1218. return self.err("no such instruction: " + strconv.Quote(name))
  1219. }
  1220. /* allocate memory for prefix if any */
  1221. if len(line.Prefixes) != 0 {
  1222. pfx = make([]byte, len(line.Prefixes))
  1223. }
  1224. /* convert the prefixes */
  1225. for i, v := range line.Prefixes {
  1226. switch v {
  1227. case PrefixLock : pfx[i] = _P_lock
  1228. case PrefixSegmentCS : pfx[i] = _P_cs
  1229. case PrefixSegmentDS : pfx[i] = _P_ds
  1230. case PrefixSegmentES : pfx[i] = _P_es
  1231. case PrefixSegmentFS : pfx[i] = _P_fs
  1232. case PrefixSegmentGS : pfx[i] = _P_gs
  1233. case PrefixSegmentSS : pfx[i] = _P_ss
  1234. default : panic("unreachable: invalid segment prefix")
  1235. }
  1236. }
  1237. /* convert the operands */
  1238. for _, op := range line.Operands {
  1239. switch op.Op {
  1240. case OpImm : ops = append(ops, op.Imm)
  1241. case OpReg : ops = append(ops, op.Reg)
  1242. case OpMem : self.assembleInstrMem(&ops, op.Memory)
  1243. case OpLabel : self.assembleInstrLabel(&ops, op.Label)
  1244. default : panic("parser yields an invalid operand kind")
  1245. }
  1246. }
  1247. /* catch any exceptions in the encoder */
  1248. defer func() {
  1249. if v := recover(); v != nil {
  1250. err = self.err(fmt.Sprint(v))
  1251. }
  1252. }()
  1253. /* encode the instruction */
  1254. enc(p, ops...).prefix = pfx
  1255. return nil
  1256. }
  1257. func (self *Assembler) assembleInstrMem(ops *[]interface{}, addr MemoryAddress) {
  1258. mem := new(MemoryOperand)
  1259. *ops = append(*ops, mem)
  1260. /* check for RIP-relative addressing */
  1261. if addr.Base != rip {
  1262. mem.Addr.Type = Memory
  1263. mem.Addr.Memory = addr
  1264. } else {
  1265. mem.Addr.Type = Offset
  1266. mem.Addr.Offset = RelativeOffset(addr.Displacement)
  1267. }
  1268. }
  1269. func (self *Assembler) assembleInstrLabel(ops *[]interface{}, label ParsedLabel) {
  1270. vk := label.Kind
  1271. tr, err := self.repo.label(label.Name)
  1272. /* check for errors */
  1273. if err != nil {
  1274. panic(err)
  1275. }
  1276. /* check for branch target */
  1277. if vk == BranchTarget {
  1278. *ops = append(*ops, tr)
  1279. return
  1280. }
  1281. /* add to ops */
  1282. *ops = append(*ops, &MemoryOperand {
  1283. Addr: Addressable {
  1284. Type : Reference,
  1285. Reference : tr,
  1286. },
  1287. })
  1288. }
  1289. func (self *Assembler) assembleCommand(p *Program, line *ParsedCommand) error {
  1290. var iv int
  1291. var cc rune
  1292. var ok bool
  1293. var va bool
  1294. var fn _Command
  1295. /* find the command */
  1296. if fn, ok = asmCommands[line.Cmd]; !ok {
  1297. if self.opts.IgnoreUnknownDirectives {
  1298. return nil
  1299. } else {
  1300. return self.err("no such command: " + strconv.Quote(line.Cmd))
  1301. }
  1302. }
  1303. /* expected & real argument count */
  1304. argx := len(fn.args)
  1305. argc := len(line.Args)
  1306. /* check the arguments */
  1307. loop: for iv, cc = range fn.args {
  1308. switch cc {
  1309. case '?' : va = true; break loop
  1310. case 's' : if err := self.checkArgs(iv, argx, line, true) ; err != nil { return err }
  1311. case 'e' : if err := self.checkArgs(iv, argx, line, false) ; err != nil { return err }
  1312. default : panic("invalid argument descriptor: " + strconv.Quote(fn.args))
  1313. }
  1314. }
  1315. /* simple case: non-variadic command */
  1316. if !va {
  1317. if argc == argx {
  1318. return fn.handler(self, p, line.Args)
  1319. } else {
  1320. return self.err(fmt.Sprintf("command %s takes exact %d arguments", strconv.Quote(line.Cmd), argx))
  1321. }
  1322. }
  1323. /* check for the descriptor */
  1324. if iv != argx - 2 {
  1325. panic("invalid argument descriptor: " + strconv.Quote(fn.args))
  1326. }
  1327. /* variadic command and the final optional argument is set */
  1328. if argc == argx - 1 {
  1329. switch fn.args[argx - 1] {
  1330. case 's' : if err := self.checkArgs(iv, -1, line, true) ; err != nil { return err }
  1331. case 'e' : if err := self.checkArgs(iv, -1, line, false) ; err != nil { return err }
  1332. default : panic("invalid argument descriptor: " + strconv.Quote(fn.args))
  1333. }
  1334. }
  1335. /* check argument count */
  1336. if argc == argx - 1 || argc == argx - 2 {
  1337. return fn.handler(self, p, line.Args)
  1338. } else {
  1339. return self.err(fmt.Sprintf("command %s takes %d or %d arguments", strconv.Quote(line.Cmd), argx - 2, argx - 1))
  1340. }
  1341. }
  1342. func (self *Assembler) assembleCommandInt(p *Program, argv []ParsedCommandArg, addfn func(*Program, *expr.Expr) *Instruction) error {
  1343. var err error
  1344. var val *expr.Expr
  1345. /* parse the expression */
  1346. if val, err = self.expr.SetSource(argv[0].Value).Parse(&self.repo); err != nil {
  1347. return err
  1348. }
  1349. /* add to the program */
  1350. addfn(p, val)
  1351. return nil
  1352. }
  1353. func (self *Assembler) assembleCommandOrg(_ *Program, argv []ParsedCommandArg) error {
  1354. var err error
  1355. var val int64
  1356. /* evaluate the expression */
  1357. if val, err = self.eval(argv[0].Value); err != nil {
  1358. return err
  1359. }
  1360. /* check for origin */
  1361. if val < 0 {
  1362. return self.err(fmt.Sprintf("negative origin: %d", val))
  1363. }
  1364. /* ".org" must be the first command if any */
  1365. if self.cc != 1 {
  1366. return self.err(".org must be the first command if present")
  1367. }
  1368. /* set the initial program counter */
  1369. self.pc = uintptr(val)
  1370. return nil
  1371. }
  1372. func (self *Assembler) assembleCommandSet(_ *Program, argv []ParsedCommandArg) error {
  1373. var err error
  1374. var val *expr.Expr
  1375. /* parse the expression */
  1376. if val, err = self.expr.SetSource(argv[1].Value).Parse(&self.repo); err != nil {
  1377. return err
  1378. }
  1379. /* define the new identifier */
  1380. self.repo.define(argv[0].Value, val)
  1381. return nil
  1382. }
  1383. func (self *Assembler) assembleCommandByte(p *Program, argv []ParsedCommandArg) error {
  1384. return self.assembleCommandInt(p, argv, (*Program).Byte)
  1385. }
  1386. func (self *Assembler) assembleCommandWord(p *Program, argv []ParsedCommandArg) error {
  1387. return self.assembleCommandInt(p, argv, (*Program).Word)
  1388. }
  1389. func (self *Assembler) assembleCommandLong(p *Program, argv []ParsedCommandArg) error {
  1390. return self.assembleCommandInt(p, argv, (*Program).Long)
  1391. }
  1392. func (self *Assembler) assembleCommandQuad(p *Program, argv []ParsedCommandArg) error {
  1393. return self.assembleCommandInt(p, argv, (*Program).Quad)
  1394. }
  1395. func (self *Assembler) assembleCommandFill(p *Program, argv []ParsedCommandArg) error {
  1396. var fv byte
  1397. var nb int64
  1398. var ex error
  1399. /* evaluate the size */
  1400. if nb, ex = self.eval(argv[0].Value); ex != nil {
  1401. return ex
  1402. }
  1403. /* check for filling size */
  1404. if nb < 0 {
  1405. return self.err(fmt.Sprintf("negative filling size: %d", nb))
  1406. }
  1407. /* check for optional filling value */
  1408. if len(argv) == 2 {
  1409. if val, err := self.eval(argv[1].Value); err != nil {
  1410. return err
  1411. } else if val < math.MinInt8 || val > math.MaxUint8 {
  1412. return self.err(fmt.Sprintf("value %d cannot be represented with a byte", val))
  1413. } else {
  1414. fv = byte(val)
  1415. }
  1416. }
  1417. /* fill with specified byte */
  1418. p.Data(bytes.Repeat([]byte { fv }, int(nb)))
  1419. return nil
  1420. }
  1421. func (self *Assembler) assembleCommandAlign(p *Program, argv []ParsedCommandArg) error {
  1422. var nb int64
  1423. var ex error
  1424. var fv *expr.Expr
  1425. /* evaluate the size */
  1426. if nb, ex = self.eval(argv[0].Value); ex != nil {
  1427. return ex
  1428. }
  1429. /* check for alignment value */
  1430. if nb <= 0 {
  1431. return self.err(fmt.Sprintf("zero or negative alignment: %d", nb))
  1432. }
  1433. /* alignment must be a power of 2 */
  1434. if (nb & (nb - 1)) != 0 {
  1435. return self.err(fmt.Sprintf("alignment must be a power of 2: %d", nb))
  1436. }
  1437. /* check for optional filling value */
  1438. if len(argv) == 2 {
  1439. if v, err := self.expr.SetSource(argv[1].Value).Parse(&self.repo); err == nil {
  1440. fv = v
  1441. } else {
  1442. return err
  1443. }
  1444. }
  1445. /* fill with specified byte, default to 0 if not specified */
  1446. p.Align(uint64(nb), fv)
  1447. return nil
  1448. }
  1449. func (self *Assembler) assembleCommandEntry(_ *Program, argv []ParsedCommandArg) error {
  1450. name := argv[0].Value
  1451. rbuf := []rune(name)
  1452. /* check all the characters */
  1453. for i, cc := range rbuf {
  1454. if !isident0(cc) && (i == 0 || !isident(cc)) {
  1455. return self.err("entry point must be a label name")
  1456. }
  1457. }
  1458. /* set the main entry point */
  1459. self.main = name
  1460. return nil
  1461. }
  1462. func (self *Assembler) assembleCommandAscii(p *Program, argv []ParsedCommandArg) error {
  1463. p.Data([]byte(argv[0].Value))
  1464. return nil
  1465. }
  1466. func (self *Assembler) assembleCommandAsciz(p *Program, argv []ParsedCommandArg) error {
  1467. p.Data(append([]byte(argv[0].Value), 0))
  1468. return nil
  1469. }
  1470. func (self *Assembler) assembleCommandP2Align(p *Program, argv []ParsedCommandArg) error {
  1471. var nb int64
  1472. var ex error
  1473. var fv *expr.Expr
  1474. /* evaluate the size */
  1475. if nb, ex = self.eval(argv[0].Value); ex != nil {
  1476. return ex
  1477. }
  1478. /* check for alignment value */
  1479. if nb <= 0 {
  1480. return self.err(fmt.Sprintf("zero or negative alignment: %d", nb))
  1481. }
  1482. /* check for optional filling value */
  1483. if len(argv) == 2 {
  1484. if v, err := self.expr.SetSource(argv[1].Value).Parse(&self.repo); err == nil {
  1485. fv = v
  1486. } else {
  1487. return err
  1488. }
  1489. }
  1490. /* fill with specified byte, default to 0 if not specified */
  1491. p.Align(1 << nb, fv)
  1492. return nil
  1493. }
  1494. // Base returns the origin.
  1495. func (self *Assembler) Base() uintptr {
  1496. return self.pc
  1497. }
  1498. // Code returns the assembled machine code.
  1499. func (self *Assembler) Code() []byte {
  1500. return self.buf
  1501. }
  1502. // Entry returns the address of the specified entry point, or the origin if not specified.
  1503. func (self *Assembler) Entry() uintptr {
  1504. if self.main == "" {
  1505. return self.pc
  1506. } else if tr, err := self.repo.Get(self.main); err != nil {
  1507. panic(err)
  1508. } else if val, err := tr.Evaluate(); err != nil {
  1509. panic(err)
  1510. } else {
  1511. return uintptr(val)
  1512. }
  1513. }
  1514. // Options returns the internal options reference, changing it WILL affect this Assembler instance.
  1515. func (self *Assembler) Options() *Options {
  1516. return &self.opts
  1517. }
  1518. // WithBase resets the origin to pc.
  1519. func (self *Assembler) WithBase(pc uintptr) *Assembler {
  1520. self.pc = pc
  1521. return self
  1522. }
  1523. // Assemble assembles the assembly source and save the machine code to internal buffer.
  1524. func (self *Assembler) Assemble(src string) error {
  1525. var err error
  1526. var buf []*ParsedLine
  1527. /* parse the source */
  1528. if buf, err = self.ps.Parse(src); err != nil {
  1529. return err
  1530. }
  1531. /* create a new program */
  1532. p := DefaultArch.CreateProgram()
  1533. defer p.Free()
  1534. /* process every line */
  1535. for _, self.line = range buf {
  1536. switch self.cc++; self.line.Kind {
  1537. case LineLabel : if err = self.assembleLabel (p, &self.line.Label) ; err != nil { return err }
  1538. case LineInstr : if err = self.assembleInstr (p, &self.line.Instruction) ; err != nil { return err }
  1539. case LineCommand : if err = self.assembleCommand (p, &self.line.Command) ; err != nil { return err }
  1540. default : panic("parser yields an invalid line kind")
  1541. }
  1542. }
  1543. /* assemble the program */
  1544. self.buf = p.Assemble(self.pc)
  1545. return nil
  1546. }