inst.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649
  1. // Copyright 2014 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // Package x86asm implements decoding of x86 machine code.
  5. package x86asm
  6. import (
  7. "bytes"
  8. "fmt"
  9. )
  10. // An Inst is a single instruction.
  11. type Inst struct {
  12. Prefix Prefixes // Prefixes applied to the instruction.
  13. Op Op // Opcode mnemonic
  14. Opcode uint32 // Encoded opcode bits, left aligned (first byte is Opcode>>24, etc)
  15. Args Args // Instruction arguments, in Intel order
  16. Mode int // processor mode in bits: 16, 32, or 64
  17. AddrSize int // address size in bits: 16, 32, or 64
  18. DataSize int // operand size in bits: 16, 32, or 64
  19. MemBytes int // size of memory argument in bytes: 1, 2, 4, 8, 16, and so on.
  20. Len int // length of encoded instruction in bytes
  21. PCRel int // length of PC-relative address in instruction encoding
  22. PCRelOff int // index of start of PC-relative address in instruction encoding
  23. }
  24. // Prefixes is an array of prefixes associated with a single instruction.
  25. // The prefixes are listed in the same order as found in the instruction:
  26. // each prefix byte corresponds to one slot in the array. The first zero
  27. // in the array marks the end of the prefixes.
  28. type Prefixes [14]Prefix
  29. // A Prefix represents an Intel instruction prefix.
  30. // The low 8 bits are the actual prefix byte encoding,
  31. // and the top 8 bits contain distinguishing bits and metadata.
  32. type Prefix uint16
  33. const (
  34. // Metadata about the role of a prefix in an instruction.
  35. PrefixImplicit Prefix = 0x8000 // prefix is implied by instruction text
  36. PrefixIgnored Prefix = 0x4000 // prefix is ignored: either irrelevant or overridden by a later prefix
  37. PrefixInvalid Prefix = 0x2000 // prefix makes entire instruction invalid (bad LOCK)
  38. // Memory segment overrides.
  39. PrefixES Prefix = 0x26 // ES segment override
  40. PrefixCS Prefix = 0x2E // CS segment override
  41. PrefixSS Prefix = 0x36 // SS segment override
  42. PrefixDS Prefix = 0x3E // DS segment override
  43. PrefixFS Prefix = 0x64 // FS segment override
  44. PrefixGS Prefix = 0x65 // GS segment override
  45. // Branch prediction.
  46. PrefixPN Prefix = 0x12E // predict not taken (conditional branch only)
  47. PrefixPT Prefix = 0x13E // predict taken (conditional branch only)
  48. // Size attributes.
  49. PrefixDataSize Prefix = 0x66 // operand size override
  50. PrefixData16 Prefix = 0x166
  51. PrefixData32 Prefix = 0x266
  52. PrefixAddrSize Prefix = 0x67 // address size override
  53. PrefixAddr16 Prefix = 0x167
  54. PrefixAddr32 Prefix = 0x267
  55. // One of a kind.
  56. PrefixLOCK Prefix = 0xF0 // lock
  57. PrefixREPN Prefix = 0xF2 // repeat not zero
  58. PrefixXACQUIRE Prefix = 0x1F2
  59. PrefixBND Prefix = 0x2F2
  60. PrefixREP Prefix = 0xF3 // repeat
  61. PrefixXRELEASE Prefix = 0x1F3
  62. // The REX prefixes must be in the range [PrefixREX, PrefixREX+0x10).
  63. // the other bits are set or not according to the intended use.
  64. PrefixREX Prefix = 0x40 // REX 64-bit extension prefix
  65. PrefixREXW Prefix = 0x08 // extension bit W (64-bit instruction width)
  66. PrefixREXR Prefix = 0x04 // extension bit R (r field in modrm)
  67. PrefixREXX Prefix = 0x02 // extension bit X (index field in sib)
  68. PrefixREXB Prefix = 0x01 // extension bit B (r/m field in modrm or base field in sib)
  69. PrefixVEX2Bytes Prefix = 0xC5 // Short form of vex prefix
  70. PrefixVEX3Bytes Prefix = 0xC4 // Long form of vex prefix
  71. )
  72. // IsREX reports whether p is a REX prefix byte.
  73. func (p Prefix) IsREX() bool {
  74. return p&0xF0 == PrefixREX
  75. }
  76. func (p Prefix) IsVEX() bool {
  77. return p&0xFF == PrefixVEX2Bytes || p&0xFF == PrefixVEX3Bytes
  78. }
  79. func (p Prefix) String() string {
  80. p &^= PrefixImplicit | PrefixIgnored | PrefixInvalid
  81. if s := prefixNames[p]; s != "" {
  82. return s
  83. }
  84. if p.IsREX() {
  85. s := "REX."
  86. if p&PrefixREXW != 0 {
  87. s += "W"
  88. }
  89. if p&PrefixREXR != 0 {
  90. s += "R"
  91. }
  92. if p&PrefixREXX != 0 {
  93. s += "X"
  94. }
  95. if p&PrefixREXB != 0 {
  96. s += "B"
  97. }
  98. return s
  99. }
  100. return fmt.Sprintf("Prefix(%#x)", int(p))
  101. }
  102. // An Op is an x86 opcode.
  103. type Op uint32
  104. func (op Op) String() string {
  105. i := int(op)
  106. if i < 0 || i >= len(opNames) || opNames[i] == "" {
  107. return fmt.Sprintf("Op(%d)", i)
  108. }
  109. return opNames[i]
  110. }
  111. // An Args holds the instruction arguments.
  112. // If an instruction has fewer than 4 arguments,
  113. // the final elements in the array are nil.
  114. type Args [4]Arg
  115. // An Arg is a single instruction argument,
  116. // one of these types: Reg, Mem, Imm, Rel.
  117. type Arg interface {
  118. String() string
  119. isArg()
  120. }
  121. // Note that the implements of Arg that follow are all sized
  122. // so that on a 64-bit machine the data can be inlined in
  123. // the interface value instead of requiring an allocation.
  124. // A Reg is a single register.
  125. // The zero Reg value has no name but indicates “no register.”
  126. type Reg uint8
  127. const (
  128. _ Reg = iota
  129. // 8-bit
  130. AL
  131. CL
  132. DL
  133. BL
  134. AH
  135. CH
  136. DH
  137. BH
  138. SPB
  139. BPB
  140. SIB
  141. DIB
  142. R8B
  143. R9B
  144. R10B
  145. R11B
  146. R12B
  147. R13B
  148. R14B
  149. R15B
  150. // 16-bit
  151. AX
  152. CX
  153. DX
  154. BX
  155. SP
  156. BP
  157. SI
  158. DI
  159. R8W
  160. R9W
  161. R10W
  162. R11W
  163. R12W
  164. R13W
  165. R14W
  166. R15W
  167. // 32-bit
  168. EAX
  169. ECX
  170. EDX
  171. EBX
  172. ESP
  173. EBP
  174. ESI
  175. EDI
  176. R8L
  177. R9L
  178. R10L
  179. R11L
  180. R12L
  181. R13L
  182. R14L
  183. R15L
  184. // 64-bit
  185. RAX
  186. RCX
  187. RDX
  188. RBX
  189. RSP
  190. RBP
  191. RSI
  192. RDI
  193. R8
  194. R9
  195. R10
  196. R11
  197. R12
  198. R13
  199. R14
  200. R15
  201. // Instruction pointer.
  202. IP // 16-bit
  203. EIP // 32-bit
  204. RIP // 64-bit
  205. // 387 floating point registers.
  206. F0
  207. F1
  208. F2
  209. F3
  210. F4
  211. F5
  212. F6
  213. F7
  214. // MMX registers.
  215. M0
  216. M1
  217. M2
  218. M3
  219. M4
  220. M5
  221. M6
  222. M7
  223. // XMM registers.
  224. X0
  225. X1
  226. X2
  227. X3
  228. X4
  229. X5
  230. X6
  231. X7
  232. X8
  233. X9
  234. X10
  235. X11
  236. X12
  237. X13
  238. X14
  239. X15
  240. // Segment registers.
  241. ES
  242. CS
  243. SS
  244. DS
  245. FS
  246. GS
  247. // System registers.
  248. GDTR
  249. IDTR
  250. LDTR
  251. MSW
  252. TASK
  253. // Control registers.
  254. CR0
  255. CR1
  256. CR2
  257. CR3
  258. CR4
  259. CR5
  260. CR6
  261. CR7
  262. CR8
  263. CR9
  264. CR10
  265. CR11
  266. CR12
  267. CR13
  268. CR14
  269. CR15
  270. // Debug registers.
  271. DR0
  272. DR1
  273. DR2
  274. DR3
  275. DR4
  276. DR5
  277. DR6
  278. DR7
  279. DR8
  280. DR9
  281. DR10
  282. DR11
  283. DR12
  284. DR13
  285. DR14
  286. DR15
  287. // Task registers.
  288. TR0
  289. TR1
  290. TR2
  291. TR3
  292. TR4
  293. TR5
  294. TR6
  295. TR7
  296. )
  297. const regMax = TR7
  298. func (Reg) isArg() {}
  299. func (r Reg) String() string {
  300. i := int(r)
  301. if i < 0 || i >= len(regNames) || regNames[i] == "" {
  302. return fmt.Sprintf("Reg(%d)", i)
  303. }
  304. return regNames[i]
  305. }
  306. // A Mem is a memory reference.
  307. // The general form is Segment:[Base+Scale*Index+Disp].
  308. type Mem struct {
  309. Segment Reg
  310. Base Reg
  311. Scale uint8
  312. Index Reg
  313. Disp int64
  314. }
  315. func (Mem) isArg() {}
  316. func (m Mem) String() string {
  317. var base, plus, scale, index, disp string
  318. if m.Base != 0 {
  319. base = m.Base.String()
  320. }
  321. if m.Scale != 0 {
  322. if m.Base != 0 {
  323. plus = "+"
  324. }
  325. if m.Scale > 1 {
  326. scale = fmt.Sprintf("%d*", m.Scale)
  327. }
  328. index = m.Index.String()
  329. }
  330. if m.Disp != 0 || m.Base == 0 && m.Scale == 0 {
  331. disp = fmt.Sprintf("%+#x", m.Disp)
  332. }
  333. return "[" + base + plus + scale + index + disp + "]"
  334. }
  335. // A Rel is an offset relative to the current instruction pointer.
  336. type Rel int32
  337. func (Rel) isArg() {}
  338. func (r Rel) String() string {
  339. return fmt.Sprintf(".%+d", r)
  340. }
  341. // An Imm is an integer constant.
  342. type Imm int64
  343. func (Imm) isArg() {}
  344. func (i Imm) String() string {
  345. return fmt.Sprintf("%#x", int64(i))
  346. }
  347. func (i Inst) String() string {
  348. var buf bytes.Buffer
  349. for _, p := range i.Prefix {
  350. if p == 0 {
  351. break
  352. }
  353. if p&PrefixImplicit != 0 {
  354. continue
  355. }
  356. fmt.Fprintf(&buf, "%v ", p)
  357. }
  358. fmt.Fprintf(&buf, "%v", i.Op)
  359. sep := " "
  360. for _, v := range i.Args {
  361. if v == nil {
  362. break
  363. }
  364. fmt.Fprintf(&buf, "%s%v", sep, v)
  365. sep = ", "
  366. }
  367. return buf.String()
  368. }
  369. func isReg(a Arg) bool {
  370. _, ok := a.(Reg)
  371. return ok
  372. }
  373. func isSegReg(a Arg) bool {
  374. r, ok := a.(Reg)
  375. return ok && ES <= r && r <= GS
  376. }
  377. func isMem(a Arg) bool {
  378. _, ok := a.(Mem)
  379. return ok
  380. }
  381. func isImm(a Arg) bool {
  382. _, ok := a.(Imm)
  383. return ok
  384. }
  385. func regBytes(a Arg) int {
  386. r, ok := a.(Reg)
  387. if !ok {
  388. return 0
  389. }
  390. if AL <= r && r <= R15B {
  391. return 1
  392. }
  393. if AX <= r && r <= R15W {
  394. return 2
  395. }
  396. if EAX <= r && r <= R15L {
  397. return 4
  398. }
  399. if RAX <= r && r <= R15 {
  400. return 8
  401. }
  402. return 0
  403. }
  404. func isSegment(p Prefix) bool {
  405. switch p {
  406. case PrefixCS, PrefixDS, PrefixES, PrefixFS, PrefixGS, PrefixSS:
  407. return true
  408. }
  409. return false
  410. }
  411. // The Op definitions and string list are in tables.go.
  412. var prefixNames = map[Prefix]string{
  413. PrefixCS: "CS",
  414. PrefixDS: "DS",
  415. PrefixES: "ES",
  416. PrefixFS: "FS",
  417. PrefixGS: "GS",
  418. PrefixSS: "SS",
  419. PrefixLOCK: "LOCK",
  420. PrefixREP: "REP",
  421. PrefixREPN: "REPN",
  422. PrefixAddrSize: "ADDRSIZE",
  423. PrefixDataSize: "DATASIZE",
  424. PrefixAddr16: "ADDR16",
  425. PrefixData16: "DATA16",
  426. PrefixAddr32: "ADDR32",
  427. PrefixData32: "DATA32",
  428. PrefixBND: "BND",
  429. PrefixXACQUIRE: "XACQUIRE",
  430. PrefixXRELEASE: "XRELEASE",
  431. PrefixREX: "REX",
  432. PrefixPT: "PT",
  433. PrefixPN: "PN",
  434. }
  435. var regNames = [...]string{
  436. AL: "AL",
  437. CL: "CL",
  438. BL: "BL",
  439. DL: "DL",
  440. AH: "AH",
  441. CH: "CH",
  442. BH: "BH",
  443. DH: "DH",
  444. SPB: "SPB",
  445. BPB: "BPB",
  446. SIB: "SIB",
  447. DIB: "DIB",
  448. R8B: "R8B",
  449. R9B: "R9B",
  450. R10B: "R10B",
  451. R11B: "R11B",
  452. R12B: "R12B",
  453. R13B: "R13B",
  454. R14B: "R14B",
  455. R15B: "R15B",
  456. AX: "AX",
  457. CX: "CX",
  458. BX: "BX",
  459. DX: "DX",
  460. SP: "SP",
  461. BP: "BP",
  462. SI: "SI",
  463. DI: "DI",
  464. R8W: "R8W",
  465. R9W: "R9W",
  466. R10W: "R10W",
  467. R11W: "R11W",
  468. R12W: "R12W",
  469. R13W: "R13W",
  470. R14W: "R14W",
  471. R15W: "R15W",
  472. EAX: "EAX",
  473. ECX: "ECX",
  474. EDX: "EDX",
  475. EBX: "EBX",
  476. ESP: "ESP",
  477. EBP: "EBP",
  478. ESI: "ESI",
  479. EDI: "EDI",
  480. R8L: "R8L",
  481. R9L: "R9L",
  482. R10L: "R10L",
  483. R11L: "R11L",
  484. R12L: "R12L",
  485. R13L: "R13L",
  486. R14L: "R14L",
  487. R15L: "R15L",
  488. RAX: "RAX",
  489. RCX: "RCX",
  490. RDX: "RDX",
  491. RBX: "RBX",
  492. RSP: "RSP",
  493. RBP: "RBP",
  494. RSI: "RSI",
  495. RDI: "RDI",
  496. R8: "R8",
  497. R9: "R9",
  498. R10: "R10",
  499. R11: "R11",
  500. R12: "R12",
  501. R13: "R13",
  502. R14: "R14",
  503. R15: "R15",
  504. IP: "IP",
  505. EIP: "EIP",
  506. RIP: "RIP",
  507. F0: "F0",
  508. F1: "F1",
  509. F2: "F2",
  510. F3: "F3",
  511. F4: "F4",
  512. F5: "F5",
  513. F6: "F6",
  514. F7: "F7",
  515. M0: "M0",
  516. M1: "M1",
  517. M2: "M2",
  518. M3: "M3",
  519. M4: "M4",
  520. M5: "M5",
  521. M6: "M6",
  522. M7: "M7",
  523. X0: "X0",
  524. X1: "X1",
  525. X2: "X2",
  526. X3: "X3",
  527. X4: "X4",
  528. X5: "X5",
  529. X6: "X6",
  530. X7: "X7",
  531. X8: "X8",
  532. X9: "X9",
  533. X10: "X10",
  534. X11: "X11",
  535. X12: "X12",
  536. X13: "X13",
  537. X14: "X14",
  538. X15: "X15",
  539. CS: "CS",
  540. SS: "SS",
  541. DS: "DS",
  542. ES: "ES",
  543. FS: "FS",
  544. GS: "GS",
  545. GDTR: "GDTR",
  546. IDTR: "IDTR",
  547. LDTR: "LDTR",
  548. MSW: "MSW",
  549. TASK: "TASK",
  550. CR0: "CR0",
  551. CR1: "CR1",
  552. CR2: "CR2",
  553. CR3: "CR3",
  554. CR4: "CR4",
  555. CR5: "CR5",
  556. CR6: "CR6",
  557. CR7: "CR7",
  558. CR8: "CR8",
  559. CR9: "CR9",
  560. CR10: "CR10",
  561. CR11: "CR11",
  562. CR12: "CR12",
  563. CR13: "CR13",
  564. CR14: "CR14",
  565. CR15: "CR15",
  566. DR0: "DR0",
  567. DR1: "DR1",
  568. DR2: "DR2",
  569. DR3: "DR3",
  570. DR4: "DR4",
  571. DR5: "DR5",
  572. DR6: "DR6",
  573. DR7: "DR7",
  574. DR8: "DR8",
  575. DR9: "DR9",
  576. DR10: "DR10",
  577. DR11: "DR11",
  578. DR12: "DR12",
  579. DR13: "DR13",
  580. DR14: "DR14",
  581. DR15: "DR15",
  582. TR0: "TR0",
  583. TR1: "TR1",
  584. TR2: "TR2",
  585. TR3: "TR3",
  586. TR4: "TR4",
  587. TR5: "TR5",
  588. TR6: "TR6",
  589. TR7: "TR7",
  590. }