decode.go 45 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724
  1. // Copyright 2014 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // Table-driven decoding of x86 instructions.
  5. package x86asm
  6. import (
  7. "encoding/binary"
  8. "errors"
  9. "fmt"
  10. "runtime"
  11. )
  12. // Set trace to true to cause the decoder to print the PC sequence
  13. // of the executed instruction codes. This is typically only useful
  14. // when you are running a test of a single input case.
  15. const trace = false
  16. // A decodeOp is a single instruction in the decoder bytecode program.
  17. //
  18. // The decodeOps correspond to consuming and conditionally branching
  19. // on input bytes, consuming additional fields, and then interpreting
  20. // consumed data as instruction arguments. The names of the xRead and xArg
  21. // operations are taken from the Intel manual conventions, for example
  22. // Volume 2, Section 3.1.1, page 487 of
  23. // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-manual-325462.pdf
  24. //
  25. // The actual decoding program is generated by ../x86map.
  26. //
  27. // TODO(rsc): We may be able to merge various of the memory operands
  28. // since we don't care about, say, the distinction between m80dec and m80bcd.
  29. // Similarly, mm and mm1 have identical meaning, as do xmm and xmm1.
  30. type decodeOp uint16
  31. const (
  32. xFail decodeOp = iota // invalid instruction (return)
  33. xMatch // completed match
  34. xJump // jump to pc
  35. xCondByte // switch on instruction byte value
  36. xCondSlashR // read and switch on instruction /r value
  37. xCondPrefix // switch on presence of instruction prefix
  38. xCondIs64 // switch on 64-bit processor mode
  39. xCondDataSize // switch on operand size
  40. xCondAddrSize // switch on address size
  41. xCondIsMem // switch on memory vs register argument
  42. xSetOp // set instruction opcode
  43. xReadSlashR // read /r
  44. xReadIb // read ib
  45. xReadIw // read iw
  46. xReadId // read id
  47. xReadIo // read io
  48. xReadCb // read cb
  49. xReadCw // read cw
  50. xReadCd // read cd
  51. xReadCp // read cp
  52. xReadCm // read cm
  53. xArg1 // arg 1
  54. xArg3 // arg 3
  55. xArgAL // arg AL
  56. xArgAX // arg AX
  57. xArgCL // arg CL
  58. xArgCR0dashCR7 // arg CR0-CR7
  59. xArgCS // arg CS
  60. xArgDR0dashDR7 // arg DR0-DR7
  61. xArgDS // arg DS
  62. xArgDX // arg DX
  63. xArgEAX // arg EAX
  64. xArgEDX // arg EDX
  65. xArgES // arg ES
  66. xArgFS // arg FS
  67. xArgGS // arg GS
  68. xArgImm16 // arg imm16
  69. xArgImm32 // arg imm32
  70. xArgImm64 // arg imm64
  71. xArgImm8 // arg imm8
  72. xArgImm8u // arg imm8 but record as unsigned
  73. xArgImm16u // arg imm8 but record as unsigned
  74. xArgM // arg m
  75. xArgM128 // arg m128
  76. xArgM256 // arg m256
  77. xArgM1428byte // arg m14/28byte
  78. xArgM16 // arg m16
  79. xArgM16and16 // arg m16&16
  80. xArgM16and32 // arg m16&32
  81. xArgM16and64 // arg m16&64
  82. xArgM16colon16 // arg m16:16
  83. xArgM16colon32 // arg m16:32
  84. xArgM16colon64 // arg m16:64
  85. xArgM16int // arg m16int
  86. xArgM2byte // arg m2byte
  87. xArgM32 // arg m32
  88. xArgM32and32 // arg m32&32
  89. xArgM32fp // arg m32fp
  90. xArgM32int // arg m32int
  91. xArgM512byte // arg m512byte
  92. xArgM64 // arg m64
  93. xArgM64fp // arg m64fp
  94. xArgM64int // arg m64int
  95. xArgM8 // arg m8
  96. xArgM80bcd // arg m80bcd
  97. xArgM80dec // arg m80dec
  98. xArgM80fp // arg m80fp
  99. xArgM94108byte // arg m94/108byte
  100. xArgMm // arg mm
  101. xArgMm1 // arg mm1
  102. xArgMm2 // arg mm2
  103. xArgMm2M64 // arg mm2/m64
  104. xArgMmM32 // arg mm/m32
  105. xArgMmM64 // arg mm/m64
  106. xArgMem // arg mem
  107. xArgMoffs16 // arg moffs16
  108. xArgMoffs32 // arg moffs32
  109. xArgMoffs64 // arg moffs64
  110. xArgMoffs8 // arg moffs8
  111. xArgPtr16colon16 // arg ptr16:16
  112. xArgPtr16colon32 // arg ptr16:32
  113. xArgR16 // arg r16
  114. xArgR16op // arg r16 with +rw in opcode
  115. xArgR32 // arg r32
  116. xArgR32M16 // arg r32/m16
  117. xArgR32M8 // arg r32/m8
  118. xArgR32op // arg r32 with +rd in opcode
  119. xArgR64 // arg r64
  120. xArgR64M16 // arg r64/m16
  121. xArgR64op // arg r64 with +rd in opcode
  122. xArgR8 // arg r8
  123. xArgR8op // arg r8 with +rb in opcode
  124. xArgRAX // arg RAX
  125. xArgRDX // arg RDX
  126. xArgRM // arg r/m
  127. xArgRM16 // arg r/m16
  128. xArgRM32 // arg r/m32
  129. xArgRM64 // arg r/m64
  130. xArgRM8 // arg r/m8
  131. xArgReg // arg reg
  132. xArgRegM16 // arg reg/m16
  133. xArgRegM32 // arg reg/m32
  134. xArgRegM8 // arg reg/m8
  135. xArgRel16 // arg rel16
  136. xArgRel32 // arg rel32
  137. xArgRel8 // arg rel8
  138. xArgSS // arg SS
  139. xArgST // arg ST, aka ST(0)
  140. xArgSTi // arg ST(i) with +i in opcode
  141. xArgSreg // arg Sreg
  142. xArgTR0dashTR7 // arg TR0-TR7
  143. xArgXmm // arg xmm
  144. xArgXMM0 // arg <XMM0>
  145. xArgXmm1 // arg xmm1
  146. xArgXmm2 // arg xmm2
  147. xArgXmm2M128 // arg xmm2/m128
  148. xArgYmm2M256 // arg ymm2/m256
  149. xArgXmm2M16 // arg xmm2/m16
  150. xArgXmm2M32 // arg xmm2/m32
  151. xArgXmm2M64 // arg xmm2/m64
  152. xArgXmmM128 // arg xmm/m128
  153. xArgXmmM32 // arg xmm/m32
  154. xArgXmmM64 // arg xmm/m64
  155. xArgYmm1 // arg ymm1
  156. xArgRmf16 // arg r/m16 but force mod=3
  157. xArgRmf32 // arg r/m32 but force mod=3
  158. xArgRmf64 // arg r/m64 but force mod=3
  159. )
  160. // instPrefix returns an Inst describing just one prefix byte.
  161. // It is only used if there is a prefix followed by an unintelligible
  162. // or invalid instruction byte sequence.
  163. func instPrefix(b byte, mode int) (Inst, error) {
  164. // When tracing it is useful to see what called instPrefix to report an error.
  165. if trace {
  166. _, file, line, _ := runtime.Caller(1)
  167. fmt.Printf("%s:%d\n", file, line)
  168. }
  169. p := Prefix(b)
  170. switch p {
  171. case PrefixDataSize:
  172. if mode == 16 {
  173. p = PrefixData32
  174. } else {
  175. p = PrefixData16
  176. }
  177. case PrefixAddrSize:
  178. if mode == 32 {
  179. p = PrefixAddr16
  180. } else {
  181. p = PrefixAddr32
  182. }
  183. }
  184. // Note: using composite literal with Prefix key confuses 'bundle' tool.
  185. inst := Inst{Len: 1}
  186. inst.Prefix = Prefixes{p}
  187. return inst, nil
  188. }
  189. // truncated reports a truncated instruction.
  190. // For now we use instPrefix but perhaps later we will return
  191. // a specific error here.
  192. func truncated(src []byte, mode int) (Inst, error) {
  193. if len(src) == 0 {
  194. return Inst{}, ErrTruncated
  195. }
  196. return instPrefix(src[0], mode) // too long
  197. }
  198. // These are the errors returned by Decode.
  199. var (
  200. ErrInvalidMode = errors.New("invalid x86 mode in Decode")
  201. ErrTruncated = errors.New("truncated instruction")
  202. ErrUnrecognized = errors.New("unrecognized instruction")
  203. )
  204. // decoderCover records coverage information for which parts
  205. // of the byte code have been executed.
  206. var decoderCover []bool
  207. // Decode decodes the leading bytes in src as a single instruction.
  208. // The mode arguments specifies the assumed processor mode:
  209. // 16, 32, or 64 for 16-, 32-, and 64-bit execution modes.
  210. func Decode(src []byte, mode int) (inst Inst, err error) {
  211. return decode1(src, mode, false)
  212. }
  213. // decode1 is the implementation of Decode but takes an extra
  214. // gnuCompat flag to cause it to change its behavior to mimic
  215. // bugs (or at least unique features) of GNU libopcodes as used
  216. // by objdump. We don't believe that logic is the right thing to do
  217. // in general, but when testing against libopcodes it simplifies the
  218. // comparison if we adjust a few small pieces of logic.
  219. // The affected logic is in the conditional branch for "mandatory" prefixes,
  220. // case xCondPrefix.
  221. func decode1(src []byte, mode int, gnuCompat bool) (Inst, error) {
  222. switch mode {
  223. case 16, 32, 64:
  224. // ok
  225. // TODO(rsc): 64-bit mode not tested, probably not working.
  226. default:
  227. return Inst{}, ErrInvalidMode
  228. }
  229. // Maximum instruction size is 15 bytes.
  230. // If we need to read more, return 'truncated instruction.
  231. if len(src) > 15 {
  232. src = src[:15]
  233. }
  234. var (
  235. // prefix decoding information
  236. pos = 0 // position reading src
  237. nprefix = 0 // number of prefixes
  238. lockIndex = -1 // index of LOCK prefix in src and inst.Prefix
  239. repIndex = -1 // index of REP/REPN prefix in src and inst.Prefix
  240. segIndex = -1 // index of Group 2 prefix in src and inst.Prefix
  241. dataSizeIndex = -1 // index of Group 3 prefix in src and inst.Prefix
  242. addrSizeIndex = -1 // index of Group 4 prefix in src and inst.Prefix
  243. rex Prefix // rex byte if present (or 0)
  244. rexUsed Prefix // bits used in rex byte
  245. rexIndex = -1 // index of rex byte
  246. vex Prefix // use vex encoding
  247. vexIndex = -1 // index of vex prefix
  248. addrMode = mode // address mode (width in bits)
  249. dataMode = mode // operand mode (width in bits)
  250. // decoded ModR/M fields
  251. haveModrm bool
  252. modrm int
  253. mod int
  254. regop int
  255. rm int
  256. // if ModR/M is memory reference, Mem form
  257. mem Mem
  258. haveMem bool
  259. // decoded SIB fields
  260. haveSIB bool
  261. sib int
  262. scale int
  263. index int
  264. base int
  265. displen int
  266. dispoff int
  267. // decoded immediate values
  268. imm int64
  269. imm8 int8
  270. immc int64
  271. immcpos int
  272. // output
  273. opshift int
  274. inst Inst
  275. narg int // number of arguments written to inst
  276. )
  277. if mode == 64 {
  278. dataMode = 32
  279. }
  280. // Prefixes are certainly the most complex and underspecified part of
  281. // decoding x86 instructions. Although the manuals say things like
  282. // up to four prefixes, one from each group, nearly everyone seems to
  283. // agree that in practice as many prefixes as possible, including multiple
  284. // from a particular group or repetitions of a given prefix, can be used on
  285. // an instruction, provided the total instruction length including prefixes
  286. // does not exceed the agreed-upon maximum of 15 bytes.
  287. // Everyone also agrees that if one of these prefixes is the LOCK prefix
  288. // and the instruction is not one of the instructions that can be used with
  289. // the LOCK prefix or if the destination is not a memory operand,
  290. // then the instruction is invalid and produces the #UD exception.
  291. // However, that is the end of any semblance of agreement.
  292. //
  293. // What happens if prefixes are given that conflict with other prefixes?
  294. // For example, the memory segment overrides CS, DS, ES, FS, GS, SS
  295. // conflict with each other: only one segment can be in effect.
  296. // Disassemblers seem to agree that later prefixes take priority over
  297. // earlier ones. I have not taken the time to write assembly programs
  298. // to check to see if the hardware agrees.
  299. //
  300. // What happens if prefixes are given that have no meaning for the
  301. // specific instruction to which they are attached? It depends.
  302. // If they really have no meaning, they are ignored. However, a future
  303. // processor may assign a different meaning. As a disassembler, we
  304. // don't really know whether we're seeing a meaningless prefix or one
  305. // whose meaning we simply haven't been told yet.
  306. //
  307. // Combining the two questions, what happens when conflicting
  308. // extension prefixes are given? No one seems to know for sure.
  309. // For example, MOVQ is 66 0F D6 /r, MOVDQ2Q is F2 0F D6 /r,
  310. // and MOVQ2DQ is F3 0F D6 /r. What is '66 F2 F3 0F D6 /r'?
  311. // Which prefix wins? See the xCondPrefix prefix for more.
  312. //
  313. // Writing assembly test cases to divine which interpretation the
  314. // CPU uses might clarify the situation, but more likely it would
  315. // make the situation even less clear.
  316. // Read non-REX prefixes.
  317. ReadPrefixes:
  318. for ; pos < len(src); pos++ {
  319. p := Prefix(src[pos])
  320. switch p {
  321. default:
  322. nprefix = pos
  323. break ReadPrefixes
  324. // Group 1 - lock and repeat prefixes
  325. // According to Intel, there should only be one from this set,
  326. // but according to AMD both can be present.
  327. case 0xF0:
  328. if lockIndex >= 0 {
  329. inst.Prefix[lockIndex] |= PrefixIgnored
  330. }
  331. lockIndex = pos
  332. case 0xF2, 0xF3:
  333. if repIndex >= 0 {
  334. inst.Prefix[repIndex] |= PrefixIgnored
  335. }
  336. repIndex = pos
  337. // Group 2 - segment override / branch hints
  338. case 0x26, 0x2E, 0x36, 0x3E:
  339. if mode == 64 {
  340. p |= PrefixIgnored
  341. break
  342. }
  343. fallthrough
  344. case 0x64, 0x65:
  345. if segIndex >= 0 {
  346. inst.Prefix[segIndex] |= PrefixIgnored
  347. }
  348. segIndex = pos
  349. // Group 3 - operand size override
  350. case 0x66:
  351. if mode == 16 {
  352. dataMode = 32
  353. p = PrefixData32
  354. } else {
  355. dataMode = 16
  356. p = PrefixData16
  357. }
  358. if dataSizeIndex >= 0 {
  359. inst.Prefix[dataSizeIndex] |= PrefixIgnored
  360. }
  361. dataSizeIndex = pos
  362. // Group 4 - address size override
  363. case 0x67:
  364. if mode == 32 {
  365. addrMode = 16
  366. p = PrefixAddr16
  367. } else {
  368. addrMode = 32
  369. p = PrefixAddr32
  370. }
  371. if addrSizeIndex >= 0 {
  372. inst.Prefix[addrSizeIndex] |= PrefixIgnored
  373. }
  374. addrSizeIndex = pos
  375. //Group 5 - Vex encoding
  376. case 0xC5:
  377. if pos == 0 && pos+1 < len(src) && (mode == 64 || (mode == 32 && src[pos+1]&0xc0 == 0xc0)) {
  378. vex = p
  379. vexIndex = pos
  380. inst.Prefix[pos] = p
  381. inst.Prefix[pos+1] = Prefix(src[pos+1])
  382. pos += 1
  383. continue
  384. } else {
  385. nprefix = pos
  386. break ReadPrefixes
  387. }
  388. case 0xC4:
  389. if pos == 0 && pos+2 < len(src) && (mode == 64 || (mode == 32 && src[pos+1]&0xc0 == 0xc0)) {
  390. vex = p
  391. vexIndex = pos
  392. inst.Prefix[pos] = p
  393. inst.Prefix[pos+1] = Prefix(src[pos+1])
  394. inst.Prefix[pos+2] = Prefix(src[pos+2])
  395. pos += 2
  396. continue
  397. } else {
  398. nprefix = pos
  399. break ReadPrefixes
  400. }
  401. }
  402. if pos >= len(inst.Prefix) {
  403. return instPrefix(src[0], mode) // too long
  404. }
  405. inst.Prefix[pos] = p
  406. }
  407. // Read REX prefix.
  408. if pos < len(src) && mode == 64 && Prefix(src[pos]).IsREX() && vex == 0 {
  409. rex = Prefix(src[pos])
  410. rexIndex = pos
  411. if pos >= len(inst.Prefix) {
  412. return instPrefix(src[0], mode) // too long
  413. }
  414. inst.Prefix[pos] = rex
  415. pos++
  416. if rex&PrefixREXW != 0 {
  417. dataMode = 64
  418. if dataSizeIndex >= 0 {
  419. inst.Prefix[dataSizeIndex] |= PrefixIgnored
  420. }
  421. }
  422. }
  423. // Decode instruction stream, interpreting decoding instructions.
  424. // opshift gives the shift to use when saving the next
  425. // opcode byte into inst.Opcode.
  426. opshift = 24
  427. // Decode loop, executing decoder program.
  428. var oldPC, prevPC int
  429. Decode:
  430. for pc := 1; ; { // TODO uint
  431. oldPC = prevPC
  432. prevPC = pc
  433. if trace {
  434. println("run", pc)
  435. }
  436. x := decoder[pc]
  437. if decoderCover != nil {
  438. decoderCover[pc] = true
  439. }
  440. pc++
  441. // Read and decode ModR/M if needed by opcode.
  442. switch decodeOp(x) {
  443. case xCondSlashR, xReadSlashR:
  444. if haveModrm {
  445. return Inst{Len: pos}, errInternal
  446. }
  447. haveModrm = true
  448. if pos >= len(src) {
  449. return truncated(src, mode)
  450. }
  451. modrm = int(src[pos])
  452. pos++
  453. if opshift >= 0 {
  454. inst.Opcode |= uint32(modrm) << uint(opshift)
  455. opshift -= 8
  456. }
  457. mod = modrm >> 6
  458. regop = (modrm >> 3) & 07
  459. rm = modrm & 07
  460. if rex&PrefixREXR != 0 {
  461. rexUsed |= PrefixREXR
  462. regop |= 8
  463. }
  464. if addrMode == 16 {
  465. // 16-bit modrm form
  466. if mod != 3 {
  467. haveMem = true
  468. mem = addr16[rm]
  469. if rm == 6 && mod == 0 {
  470. mem.Base = 0
  471. }
  472. // Consume disp16 if present.
  473. if mod == 0 && rm == 6 || mod == 2 {
  474. if pos+2 > len(src) {
  475. return truncated(src, mode)
  476. }
  477. mem.Disp = int64(binary.LittleEndian.Uint16(src[pos:]))
  478. pos += 2
  479. }
  480. // Consume disp8 if present.
  481. if mod == 1 {
  482. if pos >= len(src) {
  483. return truncated(src, mode)
  484. }
  485. mem.Disp = int64(int8(src[pos]))
  486. pos++
  487. }
  488. }
  489. } else {
  490. haveMem = mod != 3
  491. // 32-bit or 64-bit form
  492. // Consume SIB encoding if present.
  493. if rm == 4 && mod != 3 {
  494. haveSIB = true
  495. if pos >= len(src) {
  496. return truncated(src, mode)
  497. }
  498. sib = int(src[pos])
  499. pos++
  500. if opshift >= 0 {
  501. inst.Opcode |= uint32(sib) << uint(opshift)
  502. opshift -= 8
  503. }
  504. scale = sib >> 6
  505. index = (sib >> 3) & 07
  506. base = sib & 07
  507. if rex&PrefixREXB != 0 || vex == 0xC4 && inst.Prefix[vexIndex+1]&0x20 == 0 {
  508. rexUsed |= PrefixREXB
  509. base |= 8
  510. }
  511. if rex&PrefixREXX != 0 || vex == 0xC4 && inst.Prefix[vexIndex+1]&0x40 == 0 {
  512. rexUsed |= PrefixREXX
  513. index |= 8
  514. }
  515. mem.Scale = 1 << uint(scale)
  516. if index == 4 {
  517. // no mem.Index
  518. } else {
  519. mem.Index = baseRegForBits(addrMode) + Reg(index)
  520. }
  521. if base&7 == 5 && mod == 0 {
  522. // no mem.Base
  523. } else {
  524. mem.Base = baseRegForBits(addrMode) + Reg(base)
  525. }
  526. } else {
  527. if rex&PrefixREXB != 0 {
  528. rexUsed |= PrefixREXB
  529. rm |= 8
  530. }
  531. if mod == 0 && rm&7 == 5 || rm&7 == 4 {
  532. // base omitted
  533. } else if mod != 3 {
  534. mem.Base = baseRegForBits(addrMode) + Reg(rm)
  535. }
  536. }
  537. // Consume disp32 if present.
  538. if mod == 0 && (rm&7 == 5 || haveSIB && base&7 == 5) || mod == 2 {
  539. if pos+4 > len(src) {
  540. return truncated(src, mode)
  541. }
  542. dispoff = pos
  543. displen = 4
  544. mem.Disp = int64(binary.LittleEndian.Uint32(src[pos:]))
  545. pos += 4
  546. }
  547. // Consume disp8 if present.
  548. if mod == 1 {
  549. if pos >= len(src) {
  550. return truncated(src, mode)
  551. }
  552. dispoff = pos
  553. displen = 1
  554. mem.Disp = int64(int8(src[pos]))
  555. pos++
  556. }
  557. // In 64-bit, mod=0 rm=5 is PC-relative instead of just disp.
  558. // See Vol 2A. Table 2-7.
  559. if mode == 64 && mod == 0 && rm&7 == 5 {
  560. if addrMode == 32 {
  561. mem.Base = EIP
  562. } else {
  563. mem.Base = RIP
  564. }
  565. }
  566. }
  567. if segIndex >= 0 {
  568. mem.Segment = prefixToSegment(inst.Prefix[segIndex])
  569. }
  570. }
  571. // Execute single opcode.
  572. switch decodeOp(x) {
  573. default:
  574. println("bad op", x, "at", pc-1, "from", oldPC)
  575. return Inst{Len: pos}, errInternal
  576. case xFail:
  577. inst.Op = 0
  578. break Decode
  579. case xMatch:
  580. break Decode
  581. case xJump:
  582. pc = int(decoder[pc])
  583. // Conditional branches.
  584. case xCondByte:
  585. if pos >= len(src) {
  586. return truncated(src, mode)
  587. }
  588. b := src[pos]
  589. n := int(decoder[pc])
  590. pc++
  591. for i := 0; i < n; i++ {
  592. xb, xpc := decoder[pc], int(decoder[pc+1])
  593. pc += 2
  594. if b == byte(xb) {
  595. pc = xpc
  596. pos++
  597. if opshift >= 0 {
  598. inst.Opcode |= uint32(b) << uint(opshift)
  599. opshift -= 8
  600. }
  601. continue Decode
  602. }
  603. }
  604. // xCondByte is the only conditional with a fall through,
  605. // so that it can be used to pick off special cases before
  606. // an xCondSlash. If the fallthrough instruction is xFail,
  607. // advance the position so that the decoded instruction
  608. // size includes the byte we just compared against.
  609. if decodeOp(decoder[pc]) == xJump {
  610. pc = int(decoder[pc+1])
  611. }
  612. if decodeOp(decoder[pc]) == xFail {
  613. pos++
  614. }
  615. case xCondIs64:
  616. if mode == 64 {
  617. pc = int(decoder[pc+1])
  618. } else {
  619. pc = int(decoder[pc])
  620. }
  621. case xCondIsMem:
  622. mem := haveMem
  623. if !haveModrm {
  624. if pos >= len(src) {
  625. return instPrefix(src[0], mode) // too long
  626. }
  627. mem = src[pos]>>6 != 3
  628. }
  629. if mem {
  630. pc = int(decoder[pc+1])
  631. } else {
  632. pc = int(decoder[pc])
  633. }
  634. case xCondDataSize:
  635. switch dataMode {
  636. case 16:
  637. if dataSizeIndex >= 0 {
  638. inst.Prefix[dataSizeIndex] |= PrefixImplicit
  639. }
  640. pc = int(decoder[pc])
  641. case 32:
  642. if dataSizeIndex >= 0 {
  643. inst.Prefix[dataSizeIndex] |= PrefixImplicit
  644. }
  645. pc = int(decoder[pc+1])
  646. case 64:
  647. rexUsed |= PrefixREXW
  648. pc = int(decoder[pc+2])
  649. }
  650. case xCondAddrSize:
  651. switch addrMode {
  652. case 16:
  653. if addrSizeIndex >= 0 {
  654. inst.Prefix[addrSizeIndex] |= PrefixImplicit
  655. }
  656. pc = int(decoder[pc])
  657. case 32:
  658. if addrSizeIndex >= 0 {
  659. inst.Prefix[addrSizeIndex] |= PrefixImplicit
  660. }
  661. pc = int(decoder[pc+1])
  662. case 64:
  663. pc = int(decoder[pc+2])
  664. }
  665. case xCondPrefix:
  666. // Conditional branch based on presence or absence of prefixes.
  667. // The conflict cases here are completely undocumented and
  668. // differ significantly between GNU libopcodes and Intel xed.
  669. // I have not written assembly code to divine what various CPUs
  670. // do, but it wouldn't surprise me if they are not consistent either.
  671. //
  672. // The basic idea is to switch on the presence of a prefix, so that
  673. // for example:
  674. //
  675. // xCondPrefix, 4
  676. // 0xF3, 123,
  677. // 0xF2, 234,
  678. // 0x66, 345,
  679. // 0, 456
  680. //
  681. // branch to 123 if the F3 prefix is present, 234 if the F2 prefix
  682. // is present, 66 if the 345 prefix is present, and 456 otherwise.
  683. // The prefixes are given in descending order so that the 0 will be last.
  684. //
  685. // It is unclear what should happen if multiple conditions are
  686. // satisfied: what if F2 and F3 are both present, or if 66 and F2
  687. // are present, or if all three are present? The one chosen becomes
  688. // part of the opcode and the others do not. Perhaps the answer
  689. // depends on the specific opcodes in question.
  690. //
  691. // The only clear example is that CRC32 is F2 0F 38 F1 /r, and
  692. // it comes in 16-bit and 32-bit forms based on the 66 prefix,
  693. // so 66 F2 0F 38 F1 /r should be treated as F2 taking priority,
  694. // with the 66 being only an operand size override, and probably
  695. // F2 66 0F 38 F1 /r should be treated the same.
  696. // Perhaps that rule is specific to the case of CRC32, since no
  697. // 66 0F 38 F1 instruction is defined (today) (that we know of).
  698. // However, both libopcodes and xed seem to generalize this
  699. // example and choose F2/F3 in preference to 66, and we
  700. // do the same.
  701. //
  702. // Next, what if both F2 and F3 are present? Which wins?
  703. // The Intel xed rule, and ours, is that the one that occurs last wins.
  704. // The GNU libopcodes rule, which we implement only in gnuCompat mode,
  705. // is that F3 beats F2 unless F3 has no special meaning, in which
  706. // case F3 can be a modified on an F2 special meaning.
  707. //
  708. // Concretely,
  709. // 66 0F D6 /r is MOVQ
  710. // F2 0F D6 /r is MOVDQ2Q
  711. // F3 0F D6 /r is MOVQ2DQ.
  712. //
  713. // F2 66 0F D6 /r is 66 + MOVDQ2Q always.
  714. // 66 F2 0F D6 /r is 66 + MOVDQ2Q always.
  715. // F3 66 0F D6 /r is 66 + MOVQ2DQ always.
  716. // 66 F3 0F D6 /r is 66 + MOVQ2DQ always.
  717. // F2 F3 0F D6 /r is F2 + MOVQ2DQ always.
  718. // F3 F2 0F D6 /r is F3 + MOVQ2DQ in Intel xed, but F2 + MOVQ2DQ in GNU libopcodes.
  719. // Adding 66 anywhere in the prefix section of the
  720. // last two cases does not change the outcome.
  721. //
  722. // Finally, what if there is a variant in which 66 is a mandatory
  723. // prefix rather than an operand size override, but we know of
  724. // no corresponding F2/F3 form, and we see both F2/F3 and 66.
  725. // Does F2/F3 still take priority, so that the result is an unknown
  726. // instruction, or does the 66 take priority, so that the extended
  727. // 66 instruction should be interpreted as having a REP/REPN prefix?
  728. // Intel xed does the former and GNU libopcodes does the latter.
  729. // We side with Intel xed, unless we are trying to match libopcodes
  730. // more closely during the comparison-based test suite.
  731. //
  732. // In 64-bit mode REX.W is another valid prefix to test for, but
  733. // there is less ambiguity about that. When present, REX.W is
  734. // always the first entry in the table.
  735. n := int(decoder[pc])
  736. pc++
  737. sawF3 := false
  738. for j := 0; j < n; j++ {
  739. prefix := Prefix(decoder[pc+2*j])
  740. if prefix.IsREX() {
  741. rexUsed |= prefix
  742. if rex&prefix == prefix {
  743. pc = int(decoder[pc+2*j+1])
  744. continue Decode
  745. }
  746. continue
  747. }
  748. ok := false
  749. if prefix == 0 {
  750. ok = true
  751. } else if prefix.IsREX() {
  752. rexUsed |= prefix
  753. if rex&prefix == prefix {
  754. ok = true
  755. }
  756. } else if prefix == 0xC5 || prefix == 0xC4 {
  757. if vex == prefix {
  758. ok = true
  759. }
  760. } else if vex != 0 && (prefix == 0x0F || prefix == 0x0F38 || prefix == 0x0F3A ||
  761. prefix == 0x66 || prefix == 0xF2 || prefix == 0xF3) {
  762. var vexM, vexP Prefix
  763. if vex == 0xC5 {
  764. vexM = 1 // 2 byte vex always implies 0F
  765. vexP = inst.Prefix[vexIndex+1]
  766. } else {
  767. vexM = inst.Prefix[vexIndex+1]
  768. vexP = inst.Prefix[vexIndex+2]
  769. }
  770. switch prefix {
  771. case 0x66:
  772. ok = vexP&3 == 1
  773. case 0xF3:
  774. ok = vexP&3 == 2
  775. case 0xF2:
  776. ok = vexP&3 == 3
  777. case 0x0F:
  778. ok = vexM&3 == 1
  779. case 0x0F38:
  780. ok = vexM&3 == 2
  781. case 0x0F3A:
  782. ok = vexM&3 == 3
  783. }
  784. } else {
  785. if prefix == 0xF3 {
  786. sawF3 = true
  787. }
  788. switch prefix {
  789. case PrefixLOCK:
  790. if lockIndex >= 0 {
  791. inst.Prefix[lockIndex] |= PrefixImplicit
  792. ok = true
  793. }
  794. case PrefixREP, PrefixREPN:
  795. if repIndex >= 0 && inst.Prefix[repIndex]&0xFF == prefix {
  796. inst.Prefix[repIndex] |= PrefixImplicit
  797. ok = true
  798. }
  799. if gnuCompat && !ok && prefix == 0xF3 && repIndex >= 0 && (j+1 >= n || decoder[pc+2*(j+1)] != 0xF2) {
  800. // Check to see if earlier prefix F3 is present.
  801. for i := repIndex - 1; i >= 0; i-- {
  802. if inst.Prefix[i]&0xFF == prefix {
  803. inst.Prefix[i] |= PrefixImplicit
  804. ok = true
  805. }
  806. }
  807. }
  808. if gnuCompat && !ok && prefix == 0xF2 && repIndex >= 0 && !sawF3 && inst.Prefix[repIndex]&0xFF == 0xF3 {
  809. // Check to see if earlier prefix F2 is present.
  810. for i := repIndex - 1; i >= 0; i-- {
  811. if inst.Prefix[i]&0xFF == prefix {
  812. inst.Prefix[i] |= PrefixImplicit
  813. ok = true
  814. }
  815. }
  816. }
  817. case PrefixCS, PrefixDS, PrefixES, PrefixFS, PrefixGS, PrefixSS:
  818. if segIndex >= 0 && inst.Prefix[segIndex]&0xFF == prefix {
  819. inst.Prefix[segIndex] |= PrefixImplicit
  820. ok = true
  821. }
  822. case PrefixDataSize:
  823. // Looking for 66 mandatory prefix.
  824. // The F2/F3 mandatory prefixes take priority when both are present.
  825. // If we got this far in the xCondPrefix table and an F2/F3 is present,
  826. // it means the table didn't have any entry for that prefix. But if 66 has
  827. // special meaning, perhaps F2/F3 have special meaning that we don't know.
  828. // Intel xed works this way, treating the F2/F3 as inhibiting the 66.
  829. // GNU libopcodes allows the 66 to match. We do what Intel xed does
  830. // except in gnuCompat mode.
  831. if repIndex >= 0 && !gnuCompat {
  832. inst.Op = 0
  833. break Decode
  834. }
  835. if dataSizeIndex >= 0 {
  836. inst.Prefix[dataSizeIndex] |= PrefixImplicit
  837. ok = true
  838. }
  839. case PrefixAddrSize:
  840. if addrSizeIndex >= 0 {
  841. inst.Prefix[addrSizeIndex] |= PrefixImplicit
  842. ok = true
  843. }
  844. }
  845. }
  846. if ok {
  847. pc = int(decoder[pc+2*j+1])
  848. continue Decode
  849. }
  850. }
  851. inst.Op = 0
  852. break Decode
  853. case xCondSlashR:
  854. pc = int(decoder[pc+regop&7])
  855. // Input.
  856. case xReadSlashR:
  857. // done above
  858. case xReadIb:
  859. if pos >= len(src) {
  860. return truncated(src, mode)
  861. }
  862. imm8 = int8(src[pos])
  863. pos++
  864. case xReadIw:
  865. if pos+2 > len(src) {
  866. return truncated(src, mode)
  867. }
  868. imm = int64(binary.LittleEndian.Uint16(src[pos:]))
  869. pos += 2
  870. case xReadId:
  871. if pos+4 > len(src) {
  872. return truncated(src, mode)
  873. }
  874. imm = int64(binary.LittleEndian.Uint32(src[pos:]))
  875. pos += 4
  876. case xReadIo:
  877. if pos+8 > len(src) {
  878. return truncated(src, mode)
  879. }
  880. imm = int64(binary.LittleEndian.Uint64(src[pos:]))
  881. pos += 8
  882. case xReadCb:
  883. if pos >= len(src) {
  884. return truncated(src, mode)
  885. }
  886. immcpos = pos
  887. immc = int64(src[pos])
  888. pos++
  889. case xReadCw:
  890. if pos+2 > len(src) {
  891. return truncated(src, mode)
  892. }
  893. immcpos = pos
  894. immc = int64(binary.LittleEndian.Uint16(src[pos:]))
  895. pos += 2
  896. case xReadCm:
  897. immcpos = pos
  898. if addrMode == 16 {
  899. if pos+2 > len(src) {
  900. return truncated(src, mode)
  901. }
  902. immc = int64(binary.LittleEndian.Uint16(src[pos:]))
  903. pos += 2
  904. } else if addrMode == 32 {
  905. if pos+4 > len(src) {
  906. return truncated(src, mode)
  907. }
  908. immc = int64(binary.LittleEndian.Uint32(src[pos:]))
  909. pos += 4
  910. } else {
  911. if pos+8 > len(src) {
  912. return truncated(src, mode)
  913. }
  914. immc = int64(binary.LittleEndian.Uint64(src[pos:]))
  915. pos += 8
  916. }
  917. case xReadCd:
  918. immcpos = pos
  919. if pos+4 > len(src) {
  920. return truncated(src, mode)
  921. }
  922. immc = int64(binary.LittleEndian.Uint32(src[pos:]))
  923. pos += 4
  924. case xReadCp:
  925. immcpos = pos
  926. if pos+6 > len(src) {
  927. return truncated(src, mode)
  928. }
  929. w := binary.LittleEndian.Uint32(src[pos:])
  930. w2 := binary.LittleEndian.Uint16(src[pos+4:])
  931. immc = int64(w2)<<32 | int64(w)
  932. pos += 6
  933. // Output.
  934. case xSetOp:
  935. inst.Op = Op(decoder[pc])
  936. pc++
  937. case xArg1,
  938. xArg3,
  939. xArgAL,
  940. xArgAX,
  941. xArgCL,
  942. xArgCS,
  943. xArgDS,
  944. xArgDX,
  945. xArgEAX,
  946. xArgEDX,
  947. xArgES,
  948. xArgFS,
  949. xArgGS,
  950. xArgRAX,
  951. xArgRDX,
  952. xArgSS,
  953. xArgST,
  954. xArgXMM0:
  955. inst.Args[narg] = fixedArg[x]
  956. narg++
  957. case xArgImm8:
  958. inst.Args[narg] = Imm(imm8)
  959. narg++
  960. case xArgImm8u:
  961. inst.Args[narg] = Imm(uint8(imm8))
  962. narg++
  963. case xArgImm16:
  964. inst.Args[narg] = Imm(int16(imm))
  965. narg++
  966. case xArgImm16u:
  967. inst.Args[narg] = Imm(uint16(imm))
  968. narg++
  969. case xArgImm32:
  970. inst.Args[narg] = Imm(int32(imm))
  971. narg++
  972. case xArgImm64:
  973. inst.Args[narg] = Imm(imm)
  974. narg++
  975. case xArgM,
  976. xArgM128,
  977. xArgM256,
  978. xArgM1428byte,
  979. xArgM16,
  980. xArgM16and16,
  981. xArgM16and32,
  982. xArgM16and64,
  983. xArgM16colon16,
  984. xArgM16colon32,
  985. xArgM16colon64,
  986. xArgM16int,
  987. xArgM2byte,
  988. xArgM32,
  989. xArgM32and32,
  990. xArgM32fp,
  991. xArgM32int,
  992. xArgM512byte,
  993. xArgM64,
  994. xArgM64fp,
  995. xArgM64int,
  996. xArgM8,
  997. xArgM80bcd,
  998. xArgM80dec,
  999. xArgM80fp,
  1000. xArgM94108byte,
  1001. xArgMem:
  1002. if !haveMem {
  1003. inst.Op = 0
  1004. break Decode
  1005. }
  1006. inst.Args[narg] = mem
  1007. inst.MemBytes = int(memBytes[decodeOp(x)])
  1008. if mem.Base == RIP {
  1009. inst.PCRel = displen
  1010. inst.PCRelOff = dispoff
  1011. }
  1012. narg++
  1013. case xArgPtr16colon16:
  1014. inst.Args[narg] = Imm(immc >> 16)
  1015. inst.Args[narg+1] = Imm(immc & (1<<16 - 1))
  1016. narg += 2
  1017. case xArgPtr16colon32:
  1018. inst.Args[narg] = Imm(immc >> 32)
  1019. inst.Args[narg+1] = Imm(immc & (1<<32 - 1))
  1020. narg += 2
  1021. case xArgMoffs8, xArgMoffs16, xArgMoffs32, xArgMoffs64:
  1022. // TODO(rsc): Can address be 64 bits?
  1023. mem = Mem{Disp: int64(immc)}
  1024. if segIndex >= 0 {
  1025. mem.Segment = prefixToSegment(inst.Prefix[segIndex])
  1026. inst.Prefix[segIndex] |= PrefixImplicit
  1027. }
  1028. inst.Args[narg] = mem
  1029. inst.MemBytes = int(memBytes[decodeOp(x)])
  1030. if mem.Base == RIP {
  1031. inst.PCRel = displen
  1032. inst.PCRelOff = dispoff
  1033. }
  1034. narg++
  1035. case xArgYmm1:
  1036. base := baseReg[x]
  1037. index := Reg(regop)
  1038. if inst.Prefix[vexIndex+1]&0x80 == 0 {
  1039. index += 8
  1040. }
  1041. inst.Args[narg] = base + index
  1042. narg++
  1043. case xArgR8, xArgR16, xArgR32, xArgR64, xArgXmm, xArgXmm1, xArgDR0dashDR7:
  1044. base := baseReg[x]
  1045. index := Reg(regop)
  1046. if rex != 0 && base == AL && index >= 4 {
  1047. rexUsed |= PrefixREX
  1048. index -= 4
  1049. base = SPB
  1050. }
  1051. inst.Args[narg] = base + index
  1052. narg++
  1053. case xArgMm, xArgMm1, xArgTR0dashTR7:
  1054. inst.Args[narg] = baseReg[x] + Reg(regop&7)
  1055. narg++
  1056. case xArgCR0dashCR7:
  1057. // AMD documents an extension that the LOCK prefix
  1058. // can be used in place of a REX prefix in order to access
  1059. // CR8 from 32-bit mode. The LOCK prefix is allowed in
  1060. // all modes, provided the corresponding CPUID bit is set.
  1061. if lockIndex >= 0 {
  1062. inst.Prefix[lockIndex] |= PrefixImplicit
  1063. regop += 8
  1064. }
  1065. inst.Args[narg] = CR0 + Reg(regop)
  1066. narg++
  1067. case xArgSreg:
  1068. regop &= 7
  1069. if regop >= 6 {
  1070. inst.Op = 0
  1071. break Decode
  1072. }
  1073. inst.Args[narg] = ES + Reg(regop)
  1074. narg++
  1075. case xArgRmf16, xArgRmf32, xArgRmf64:
  1076. base := baseReg[x]
  1077. index := Reg(modrm & 07)
  1078. if rex&PrefixREXB != 0 {
  1079. rexUsed |= PrefixREXB
  1080. index += 8
  1081. }
  1082. inst.Args[narg] = base + index
  1083. narg++
  1084. case xArgR8op, xArgR16op, xArgR32op, xArgR64op, xArgSTi:
  1085. n := inst.Opcode >> uint(opshift+8) & 07
  1086. base := baseReg[x]
  1087. index := Reg(n)
  1088. if rex&PrefixREXB != 0 && decodeOp(x) != xArgSTi {
  1089. rexUsed |= PrefixREXB
  1090. index += 8
  1091. }
  1092. if rex != 0 && base == AL && index >= 4 {
  1093. rexUsed |= PrefixREX
  1094. index -= 4
  1095. base = SPB
  1096. }
  1097. inst.Args[narg] = base + index
  1098. narg++
  1099. case xArgRM8, xArgRM16, xArgRM32, xArgRM64, xArgR32M16, xArgR32M8, xArgR64M16,
  1100. xArgMmM32, xArgMmM64, xArgMm2M64,
  1101. xArgXmm2M16, xArgXmm2M32, xArgXmm2M64, xArgXmmM64, xArgXmmM128, xArgXmmM32, xArgXmm2M128,
  1102. xArgYmm2M256:
  1103. if haveMem {
  1104. inst.Args[narg] = mem
  1105. inst.MemBytes = int(memBytes[decodeOp(x)])
  1106. if mem.Base == RIP {
  1107. inst.PCRel = displen
  1108. inst.PCRelOff = dispoff
  1109. }
  1110. } else {
  1111. base := baseReg[x]
  1112. index := Reg(rm)
  1113. switch decodeOp(x) {
  1114. case xArgMmM32, xArgMmM64, xArgMm2M64:
  1115. // There are only 8 MMX registers, so these ignore the REX.X bit.
  1116. index &= 7
  1117. case xArgRM8:
  1118. if rex != 0 && index >= 4 {
  1119. rexUsed |= PrefixREX
  1120. index -= 4
  1121. base = SPB
  1122. }
  1123. case xArgYmm2M256:
  1124. if vex == 0xC4 && inst.Prefix[vexIndex+1]&0x40 == 0x40 {
  1125. index += 8
  1126. }
  1127. }
  1128. inst.Args[narg] = base + index
  1129. }
  1130. narg++
  1131. case xArgMm2: // register only; TODO(rsc): Handle with tag modrm_regonly tag
  1132. if haveMem {
  1133. inst.Op = 0
  1134. break Decode
  1135. }
  1136. inst.Args[narg] = baseReg[x] + Reg(rm&7)
  1137. narg++
  1138. case xArgXmm2: // register only; TODO(rsc): Handle with tag modrm_regonly tag
  1139. if haveMem {
  1140. inst.Op = 0
  1141. break Decode
  1142. }
  1143. inst.Args[narg] = baseReg[x] + Reg(rm)
  1144. narg++
  1145. case xArgRel8:
  1146. inst.PCRelOff = immcpos
  1147. inst.PCRel = 1
  1148. inst.Args[narg] = Rel(int8(immc))
  1149. narg++
  1150. case xArgRel16:
  1151. inst.PCRelOff = immcpos
  1152. inst.PCRel = 2
  1153. inst.Args[narg] = Rel(int16(immc))
  1154. narg++
  1155. case xArgRel32:
  1156. inst.PCRelOff = immcpos
  1157. inst.PCRel = 4
  1158. inst.Args[narg] = Rel(int32(immc))
  1159. narg++
  1160. }
  1161. }
  1162. if inst.Op == 0 {
  1163. // Invalid instruction.
  1164. if nprefix > 0 {
  1165. return instPrefix(src[0], mode) // invalid instruction
  1166. }
  1167. return Inst{Len: pos}, ErrUnrecognized
  1168. }
  1169. // Matched! Hooray!
  1170. // 90 decodes as XCHG EAX, EAX but is NOP.
  1171. // 66 90 decodes as XCHG AX, AX and is NOP too.
  1172. // 48 90 decodes as XCHG RAX, RAX and is NOP too.
  1173. // 43 90 decodes as XCHG R8D, EAX and is *not* NOP.
  1174. // F3 90 decodes as REP XCHG EAX, EAX but is PAUSE.
  1175. // It's all too special to handle in the decoding tables, at least for now.
  1176. if inst.Op == XCHG && inst.Opcode>>24 == 0x90 {
  1177. if inst.Args[0] == RAX || inst.Args[0] == EAX || inst.Args[0] == AX {
  1178. inst.Op = NOP
  1179. if dataSizeIndex >= 0 {
  1180. inst.Prefix[dataSizeIndex] &^= PrefixImplicit
  1181. }
  1182. inst.Args[0] = nil
  1183. inst.Args[1] = nil
  1184. }
  1185. if repIndex >= 0 && inst.Prefix[repIndex] == 0xF3 {
  1186. inst.Prefix[repIndex] |= PrefixImplicit
  1187. inst.Op = PAUSE
  1188. inst.Args[0] = nil
  1189. inst.Args[1] = nil
  1190. } else if gnuCompat {
  1191. for i := nprefix - 1; i >= 0; i-- {
  1192. if inst.Prefix[i]&0xFF == 0xF3 {
  1193. inst.Prefix[i] |= PrefixImplicit
  1194. inst.Op = PAUSE
  1195. inst.Args[0] = nil
  1196. inst.Args[1] = nil
  1197. break
  1198. }
  1199. }
  1200. }
  1201. }
  1202. // defaultSeg returns the default segment for an implicit
  1203. // memory reference: the final override if present, or else DS.
  1204. defaultSeg := func() Reg {
  1205. if segIndex >= 0 {
  1206. inst.Prefix[segIndex] |= PrefixImplicit
  1207. return prefixToSegment(inst.Prefix[segIndex])
  1208. }
  1209. return DS
  1210. }
  1211. // Add implicit arguments not present in the tables.
  1212. // Normally we shy away from making implicit arguments explicit,
  1213. // following the Intel manuals, but adding the arguments seems
  1214. // the best way to express the effect of the segment override prefixes.
  1215. // TODO(rsc): Perhaps add these to the tables and
  1216. // create bytecode instructions for them.
  1217. usedAddrSize := false
  1218. switch inst.Op {
  1219. case INSB, INSW, INSD:
  1220. inst.Args[0] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
  1221. inst.Args[1] = DX
  1222. usedAddrSize = true
  1223. case OUTSB, OUTSW, OUTSD:
  1224. inst.Args[0] = DX
  1225. inst.Args[1] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX}
  1226. usedAddrSize = true
  1227. case MOVSB, MOVSW, MOVSD, MOVSQ:
  1228. inst.Args[0] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
  1229. inst.Args[1] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX}
  1230. usedAddrSize = true
  1231. case CMPSB, CMPSW, CMPSD, CMPSQ:
  1232. inst.Args[0] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX}
  1233. inst.Args[1] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
  1234. usedAddrSize = true
  1235. case LODSB, LODSW, LODSD, LODSQ:
  1236. switch inst.Op {
  1237. case LODSB:
  1238. inst.Args[0] = AL
  1239. case LODSW:
  1240. inst.Args[0] = AX
  1241. case LODSD:
  1242. inst.Args[0] = EAX
  1243. case LODSQ:
  1244. inst.Args[0] = RAX
  1245. }
  1246. inst.Args[1] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX}
  1247. usedAddrSize = true
  1248. case STOSB, STOSW, STOSD, STOSQ:
  1249. inst.Args[0] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
  1250. switch inst.Op {
  1251. case STOSB:
  1252. inst.Args[1] = AL
  1253. case STOSW:
  1254. inst.Args[1] = AX
  1255. case STOSD:
  1256. inst.Args[1] = EAX
  1257. case STOSQ:
  1258. inst.Args[1] = RAX
  1259. }
  1260. usedAddrSize = true
  1261. case SCASB, SCASW, SCASD, SCASQ:
  1262. inst.Args[1] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
  1263. switch inst.Op {
  1264. case SCASB:
  1265. inst.Args[0] = AL
  1266. case SCASW:
  1267. inst.Args[0] = AX
  1268. case SCASD:
  1269. inst.Args[0] = EAX
  1270. case SCASQ:
  1271. inst.Args[0] = RAX
  1272. }
  1273. usedAddrSize = true
  1274. case XLATB:
  1275. inst.Args[0] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + BX - AX}
  1276. usedAddrSize = true
  1277. }
  1278. // If we used the address size annotation to construct the
  1279. // argument list, mark that prefix as implicit: it doesn't need
  1280. // to be shown when printing the instruction.
  1281. if haveMem || usedAddrSize {
  1282. if addrSizeIndex >= 0 {
  1283. inst.Prefix[addrSizeIndex] |= PrefixImplicit
  1284. }
  1285. }
  1286. // Similarly, if there's some memory operand, the segment
  1287. // will be shown there and doesn't need to be shown as an
  1288. // explicit prefix.
  1289. if haveMem {
  1290. if segIndex >= 0 {
  1291. inst.Prefix[segIndex] |= PrefixImplicit
  1292. }
  1293. }
  1294. // Branch predict prefixes are overloaded segment prefixes,
  1295. // since segment prefixes don't make sense on conditional jumps.
  1296. // Rewrite final instance to prediction prefix.
  1297. // The set of instructions to which the prefixes apply (other then the
  1298. // Jcc conditional jumps) is not 100% clear from the manuals, but
  1299. // the disassemblers seem to agree about the LOOP and JCXZ instructions,
  1300. // so we'll follow along.
  1301. // TODO(rsc): Perhaps this instruction class should be derived from the CSV.
  1302. if isCondJmp[inst.Op] || isLoop[inst.Op] || inst.Op == JCXZ || inst.Op == JECXZ || inst.Op == JRCXZ {
  1303. PredictLoop:
  1304. for i := nprefix - 1; i >= 0; i-- {
  1305. p := inst.Prefix[i]
  1306. switch p & 0xFF {
  1307. case PrefixCS:
  1308. inst.Prefix[i] = PrefixPN
  1309. break PredictLoop
  1310. case PrefixDS:
  1311. inst.Prefix[i] = PrefixPT
  1312. break PredictLoop
  1313. }
  1314. }
  1315. }
  1316. // The BND prefix is part of the Intel Memory Protection Extensions (MPX).
  1317. // A REPN applied to certain control transfers is a BND prefix to bound
  1318. // the range of possible destinations. There's surprisingly little documentation
  1319. // about this, so we just do what libopcodes and xed agree on.
  1320. // In particular, it's unclear why a REPN applied to LOOP or JCXZ instructions
  1321. // does not turn into a BND.
  1322. // TODO(rsc): Perhaps this instruction class should be derived from the CSV.
  1323. if isCondJmp[inst.Op] || inst.Op == JMP || inst.Op == CALL || inst.Op == RET {
  1324. for i := nprefix - 1; i >= 0; i-- {
  1325. p := inst.Prefix[i]
  1326. if p&^PrefixIgnored == PrefixREPN {
  1327. inst.Prefix[i] = PrefixBND
  1328. break
  1329. }
  1330. }
  1331. }
  1332. // The LOCK prefix only applies to certain instructions, and then only
  1333. // to instances of the instruction with a memory destination.
  1334. // Other uses of LOCK are invalid and cause a processor exception,
  1335. // in contrast to the "just ignore it" spirit applied to all other prefixes.
  1336. // Mark invalid lock prefixes.
  1337. hasLock := false
  1338. if lockIndex >= 0 && inst.Prefix[lockIndex]&PrefixImplicit == 0 {
  1339. switch inst.Op {
  1340. // TODO(rsc): Perhaps this instruction class should be derived from the CSV.
  1341. case ADD, ADC, AND, BTC, BTR, BTS, CMPXCHG, CMPXCHG8B, CMPXCHG16B, DEC, INC, NEG, NOT, OR, SBB, SUB, XOR, XADD, XCHG:
  1342. if isMem(inst.Args[0]) {
  1343. hasLock = true
  1344. break
  1345. }
  1346. fallthrough
  1347. default:
  1348. inst.Prefix[lockIndex] |= PrefixInvalid
  1349. }
  1350. }
  1351. // In certain cases, all of which require a memory destination,
  1352. // the REPN and REP prefixes are interpreted as XACQUIRE and XRELEASE
  1353. // from the Intel Transactional Synchroniation Extensions (TSX).
  1354. //
  1355. // The specific rules are:
  1356. // (1) Any instruction with a valid LOCK prefix can have XACQUIRE or XRELEASE.
  1357. // (2) Any XCHG, which always has an implicit LOCK, can have XACQUIRE or XRELEASE.
  1358. // (3) Any 0x88-, 0x89-, 0xC6-, or 0xC7-opcode MOV can have XRELEASE.
  1359. if isMem(inst.Args[0]) {
  1360. if inst.Op == XCHG {
  1361. hasLock = true
  1362. }
  1363. for i := len(inst.Prefix) - 1; i >= 0; i-- {
  1364. p := inst.Prefix[i] &^ PrefixIgnored
  1365. switch p {
  1366. case PrefixREPN:
  1367. if hasLock {
  1368. inst.Prefix[i] = inst.Prefix[i]&PrefixIgnored | PrefixXACQUIRE
  1369. }
  1370. case PrefixREP:
  1371. if hasLock {
  1372. inst.Prefix[i] = inst.Prefix[i]&PrefixIgnored | PrefixXRELEASE
  1373. }
  1374. if inst.Op == MOV {
  1375. op := (inst.Opcode >> 24) &^ 1
  1376. if op == 0x88 || op == 0xC6 {
  1377. inst.Prefix[i] = inst.Prefix[i]&PrefixIgnored | PrefixXRELEASE
  1378. }
  1379. }
  1380. }
  1381. }
  1382. }
  1383. // If REP is used on a non-REP-able instruction, mark the prefix as ignored.
  1384. if repIndex >= 0 {
  1385. switch inst.Prefix[repIndex] {
  1386. case PrefixREP, PrefixREPN:
  1387. switch inst.Op {
  1388. // According to the manuals, the REP/REPE prefix applies to all of these,
  1389. // while the REPN applies only to some of them. However, both libopcodes
  1390. // and xed show both prefixes explicitly for all instructions, so we do the same.
  1391. // TODO(rsc): Perhaps this instruction class should be derived from the CSV.
  1392. case INSB, INSW, INSD,
  1393. MOVSB, MOVSW, MOVSD, MOVSQ,
  1394. OUTSB, OUTSW, OUTSD,
  1395. LODSB, LODSW, LODSD, LODSQ,
  1396. CMPSB, CMPSW, CMPSD, CMPSQ,
  1397. SCASB, SCASW, SCASD, SCASQ,
  1398. STOSB, STOSW, STOSD, STOSQ:
  1399. // ok
  1400. default:
  1401. inst.Prefix[repIndex] |= PrefixIgnored
  1402. }
  1403. }
  1404. }
  1405. // If REX was present, mark implicit if all the 1 bits were consumed.
  1406. if rexIndex >= 0 {
  1407. if rexUsed != 0 {
  1408. rexUsed |= PrefixREX
  1409. }
  1410. if rex&^rexUsed == 0 {
  1411. inst.Prefix[rexIndex] |= PrefixImplicit
  1412. }
  1413. }
  1414. inst.DataSize = dataMode
  1415. inst.AddrSize = addrMode
  1416. inst.Mode = mode
  1417. inst.Len = pos
  1418. return inst, nil
  1419. }
  1420. var errInternal = errors.New("internal error")
  1421. // addr16 records the eight 16-bit addressing modes.
  1422. var addr16 = [8]Mem{
  1423. {Base: BX, Scale: 1, Index: SI},
  1424. {Base: BX, Scale: 1, Index: DI},
  1425. {Base: BP, Scale: 1, Index: SI},
  1426. {Base: BP, Scale: 1, Index: DI},
  1427. {Base: SI},
  1428. {Base: DI},
  1429. {Base: BP},
  1430. {Base: BX},
  1431. }
  1432. // baseRegForBits returns the base register for a given register size in bits.
  1433. func baseRegForBits(bits int) Reg {
  1434. switch bits {
  1435. case 8:
  1436. return AL
  1437. case 16:
  1438. return AX
  1439. case 32:
  1440. return EAX
  1441. case 64:
  1442. return RAX
  1443. }
  1444. return 0
  1445. }
  1446. // baseReg records the base register for argument types that specify
  1447. // a range of registers indexed by op, regop, or rm.
  1448. var baseReg = [...]Reg{
  1449. xArgDR0dashDR7: DR0,
  1450. xArgMm1: M0,
  1451. xArgMm2: M0,
  1452. xArgMm2M64: M0,
  1453. xArgMm: M0,
  1454. xArgMmM32: M0,
  1455. xArgMmM64: M0,
  1456. xArgR16: AX,
  1457. xArgR16op: AX,
  1458. xArgR32: EAX,
  1459. xArgR32M16: EAX,
  1460. xArgR32M8: EAX,
  1461. xArgR32op: EAX,
  1462. xArgR64: RAX,
  1463. xArgR64M16: RAX,
  1464. xArgR64op: RAX,
  1465. xArgR8: AL,
  1466. xArgR8op: AL,
  1467. xArgRM16: AX,
  1468. xArgRM32: EAX,
  1469. xArgRM64: RAX,
  1470. xArgRM8: AL,
  1471. xArgRmf16: AX,
  1472. xArgRmf32: EAX,
  1473. xArgRmf64: RAX,
  1474. xArgSTi: F0,
  1475. xArgTR0dashTR7: TR0,
  1476. xArgXmm1: X0,
  1477. xArgYmm1: X0,
  1478. xArgXmm2: X0,
  1479. xArgXmm2M128: X0,
  1480. xArgYmm2M256: X0,
  1481. xArgXmm2M16: X0,
  1482. xArgXmm2M32: X0,
  1483. xArgXmm2M64: X0,
  1484. xArgXmm: X0,
  1485. xArgXmmM128: X0,
  1486. xArgXmmM32: X0,
  1487. xArgXmmM64: X0,
  1488. }
  1489. // prefixToSegment returns the segment register
  1490. // corresponding to a particular segment prefix.
  1491. func prefixToSegment(p Prefix) Reg {
  1492. switch p &^ PrefixImplicit {
  1493. case PrefixCS:
  1494. return CS
  1495. case PrefixDS:
  1496. return DS
  1497. case PrefixES:
  1498. return ES
  1499. case PrefixFS:
  1500. return FS
  1501. case PrefixGS:
  1502. return GS
  1503. case PrefixSS:
  1504. return SS
  1505. }
  1506. return 0
  1507. }
  1508. // fixedArg records the fixed arguments corresponding to the given bytecodes.
  1509. var fixedArg = [...]Arg{
  1510. xArg1: Imm(1),
  1511. xArg3: Imm(3),
  1512. xArgAL: AL,
  1513. xArgAX: AX,
  1514. xArgDX: DX,
  1515. xArgEAX: EAX,
  1516. xArgEDX: EDX,
  1517. xArgRAX: RAX,
  1518. xArgRDX: RDX,
  1519. xArgCL: CL,
  1520. xArgCS: CS,
  1521. xArgDS: DS,
  1522. xArgES: ES,
  1523. xArgFS: FS,
  1524. xArgGS: GS,
  1525. xArgSS: SS,
  1526. xArgST: F0,
  1527. xArgXMM0: X0,
  1528. }
  1529. // memBytes records the size of the memory pointed at
  1530. // by a memory argument of the given form.
  1531. var memBytes = [...]int8{
  1532. xArgM128: 128 / 8,
  1533. xArgM256: 256 / 8,
  1534. xArgM16: 16 / 8,
  1535. xArgM16and16: (16 + 16) / 8,
  1536. xArgM16colon16: (16 + 16) / 8,
  1537. xArgM16colon32: (16 + 32) / 8,
  1538. xArgM16int: 16 / 8,
  1539. xArgM2byte: 2,
  1540. xArgM32: 32 / 8,
  1541. xArgM32and32: (32 + 32) / 8,
  1542. xArgM32fp: 32 / 8,
  1543. xArgM32int: 32 / 8,
  1544. xArgM64: 64 / 8,
  1545. xArgM64fp: 64 / 8,
  1546. xArgM64int: 64 / 8,
  1547. xArgMm2M64: 64 / 8,
  1548. xArgMmM32: 32 / 8,
  1549. xArgMmM64: 64 / 8,
  1550. xArgMoffs16: 16 / 8,
  1551. xArgMoffs32: 32 / 8,
  1552. xArgMoffs64: 64 / 8,
  1553. xArgMoffs8: 8 / 8,
  1554. xArgR32M16: 16 / 8,
  1555. xArgR32M8: 8 / 8,
  1556. xArgR64M16: 16 / 8,
  1557. xArgRM16: 16 / 8,
  1558. xArgRM32: 32 / 8,
  1559. xArgRM64: 64 / 8,
  1560. xArgRM8: 8 / 8,
  1561. xArgXmm2M128: 128 / 8,
  1562. xArgYmm2M256: 256 / 8,
  1563. xArgXmm2M16: 16 / 8,
  1564. xArgXmm2M32: 32 / 8,
  1565. xArgXmm2M64: 64 / 8,
  1566. xArgXmm: 128 / 8,
  1567. xArgXmmM128: 128 / 8,
  1568. xArgXmmM32: 32 / 8,
  1569. xArgXmmM64: 64 / 8,
  1570. }
  1571. // isCondJmp records the conditional jumps.
  1572. var isCondJmp = [maxOp + 1]bool{
  1573. JA: true,
  1574. JAE: true,
  1575. JB: true,
  1576. JBE: true,
  1577. JE: true,
  1578. JG: true,
  1579. JGE: true,
  1580. JL: true,
  1581. JLE: true,
  1582. JNE: true,
  1583. JNO: true,
  1584. JNP: true,
  1585. JNS: true,
  1586. JO: true,
  1587. JP: true,
  1588. JS: true,
  1589. }
  1590. // isLoop records the loop operators.
  1591. var isLoop = [maxOp + 1]bool{
  1592. LOOP: true,
  1593. LOOPE: true,
  1594. LOOPNE: true,
  1595. JECXZ: true,
  1596. JRCXZ: true,
  1597. }