Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

506 lines
19 KiB

  1. /***********************************************************************
  2. * Microsoft Disassembler
  3. *
  4. * Microsoft Confidential. Copyright (c) Microsoft Corporation. All rights reserved.
  5. *
  6. * File Comments:
  7. *
  8. * This file is a copy of the master version owned by richards.
  9. * Contact richards for any changes.
  10. *
  11. ***********************************************************************/
  12. #ifndef MSDIS_H
  13. #define MSDIS_H
  14. #pragma pack(push, 8)
  15. #include <stddef.h> // For size_t
  16. #include <strstream> // For std::ostream
  17. // ------------------------------------------------------------
  18. // Start of internal vs external definitions
  19. // ------------------------------------------------------------
  20. #if defined(DISDLL) // Building the MSDIS DLL
  21. #undef DISDLL
  22. #define DISDLL __declspec(dllexport)
  23. #else // Building an MSDIS client
  24. #define DISDLL __declspec(dllimport)
  25. #endif
  26. // ------------------------------------------------------------
  27. // End of internal vs external definitions
  28. // ------------------------------------------------------------
  29. class __declspec(novtable) DIS
  30. {
  31. public:
  32. enum DIST
  33. {
  34. distAM33, // Matsushita AM33
  35. distArm, // ARM
  36. distCee, // MSIL
  37. distIa64, // IA-64
  38. distM32R, // Mitsubishi M32R
  39. distMips, // MIPS R-Series
  40. distMips16, // MIPS16
  41. distPowerPc, // Motorola PowerPC
  42. distSh3, // Hitachi SuperH 3
  43. distSHcompact, // Hitachi SuperH (Compact mode)
  44. distSHmedia, // Hitachi SuperH (Media mode)
  45. distThumb, // Thumb
  46. distTriCore, // Infineon TriCore
  47. distX86, // x86 (32 bit mode)
  48. distX8616, // x86 (16 bit mode)
  49. distX8664, // x86 (64 bit mode)
  50. distArmConcan, // ARM Concan coprocessor
  51. distArmXmac, // ARM XMAC coprocessor
  52. };
  53. // A branch is defined as a transfer of control that doesn't
  54. // record the location of following block so that control may
  55. // return. A call does record the location of the following
  56. // block so that a subsequent indirect branch may return there.
  57. // The first number in the comments below is the number of
  58. // successors determinable by static analysis. There is a dependency
  59. // in SEC::FDoDisassembly() that trmtBra and above represent branch
  60. // or call types that are not valid in a delay slot of any of the
  61. // Def variants of termination type.
  62. enum TRMT // Architecture independent termination type
  63. {
  64. trmtUnknown, // Block hasn't been analyzed
  65. trmtFallThrough, // 1 Fall into following block
  66. trmtBra, // 1 Branch, Unconditional, Direct
  67. trmtBraCase, // ? Conditional, Direct, Multiple targets
  68. trmtBraCc, // 2 Branch, Conditional, Direct
  69. trmtBraCcDef, // 2 Branch, Conditional, Direct, Deferred
  70. trmtBraCcInd, // 1 Branch, Conditional, Indirect
  71. trmtBraCcIndDef, // 1 Branch, Conditional, Indirect, Deferred
  72. trmtBraDef, // 1 Branch, Unconditional, Direct, Deferred
  73. trmtBraInd, // 0 Branch, Unconditional, Indirect
  74. trmtBraIndDef, // 0 Branch, Unconditional, Indirect, Deferred
  75. trmtCall, // 2 Call, Unconditional, Direct
  76. trmtCallCc, // 2 Call, Conditional, Direct
  77. trmtCallCcDef, // 2 Call, Conditional, Direct, Deferred
  78. trmtCallCcInd, // 1 Call, Conditional, Indirect
  79. trmtCallDef, // 2 Call, Unconditional, Direct, Deferred
  80. trmtCallInd, // 1 Call, Unconditional, Indirect
  81. trmtCallIndDef, // 1 Call, Unconditional, Indirect, Deferred
  82. trmtTrap, // 1 Trap, Unconditional
  83. trmtTrapCc, // 1 Trap, Conditional
  84. };
  85. enum TRMTA // Architecture dependent termination type
  86. {
  87. trmtaUnknown = trmtUnknown,
  88. trmtaFallThrough = trmtFallThrough
  89. };
  90. typedef unsigned char BYTE;
  91. typedef unsigned short WORD;
  92. typedef unsigned long DWORD;
  93. typedef unsigned __int64 DWORDLONG;
  94. typedef DWORDLONG ADDR;
  95. enum { addrNil = 0 };
  96. // MEMREFT describes the types of memory references that an instruction
  97. // can make. If the memory reference can't be described by the defined
  98. // values, memreftOther is returned.
  99. enum MEMREFT
  100. {
  101. memreftNone, // Does not reference memory
  102. memreftRead, // Reads from single address
  103. memreftWrite, // Writes to single address
  104. memreftRdWr, // Read/Modify/Write of single address
  105. memreftOther, // None of the above
  106. };
  107. enum REGA // Architecture dependent register number
  108. {
  109. regaNil = -1,
  110. };
  111. enum OPA // Architecture dependent operation type
  112. {
  113. opaInvalid = -1,
  114. };
  115. enum OPCLS // Operand type
  116. {
  117. opclsNone = 0,
  118. opclsRegister,
  119. opclsImmediate,
  120. opclsMemory,
  121. };
  122. // OPERAND and INSTRUCTION are the structures used in
  123. // the interface between the disassembler and the routines that convert
  124. // native platform instructions into Vulcan IR.
  125. struct OPERAND
  126. {
  127. OPCLS opcls; // operand type
  128. REGA rega1; // arch dependent enum -- 1st register
  129. REGA rega2; // arch dependent enum -- 2nd register
  130. REGA rega3; // arch dependent enum -- 3rd register
  131. DWORDLONG dwl; // const, addr, etc. based on OPCLS
  132. size_t cb; // only valid for opclsMemory - some architectures add to this e.g. x86
  133. bool fImmediate; // true if dwl is valid
  134. WORD wScale; // any scaling factor to be applied to rega1
  135. };
  136. struct INSTRUCTION
  137. {
  138. OPA opa; // arch dependent enum -- opcode
  139. DWORD dwModifiers; // arch dependent bits modifying opa
  140. size_t coperand; // count of operands
  141. };
  142. // PFNCCHADDR is the type of the callback function that can be set
  143. // via PfncchaddrSet().
  144. typedef size_t (__stdcall *PFNCCHADDR)(const DIS *, ADDR, char *, size_t, DWORDLONG *);
  145. // PFNCCHCONST is the type of the callback function that can be set
  146. // via PfncchconstSet().
  147. typedef size_t (__stdcall *PFNCCHCONST)(const DIS *, DWORD, char *, size_t);
  148. // PFNCCHFIXUP is the type of the callback function that can be set
  149. // via PfncchfixupSet().
  150. typedef size_t (__stdcall *PFNCCHFIXUP)(const DIS *, ADDR, size_t, char *, size_t, DWORDLONG *);
  151. // PFNCCHREGREL is the type of the callback function that can be set
  152. // via PfncchregrelSet().
  153. typedef size_t (__stdcall *PFNCCHREGREL)(const DIS *, REGA, DWORD, char *, size_t, DWORD *);
  154. // PFNCCHREG is the type of the callback function that can be set
  155. // via PfncchregSet().
  156. typedef size_t (__stdcall *PFNCCHREG)(const DIS *, REGA, char *, size_t);
  157. // PFNDWGETREG is the type of the callback function that can be set
  158. // via Pfndwgetreg().
  159. typedef DWORDLONG (__stdcall *PFNDWGETREG)(const DIS *, REGA);
  160. // Methods
  161. ///////////////////////////////////////////////////////////////////////////
  162. // In these comments, please note that "current instruction" is defined
  163. // by the results of the most recent call to CbDisassemble() and of any
  164. // intervening call(s) to FSelectInstruction().
  165. ///////////////////////////////////////////////////////////////////////////
  166. virtual ~DIS();
  167. // UNDONE: Comment
  168. static DISDLL DIS * __stdcall PdisNew(DIST);
  169. // Addr() returns the address of the current instruction. This
  170. // is the same value as the ADDR parameter passed to CbDisassemble.
  171. // The return value of this method is not valid if the last call to
  172. // CbDisassemble returned zero.
  173. DISDLL ADDR Addr() const;
  174. // UNDONE: Comment
  175. virtual ADDR AddrAddress(size_t) const;
  176. // UNDONE: Comment
  177. virtual ADDR AddrInstruction() const;
  178. // AddrJumpTable() returns the address of a potential jump table used by
  179. // the current instruction. The return value of this method is not valid
  180. // if the last call to CbDisassemble returned zero or if the termination
  181. // type is an indirect branch variant. If the last instruction does not
  182. // identify a potential jump table, this method returns addrNil.
  183. virtual ADDR AddrJumpTable() const;
  184. // UNDONE: Comment
  185. virtual ADDR AddrOperand(size_t) const;
  186. // AddrTarget() returns the address of the branch target of the specified
  187. // operand (first operand by default) of the current instruction.
  188. // The return value of this method is not valid if the last call to
  189. // CbDisassemble returned zero or if the termination type is not
  190. // one of the direct branch or call variants.
  191. virtual ADDR AddrTarget(size_t = 1) const = 0;
  192. // Cb() returns the size in bytes of the current instruction,
  193. // or the size of a 'bundle' on those architectures that group multiple
  194. // instructions together.
  195. // The return value of this method is not valid if the last call to
  196. // CbDisassemble returned zero.
  197. virtual size_t Cb() const = 0;
  198. // CbAssemble() will assemble a single instruction into the provided
  199. // buffer assuming the provided address. On bundled architectures,
  200. // this function is not yet implemented. If the resulting buffer contains
  201. // a valid instruction, CbAssemble will return the number of bytes in
  202. // the instruction, otherwise it returns zero.
  203. virtual size_t CbAssemble(ADDR, void *, size_t);
  204. // CbDisassemble() will disassemble a single instruction from the provided
  205. // buffer assuming the provided address. On those architectures which
  206. // 'bundle' multiple instructions together, CbDisassemble() will process
  207. // the entire 'bundle' and the caller is responsible for calling both
  208. // Cinstruction() and FSelectInstruction() as appropriate. If the buffer
  209. // contains a valid instruction, CbDisassemble will return the number of
  210. // bytes in the instruction (on bundled architectures, the number of bytes
  211. // in the bundle *if* the buffer contained a valid bundle), otherwise it
  212. // returns zero.
  213. virtual size_t CbDisassemble(ADDR, const void *, size_t) = 0;
  214. // CbGenerateLoadAddress generates one or more instructions to load
  215. // the address of the memory operand from the current instruction into
  216. // a register. UNDONE: This register is currently hard coded for each
  217. // architecture. When pibAddress is non-NULL, this method will store
  218. // the offset of a possible address immediate in this location. The
  219. // value stored is only valid if the AddrAddress method returns a
  220. // value other than addrNil. It is not valid to call this method after
  221. // a call to CbDisassemble that returned 0 or when the return value of
  222. // Memreft is memreftNone. It is architecture dependent whether this
  223. // method will succeed when the return value of Memreft is memreftOther.
  224. //
  225. // UNDONE: Add reg parameter.
  226. virtual size_t CbGenerateLoadAddress(size_t, void *, size_t, size_t * = NULL) const;
  227. // CbJumpEntry() returns the size of the individual entries in the jump
  228. // table identified by AddrJumpTable(). The return value of this method
  229. // is not valid if either the return value of AddrJumpTable() is not valid
  230. // or AddrJumpTable() returned addrNil.
  231. virtual size_t CbJumpEntry() const;
  232. // CbOperand() returns the size of the memory operand of the current
  233. // instruction. The return value of this method is not valid if Memreft()
  234. // returns memreftNone or memreftOther or if the last call to CbDisassemble
  235. // returned zero.
  236. virtual size_t CbOperand(size_t) const;
  237. // CchFormatAddr() formats the provided address in the style used for the
  238. // architecture. The return value is the size of the formatted address
  239. // not including the terminating null. If the provided buffer is not
  240. // large enough, this method returns 0.
  241. DISDLL size_t CchFormatAddr(ADDR, char *, size_t) const;
  242. // CchFormatBytes() formats the data bytes of the current instruction
  243. // and returns the size of the formatted buffer not including the
  244. // terminating null. If the provided buffer is not large enough, this
  245. // method returns 0. It is not valid to call this method after a call to
  246. // CbDisassemble that returned zero.
  247. virtual size_t CchFormatBytes(char *, size_t) const = 0;
  248. // CchFormatBytesMax() returns the maximum size possibly returned by
  249. // CchFormatBytes().
  250. virtual size_t CchFormatBytesMax() const = 0;
  251. // CchFormatInstr() formats the current instruction and returns the
  252. // size of the formatted instruction not including the terminating
  253. // null. If the provided buffer is not large enough, this method returns
  254. // 0. It is not valid to call this method after a call to CbDisassemble
  255. // that returned zero.
  256. DISDLL size_t CchFormatInstr(char *, size_t) const;
  257. // Cinstruction() tells how many machine instructions resulted from the
  258. // most recent call to CbDisassemble(). On most architectures this value
  259. // will always be one (1); when it is not, the caller is responsible for
  260. // using FSelectInstruction() as appropriate to access each instruction
  261. // in turn.
  262. virtual size_t Cinstruction() const;
  263. // Coperand() returns the number of operands in the current instruction.
  264. virtual size_t Coperand() const = 0;
  265. // UNDONE: Comment
  266. virtual size_t CregaRead(REGA *, size_t) const;
  267. // UNDONE: Comment
  268. virtual size_t CregaWritten(REGA *, size_t) const;
  269. // Dist() returns the disassembler type of this instance.
  270. DISDLL DIST Dist() const;
  271. // FDecode converts the current machine instruction into a decoded opcode
  272. // and operand set. The void * points to an array of decoded operands.
  273. // The size_t argument is the size of the input array. The number of
  274. // actual operands is returned in the INSTRUCTION.
  275. virtual bool FDecode(INSTRUCTION *, OPERAND *, size_t) const;
  276. // FEncode converts the INSTRUCTION and the array of decoded
  277. // operands into a machine instruction.
  278. virtual bool FEncode(const INSTRUCTION *, const OPERAND *, size_t);
  279. // UNDONE: Comment
  280. virtual void FormatAddr(std::ostream&, ADDR) const;
  281. // UNDONE: Comment
  282. virtual void FormatInstr(std::ostream&) const = 0;
  283. // For those architectures in which calls to CbDisassemble() will generate
  284. // more than one resulting instruction, FSelectInstruction() determines
  285. // which instruction (0-based) all following calls will process.
  286. virtual bool FSelectInstruction(size_t);
  287. // Memreft() returns the memory reference type of the specified operand of
  288. // the current instruction. It is not valid to call this method
  289. // after a call to CbDisassemble that returned zero.
  290. virtual MEMREFT Memreft(size_t) const = 0;
  291. // PfncchaddrSet() sets the callback function for symbol lookup. This
  292. // function returns the previous value of the callback function address.
  293. // If the address is non-zero, the callback function is called during
  294. // CchFormatInstr to query the symbol for the supplied address. If there
  295. // is no symbol at this address, the callback should return 0.
  296. DISDLL PFNCCHADDR PfncchaddrSet(PFNCCHADDR);
  297. // PfncchconstSet() sets the callback function for constant pool lookup.
  298. // This function returns the previous value of the callback function address.
  299. // If the address is non-zero, the callback function is called during
  300. // CchFormatInstr to query the string for the supplied constant index.
  301. // If there is no constant with this index, the callback should return 0.
  302. DISDLL PFNCCHCONST PfncchconstSet(PFNCCHCONST);
  303. // PfncchfixupSet() sets the callback function for symbol lookup. This
  304. // function returns the previous value of the callback function address.
  305. // If the address is non-zero, the callback function is called during
  306. // CchFormatInstr to query the symbol and displacement referenced by
  307. // operands of the current instruction. The callback should examine the
  308. // contents of the memory identified by the supplied address and size and
  309. // return the name of any symbol targeted by a fixup on this memory and the
  310. // displacement from that symbol. If there is no fixup on the specified
  311. // memory, the callback should return 0.
  312. DISDLL PFNCCHFIXUP PfncchfixupSet(PFNCCHFIXUP);
  313. // UNDONE: Comment
  314. DISDLL PFNCCHREGREL PfncchregrelSet(PFNCCHREGREL);
  315. // UNDONE: Comment
  316. DISDLL PFNCCHREG PfncchregSet(PFNCCHREG);
  317. // UNDONE: Comment
  318. DISDLL PFNDWGETREG PfndwgetregSet(PFNDWGETREG);
  319. // PvClient() returns the current value of the client pointer.
  320. DISDLL void *PvClient() const;
  321. // PvClientSet() sets the value of a void pointer that the client can
  322. // later query with PvClient(). This funcion returns the previous value
  323. // of the client pointer.
  324. DISDLL void *PvClientSet(void *);
  325. // SetAddr64() sets whether addresses are 32 bit or 64 bit. The default
  326. // is 32 bit.
  327. DISDLL void SetAddr64(bool);
  328. // Trmt() returns the architecture independent termination type of the
  329. // current instruction. The return value of this method is not
  330. // valid if the last call to CbDisassemble returned zero.
  331. virtual TRMT Trmt() const = 0;
  332. // Trmta() returns the architecture dependent termination type of the
  333. // current instruction. The return value of this method is not
  334. // valid if the last call to CbDisassemble returned zero.
  335. virtual TRMTA Trmta() const = 0;
  336. // UNDONE : These functions have been placed at the end of the vtable
  337. // to maintain compatibility for the time-being. These should be
  338. // moved back into alphabetical order in the future.
  339. // DwModifiers() returns the architecture dependent modifier flags
  340. // of the current decoded instruction. The return value of this
  341. // method is not valid if the last call to FDecode returned false.
  342. virtual DWORD DwModifiers() const;
  343. // Opa() returns the architecture dependent operation type of the
  344. // current decoded instruction. The return value of this method is
  345. // not valid if the last call to FDecode returned false.
  346. virtual OPA Opa() const;
  347. protected:
  348. DIS(DIST);
  349. void FormatHex(std::ostream&, DWORDLONG) const;
  350. void FormatSignedHex(std::ostream&, DWORDLONG) const;
  351. DIST m_dist;
  352. bool m_fAddr64;
  353. PFNCCHADDR m_pfncchaddr;
  354. PFNCCHCONST m_pfncchconst;
  355. PFNCCHFIXUP m_pfncchfixup;
  356. PFNCCHREGREL m_pfncchregrel;
  357. PFNCCHREG m_pfncchreg;
  358. PFNDWGETREG m_pfndwgetreg;
  359. void *m_pvClient;
  360. ADDR m_addr;
  361. };
  362. #pragma pack(pop)
  363. #endif // MSDIS_H