Team Fortress 2 Source Code as on 22/4/2020
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1188 lines
50 KiB

  1. //========= Copyright Valve Corporation, All rights reserved. ============//
  2. /******************************************************************************
  3. Copyright (c) 1999 Advanced Micro Devices, Inc.
  4. LIMITATION OF LIABILITY: THE MATERIALS ARE PROVIDED *AS IS* WITHOUT ANY
  5. EXPRESS OR IMPLIED WARRANTY OF ANY KIND INCLUDING WARRANTIES OF MERCHANTABILITY,
  6. NONINFRINGEMENT OF THIRD-PARTY INTELLECTUAL PROPERTY, OR FITNESS FOR ANY
  7. PARTICULAR PURPOSE. IN NO EVENT SHALL AMD OR ITS SUPPLIERS BE LIABLE FOR ANY
  8. DAMAGES WHATSOEVER (INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOSS OF PROFITS,
  9. BUSINESS INTERRUPTION, LOSS OF INFORMATION) ARISING OUT OF THE USE OF OR
  10. INABILITY TO USE THE MATERIALS, EVEN IF AMD HAS BEEN ADVISED OF THE POSSIBILITY
  11. OF SUCH DAMAGES. BECAUSE SOME JURISDICTIONS PROHIBIT THE EXCLUSION OR LIMITATION
  12. OF LIABILITY FOR CONSEQUENTIAL OR INCIDENTAL DAMAGES, THE ABOVE LIMITATION MAY
  13. NOT APPLY TO YOU.
  14. AMD does not assume any responsibility for any errors which may appear in the
  15. Materials nor any responsibility to support or update the Materials. AMD retains
  16. the right to make changes to its test specifications at any time, without notice.
  17. NO SUPPORT OBLIGATION: AMD is not obligated to furnish, support, or make any
  18. further information, software, technical information, know-how, or show-how
  19. available to you.
  20. So that all may benefit from your experience, please report any problems
  21. or suggestions about this software to 3dsdk.support@amd.com
  22. AMD Developer Technologies, M/S 585
  23. Advanced Micro Devices, Inc.
  24. 5900 E. Ben White Blvd.
  25. Austin, TX 78741
  26. 3dsdk.support@amd.com
  27. *******************************************************************************
  28. AMD3DX.H
  29. MACRO FORMAT
  30. ============
  31. This file contains inline assembly macros that
  32. generate AMD-3D instructions in binary format.
  33. Therefore, C or C++ programmer can use AMD-3D instructions
  34. without any penalty in their C or C++ source code.
  35. The macro's name and format conventions are as follow:
  36. 1. First argument of macro is a destination and
  37. second argument is a source operand.
  38. ex) _asm PFCMPEQ (mm3, mm4)
  39. | |
  40. dst src
  41. 2. The destination operand can be m0 to m7 only.
  42. The source operand can be any one of the register
  43. m0 to m7 or _eax, _ecx, _edx, _ebx, _esi, or _edi
  44. that contains effective address.
  45. ex) _asm PFRCP (MM7, MM6)
  46. ex) _asm PFRCPIT2 (mm0, mm4)
  47. ex) _asm PFMUL (mm3, _edi)
  48. 3. The prefetch(w) takes one src operand _eax, ecx, _edx,
  49. _ebx, _esi, or _edi that contains effective address.
  50. ex) _asm PREFETCH (_edi)
  51. For WATCOM C/C++ users, when using #pragma aux instead if
  52. _asm, all macro names should be prefixed by a p_ or P_.
  53. Macros should not be enclosed in quotes.
  54. ex) p_pfrcp (MM7,MM6)
  55. NOTE: Not all instruction macros, nor all possible
  56. combinations of operands have been explicitely
  57. tested. If any errors are found, please report
  58. them.
  59. EXAMPLE
  60. =======
  61. Following program doesn't do anything but it shows you
  62. how to use inline assembly AMD-3D instructions in C.
  63. Note that this will only work in flat memory model which
  64. segment registers cs, ds, ss and es point to the same
  65. linear address space total less than 4GB.
  66. Used Microsoft VC++ 5.0
  67. #include <stdio.h>
  68. #include "amd3d.h"
  69. void main ()
  70. {
  71. float x = (float)1.25;
  72. float y = (float)1.25;
  73. float z, zz;
  74. _asm {
  75. movd mm1, x
  76. movd mm2, y
  77. pfmul (mm1, mm2)
  78. movd z, mm1
  79. femms
  80. }
  81. printf ("value of z = %f\n", z);
  82. //
  83. // Demonstration of using the memory instead of
  84. // multimedia register
  85. //
  86. _asm {
  87. movd mm3, x
  88. lea esi, y // load effective address of y
  89. pfmul (mm3, _esi)
  90. movd zz, mm3
  91. femms
  92. }
  93. printf ("value of zz = %f\n", zz);
  94. }
  95. #pragma aux EXAMPLE with WATCOM C/C++ v11.x
  96. ===========================================
  97. extern void Add(float *__Dest, float *__A, float *__B);
  98. #pragma aux Add = \
  99. p_femms \
  100. "movd mm6,[esi]" \
  101. p_pfadd(mm6,_edi) \
  102. "movd [ebx],mm6" \
  103. p_femms \
  104. parm [ebx] [esi] [edi];
  105. *******************************************************************************/
  106. #ifndef _K3DMACROSINCLUDED_
  107. #define _K3DMACROSINCLUDED_
  108. #if defined (__WATCOMC__)
  109. // The WATCOM C/C++ version of the 3DNow! macros.
  110. //
  111. // The older, compbined register style for WATCOM C/C++ macros is not
  112. // supported.
  113. /* Operand defines for instructions two operands */
  114. #define _k3d_mm0_mm0 0xc0
  115. #define _k3d_mm0_mm1 0xc1
  116. #define _k3d_mm0_mm2 0xc2
  117. #define _k3d_mm0_mm3 0xc3
  118. #define _k3d_mm0_mm4 0xc4
  119. #define _k3d_mm0_mm5 0xc5
  120. #define _k3d_mm0_mm6 0xc6
  121. #define _k3d_mm0_mm7 0xc7
  122. #define _k3d_mm0_eax 0x00
  123. #define _k3d_mm0_ecx 0x01
  124. #define _k3d_mm0_edx 0x02
  125. #define _k3d_mm0_ebx 0x03
  126. #define _k3d_mm0_esi 0x06
  127. #define _k3d_mm0_edi 0x07
  128. #define _k3d_mm1_mm0 0xc8
  129. #define _k3d_mm1_mm1 0xc9
  130. #define _k3d_mm1_mm2 0xca
  131. #define _k3d_mm1_mm3 0xcb
  132. #define _k3d_mm1_mm4 0xcc
  133. #define _k3d_mm1_mm5 0xcd
  134. #define _k3d_mm1_mm6 0xce
  135. #define _k3d_mm1_mm7 0xcf
  136. #define _k3d_mm1_eax 0x08
  137. #define _k3d_mm1_ecx 0x09
  138. #define _k3d_mm1_edx 0x0a
  139. #define _k3d_mm1_ebx 0x0b
  140. #define _k3d_mm1_esi 0x0e
  141. #define _k3d_mm1_edi 0x0f
  142. #define _k3d_mm2_mm0 0xd0
  143. #define _k3d_mm2_mm1 0xd1
  144. #define _k3d_mm2_mm2 0xd2
  145. #define _k3d_mm2_mm3 0xd3
  146. #define _k3d_mm2_mm4 0xd4
  147. #define _k3d_mm2_mm5 0xd5
  148. #define _k3d_mm2_mm6 0xd6
  149. #define _k3d_mm2_mm7 0xd7
  150. #define _k3d_mm2_eax 0x10
  151. #define _k3d_mm2_ecx 0x11
  152. #define _k3d_mm2_edx 0x12
  153. #define _k3d_mm2_ebx 0x13
  154. #define _k3d_mm2_esi 0x16
  155. #define _k3d_mm2_edi 0x17
  156. #define _k3d_mm3_mm0 0xd8
  157. #define _k3d_mm3_mm1 0xd9
  158. #define _k3d_mm3_mm2 0xda
  159. #define _k3d_mm3_mm3 0xdb
  160. #define _k3d_mm3_mm4 0xdc
  161. #define _k3d_mm3_mm5 0xdd
  162. #define _k3d_mm3_mm6 0xde
  163. #define _k3d_mm3_mm7 0xdf
  164. #define _k3d_mm3_eax 0x18
  165. #define _k3d_mm3_ecx 0x19
  166. #define _k3d_mm3_edx 0x1a
  167. #define _k3d_mm3_ebx 0x1b
  168. #define _k3d_mm3_esi 0x1e
  169. #define _k3d_mm3_edi 0x1f
  170. #define _k3d_mm4_mm0 0xe0
  171. #define _k3d_mm4_mm1 0xe1
  172. #define _k3d_mm4_mm2 0xe2
  173. #define _k3d_mm4_mm3 0xe3
  174. #define _k3d_mm4_mm4 0xe4
  175. #define _k3d_mm4_mm5 0xe5
  176. #define _k3d_mm4_mm6 0xe6
  177. #define _k3d_mm4_mm7 0xe7
  178. #define _k3d_mm4_eax 0x20
  179. #define _k3d_mm4_ecx 0x21
  180. #define _k3d_mm4_edx 0x22
  181. #define _k3d_mm4_ebx 0x23
  182. #define _k3d_mm4_esi 0x26
  183. #define _k3d_mm4_edi 0x27
  184. #define _k3d_mm5_mm0 0xe8
  185. #define _k3d_mm5_mm1 0xe9
  186. #define _k3d_mm5_mm2 0xea
  187. #define _k3d_mm5_mm3 0xeb
  188. #define _k3d_mm5_mm4 0xec
  189. #define _k3d_mm5_mm5 0xed
  190. #define _k3d_mm5_mm6 0xee
  191. #define _k3d_mm5_mm7 0xef
  192. #define _k3d_mm5_eax 0x28
  193. #define _k3d_mm5_ecx 0x29
  194. #define _k3d_mm5_edx 0x2a
  195. #define _k3d_mm5_ebx 0x2b
  196. #define _k3d_mm5_esi 0x2e
  197. #define _k3d_mm5_edi 0x2f
  198. #define _k3d_mm6_mm0 0xf0
  199. #define _k3d_mm6_mm1 0xf1
  200. #define _k3d_mm6_mm2 0xf2
  201. #define _k3d_mm6_mm3 0xf3
  202. #define _k3d_mm6_mm4 0xf4
  203. #define _k3d_mm6_mm5 0xf5
  204. #define _k3d_mm6_mm6 0xf6
  205. #define _k3d_mm6_mm7 0xf7
  206. #define _k3d_mm6_eax 0x30
  207. #define _k3d_mm6_ecx 0x31
  208. #define _k3d_mm6_edx 0x32
  209. #define _k3d_mm6_ebx 0x33
  210. #define _k3d_mm6_esi 0x36
  211. #define _k3d_mm6_edi 0x37
  212. #define _k3d_mm7_mm0 0xf8
  213. #define _k3d_mm7_mm1 0xf9
  214. #define _k3d_mm7_mm2 0xfa
  215. #define _k3d_mm7_mm3 0xfb
  216. #define _k3d_mm7_mm4 0xfc
  217. #define _k3d_mm7_mm5 0xfd
  218. #define _k3d_mm7_mm6 0xfe
  219. #define _k3d_mm7_mm7 0xff
  220. #define _k3d_mm7_eax 0x38
  221. #define _k3d_mm7_ecx 0x39
  222. #define _k3d_mm7_edx 0x3a
  223. #define _k3d_mm7_ebx 0x3b
  224. #define _k3d_mm7_esi 0x3e
  225. #define _k3d_mm7_edi 0x3f
  226. #define _k3d_name_xlat_m0 _mm0
  227. #define _k3d_name_xlat_m1 _mm1
  228. #define _k3d_name_xlat_m2 _mm2
  229. #define _k3d_name_xlat_m3 _mm3
  230. #define _k3d_name_xlat_m4 _mm4
  231. #define _k3d_name_xlat_m5 _mm5
  232. #define _k3d_name_xlat_m6 _mm6
  233. #define _k3d_name_xlat_m7 _mm7
  234. #define _k3d_name_xlat_M0 _mm0
  235. #define _k3d_name_xlat_M1 _mm1
  236. #define _k3d_name_xlat_M2 _mm2
  237. #define _k3d_name_xlat_M3 _mm3
  238. #define _k3d_name_xlat_M4 _mm4
  239. #define _k3d_name_xlat_M5 _mm5
  240. #define _k3d_name_xlat_M6 _mm6
  241. #define _k3d_name_xlat_M7 _mm7
  242. #define _k3d_name_xlat_mm0 _mm0
  243. #define _k3d_name_xlat_mm1 _mm1
  244. #define _k3d_name_xlat_mm2 _mm2
  245. #define _k3d_name_xlat_mm3 _mm3
  246. #define _k3d_name_xlat_mm4 _mm4
  247. #define _k3d_name_xlat_mm5 _mm5
  248. #define _k3d_name_xlat_mm6 _mm6
  249. #define _k3d_name_xlat_mm7 _mm7
  250. #define _k3d_name_xlat_MM0 _mm0
  251. #define _k3d_name_xlat_MM1 _mm1
  252. #define _k3d_name_xlat_MM2 _mm2
  253. #define _k3d_name_xlat_MM3 _mm3
  254. #define _k3d_name_xlat_MM4 _mm4
  255. #define _k3d_name_xlat_MM5 _mm5
  256. #define _k3d_name_xlat_MM6 _mm6
  257. #define _k3d_name_xlat_MM7 _mm7
  258. #define _k3d_name_xlat_eax _eax
  259. #define _k3d_name_xlat_ebx _ebx
  260. #define _k3d_name_xlat_ecx _ecx
  261. #define _k3d_name_xlat_edx _edx
  262. #define _k3d_name_xlat_esi _esi
  263. #define _k3d_name_xlat_edi _edi
  264. #define _k3d_name_xlat_ebp _ebp
  265. #define _k3d_name_xlat_EAX _eax
  266. #define _k3d_name_xlat_EBX _ebx
  267. #define _k3d_name_xlat_ECX _ecx
  268. #define _k3d_name_xlat_EDX _edx
  269. #define _k3d_name_xlat_ESI _esi
  270. #define _k3d_name_xlat_EDI _edi
  271. #define _k3d_name_xlat_EBP _ebp
  272. #define _k3d_name_xlat__eax _eax
  273. #define _k3d_name_xlat__ebx _ebx
  274. #define _k3d_name_xlat__ecx _ecx
  275. #define _k3d_name_xlat__edx _edx
  276. #define _k3d_name_xlat__esi _esi
  277. #define _k3d_name_xlat__edi _edi
  278. #define _k3d_name_xlat__ebp _ebp
  279. #define _k3d_name_xlat__EAX _eax
  280. #define _k3d_name_xlat__EBX _ebx
  281. #define _k3d_name_xlat__ECX _ecx
  282. #define _k3d_name_xlat__EDX _edx
  283. #define _k3d_name_xlat__ESI _esi
  284. #define _k3d_name_xlat__EDI _edi
  285. #define _k3d_name_xlat__EBP _ebp
  286. #define _k3d_xglue3(a,b,c) a##b##c
  287. #define _k3d_glue3(a,b,c) _k3d_xglue3(a,b,c)
  288. #define _k3d_MODRM(dst, src) _k3d_glue3(_k3d,_k3d_name_xlat_##dst,_k3d_name_xlat_##src)
  289. /* Operand defines for prefetch and prefetchw */
  290. #define _k3d_pref_eax 0x00
  291. #define _k3d_pref_ecx 0x01
  292. #define _k3d_pref_edx 0x02
  293. #define _k3d_pref_ebx 0x03
  294. #define _k3d_pref_esi 0x06
  295. #define _k3d_pref_edi 0x07
  296. #define _k3d_pref_EAX 0x00
  297. #define _k3d_pref_ECX 0x01
  298. #define _k3d_pref_EDX 0x02
  299. #define _k3d_pref_EBX 0x03
  300. #define _k3d_pref_ESI 0x06
  301. #define _k3d_pref_EDI 0x07
  302. #define _k3d_prefw_eax 0x08
  303. #define _k3d_prefw_ecx 0x09
  304. #define _k3d_prefw_edx 0x0A
  305. #define _k3d_prefw_ebx 0x0B
  306. #define _k3d_prefw_esi 0x0E
  307. #define _k3d_prefw_edi 0x0F
  308. #define _k3d_prefw_EAX 0x08
  309. #define _k3d_prefw_ECX 0x09
  310. #define _k3d_prefw_EDX 0x0A
  311. #define _k3d_prefw_EBX 0x0B
  312. #define _k3d_prefw_ESI 0x0E
  313. #define _k3d_prefw_EDI 0x0F
  314. /* Defines for 3DNow! instructions */
  315. #define PF2ID(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x1d
  316. #define PFACC(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xae
  317. #define PFADD(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x9e
  318. #define PFCMPEQ(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xb0
  319. #define PFCMPGE(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x90
  320. #define PFCMPGT(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xa0
  321. #define PFMAX(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xa4
  322. #define PFMIN(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x94
  323. #define PFMUL(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xb4
  324. #define PFRCP(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x96
  325. #define PFRCPIT1(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xa6
  326. #define PFRCPIT2(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xb6
  327. #define PFRSQRT(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x97
  328. #define PFRSQIT1(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xa7
  329. #define PFSUB(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x9a
  330. #define PFSUBR(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xaa
  331. #define PI2FD(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x0d
  332. #define FEMMS db 0x0f, 0x0e
  333. #define PAVGUSB(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xbf
  334. #define PMULHRW(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xb7
  335. #define PREFETCH(src) db 0x0f, 0x0d, _k3d_pref_##src
  336. #define PREFETCHW(src) db 0x0f, 0x0d, _k3d_prefw_##src
  337. #define CPUID db 0x0f, 0xa2
  338. /* Defines for new, K7 opcodes */
  339. #define PFNACC(dst,src) db 0x0f, 0x0f, _k3d_MODRM(dst,src), 0x8a
  340. #define FPPNACC(dst,src) db 0x0f, 0x0f, _k3d_MODRM(dst,src), 0x8e
  341. #define PSWAPD(dst,src) db 0x0f, 0x0f, _k3d_MODRM(dst,src), 0xbb
  342. #define PMINUB(dst,src) db 0x0f, 0xda, _k3d_MODRM(dst,src)
  343. #define PMAXUB(dst,src) db 0x0f, 0xde, _k3d_MODRM(dst,src)
  344. #define PMINSW(dst,src) db 0x0f, 0xea, _k3d_MODRM(dst,src)
  345. #define PMAXSW(dst,src) db 0x0f, 0xee, _k3d_MODRM(dst,src)
  346. #define PMULHUW(dst,src) db 0x0f, 0xe4, _k3d_MODRM(dst,src)
  347. #define PAVGB(dst,src) db 0x0f, 0xe0, _k3d_MODRM(dst,src)
  348. #define PAVGW(dst,src) db 0x0f, 0xe3, _k3d_MODRM(dst,src)
  349. #define PSADBW(dst,src) db 0x0f, 0xf6, _k3d_MODRM(dst,src)
  350. #define PMOVMSKB(dst,src) db 0x0f, 0xd7, _k3d_MODRM(dst,src)
  351. #define PMASKMOVQ(dst,src) db 0x0f, 0xf7, _k3d_MODRM(dst,src)
  352. #define PINSRW(dst,src,msk) db 0x0f, 0xc4, _k3d_MODRM(dst,src), msk
  353. #define PEXTRW(dst,src,msk) db 0x0f, 0xc5, _k3d_MODRM(dst,src), msk
  354. #define PSHUFW(dst,src,msk) db 0x0f, 0x70, _k3d_MODRM(dst,src), msk
  355. #define MOVNTQ(dst,src) db 0x0f, 0xe7, _k3d_MODRM(src,dst)
  356. #define SFENCE db 0x0f, 0xae, 0xf8
  357. /* Memory/offset versions of the opcodes */
  358. #define PF2IDM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x1d
  359. #define PFACCM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xae
  360. #define PFADDM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x9e
  361. #define PFCMPEQM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xb0
  362. #define PFCMPGEM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x90
  363. #define PFCMPGTM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xa0
  364. #define PFMAXM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xa4
  365. #define PFMINM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x94
  366. #define PFMULM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xb4
  367. #define PFRCPM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x96
  368. #define PFRCPIT1M(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xa6
  369. #define PFRCPIT2M(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xb6
  370. #define PFRSQRTM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x97
  371. #define PFRSQIT1M(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xa7
  372. #define PFSUBM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x9a
  373. #define PFSUBRM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xaa
  374. #define PI2FDM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x0d
  375. #define PAVGUSBM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xbf
  376. #define PMULHRWM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xb7
  377. /* Memory/offset versions of the new, K7 opcodes */
  378. #define PFNACCM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x8a
  379. #define FPPNACCM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x8e
  380. #define PSWAPDM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xbb
  381. #define PMINUBM(dst,src,off) db 0x0f, 0xda, _k3d_MODRM(dst,src) | 0x40, off
  382. #define PMAXUBM(dst,src,off) db 0x0f, 0xde, _k3d_MODRM(dst,src) | 0x40, off
  383. #define PMINSWM(dst,src,off) db 0x0f, 0xea, _k3d_MODRM(dst,src) | 0x40, off
  384. #define PMAXSWM(dst,src,off) db 0x0f, 0xee, _k3d_MODRM(dst,src) | 0x40, off
  385. #define PMULHUWM(dst,src,off) db 0x0f, 0xe4, _k3d_MODRM(dst,src) | 0x40, off
  386. #define PAVGBM(dst,src,off) db 0x0f, 0xe0, _k3d_MODRM(dst,src) | 0x40, off
  387. #define PAVGWM(dst,src,off) db 0x0f, 0xe3, _k3d_MODRM(dst,src) | 0x40, off
  388. #define PSADBWM(dst,src,off) db 0x0f, 0xf6, _k3d_MODRM(dst,src) | 0x40, off
  389. #define PMOVMSKBM(dst,src,off) db 0x0f, 0xd7, _k3d_MODRM(dst,src) | 0x40, off
  390. #define PMASKMOVQM(dst,src,off) db 0x0f, 0xf7, _k3d_MODRM(dst,src) | 0x40, off
  391. #define MOVNTQM(dst,src,off) db 0x0f, 0xe7, _k3d_MODRM(src,dst) | 0x40, off
  392. #define PINSRWM(dst,src,off,msk) db 0x0f, 0xc4, _k3d_MODRM(dst,src) | 0x40, off, msk
  393. #define PSHUFWM(dst,src,off,msk) db 0x0f, 0x70, _k3d_MODRM(dst,src) | 0x40, off, msk
  394. /* Defines for 3DNow! instructions for use in pragmas */
  395. #define p_pf2id(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x1d
  396. #define p_pfacc(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xae
  397. #define p_pfadd(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x9e
  398. #define p_pfcmpeq(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xb0
  399. #define p_pfcmpge(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x90
  400. #define p_pfcmpgt(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xa0
  401. #define p_pfmax(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xa4
  402. #define p_pfmin(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x94
  403. #define p_pfmul(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xb4
  404. #define p_pfrcp(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x96
  405. #define p_pfrcpit1(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xa6
  406. #define p_pfrcpit2(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xb6
  407. #define p_pfrsqrt(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x97
  408. #define p_pfrsqit1(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xa7
  409. #define p_pfsub(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x9a
  410. #define p_pfsubr(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xaa
  411. #define p_pi2fd(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x0d
  412. #define p_femms 0x0f 0x0e
  413. #define p_pavgusb(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xbf
  414. #define p_pmulhrw(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xb7
  415. #define p_prefetch(src) 0x0f 0x0d _k3d_pref_##src
  416. #define p_prefetchw(src) 0x0f 0x0d _k3d_prefw_##src
  417. #define P_PFNACC(dst,src) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x8a
  418. #define P_FPPNACC(dst,src) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x8e
  419. #define P_PSWAPD(dst,src) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xbb
  420. #define P_PMINUB(dst,src) 0x0f 0xda (_k3d_MODRM(dst,src) | 0x40) off
  421. #define P_PMAXUB(dst,src) 0x0f 0xde (_k3d_MODRM(dst,src) | 0x40) off
  422. #define P_PMINSW(dst,src) 0x0f 0xea (_k3d_MODRM(dst,src) | 0x40) off
  423. #define P_PMAXSW(dst,src) 0x0f 0xee (_k3d_MODRM(dst,src) | 0x40) off
  424. #define P_PMULHUW(dst,src) 0x0f 0xe4 (_k3d_MODRM(dst,src) | 0x40) off
  425. #define P_PAVGB(dst,src) 0x0f 0xe0 (_k3d_MODRM(dst,src) | 0x40) off
  426. #define P_PAVGW(dst,src) 0x0f 0xe3 (_k3d_MODRM(dst,src) | 0x40) off
  427. #define P_PSADBW(dst,src) 0x0f 0xf6 (_k3d_MODRM(dst,src) | 0x40) off
  428. #define P_PMOVMSKB(dst,src) 0x0f 0xd7 (_k3d_MODRM(dst,src) | 0x40) off
  429. #define P_PMASKMOVQ(dst,src) 0x0f 0xf7 (_k3d_MODRM(dst,src) | 0x40) off
  430. #define P_PINSRW(dst,src,msk) 0x0f 0xc4 (_k3d_MODRM(dst,src) | 0x40) off msk
  431. #define P_PEXTRW(dst,src,msk) 0x0f 0xc5 (_k3d_MODRM(dst,src) | 0x40) off msk
  432. #define P_PSHUFW(dst,src,msk) 0x0f 0x70 (_k3d_MODRM(dst,src) | 0x40) off msk
  433. #define P_MOVNTQ(dst,src) 0x0f 0xe7 (_k3d_MODRM(src,dst) | 0x40) off
  434. #define P_PF2IDM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x1d
  435. #define P_PFACCM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xae
  436. #define P_PFADDM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x9e
  437. #define P_PFCMPEQM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xb0
  438. #define P_PFCMPGEM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x90
  439. #define P_PFCMPGTM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xa0
  440. #define P_PFMAXM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xa4
  441. #define P_PFMINM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x94
  442. #define P_PFMULM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xb4
  443. #define P_PFRCPM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x96
  444. #define P_PFRCPIT1M(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xa6
  445. #define P_PFRCPIT2M(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xb6
  446. #define P_PFRSQRTM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x97
  447. #define P_PFRSQIT1M(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xa7
  448. #define P_PFSUBM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x9a
  449. #define P_PFSUBRM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xaa
  450. #define P_PI2FDM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x0d
  451. #define P_PAVGUSBM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xbf
  452. #define P_PMULHRWM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xb7
  453. #define P_PFNACCM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x8a
  454. #define P_FPPNACCM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x8e
  455. #define P_PSWAPDM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xbb
  456. #define P_PMINUBM(dst,src,off) 0x0f 0xda (_k3d_MODRM(dst,src) | 0x40) off
  457. #define P_PMAXUBM(dst,src,off) 0x0f 0xde (_k3d_MODRM(dst,src) | 0x40) off
  458. #define P_PMINSWM(dst,src,off) 0x0f 0xea (_k3d_MODRM(dst,src) | 0x40) off
  459. #define P_PMAXSWM(dst,src,off) 0x0f 0xee (_k3d_MODRM(dst,src) | 0x40) off
  460. #define P_PMULHUWM(dst,src,off) 0x0f 0xe4 (_k3d_MODRM(dst,src) | 0x40) off
  461. #define P_PAVGBM(dst,src,off) 0x0f 0xe0 (_k3d_MODRM(dst,src) | 0x40) off
  462. #define P_PAVGWM(dst,src,off) 0x0f 0xe3 (_k3d_MODRM(dst,src) | 0x40) off
  463. #define P_PSADBWM(dst,src,off) 0x0f 0xf6 (_k3d_MODRM(dst,src) | 0x40) off
  464. #define P_PMOVMSKBM(dst,src,off) 0x0f 0xd7 (_k3d_MODRM(dst,src) | 0x40) off
  465. #define P_MOVNTQM(dst,src,off) 0x0f 0xe7 (_k3d_MODRM(src,dst) | 0x40) off
  466. #define P_PMASKMOVQM(dst,src,off) 0x0f 0xf7 (_k3d_MODRM(dst,src) | 0x40) off
  467. #define P_PINSRWM(dst,src,off,msk) 0x0f 0xc4 (_k3d_MODRM(dst,src) | 0x40) off msk
  468. #define P_PSHUFWM(dst,src,off,msk) 0x0f 0x70 (_k3d_MODRM(dst,src) | 0x40) off msk
  469. #define P_PF2ID(dst,src) p_pf2id(dst,src)
  470. #define P_PFACC(dst,src) p_pfacc(dst,src)
  471. #define P_PFADD(dst,src) p_pfadd(dst,src)
  472. #define P_PFCMPEQ(dst,src) p_pfcmpeq(dst,src)
  473. #define P_PFCMPGE(dst,src) p_pfcmpge(dst,src)
  474. #define P_PFCMPGT(dst,src) p_pfcmpgt(dst,src)
  475. #define P_PFMAX(dst,src) p_pfmax(dst,src)
  476. #define P_PFMIN(dst,src) p_pfmin(dst,src)
  477. #define P_PFMUL(dst,src) p_pfmul(dst,src)
  478. #define P_PFRCP(dst,src) p_pfrcp(dst,src)
  479. #define P_PFRCPIT1(dst,src) p_pfrcpit1(dst,src)
  480. #define P_PFRCPIT2(dst,src) p_pfrcpit2(dst,src)
  481. #define P_PFRSQRT(dst,src) p_pfrsqrt(dst,src)
  482. #define P_PFRSQIT1(dst,src) p_pfrsqit1(dst,src)
  483. #define P_PFSUB(dst,src) p_pfsub(dst,src)
  484. #define P_PFSUBR(dst,src) p_pfsubr(dst,src)
  485. #define P_PI2FD(dst,src) p_pi2fd(dst,src)
  486. #define P_FEMMS p_femms
  487. #define P_PAVGUSB(dst,src) p_pavgusb(dst,src)
  488. #define P_PMULHRW(dst,src) p_pmulhrw(dst,src)
  489. #define P_PREFETCH(src) p_prefetch(src)
  490. #define P_PREFETCHW(src) p_prefetchw(src)
  491. #define p_CPUID 0x0f 0xa2
  492. #define p_pf2idm(dst,src,off) P_PF2IDM(dst,src,off)
  493. #define p_pfaccm(dst,src,off) P_PFACCM(dst,src,off)
  494. #define p_pfaddm(dst,src,off) P_PFADDM(dst,src,off)
  495. #define p_pfcmpeqm(dst,src,off) P_PFCMPEQM(dst,src,off)
  496. #define p_pfcmpgem(dst,src,off) P_PFCMPGEM(dst,src,off)
  497. #define p_pfcmpgtm(dst,src,off) P_PFCMPGTM(dst,src,off)
  498. #define p_pfmaxm(dst,src,off) P_PFMAXM(dst,src,off)
  499. #define p_pfminm(dst,src,off) P_PFMINM(dst,src,off)
  500. #define p_pfmulm(dst,src,off) P_PFMULM(dst,src,off)
  501. #define p_pfrcpm(dst,src,off) P_PFRCPM(dst,src,off)
  502. #define p_pfrcpit1m(dst,src,off) P_PFRCPIT1M(dst,src,off)
  503. #define p_pfrcpit2m(dst,src,off) P_PFRCPIT2M(dst,src,off)
  504. #define p_pfrsqrtm(dst,src,off) P_PFRSQRTM(dst,src,off)
  505. #define p_pfrsqit1m(dst,src,off) P_PFRSQIT1M(dst,src,off)
  506. #define p_pfsubm(dst,src,off) P_PFSUBM(dst,src,off)
  507. #define p_pfsubrm(dst,src,off) P_PFSUBRM(dst,src,off)
  508. #define p_pi2fdm(dst,src,off) P_PI2FDM(dst,src,off)
  509. #define p_pavgusbm(dst,src,off) P_PAVGUSBM(dst,src,off)
  510. #define p_pmulhrwm(dst,src,off) P_PMULHRWM(dst,src,off)
  511. #define P_PFNACC(dst,src) p_pfnacc(dst,src)
  512. #define P_FPPNACC(dst,src) p_pfpnacc(dst,src)
  513. #define P_PSWAPD(dst,src) p_pswapd(dst,src)
  514. #define P_PMINUB(dst,src) p_pminub(dst,src)
  515. #define P_PMAXUB(dst,src) p_pmaxub(dst,src)
  516. #define P_PMINSW(dst,src) p_pminsw(dst,src)
  517. #define P_PMAXSW(dst,src) p_pmaxsw(dst,src)
  518. #define P_PMULHUW(dst,src) p_pmulhuw(dst,src)
  519. #define P_PAVGB(dst,src) p_pavgb(dst,src)
  520. #define P_PAVGW(dst,src) p_avgw(dst,src)
  521. #define P_PSADBW(dst,src) p_psadbw(dst,src)
  522. #define P_PMOVMSKB(dst,src) p_pmovmskb(dst,src)
  523. #define P_PMASKMOVQ(dst,src) p_pmaskmovq(dst,src)
  524. #define P_PINSRW(dst,src,msk) p_pinsrw(dst,src)
  525. #define P_PEXTRW(dst,src,msk) p_pextrw(dst,src)
  526. #define P_PSHUFW(dst,src,msk) p_pshufw(dst,src)
  527. #define P_MOVNTQ(dst,src) p_movntq(dst,src)
  528. #define P_PFNACCM(dst,src,off) p_pfnaccm(dst,src,off)
  529. #define P_FPPNACCM(dst,src,off) p_pfpnaccm(dst,src,off)
  530. #define P_PSWAPDM(dst,src,off) p_pswapdm(dst,src,off)
  531. #define P_PMINUBM(dst,src,off) p_pminubm(dst,src,off)
  532. #define P_PMAXUBM(dst,src,off) p_pmaxubm(dst,src,off)
  533. #define P_PMINSWM(dst,src,off) p_pminswm(dst,src,off)
  534. #define P_PMAXSWM(dst,src,off) p_pmaxswm(dst,src,off)
  535. #define P_PMULHUWM(dst,src,off) p_pmulhuwm(dst,src,off)
  536. #define P_PAVGBM(dst,src,off) p_pavgbm(dst,src,off)
  537. #define P_PAVGWM(dst,src,off) p_avgwm(dst,src,off)
  538. #define P_PSADBWM(dst,src,off) p_psadbwm(dst,src,off)
  539. #define P_PMOVMSKBM(dst,src,off) p_pmovmskbm(dst,src,off)
  540. #define P_PMASKMOVQM(dst,src,off) p_pmaskmovqm(dst,src,off)
  541. #define P_PINSRWM(dst,src,off,msk) p_pinsrwm(dst,src,off,msk)
  542. #define P_PSHUFWM(dst,src,off,msk) p_pshufwm(dst,src,off,msk)
  543. #define P_MOVNTQM(dst,src,off) p_movntqm(dst,src,off)
  544. #elif defined (_MSC_VER) && !defined (__MWERKS__)
  545. // The Microsoft Visual C++ version of the 3DNow! macros.
  546. // Stop the "no EMMS" warning, since it doesn't detect FEMMS properly
  547. #pragma warning(disable:4799)
  548. // Defines for operands.
  549. #define _K3D_MM0 0xc0
  550. #define _K3D_MM1 0xc1
  551. #define _K3D_MM2 0xc2
  552. #define _K3D_MM3 0xc3
  553. #define _K3D_MM4 0xc4
  554. #define _K3D_MM5 0xc5
  555. #define _K3D_MM6 0xc6
  556. #define _K3D_MM7 0xc7
  557. #define _K3D_mm0 0xc0
  558. #define _K3D_mm1 0xc1
  559. #define _K3D_mm2 0xc2
  560. #define _K3D_mm3 0xc3
  561. #define _K3D_mm4 0xc4
  562. #define _K3D_mm5 0xc5
  563. #define _K3D_mm6 0xc6
  564. #define _K3D_mm7 0xc7
  565. #define _K3D_EAX 0x00
  566. #define _K3D_ECX 0x01
  567. #define _K3D_EDX 0x02
  568. #define _K3D_EBX 0x03
  569. #define _K3D_ESI 0x06
  570. #define _K3D_EDI 0x07
  571. #define _K3D_eax 0x00
  572. #define _K3D_ecx 0x01
  573. #define _K3D_edx 0x02
  574. #define _K3D_ebx 0x03
  575. #define _K3D_esi 0x06
  576. #define _K3D_edi 0x07
  577. // These defines are for compatibility with the previous version of the header file.
  578. #define _K3D_M0 0xc0
  579. #define _K3D_M1 0xc1
  580. #define _K3D_M2 0xc2
  581. #define _K3D_M3 0xc3
  582. #define _K3D_M4 0xc4
  583. #define _K3D_M5 0xc5
  584. #define _K3D_M6 0xc6
  585. #define _K3D_M7 0xc7
  586. #define _K3D_m0 0xc0
  587. #define _K3D_m1 0xc1
  588. #define _K3D_m2 0xc2
  589. #define _K3D_m3 0xc3
  590. #define _K3D_m4 0xc4
  591. #define _K3D_m5 0xc5
  592. #define _K3D_m6 0xc6
  593. #define _K3D_m7 0xc7
  594. #define _K3D__EAX 0x00
  595. #define _K3D__ECX 0x01
  596. #define _K3D__EDX 0x02
  597. #define _K3D__EBX 0x03
  598. #define _K3D__ESI 0x06
  599. #define _K3D__EDI 0x07
  600. #define _K3D__eax 0x00
  601. #define _K3D__ecx 0x01
  602. #define _K3D__edx 0x02
  603. #define _K3D__ebx 0x03
  604. #define _K3D__esi 0x06
  605. #define _K3D__edi 0x07
  606. // General 3DNow! instruction format that is supported by
  607. // these macros. Note that only the most basic form of memory
  608. // operands are supported by these macros.
  609. #define InjK3DOps(dst,src,inst) \
  610. { \
  611. _asm _emit 0x0f \
  612. _asm _emit 0x0f \
  613. _asm _emit ((_K3D_##dst & 0x3f) << 3) | _K3D_##src \
  614. _asm _emit _3DNowOpcode##inst \
  615. }
  616. #define InjK3DMOps(dst,src,off,inst) \
  617. { \
  618. _asm _emit 0x0f \
  619. _asm _emit 0x0f \
  620. _asm _emit (((_K3D_##dst & 0x3f) << 3) | _K3D_##src | 0x40) \
  621. _asm _emit off \
  622. _asm _emit _3DNowOpcode##inst \
  623. }
  624. #define InjMMXOps(dst,src,inst) \
  625. { \
  626. _asm _emit 0x0f \
  627. _asm _emit _3DNowOpcode##inst \
  628. _asm _emit ((_K3D_##dst & 0x3f) << 3) | _K3D_##src \
  629. }
  630. #define InjMMXMOps(dst,src,off,inst) \
  631. { \
  632. _asm _emit 0x0f \
  633. _asm _emit _3DNowOpcode##inst \
  634. _asm _emit (((_K3D_##dst & 0x3f) << 3) | _K3D_##src | 0x40) \
  635. _asm _emit off \
  636. }
  637. #define _3DNowOpcodePF2ID 0x1d
  638. #define _3DNowOpcodePFACC 0xae
  639. #define _3DNowOpcodePFADD 0x9e
  640. #define _3DNowOpcodePFCMPEQ 0xb0
  641. #define _3DNowOpcodePFCMPGE 0x90
  642. #define _3DNowOpcodePFCMPGT 0xa0
  643. #define _3DNowOpcodePFMAX 0xa4
  644. #define _3DNowOpcodePFMIN 0x94
  645. #define _3DNowOpcodePFMUL 0xb4
  646. #define _3DNowOpcodePFRCP 0x96
  647. #define _3DNowOpcodePFRCPIT1 0xa6
  648. #define _3DNowOpcodePFRCPIT2 0xb6
  649. #define _3DNowOpcodePFRSQRT 0x97
  650. #define _3DNowOpcodePFRSQIT1 0xa7
  651. #define _3DNowOpcodePFSUB 0x9a
  652. #define _3DNowOpcodePFSUBR 0xaa
  653. #define _3DNowOpcodePI2FD 0x0d
  654. #define _3DNowOpcodePAVGUSB 0xbf
  655. #define _3DNowOpcodePMULHRW 0xb7
  656. #define _3DNowOpcodePFNACC 0x8a
  657. #define _3DNowOpcodeFPPNACC 0x8e
  658. #define _3DNowOpcodePSWAPD 0xbb
  659. #define _3DNowOpcodePMINUB 0xda
  660. #define _3DNowOpcodePMAXUB 0xde
  661. #define _3DNowOpcodePMINSW 0xea
  662. #define _3DNowOpcodePMAXSW 0xee
  663. #define _3DNowOpcodePMULHUW 0xe4
  664. #define _3DNowOpcodePAVGB 0xe0
  665. #define _3DNowOpcodePAVGW 0xe3
  666. #define _3DNowOpcodePSADBW 0xf6
  667. #define _3DNowOpcodePMOVMSKB 0xd7
  668. #define _3DNowOpcodePMASKMOVQ 0xf7
  669. #define _3DNowOpcodePINSRW 0xc4
  670. #define _3DNowOpcodePEXTRW 0xc5
  671. #define _3DNowOpcodePSHUFW 0x70
  672. #define _3DNowOpcodeMOVNTQ 0xe7
  673. #define _3DNowOpcodePREFETCHT 0x18
  674. #define PF2ID(dst,src) InjK3DOps(dst, src, PF2ID)
  675. #define PFACC(dst,src) InjK3DOps(dst, src, PFACC)
  676. #define PFADD(dst,src) InjK3DOps(dst, src, PFADD)
  677. #define PFCMPEQ(dst,src) InjK3DOps(dst, src, PFCMPEQ)
  678. #define PFCMPGE(dst,src) InjK3DOps(dst, src, PFCMPGE)
  679. #define PFCMPGT(dst,src) InjK3DOps(dst, src, PFCMPGT)
  680. #define PFMAX(dst,src) InjK3DOps(dst, src, PFMAX)
  681. #define PFMIN(dst,src) InjK3DOps(dst, src, PFMIN)
  682. #define PFMUL(dst,src) InjK3DOps(dst, src, PFMUL)
  683. #define PFRCP(dst,src) InjK3DOps(dst, src, PFRCP)
  684. #define PFRCPIT1(dst,src) InjK3DOps(dst, src, PFRCPIT1)
  685. #define PFRCPIT2(dst,src) InjK3DOps(dst, src, PFRCPIT2)
  686. #define PFRSQRT(dst,src) InjK3DOps(dst, src, PFRSQRT)
  687. #define PFRSQIT1(dst,src) InjK3DOps(dst, src, PFRSQIT1)
  688. #define PFSUB(dst,src) InjK3DOps(dst, src, PFSUB)
  689. #define PFSUBR(dst,src) InjK3DOps(dst, src, PFSUBR)
  690. #define PI2FD(dst,src) InjK3DOps(dst, src, PI2FD)
  691. #define PAVGUSB(dst,src) InjK3DOps(dst, src, PAVGUSB)
  692. #define PMULHRW(dst,src) InjK3DOps(dst, src, PMULHRW)
  693. #define FEMMS \
  694. { \
  695. _asm _emit 0x0f \
  696. _asm _emit 0x0e \
  697. }
  698. #define PREFETCH(src) \
  699. { \
  700. _asm _emit 0x0f \
  701. _asm _emit 0x0d \
  702. _asm _emit (_K3D_##src & 0x07) \
  703. }
  704. /* Prefetch with a short offset, < 127 or > -127
  705. Carefull! Doesn't check for your offset being
  706. in range. */
  707. #define PREFETCHM(src,off) \
  708. { \
  709. _asm _emit 0x0f \
  710. _asm _emit 0x0d \
  711. _asm _emit (0x40 | (_K3D_##src & 0x07)) \
  712. _asm _emit off \
  713. }
  714. /* Prefetch with a long offset */
  715. #define PREFETCHMLONG(src,off) \
  716. { \
  717. _asm _emit 0x0f \
  718. _asm _emit 0x0d \
  719. _asm _emit (0x80 | (_K3D_##src & 0x07)) \
  720. _asm _emit (off & 0x000000ff) \
  721. _asm _emit (off & 0x0000ff00) >> 8 \
  722. _asm _emit (off & 0x00ff0000) >> 16 \
  723. _asm _emit (off & 0xff000000) >> 24 \
  724. }
  725. #define PREFETCHW(src) \
  726. { \
  727. _asm _emit 0x0f \
  728. _asm _emit 0x0d \
  729. _asm _emit (0x08 | (_K3D_##src & 0x07)) \
  730. }
  731. #define PREFETCHWM(src,off) \
  732. { \
  733. _asm _emit 0x0f \
  734. _asm _emit 0x0d \
  735. _asm _emit 0x48 | (_K3D_##src & 0x07) \
  736. _asm _emit off \
  737. }
  738. #define PREFETCHWMLONG(src,off) \
  739. { \
  740. _asm _emit 0x0f \
  741. _asm _emit 0x0d \
  742. _asm _emit 0x88 | (_K3D_##src & 0x07) \
  743. _asm _emit (off & 0x000000ff) \
  744. _asm _emit (off & 0x0000ff00) >> 8 \
  745. _asm _emit (off & 0x00ff0000) >> 16 \
  746. _asm _emit (off & 0xff000000) >> 24 \
  747. }
  748. #define CPUID \
  749. { \
  750. _asm _emit 0x0f \
  751. _asm _emit 0xa2 \
  752. }
  753. /* Defines for new, K7 opcodes */
  754. #define SFENCE \
  755. { \
  756. _asm _emit 0x0f \
  757. _asm _emit 0xae \
  758. _asm _emit 0xf8 \
  759. }
  760. #define PFNACC(dst,src) InjK3DOps(dst,src,PFNACC)
  761. #define PFPNACC(dst,src) InjK3DOps(dst,src,PFPNACC)
  762. #define PSWAPD(dst,src) InjK3DOps(dst,src,PSWAPD)
  763. #define PMINUB(dst,src) InjMMXOps(dst,src,PMINUB)
  764. #define PMAXUB(dst,src) InjMMXOps(dst,src,PMAXUB)
  765. #define PMINSW(dst,src) InjMMXOps(dst,src,PMINSW)
  766. #define PMAXSW(dst,src) InjMMXOps(dst,src,PMAXSW)
  767. #define PMULHUW(dst,src) InjMMXOps(dst,src,PMULHUW)
  768. #define PAVGB(dst,src) InjMMXOps(dst,src,PAVGB)
  769. #define PAVGW(dst,src) InjMMXOps(dst,src,PAVGW)
  770. #define PSADBW(dst,src) InjMMXOps(dst,src,PSADBW)
  771. #define PMOVMSKB(dst,src) InjMMXOps(dst,src,PMOVMSKB)
  772. #define PMASKMOVQ(dst,src) InjMMXOps(dst,src,PMASKMOVQ)
  773. #define PINSRW(dst,src,msk) InjMMXOps(dst,src,PINSRW) _asm _emit msk
  774. #define PEXTRW(dst,src,msk) InjMMXOps(dst,src,PEXTRW) _asm _emit msk
  775. #define PSHUFW(dst,src,msk) InjMMXOps(dst,src,PSHUFW) _asm _emit msk
  776. #define MOVNTQ(dst,src) InjMMXOps(src,dst,MOVNTQ)
  777. #define PREFETCHNTA(mem) InjMMXOps(mm0,mem,PREFETCHT)
  778. #define PREFETCHT0(mem) InjMMXOps(mm1,mem,PREFETCHT)
  779. #define PREFETCHT1(mem) InjMMXOps(mm2,mem,PREFETCHT)
  780. #define PREFETCHT2(mem) InjMMXOps(mm3,mem,PREFETCHT)
  781. /* Memory/offset versions of the opcodes */
  782. #define PAVGUSBM(dst,src,off) InjK3DMOps(dst,src,off,PAVGUSB)
  783. #define PF2IDM(dst,src,off) InjK3DMOps(dst,src,off,PF2ID)
  784. #define PFACCM(dst,src,off) InjK3DMOps(dst,src,off,PFACC)
  785. #define PFADDM(dst,src,off) InjK3DMOps(dst,src,off,PFADD)
  786. #define PFCMPEQM(dst,src,off) InjK3DMOps(dst,src,off,PFCMPEQ)
  787. #define PFCMPGEM(dst,src,off) InjK3DMOps(dst,src,off,PFCMPGE)
  788. #define PFCMPGTM(dst,src,off) InjK3DMOps(dst,src,off,PFCMPGT)
  789. #define PFMAXM(dst,src,off) InjK3DMOps(dst,src,off,PFMAX)
  790. #define PFMINM(dst,src,off) InjK3DMOps(dst,src,off,PFMIN)
  791. #define PFMULM(dst,src,off) InjK3DMOps(dst,src,off,PFMUL)
  792. #define PFRCPM(dst,src,off) InjK3DMOps(dst,src,off,PFRCP)
  793. #define PFRCPIT1M(dst,src,off) InjK3DMOps(dst,src,off,PFRCPIT1)
  794. #define PFRCPIT2M(dst,src,off) InjK3DMOps(dst,src,off,PFRCPIT2)
  795. #define PFRSQRTM(dst,src,off) InjK3DMOps(dst,src,off,PFRSQRT)
  796. #define PFRSQIT1M(dst,src,off) InjK3DMOps(dst,src,off,PFRSQIT1)
  797. #define PFSUBM(dst,src,off) InjK3DMOps(dst,src,off,PFSUB)
  798. #define PFSUBRM(dst,src,off) InjK3DMOps(dst,src,off,PFSUBR)
  799. #define PI2FDM(dst,src,off) InjK3DMOps(dst,src,off,PI2FD)
  800. #define PMULHRWM(dst,src,off) InjK3DMOps(dst,src,off,PMULHRW)
  801. /* Memory/offset versions of the K7 opcodes */
  802. #define PFNACCM(dst,src,off) InjK3DMOps(dst,src,off,PFNACC)
  803. #define PFPNACCM(dst,src,off) InjK3DMOps(dst,src,off,PFPNACC)
  804. #define PSWAPDM(dst,src,off) InjK3DMOps(dst,src,off,PSWAPD)
  805. #define PMINUBM(dst,src,off) InjMMXMOps(dst,src,off,PMINUB)
  806. #define PMAXUBM(dst,src,off) InjMMXMOps(dst,src,off,PMAXUB)
  807. #define PMINSWM(dst,src,off) InjMMXMOps(dst,src,off,PMINSW)
  808. #define PMAXSWM(dst,src,off) InjMMXMOps(dst,src,off,PMAXSW)
  809. #define PMULHUWM(dst,src,off) InjMMXMOps(dst,src,off,PMULHUW)
  810. #define PAVGBM(dst,src,off) InjMMXMOps(dst,src,off,PAVGB)
  811. #define PAVGWM(dst,src,off) InjMMXMOps(dst,src,off,PAVGW)
  812. #define PSADBWM(dst,src,off) InjMMXMOps(dst,src,off,PSADBW)
  813. #define PMOVMSKBM(dst,src,off) InjMMXMOps(dst,src,off,PMOVMSKB)
  814. #define PMASKMOVQM(dst,src,off) InjMMXMOps(dst,src,off,PMASKMOVQ)
  815. #define PINSRWM(dst,src,off,msk) InjMMXMOps(dst,src,off,PINSRW) _asm _emit msk
  816. #define PSHUFWM(dst,src,off,msk) InjMMXMOps(dst,src,off,PSHUFW) _asm _emit msk
  817. #define MOVNTQM(dst,src,off) InjMMXMOps(src,dst,off,MOVNTQ)
  818. #define PREFETCHNTAM(mem,off) InjMMXMOps(mm0,mem,off,PREFETCHT)
  819. #define PREFETCHT0M(mem,off) InjMMXMOps(mm1,mem,off,PREFETCHT)
  820. #define PREFETCHT1M(mem,off) InjMMXMOps(mm2,mem,off,PREFETCHT)
  821. #define PREFETCHT2M(mem,off) InjMMXMOps(mm3,mem,off,PREFETCHT)
  822. #else
  823. /* Assume built-in support for 3DNow! opcodes, replace macros with opcodes */
  824. #define PAVGUSB(dst,src) pavgusb dst,src
  825. #define PF2ID(dst,src) pf2id dst,src
  826. #define PFACC(dst,src) pfacc dst,src
  827. #define PFADD(dst,src) pfadd dst,src
  828. #define PFCMPEQ(dst,src) pfcmpeq dst,src
  829. #define PFCMPGE(dst,src) pfcmpge dst,src
  830. #define PFCMPGT(dst,src) pfcmpgt dst,src
  831. #define PFMAX(dst,src) pfmax dst,src
  832. #define PFMIN(dst,src) pfmin dst,src
  833. #define PFMUL(dst,src) pfmul dst,src
  834. #define PFRCP(dst,src) pfrcp dst,src
  835. #define PFRCPIT1(dst,src) pfrcpit1 dst,src
  836. #define PFRCPIT2(dst,src) pfrcpit2 dst,src
  837. #define PFRSQRT(dst,src) pfrsqrt dst,src
  838. #define PFRSQIT1(dst,src) pfrsqit1 dst,src
  839. #define PFSUB(dst,src) pfsub dst,src
  840. #define PFSUBR(dst,src) pfsubr dst,src
  841. #define PI2FD(dst,src) pi2fd dst,src
  842. #define PMULHRW(dst,src) pmulhrw dst,src
  843. #define PREFETCH(src) prefetch src
  844. #define PREFETCHW(src) prefetchw src
  845. #define PAVGUSBM(dst,src,off) pavgusb dst,[src+off]
  846. #define PF2IDM(dst,src,off) PF2ID dst,[src+off]
  847. #define PFACCM(dst,src,off) PFACC dst,[src+off]
  848. #define PFADDM(dst,src,off) PFADD dst,[src+off]
  849. #define PFCMPEQM(dst,src,off) PFCMPEQ dst,[src+off]
  850. #define PFCMPGEM(dst,src,off) PFCMPGE dst,[src+off]
  851. #define PFCMPGTM(dst,src,off) PFCMPGT dst,[src+off]
  852. #define PFMAXM(dst,src,off) PFMAX dst,[src+off]
  853. #define PFMINM(dst,src,off) PFMIN dst,[src+off]
  854. #define PFMULM(dst,src,off) PFMUL dst,[src+off]
  855. #define PFRCPM(dst,src,off) PFRCP dst,[src+off]
  856. #define PFRCPIT1M(dst,src,off) PFRCPIT1 dst,[src+off]
  857. #define PFRCPIT2M(dst,src,off) PFRCPIT2 dst,[src+off]
  858. #define PFRSQRTM(dst,src,off) PFRSQRT dst,[src+off]
  859. #define PFRSQIT1M(dst,src,off) PFRSQIT1 dst,[src+off]
  860. #define PFSUBM(dst,src,off) PFSUB dst,[src+off]
  861. #define PFSUBRM(dst,src,off) PFSUBR dst,[src+off]
  862. #define PI2FDM(dst,src,off) PI2FD dst,[src+off]
  863. #define PMULHRWM(dst,src,off) PMULHRW dst,[src+off]
  864. #if defined (__MWERKS__)
  865. // At the moment, CodeWarrior does not support these opcodes, so hand-assemble them
  866. // Defines for operands.
  867. #define _K3D_MM0 0xc0
  868. #define _K3D_MM1 0xc1
  869. #define _K3D_MM2 0xc2
  870. #define _K3D_MM3 0xc3
  871. #define _K3D_MM4 0xc4
  872. #define _K3D_MM5 0xc5
  873. #define _K3D_MM6 0xc6
  874. #define _K3D_MM7 0xc7
  875. #define _K3D_mm0 0xc0
  876. #define _K3D_mm1 0xc1
  877. #define _K3D_mm2 0xc2
  878. #define _K3D_mm3 0xc3
  879. #define _K3D_mm4 0xc4
  880. #define _K3D_mm5 0xc5
  881. #define _K3D_mm6 0xc6
  882. #define _K3D_mm7 0xc7
  883. #define _K3D_EAX 0x00
  884. #define _K3D_ECX 0x01
  885. #define _K3D_EDX 0x02
  886. #define _K3D_EBX 0x03
  887. #define _K3D_ESI 0x06
  888. #define _K3D_EDI 0x07
  889. #define _K3D_eax 0x00
  890. #define _K3D_ecx 0x01
  891. #define _K3D_edx 0x02
  892. #define _K3D_ebx 0x03
  893. #define _K3D_esi 0x06
  894. #define _K3D_edi 0x07
  895. #define _K3D_EAX 0x00
  896. #define _K3D_ECX 0x01
  897. #define _K3D_EDX 0x02
  898. #define _K3D_EBX 0x03
  899. #define _K3D_ESI 0x06
  900. #define _K3D_EDI 0x07
  901. #define _K3D_eax 0x00
  902. #define _K3D_ecx 0x01
  903. #define _K3D_edx 0x02
  904. #define _K3D_ebx 0x03
  905. #define _K3D_esi 0x06
  906. #define _K3D_edi 0x07
  907. #define InjK3DOps(dst,src,inst) \
  908. db 0x0f, 0x0f, (((_K3D_##dst & 0x3f) << 3) | _K3D_##src), _3DNowOpcode##inst
  909. #define InjK3DMOps(dst,src,off,inst) \
  910. db 0x0f, 0x0f, (((_K3D_##dst & 0x3f) << 3) | _K3D_##src | 0x40), off, _3DNowOpcode##inst
  911. #define InjMMXOps(dst,src,inst) \
  912. db 0x0f, _3DNowOpcode##inst, (((_K3D_##dst & 0x3f) << 3) | _K3D_##src)
  913. #define InjMMXMOps(dst,src,off,inst) \
  914. db 0x0f, _3DNowOpcode##inst, (((_K3D_##dst & 0x3f) << 3) | _K3D_##src | 0x40), off
  915. #define PFNACC(dst,src) InjK3DOps(dst,src,PFNACC)
  916. #define PFPNACC(dst,src) InjK3DOps(dst,src,PFPNACC)
  917. #define PSWAPD(dst,src) InjK3DOps(dst,src,PSWAPD)
  918. #define PMINUB(dst,src) InjMMXOps(dst,src,PMINUB)
  919. #define PMAXUB(dst,src) InjMMXOps(dst,src,PMAXUB)
  920. #define PMINSW(dst,src) InjMMXOps(dst,src,PMINSW)
  921. #define PMAXSW(dst,src) InjMMXOps(dst,src,PMAXSW)
  922. #define PMULHUW(dst,src) InjMMXOps(dst,src,PMULHUW)
  923. #define PAVGB(dst,src) InjMMXOps(dst,src,PAVGB)
  924. #define PAVGW(dst,src) InjMMXOps(dst,src,PAVGW)
  925. #define PSADBW(dst,src) InjMMXOps(dst,src,PSADBW)
  926. #define PMOVMSKB(dst,src) InjMMXOps(dst,src,PMOVMSKB)
  927. #define PMASKMOVQ(dst,src) InjMMXOps(dst,src,PMASKMOVQ)
  928. #define PINSRW(dst,src,msk) InjMMXOps(dst,src,PINSRW) db msk
  929. #define PEXTRW(dst,src,msk) InjMMXOps(dst,src,PEXTRW) db msk
  930. #define PSHUFW(dst,src,msk) InjMMXOps(dst,src,PSHUFW) db msk
  931. #define MOVNTQ(dst,src) InjMMXOps(src,dst,MOVNTQ)
  932. #define PREFETCHNTA(mem) InjMMXOps(mm0,mem,PREFETCHT)
  933. #define PREFETCHT0(mem) InjMMXOps(mm1,mem,PREFETCHT)
  934. #define PREFETCHT1(mem) InjMMXOps(mm2,mem,PREFETCHT)
  935. #define PREFETCHT2(mem) InjMMXOps(mm3,mem,PREFETCHT)
  936. /* Memory/offset versions of the K7 opcodes */
  937. #define PFNACCM(dst,src,off) InjK3DMOps(dst,src,off,PFNACC)
  938. #define PFPNACCM(dst,src,off) InjK3DMOps(dst,src,off,PFPNACC)
  939. #define PSWAPDM(dst,src,off) InjK3DMOps(dst,src,off,PSWAPD)
  940. #define PMINUBM(dst,src,off) InjMMXMOps(dst,src,off,PMINUB)
  941. #define PMAXUBM(dst,src,off) InjMMXMOps(dst,src,off,PMAXUB)
  942. #define PMINSWM(dst,src,off) InjMMXMOps(dst,src,off,PMINSW)
  943. #define PMAXSWM(dst,src,off) InjMMXMOps(dst,src,off,PMAXSW)
  944. #define PMULHUWM(dst,src,off) InjMMXMOps(dst,src,off,PMULHUW)
  945. #define PAVGBM(dst,src,off) InjMMXMOps(dst,src,off,PAVGB)
  946. #define PAVGWM(dst,src,off) InjMMXMOps(dst,src,off,PAVGW)
  947. #define PSADBWM(dst,src,off) InjMMXMOps(dst,src,off,PSADBW)
  948. #define PMOVMSKBM(dst,src,off) InjMMXMOps(dst,src,off,PMOVMSKB)
  949. #define PMASKMOVQM(dst,src,off) InjMMXMOps(dst,src,off,PMASKMOVQ)
  950. #define PINSRWM(dst,src,off,msk) InjMMXMOps(dst,src,off,PINSRW), msk
  951. #define PEXTRWM(dst,src,off,msk) InjMMXMOps(dst,src,off,PEXTRW), msk
  952. #define PSHUFWM(dst,src,off,msk) InjMMXMOps(dst,src,off,PSHUFW), msk
  953. #define MOVNTQM(dst,src,off) InjMMXMOps(src,dst,off,MOVNTQ)
  954. #define PREFETCHNTAM(mem,off) InjMMXMOps(mm0,mem,off,PREFETCHT)
  955. #define PREFETCHT0M(mem,off) InjMMXMOps(mm1,mem,off,PREFETCHT)
  956. #define PREFETCHT1M(mem,off) InjMMXMOps(mm2,mem,off,PREFETCHT)
  957. #define PREFETCHT2M(mem,off) InjMMXMOps(mm3,mem,off,PREFETCHT)
  958. #else
  959. #define PFNACC(dst,src) PFNACC dst,src
  960. #define PFPNACC(dst,src) PFPNACC dst,src
  961. #define PSWAPD(dst,src) PSWAPD dst,src
  962. #define PMINUB(dst,src) PMINUB dst,src
  963. #define PMAXUB(dst,src) PMAXUB dst,src
  964. #define PMINSW(dst,src) PMINSW dst,src
  965. #define PMAXSW(dst,src) PMAXSW dst,src
  966. #define PMULHUW(dst,src) PMULHUW dst,src
  967. #define PAVGB(dst,src) PAVGB dst,src
  968. #define PAVGW(dst,src) PAVGW dst,src
  969. #define PSADBW(dst,src) PSADBW dst,src
  970. #define PMOVMSKB(dst,src) PMOVMSKB dst,src
  971. #define PMASKMOVQ(dst,src) PMASKMOVQ dst,src
  972. #define PINSRW(dst,src,msk) PINSRW dst,src,msk
  973. #define PEXTRW(dst,src,msk) PEXTRW dst,src,msk
  974. #define PSHUFW(dst,src,msk) PSHUFW dst,src,msk
  975. #define MOVNTQ(dst,src) MOVNTQ dst,src
  976. #define PFNACCM(dst,src,off) PFNACC dst,[src+off]
  977. #define PFPNACCM(dst,src,off) PFPNACC dst,[src+off]
  978. #define PSWAPDM(dst,src,off) PSWAPD dst,[src+off]
  979. #define PMINUBM(dst,src,off) PMINUB dst,[src+off]
  980. #define PMAXUBM(dst,src,off) PMAXUB dst,[src+off]
  981. #define PMINSWM(dst,src,off) PMINSW dst,[src+off]
  982. #define PMAXSWM(dst,src,off) PMAXSW dst,[src+off]
  983. #define PMULHUWM(dst,src,off) PMULHUW dst,[src+off]
  984. #define PAVGBM(dst,src,off) PAVGB dst,[src+off]
  985. #define PAVGWM(dst,src,off) PAVGW dst,[src+off]
  986. #define PSADBWM(dst,src,off) PSADBW dst,[src+off]
  987. #define PMOVMSKBM(dst,src,off) PMOVMSKB dst,[src+off]
  988. #define PMASKMOVQM(dst,src,off) PMASKMOVQ dst,[src+off]
  989. #define PINSRWM(dst,src,off,msk) PINSRW dst,[src+off],msk
  990. #define PEXTRWM(dst,src,off,msk) PEXTRW dst,[src+off],msk
  991. #define PSHUFWM(dst,src,off,msk) PSHUFW dst,[src+off],msk
  992. #define MOVNTQM(dst,src,off) MOVNTQ dst,[src+off]
  993. #endif
  994. #endif
  995. /* Just to deal with lower case. */
  996. #define pf2id(dst,src) PF2ID(dst,src)
  997. #define pfacc(dst,src) PFACC(dst,src)
  998. #define pfadd(dst,src) PFADD(dst,src)
  999. #define pfcmpeq(dst,src) PFCMPEQ(dst,src)
  1000. #define pfcmpge(dst,src) PFCMPGE(dst,src)
  1001. #define pfcmpgt(dst,src) PFCMPGT(dst,src)
  1002. #define pfmax(dst,src) PFMAX(dst,src)
  1003. #define pfmin(dst,src) PFMIN(dst,src)
  1004. #define pfmul(dst,src) PFMUL(dst,src)
  1005. #define pfrcp(dst,src) PFRCP(dst,src)
  1006. #define pfrcpit1(dst,src) PFRCPIT1(dst,src)
  1007. #define pfrcpit2(dst,src) PFRCPIT2(dst,src)
  1008. #define pfrsqrt(dst,src) PFRSQRT(dst,src)
  1009. #define pfrsqit1(dst,src) PFRSQIT1(dst,src)
  1010. #define pfsub(dst,src) PFSUB(dst,src)
  1011. #define pfsubr(dst,src) PFSUBR(dst,src)
  1012. #define pi2fd(dst,src) PI2FD(dst,src)
  1013. #define femms FEMMS
  1014. #define pavgusb(dst,src) PAVGUSB(dst,src)
  1015. #define pmulhrw(dst,src) PMULHRW(dst,src)
  1016. #define prefetch(src) PREFETCH(src)
  1017. #define prefetchw(src) PREFETCHW(src)
  1018. #define prefetchm(src,off) PREFETCHM(src,off)
  1019. #define prefetchmlong(src,off) PREFETCHMLONG(src,off)
  1020. #define prefetchwm(src,off) PREFETCHWM(src,off)
  1021. #define prefetchwmlong(src,off) PREFETCHWMLONG(src,off)
  1022. #define pfnacc(dst,src) PFNACC(dst,src)
  1023. #define pfpnacc(dst,src) PFPNACC(dst,src)
  1024. #define pswapd(dst,src) PSWAPD(dst,src)
  1025. #define pminub(dst,src) PMINUB(dst,src)
  1026. #define pmaxub(dst,src) PMAXUB(dst,src)
  1027. #define pminsw(dst,src) PMINSW(dst,src)
  1028. #define pmaxsw(dst,src) PMAXSW(dst,src)
  1029. #define pmulhuw(dst,src) PMULHUW(dst,src)
  1030. #define pavgb(dst,src) PAVGB(dst,src)
  1031. #define pavgw(dst,src) PAVGW(dst,src)
  1032. #define psadbw(dst,src) PSADBW(dst,src)
  1033. #define pmovmskb(dst,src) PMOVMSKB(dst,src)
  1034. #define pmaskmovq(dst,src) PMASKMOVQ(dst,src)
  1035. #define pinsrw(dst,src,msk) PINSRW(dst,src,msk)
  1036. #define pextrw(dst,src,msk) PEXTRW(dst,src,msk)
  1037. #define pshufw(dst,src,msk) PSHUFW(dst,src,msk)
  1038. #define movntq(dst,src) MOVNTQ(dst,src)
  1039. #define prefetchnta(mem) PREFETCHNTA(mem)
  1040. #define prefetcht0(mem) PREFETCHT0(mem)
  1041. #define prefetcht1(mem) PREFETCHT1(mem)
  1042. #define prefetcht2(mem) PREFETCHT2(mem)
  1043. #define pavgusbm(dst,src,off) PAVGUSBM(dst,src,off)
  1044. #define pf2idm(dst,src,off) PF2IDM(dst,src,off)
  1045. #define pfaccm(dst,src,off) PFACCM(dst,src,off)
  1046. #define pfaddm(dst,src,off) PFADDM(dst,src,off)
  1047. #define pfcmpeqm(dst,src,off) PFCMPEQM(dst,src,off)
  1048. #define pfcmpgem(dst,src,off) PFCMPGEM(dst,src,off)
  1049. #define pfcmpgtm(dst,src,off) PFCMPGTM(dst,src,off)
  1050. #define pfmaxm(dst,src,off) PFMAXM(dst,src,off)
  1051. #define pfminm(dst,src,off) PFMINM(dst,src,off)
  1052. #define pfmulm(dst,src,off) PFMULM(dst,src,off)
  1053. #define pfrcpm(dst,src,off) PFRCPM(dst,src,off)
  1054. #define pfrcpit1m(dst,src,off) PFRCPIT1M(dst,src,off)
  1055. #define pfrcpit2m(dst,src,off) PFRCPIT2M(dst,src,off)
  1056. #define pfrsqrtm(dst,src,off) PFRSQRTM(dst,src,off)
  1057. #define pfrsqit1m(dst,src,off) PFRSQIT1M(dst,src,off)
  1058. #define pfsubm(dst,src,off) PFSUBM(dst,src,off)
  1059. #define pfsubrm(dst,src,off) PFSUBRM(dst,src,off)
  1060. #define pi2fdm(dst,src,off) PI2FDM(dst,src,off)
  1061. #define pmulhrwm(dst,src,off) PMULHRWM(dst,src,off)
  1062. #define cpuid CPUID
  1063. #define sfence SFENCE
  1064. #define pfnaccm(dst,src,off) PFNACCM(dst,src,off)
  1065. #define pfpnaccm(dst,src,off) PFPNACCM(dst,src,off)
  1066. #define pswapdm(dst,src,off) PSWAPDM(dst,src,off)
  1067. #define pminubm(dst,src,off) PMINUBM(dst,src,off)
  1068. #define pmaxubm(dst,src,off) PMAXUBM(dst,src,off)
  1069. #define pminswm(dst,src,off) PMINSWM(dst,src,off)
  1070. #define pmaxswm(dst,src,off) PMAXSWM(dst,src,off)
  1071. #define pmulhuwm(dst,src,off) PMULHUWM(dst,src,off)
  1072. #define pavgbm(dst,src,off) PAVGBM(dst,src,off)
  1073. #define pavgwm(dst,src,off) PAVGWM(dst,src,off)
  1074. #define psadbwm(dst,src,off) PSADBWM(dst,src,off)
  1075. #define pmovmskbm(dst,src,off) PMOVMSKBM(dst,src,off)
  1076. #define pmaskmovqm(dst,src,off) PMASKMOVQM(dst,src,off)
  1077. #define pinsrwm(dst,src,off,msk) PINSRWM(dst,src,off,msk)
  1078. #define pextrwm(dst,src,off,msk) PEXTRWM(dst,src,off,msk)
  1079. #define pshufwm(dst,src,off,msk) PSHUFWM(dst,src,off,msk)
  1080. #define movntqm(dst,src,off) MOVNTQM(dst,src,off)
  1081. #define prefetchntam(mem,off) PREFETCHNTA(mem,off)
  1082. #define prefetcht0m(mem,off) PREFETCHT0(mem,off)
  1083. #define prefetcht1m(mem,off) PREFETCHT1(mem,off)
  1084. #define prefetcht2m(mem,off) PREFETCHT2(mem,off)
  1085. #endif