Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1795 lines
49 KiB

  1. // TITLE("Capture and Restore Context")
  2. //++
  3. //
  4. // Module Name:
  5. //
  6. // capture.s
  7. //
  8. // Abstract:
  9. //
  10. // This module implements the code necessary to capture and restore
  11. // the context of the caller.
  12. //
  13. // Author:
  14. //
  15. // William K. Cheung (wcheung) 08-Jan-1996
  16. //
  17. // Environment:
  18. //
  19. // Any mode.
  20. //
  21. // Revision History:
  22. //
  23. //--
  24. #include "ksia64.h"
  25. .global ZwContinue
  26. .type ZwContinue, @function
  27. //++
  28. //
  29. // VOID
  30. // RtlCaptureContext (
  31. // OUT PCONTEXT ContextRecord
  32. // )
  33. //
  34. // Routine Description:
  35. //
  36. // This function captures the context of the caller in the specified
  37. // context record.
  38. //
  39. // N.B. The context record is not guaranteed to be quadword aligned
  40. // and, therefore, no double floating store instructions can be
  41. // used.
  42. //
  43. // Arguments:
  44. //
  45. // ContextRecord (a0) - Supplies the address of a context record.
  46. //
  47. // Return Value:
  48. //
  49. // None.
  50. //
  51. //--
  52. LEAF_ENTRY(RtlCaptureContext)
  53. //
  54. // Save all integer registers and flush the RSE
  55. //
  56. .prologue
  57. .regstk 1, 10, 0, 0
  58. rbsp = loc9
  59. rpfs = loc8
  60. rbrp = loc7
  61. rpr = loc6
  62. runat = loc4
  63. flag = t16
  64. rpsr = t22
  65. alloc rpfs = ar.pfs, 1, 10, 0, 0
  66. add loc0 = CxIntGp, a0
  67. add loc1 = CxIntT8, a0
  68. ;;
  69. flushrs
  70. .save ar.unat, loc4
  71. mov runat = ar.unat
  72. mov rpr = pr
  73. PROLOGUE_END
  74. .mem.offset 0,0
  75. st8.spill.nta [loc0] = gp, CxIntT0 - CxIntGp
  76. .mem.offset 8,0
  77. st8.spill.nta [loc1] = t8, CxIntT9 - CxIntT8
  78. add loc2 = CxIntGp, a0
  79. ;;
  80. .mem.offset 0,0
  81. st8.spill.nta [loc0] = t0, CxIntT1 - CxIntT0
  82. .mem.offset 8,0
  83. st8.spill.nta [loc1] = t9, CxIntT10 - CxIntT9
  84. shr loc2 = loc2, 3
  85. ;;
  86. .mem.offset 0,0
  87. st8.spill.nta [loc0] = t1, CxIntS0 - CxIntT1
  88. .mem.offset 8,0
  89. st8.spill.nta [loc1] = t10, CxIntT11 - CxIntT10
  90. and t0 = 0x3f, loc2
  91. ;;
  92. .mem.offset 0,0
  93. st8.spill.nta [loc0] = s0, CxIntS1 - CxIntS0
  94. .mem.offset 8,0
  95. st8.spill.nta [loc1] = t11, CxIntT12 - CxIntT11
  96. cmp4.ge pt1, pt0 = 1, t0
  97. ;;
  98. .mem.offset 0,0
  99. st8.spill.nta [loc0] = s1, CxIntS2 - CxIntS1
  100. .mem.offset 8,0
  101. st8.spill.nta [loc1] = t12, CxIntT13 - CxIntT12
  102. (pt1) sub t1 = 1, t0
  103. ;;
  104. .mem.offset 0,0
  105. st8.spill.nta [loc0] = s2, CxIntS3 - CxIntS2
  106. .mem.offset 8,0
  107. st8.spill.nta [loc1] = t13, CxIntT14 - CxIntT13
  108. (pt0) add t1 = -1, t0
  109. ;;
  110. .mem.offset 0,0
  111. st8.spill.nta [loc0] = s3, CxIntV0 - CxIntS3
  112. .mem.offset 8,0
  113. st8.spill.nta [loc1] = t14, CxIntT15 - CxIntT14
  114. (pt0) sub t8 = 65, t0
  115. ;;
  116. .mem.offset 0,0
  117. st8.spill.nta [loc0] = v0, CxIntTeb - CxIntV0
  118. .mem.offset 8,0
  119. st8.spill.nta [loc1] = t15, CxIntT16 - CxIntT15
  120. nop.i 0
  121. ;;
  122. .mem.offset 0,0
  123. st8.spill.nta [loc0] = teb, CxIntT2 - CxIntTeb
  124. .mem.offset 8,0
  125. st8.spill.nta [loc1] = t16, CxIntT17 - CxIntT16
  126. mov rbrp = brp
  127. ;;
  128. .mem.offset 0,0
  129. st8.spill.nta [loc0] = t2, CxIntT3 - CxIntT2
  130. .mem.offset 8,0
  131. st8.spill.nta [loc1] = t17, CxIntT18 - CxIntT17
  132. mov t11 = bs0
  133. ;;
  134. .mem.offset 0,0
  135. st8.spill.nta [loc0] = t3, CxIntSp - CxIntT3
  136. .mem.offset 8,0
  137. st8.spill.nta [loc1] = t18, CxIntT19 - CxIntT18
  138. mov t12 = bs1
  139. ;;
  140. .mem.offset 0,0
  141. st8.spill.nta [loc0] = sp, CxIntT4 - CxIntSp
  142. .mem.offset 8,0
  143. st8.spill.nta [loc1] = t19, CxIntT20 - CxIntT19
  144. mov t13 = bs2
  145. ;;
  146. .mem.offset 0,0
  147. st8.spill.nta [loc0] = t4, CxIntT5 - CxIntT4
  148. .mem.offset 8,0
  149. st8.spill.nta [loc1] = t20, CxIntT21 - CxIntT20
  150. mov t14 = bs3
  151. ;;
  152. .mem.offset 0,0
  153. st8.spill.nta [loc0] = t5, CxIntT6 - CxIntT5
  154. .mem.offset 8,0
  155. st8.spill.nta [loc1] = t21, CxIntT22 - CxIntT21
  156. mov t15 = bs4
  157. ;;
  158. .mem.offset 0,0
  159. st8.spill.nta [loc0] = t6, CxIntT7 - CxIntT6
  160. .mem.offset 8,0
  161. st8.spill.nta [loc1] = t22, CxPreds - CxIntT22
  162. mov t16 = bt0
  163. ;;
  164. st8.spill.nta [loc0] = t7
  165. st8.nta [loc1] = rpr, CxIntNats - CxPreds // save predicates
  166. mov t17 = bt1
  167. ;;
  168. mov t9 = ar.unat
  169. mov t4 = ar.fpsr
  170. add loc2 = CxBrRp, a0
  171. ;;
  172. add loc3 = CxBrS3, a0
  173. (pt1) shl t9 = t9, t1
  174. (pt0) shr.u t2 = t9, t1
  175. ;;
  176. //
  177. // Save branch registers.
  178. //
  179. st8.nta [loc2] = rbrp, CxBrS0 - CxBrRp // save brp
  180. st8.nta [loc3] = t14, CxBrS4 - CxBrS3 // save bs3
  181. (pt0) shl t3 = t9, t8
  182. ;;
  183. st8.nta [loc2] = t11, CxBrS1 - CxBrS0 // save bs0
  184. st8.nta [loc3] = t15, CxBrT0 - CxBrS4 // save bs4
  185. (pt0) or t9 = t2, t3
  186. ;;
  187. st8.nta [loc2] = t12, CxBrS2 - CxBrS1 // save bs1
  188. st8.nta [loc3] = t16, CxBrT1 - CxBrT0 // save bt0
  189. add loc0 = CxStFPSR, a0
  190. ;;
  191. st8.nta [loc2] = t13 // save bs2
  192. st8.nta [loc3] = t17 // save bt1
  193. nop.i 0
  194. ;;
  195. st8.nta [loc0] = t4 // save fpsr
  196. st8.nta [loc1] = t9 // save nat bits
  197. ;;
  198. #if !defined(NTOS_KERNEL_RUNTIME)
  199. mov t0 = ar21
  200. mov t1 = ar24
  201. add loc0 = CxStFCR, a0
  202. add loc1 = CxEflag, a0
  203. ;;
  204. mov t2 = ar25
  205. mov t3 = ar26
  206. st8.nta [loc0] = t0, 16
  207. st8.nta [loc1] = t1, 16
  208. ;;
  209. mov t0 = ar27
  210. mov t1 = ar28
  211. st8.nta [loc0] = t2, 16
  212. st8.nta [loc1] = t3, 16
  213. ;;
  214. mov t2 = ar29
  215. mov t3 = ar30
  216. st8.nta [loc0] = t0, 16
  217. st8.nta [loc1] = t1, 16
  218. ;;
  219. st8.nta [loc0] = t2, 16
  220. st8.nta [loc1] = t3, 16
  221. #endif // !defined(NTOS_KERNEL_RUNTIME)
  222. mov rbsp = ar.bsp
  223. add loc2 = CxFltS0, a0
  224. add loc3 = CxFltS1, a0
  225. ;;
  226. //
  227. // Save floating status and floating registers f0 - f127.
  228. //
  229. stf.spill.nta [loc2] = fs0, CxFltS2 - CxFltS0
  230. stf.spill.nta [loc3] = fs1, CxFltS3 - CxFltS1
  231. shr t0 = rpfs, 7
  232. ;;
  233. stf.spill.nta [loc2] = fs2, CxFltT0 - CxFltS2
  234. stf.spill.nta [loc3] = fs3, CxFltT1 - CxFltS3
  235. and t0 = 0x7f, t0
  236. ;;
  237. stf.spill.nta [loc2] = ft0, CxFltT2 - CxFltT0
  238. stf.spill.nta [loc3] = ft1, CxFltT3 - CxFltT1
  239. shr t1 = rbsp, 3
  240. ;;
  241. stf.spill.nta [loc2] = ft2, CxFltT4 - CxFltT2
  242. stf.spill.nta [loc3] = ft3, CxFltT5 - CxFltT3
  243. and t1 = 0x3f, t1
  244. ;;
  245. stf.spill.nta [loc2] = ft4, CxFltT6 - CxFltT4
  246. stf.spill.nta [loc3] = ft5, CxFltT7 - CxFltT5
  247. sub t2 = t0, t1
  248. ;;
  249. stf.spill.nta [loc2] = ft6, CxFltT8 - CxFltT6
  250. stf.spill.nta [loc3] = ft7, CxFltT9 - CxFltT7
  251. cmp4.le pt1, pt0 = t2, zero
  252. ;;
  253. stf.spill.nta [loc2] = ft8, CxFltS4 - CxFltT8
  254. stf.spill.nta [loc3] = ft9, CxFltS5 - CxFltT9
  255. (pt0) add t2 = -1, t2
  256. ;;
  257. stf.spill.nta [loc2] = fs4, CxFltS6 - CxFltS4
  258. stf.spill.nta [loc3] = fs5, CxFltS7 - CxFltS5
  259. (pt0) add t0 = 1, t0
  260. ;;
  261. stf.spill.nta [loc2] = fs6, CxFltS8 - CxFltS6
  262. stf.spill.nta [loc3] = fs7, CxFltS9 - CxFltS7
  263. (pt0) add t2 = -63, t2
  264. ;;
  265. stf.spill.nta [loc2] = fs8, CxFltS10 - CxFltS8
  266. stf.spill.nta [loc3] = fs9, CxFltS11 - CxFltS9
  267. (pt0) cmp4.ge.unc pt2, pt3 = t2, zero
  268. ;;
  269. stf.spill.nta [loc2] = fs10, CxFltS12 - CxFltS10
  270. stf.spill.nta [loc3] = fs11, CxFltS13 - CxFltS11
  271. mov rpsr = psr.um
  272. (pt1) br.cond.spnt Rcc20p
  273. ;;
  274. Rcc10p:
  275. (pt2) add t0 = 1, t0
  276. (pt2) add t2 = -63, t2
  277. (pt3) br.cond.sptk Rcc20p
  278. ;;
  279. cmp4.ge pt2, pt3 = t2, zero
  280. nop.m 0
  281. br Rcc10p
  282. ;;
  283. Rcc20p:
  284. stf.spill.nta [loc2] = fs12, CxFltS14 - CxFltS12
  285. stf.spill.nta [loc3] = fs13, CxFltS15 - CxFltS13
  286. shl t0 = t0, 3
  287. ;;
  288. stf.spill.nta [loc2] = fs14, CxFltS16 - CxFltS14
  289. stf.spill.nta [loc3] = fs15, CxFltS17 - CxFltS15
  290. sub rbsp = rbsp, t0
  291. ;;
  292. stf.spill.nta [loc2] = fs16, CxFltS18 - CxFltS16
  293. stf.spill.nta [loc3] = fs17, CxFltS19 - CxFltS17
  294. tbit.z pt2, pt1 = rpsr, PSR_MFH
  295. ;;
  296. stf.spill.nta [loc2] = fs18, CxFltF32 - CxFltS18
  297. stf.spill.nta [loc3] = fs19, CxFltF33 - CxFltS19
  298. mov flag = CONTEXT_CONTROL | CONTEXT_LOWER_FLOATING_POINT | CONTEXT_INTEGER
  299. ;;
  300. #if !defined(NTOS_KERNEL_RUNTIME)
  301. //
  302. // there is no need to capture the high fp set if the privilege
  303. // mode is kernel or the psr.mfh bit is not set in user mode.
  304. //
  305. (pt1) mov flag = CONTEXT_FULL
  306. (pt2) br.cond.sptk Rcc30p
  307. ;;
  308. stf.spill.nta [loc2] = f32, CxFltF34 - CxFltF32
  309. stf.spill.nta [loc3] = f33, CxFltF35 - CxFltF33
  310. nop.i 0
  311. ;;
  312. stf.spill.nta [loc2] = f34, CxFltF36 - CxFltF34
  313. stf.spill.nta [loc3] = f35, CxFltF37 - CxFltF35
  314. nop.i 0
  315. ;;
  316. stf.spill.nta [loc2] = f36, CxFltF38 - CxFltF36
  317. stf.spill.nta [loc3] = f37, CxFltF39 - CxFltF37
  318. nop.i 0
  319. ;;
  320. stf.spill.nta [loc2] = f38, CxFltF40 - CxFltF38
  321. stf.spill.nta [loc3] = f39, CxFltF41 - CxFltF39
  322. nop.i 0
  323. ;;
  324. stf.spill.nta [loc2] = f40, CxFltF42 - CxFltF40
  325. stf.spill.nta [loc3] = f41, CxFltF43 - CxFltF41
  326. nop.i 0
  327. ;;
  328. stf.spill.nta [loc2] = f42, CxFltF44 - CxFltF42
  329. stf.spill.nta [loc3] = f43, CxFltF45 - CxFltF43
  330. nop.i 0
  331. ;;
  332. stf.spill.nta [loc2] = f44, CxFltF46 - CxFltF44
  333. stf.spill.nta [loc3] = f45, CxFltF47 - CxFltF45
  334. nop.i 0
  335. ;;
  336. stf.spill.nta [loc2] = f46, CxFltF48 - CxFltF46
  337. stf.spill.nta [loc3] = f47, CxFltF49 - CxFltF47
  338. nop.i 0
  339. ;;
  340. stf.spill.nta [loc2] = f48, CxFltF50 - CxFltF48
  341. stf.spill.nta [loc3] = f49, CxFltF51 - CxFltF49
  342. nop.i 0
  343. ;;
  344. stf.spill.nta [loc2] = f50, CxFltF52 - CxFltF50
  345. stf.spill.nta [loc3] = f51, CxFltF53 - CxFltF51
  346. nop.i 0
  347. ;;
  348. stf.spill.nta [loc2] = f52, CxFltF54 - CxFltF52
  349. stf.spill.nta [loc3] = f53, CxFltF55 - CxFltF53
  350. nop.i 0
  351. ;;
  352. stf.spill.nta [loc2] = f54, CxFltF56 - CxFltF54
  353. stf.spill.nta [loc3] = f55, CxFltF57 - CxFltF55
  354. nop.i 0
  355. ;;
  356. stf.spill.nta [loc2] = f56, CxFltF58 - CxFltF56
  357. stf.spill.nta [loc3] = f57, CxFltF59 - CxFltF57
  358. nop.i 0
  359. ;;
  360. stf.spill.nta [loc2] = f58, CxFltF60 - CxFltF58
  361. stf.spill.nta [loc3] = f59, CxFltF61 - CxFltF59
  362. nop.i 0
  363. ;;
  364. stf.spill.nta [loc2] = f60, CxFltF62 - CxFltF60
  365. stf.spill.nta [loc3] = f61, CxFltF63 - CxFltF61
  366. nop.i 0
  367. ;;
  368. stf.spill.nta [loc2] = f62, CxFltF64 - CxFltF62
  369. stf.spill.nta [loc3] = f63, CxFltF65 - CxFltF63
  370. nop.i 0
  371. ;;
  372. stf.spill.nta [loc2] = f64, CxFltF66 - CxFltF64
  373. stf.spill.nta [loc3] = f65, CxFltF67 - CxFltF65
  374. nop.i 0
  375. ;;
  376. stf.spill.nta [loc2] = f66, CxFltF68 - CxFltF66
  377. stf.spill.nta [loc3] = f67, CxFltF69 - CxFltF67
  378. nop.i 0
  379. ;;
  380. stf.spill.nta [loc2] = f68, CxFltF70 - CxFltF68
  381. stf.spill.nta [loc3] = f69, CxFltF71 - CxFltF69
  382. nop.i 0
  383. ;;
  384. stf.spill.nta [loc2] = f70, CxFltF72 - CxFltF70
  385. stf.spill.nta [loc3] = f71, CxFltF73 - CxFltF71
  386. nop.i 0
  387. ;;
  388. stf.spill.nta [loc2] = f72, CxFltF74 - CxFltF72
  389. stf.spill.nta [loc3] = f73, CxFltF75 - CxFltF73
  390. nop.i 0
  391. ;;
  392. stf.spill.nta [loc2] = f74, CxFltF76 - CxFltF74
  393. stf.spill.nta [loc3] = f75, CxFltF77 - CxFltF75
  394. nop.i 0
  395. ;;
  396. stf.spill.nta [loc2] = f76, CxFltF78 - CxFltF76
  397. stf.spill.nta [loc3] = f77, CxFltF79 - CxFltF77
  398. nop.i 0
  399. ;;
  400. stf.spill.nta [loc2] = f78, CxFltF80 - CxFltF78
  401. stf.spill.nta [loc3] = f79, CxFltF81 - CxFltF79
  402. nop.i 0
  403. ;;
  404. stf.spill.nta [loc2] = f80, CxFltF82 - CxFltF80
  405. stf.spill.nta [loc3] = f81, CxFltF83 - CxFltF81
  406. nop.i 0
  407. ;;
  408. stf.spill.nta [loc2] = f82, CxFltF84 - CxFltF82
  409. stf.spill.nta [loc3] = f83, CxFltF85 - CxFltF83
  410. nop.i 0
  411. ;;
  412. stf.spill.nta [loc2] = f84, CxFltF86 - CxFltF84
  413. stf.spill.nta [loc3] = f85, CxFltF87 - CxFltF85
  414. nop.i 0
  415. ;;
  416. stf.spill.nta [loc2] = f86, CxFltF88 - CxFltF86
  417. stf.spill.nta [loc3] = f87, CxFltF89 - CxFltF87
  418. nop.i 0
  419. ;;
  420. stf.spill.nta [loc2] = f88, CxFltF90 - CxFltF88
  421. stf.spill.nta [loc3] = f89, CxFltF91 - CxFltF89
  422. nop.i 0
  423. ;;
  424. stf.spill.nta [loc2] = f90, CxFltF92 - CxFltF90
  425. stf.spill.nta [loc3] = f91, CxFltF93 - CxFltF91
  426. nop.i 0
  427. ;;
  428. stf.spill.nta [loc2] = f92, CxFltF94 - CxFltF92
  429. stf.spill.nta [loc3] = f93, CxFltF95 - CxFltF93
  430. nop.i 0
  431. ;;
  432. stf.spill.nta [loc2] = f94, CxFltF96 - CxFltF94
  433. stf.spill.nta [loc3] = f95, CxFltF97 - CxFltF95
  434. nop.i 0
  435. ;;
  436. stf.spill.nta [loc2] = f96, CxFltF98 - CxFltF96
  437. stf.spill.nta [loc3] = f97, CxFltF99 - CxFltF97
  438. nop.i 0
  439. ;;
  440. stf.spill.nta [loc2] = f98, CxFltF100 - CxFltF98
  441. stf.spill.nta [loc3] = f99, CxFltF101 - CxFltF99
  442. nop.i 0
  443. ;;
  444. stf.spill.nta [loc2] = f100, CxFltF102 - CxFltF100
  445. stf.spill.nta [loc3] = f101, CxFltF103 - CxFltF101
  446. nop.i 0
  447. ;;
  448. stf.spill.nta [loc2] = f102, CxFltF104 - CxFltF102
  449. stf.spill.nta [loc3] = f103, CxFltF105 - CxFltF103
  450. nop.i 0
  451. ;;
  452. stf.spill.nta [loc2] = f104, CxFltF106 - CxFltF104
  453. stf.spill.nta [loc3] = f105, CxFltF107 - CxFltF105
  454. nop.i 0
  455. ;;
  456. stf.spill.nta [loc2] = f106, CxFltF108 - CxFltF106
  457. stf.spill.nta [loc3] = f107, CxFltF109 - CxFltF107
  458. nop.i 0
  459. ;;
  460. stf.spill.nta [loc2] = f108, CxFltF110 - CxFltF108
  461. stf.spill.nta [loc3] = f109, CxFltF111 - CxFltF109
  462. nop.i 0
  463. ;;
  464. stf.spill.nta [loc2] = f110, CxFltF112 - CxFltF110
  465. stf.spill.nta [loc3] = f111, CxFltF113 - CxFltF111
  466. nop.i 0
  467. ;;
  468. stf.spill.nta [loc2] = f112, CxFltF114 - CxFltF112
  469. stf.spill.nta [loc3] = f113, CxFltF115 - CxFltF113
  470. nop.i 0
  471. ;;
  472. stf.spill.nta [loc2] = f114, CxFltF116 - CxFltF114
  473. stf.spill.nta [loc3] = f115, CxFltF117 - CxFltF115
  474. nop.i 0
  475. ;;
  476. stf.spill.nta [loc2] = f116, CxFltF118 - CxFltF116
  477. stf.spill.nta [loc3] = f117, CxFltF119 - CxFltF117
  478. nop.i 0
  479. ;;
  480. stf.spill.nta [loc2] = f118, CxFltF120 - CxFltF118
  481. stf.spill.nta [loc3] = f119, CxFltF121 - CxFltF119
  482. nop.i 0
  483. ;;
  484. stf.spill.nta [loc2] = f120, CxFltF122 - CxFltF120
  485. stf.spill.nta [loc3] = f121, CxFltF123 - CxFltF121
  486. nop.i 0
  487. ;;
  488. stf.spill.nta [loc2] = f122, CxFltF124 - CxFltF122
  489. stf.spill.nta [loc3] = f123, CxFltF125 - CxFltF123
  490. nop.i 0
  491. ;;
  492. stf.spill.nta [loc2] = f124, CxFltF126 - CxFltF124
  493. stf.spill.nta [loc3] = f125, CxFltF127 - CxFltF125
  494. nop.i 0
  495. ;;
  496. stf.spill.nta [loc2] = f126
  497. stf.spill.nta [loc3] = f127
  498. nop.i 0
  499. ;;
  500. Rcc30p:
  501. #endif // !defined(NTOS_KERNEL_RUNTIME)
  502. //
  503. // Save application registers, control information and set context flags.
  504. //
  505. User=pt0
  506. Krnl=pt1
  507. rdcr=t1
  508. sol=t4
  509. rpsr=t5
  510. is=t6
  511. rccv=t7
  512. rlc=t8
  513. rec=t9
  514. rrsc=t10
  515. rrnat=t11
  516. addr0=t17
  517. addr1=t18
  518. tmp=t19
  519. mov rrsc = ar.rsc
  520. tbit.nz Krnl, User = sp, 62 // bit 62 is 1 when
  521. mov rlc = ar.lc
  522. ;;
  523. mov ar.rsc = r0 // put RSE in lazy mode
  524. mov rccv = ar.ccv
  525. mov rec = ar.ec
  526. ;; // in kernel
  527. (Krnl) mov rpsr = psr
  528. (User) mov rpsr = psr.um
  529. add addr0 = CxApUNAT, a0
  530. mov rrnat = ar.rnat
  531. add addr1 = CxApLC, a0
  532. (Krnl) mov rdcr = cr.dcr
  533. (Krnl) movl tmp = 1 << PSR_BN
  534. ;;
  535. st8.nta [addr0] = runat, CxApEC - CxApUNAT
  536. st8.nta [addr1] = rlc, CxApCCV - CxApLC
  537. (Krnl) or rpsr = tmp, rpsr
  538. ;;
  539. st8.nta [addr0] = rec, CxApDCR - CxApEC
  540. st8.nta [addr1] = rccv, CxRsPFS - CxApCCV
  541. mov tmp = 1
  542. ;;
  543. st8.nta [addr0] = rdcr, CxRsBSP - CxApDCR
  544. st8.nta [addr1] = rpfs, CxRsBSPSTORE - CxRsPFS
  545. shl tmp = tmp, 63
  546. ;;
  547. st8.nta [addr0] = rbsp, CxRsRSC - CxRsBSP
  548. st8.nta [addr1] = rbsp, CxRsRNAT - CxRsBSPSTORE
  549. or rpfs = rpfs, tmp // validate IFS
  550. ;;
  551. st8.nta [addr0] = rrsc, CxStIIP - CxRsRSC
  552. st8.nta [addr1] = rrnat, CxStIFS - CxRsRNAT
  553. mov rccv = ar.csd
  554. mov rbsp = ar.bsp
  555. ;;
  556. st8.nta [addr0] = rbrp, CxStIPSR - CxStIIP
  557. st8.nta [addr1] = rpfs, CxSegCSD - CxStIFS
  558. add tmp = CxContextFlags, a0
  559. ;;
  560. st8.nta [addr0] = rpsr // save psr
  561. st8.nta [addr1] = rccv // save CxSegCSD
  562. dep rbsp = 1, rbsp, 3, 6 // Calculate the address of the NAT collection
  563. ;;
  564. mov ar.rsc = rrsc // restore RSC
  565. st4.nta [tmp] = flag
  566. st8.nta [rbsp] = rrnat // Save the current RNAT in NAT collection for the BSP.
  567. // This handles the case where the collection location is in
  568. // the middle of the out registers, and never gets written, but
  569. // is read when we do a continue with this context.
  570. mov ar.unat = runat // restore ar.unat
  571. (p0) br.ret.sptk brp // return to caller.
  572. LEAF_EXIT(RtlCaptureContext)
  573. //++
  574. //
  575. // VOID
  576. // RtlCaptureNonVolatileContext (
  577. // OUT PCONTEXT ContextRecord
  578. // )
  579. //
  580. // Routine Description:
  581. //
  582. // This function captures the non-volatile context of the caller in the specified
  583. // context record.
  584. //
  585. // N.B. The context record is not guaranteed to be quadword aligned
  586. // and, therefore, no double floating store instructions can be
  587. // used.
  588. //
  589. // Arguments:
  590. //
  591. // ContextRecord (a0) - Supplies the address of a context record.
  592. //
  593. // Return Value:
  594. //
  595. // None.
  596. //
  597. //--
  598. LEAF_ENTRY(RtlCaptureNonVolatileContext)
  599. //
  600. // Save all non-volatile integer registers and flush the RSE
  601. //
  602. .prologue
  603. .regstk 1, 10, 0, 0
  604. rbsp = loc9
  605. rpfs = loc8
  606. rbrp = loc7
  607. rpr = loc6
  608. runat = loc4
  609. flag = t16
  610. rpsr = t22
  611. alloc rpfs = ar.pfs, 1, 10, 0, 0
  612. add loc0 = CxIntGp, a0
  613. add loc1 = CxIntS0, a0
  614. ;;
  615. flushrs
  616. .save ar.unat, loc4
  617. mov runat = ar.unat
  618. mov rpr = pr
  619. PROLOGUE_END
  620. .mem.offset 0,0
  621. st8.spill.nta [loc0] = gp, CxIntS1 - CxIntGp
  622. .mem.offset 8,0
  623. st8.spill.nta [loc1] = s0, CxIntS2 - CxIntS0
  624. add loc2 = CxIntGp, a0
  625. ;;
  626. .mem.offset 0,0
  627. st8.spill.nta [loc0] = s1, CxIntV0 - CxIntS1
  628. .mem.offset 8,0
  629. st8.spill.nta [loc1] = s2, CxIntS3 - CxIntS2
  630. shr loc2 = loc2, 3
  631. ;;
  632. .mem.offset 0,0
  633. st8.spill.nta [loc0] = v0, CxIntTeb - CxIntV0
  634. .mem.offset 8,0
  635. st8.spill.nta [loc1] = s3, CxPreds - CxIntS3
  636. and t0 = 0x3f, loc2
  637. ;;
  638. .mem.offset 0,0
  639. st8.spill.nta [loc0] = teb, CxIntSp - CxIntTeb
  640. .mem.offset 8,0
  641. st8.nta [loc1] = rpr, CxIntNats - CxPreds // save predicates
  642. cmp4.ge pt1, pt0 = 1, t0
  643. ;;
  644. (pt1) sub t1 = 1, t0
  645. (pt0) add t1 = -1, t0
  646. (pt0) sub t8 = 65, t0
  647. ;;
  648. mov rbrp = brp
  649. mov t11 = bs0
  650. mov t12 = bs1
  651. ;;
  652. mov t13 = bs2
  653. mov t14 = bs3
  654. mov t15 = bs4
  655. ;;
  656. st8.spill.nta [loc0] = sp
  657. ;;
  658. mov t9 = ar.unat
  659. mov t4 = ar.fpsr
  660. ;;
  661. add loc2 = CxBrRp, a0
  662. add loc3 = CxBrS3, a0
  663. add loc0 = CxStFPSR, a0
  664. ;;
  665. (pt1) shl t9 = t9, t1
  666. (pt0) shr.u t2 = t9, t1
  667. nop.i 0
  668. ;;
  669. //
  670. // Save branch registers.
  671. //
  672. st8.nta [loc2] = rbrp, CxBrS0 - CxBrRp // save brp
  673. st8.nta [loc3] = t14, CxBrS4 - CxBrS3 // save bs3
  674. (pt0) shl t3 = t9, t8
  675. ;;
  676. st8.nta [loc2] = t11, CxBrS1 - CxBrS0 // save bs0
  677. st8.nta [loc3] = t15, CxBrT0 - CxBrS4 // save bs4
  678. (pt0) or t9 = t2, t3
  679. ;;
  680. st8.nta [loc2] = t12, CxBrS2 - CxBrS1 // save bs1
  681. st8.nta [loc0] = t4 // save fpsr
  682. nop.i 0
  683. ;;
  684. st8.nta [loc2] = t13 // save bs2
  685. st8.nta [loc1] = t9 // save nat bits
  686. nop.i 0
  687. ;;
  688. #if !defined(NTOS_KERNEL_RUNTIME)
  689. mov t0 = ar21
  690. mov t1 = ar24
  691. add loc0 = CxStFCR, a0
  692. add loc1 = CxEflag, a0
  693. ;;
  694. mov t2 = ar25
  695. mov t3 = ar26
  696. st8.nta [loc0] = t0, 16
  697. st8.nta [loc1] = t1, 16
  698. ;;
  699. mov t0 = ar27
  700. mov t1 = ar28
  701. st8.nta [loc0] = t2, 16
  702. st8.nta [loc1] = t3, 16
  703. ;;
  704. mov t2 = ar29
  705. mov t3 = ar30
  706. st8.nta [loc0] = t0, 16
  707. st8.nta [loc1] = t1, 16
  708. ;;
  709. st8.nta [loc0] = t2, 16
  710. st8.nta [loc1] = t3, 16
  711. #endif // !defined(NTOS_KERNEL_RUNTIME)
  712. mov rbsp = ar.bsp
  713. add loc2 = CxFltS0, a0
  714. add loc3 = CxFltS1, a0
  715. ;;
  716. //
  717. // Save floating status and floating registers f0 - f19.
  718. //
  719. stf.spill.nta [loc2] = fs0, CxFltS2 - CxFltS0
  720. stf.spill.nta [loc3] = fs1, CxFltS3 - CxFltS1
  721. shr t0 = rpfs, 7
  722. ;;
  723. stf.spill.nta [loc2] = fs2, CxFltS4 - CxFltS2
  724. stf.spill.nta [loc3] = fs3, CxFltS5 - CxFltS3
  725. and t0 = 0x7f, t0
  726. ;;
  727. shr t1 = rbsp, 3
  728. ;;
  729. and t1 = 0x3f, t1
  730. ;;
  731. sub t2 = t0, t1
  732. ;;
  733. cmp4.le pt1, pt0 = t2, zero
  734. ;;
  735. (pt0) add t2 = -1, t2
  736. ;;
  737. stf.spill.nta [loc2] = fs4, CxFltS6 - CxFltS4
  738. stf.spill.nta [loc3] = fs5, CxFltS7 - CxFltS5
  739. (pt0) add t0 = 1, t0
  740. ;;
  741. stf.spill.nta [loc2] = fs6, CxFltS8 - CxFltS6
  742. stf.spill.nta [loc3] = fs7, CxFltS9 - CxFltS7
  743. (pt0) add t2 = -63, t2
  744. ;;
  745. stf.spill.nta [loc2] = fs8, CxFltS10 - CxFltS8
  746. stf.spill.nta [loc3] = fs9, CxFltS11 - CxFltS9
  747. (pt0) cmp4.ge.unc pt2, pt3 = t2, zero
  748. ;;
  749. stf.spill.nta [loc2] = fs10, CxFltS12 - CxFltS10
  750. stf.spill.nta [loc3] = fs11, CxFltS13 - CxFltS11
  751. mov rpsr = psr.um
  752. (pt1) br.cond.spnt Rcc20
  753. ;;
  754. Rcc10:
  755. (pt2) add t0 = 1, t0
  756. (pt2) add t2 = -63, t2
  757. (pt3) br.cond.sptk Rcc20
  758. ;;
  759. cmp4.ge pt2, pt3 = t2, zero
  760. nop.m 0
  761. br Rcc10
  762. ;;
  763. Rcc20:
  764. stf.spill.nta [loc2] = fs12, CxFltS14 - CxFltS12
  765. stf.spill.nta [loc3] = fs13, CxFltS15 - CxFltS13
  766. shl t0 = t0, 3
  767. ;;
  768. stf.spill.nta [loc2] = fs14, CxFltS16 - CxFltS14
  769. stf.spill.nta [loc3] = fs15, CxFltS17 - CxFltS15
  770. sub rbsp = rbsp, t0
  771. ;;
  772. stf.spill.nta [loc2] = fs16, CxFltS18 - CxFltS16
  773. stf.spill.nta [loc3] = fs17, CxFltS19 - CxFltS17
  774. tbit.z pt2, pt1 = rpsr, PSR_MFH
  775. ;;
  776. stf.spill.nta [loc2] = fs18, CxFltF32 - CxFltS18
  777. stf.spill.nta [loc3] = fs19, CxFltF33 - CxFltS19
  778. mov flag = CONTEXT_CONTROL | CONTEXT_LOWER_FLOATING_POINT | CONTEXT_INTEGER
  779. ;;
  780. //
  781. // Save application registers, control information and set context flags.
  782. //
  783. User=pt0
  784. Krnl=pt1
  785. rdcr=t1
  786. sol=t4
  787. rpsr=t5
  788. is=t6
  789. rccv=t7
  790. rlc=t8
  791. rec=t9
  792. rrsc=t10
  793. rrnat=t11
  794. addr0=t17
  795. addr1=t18
  796. tmp=t19
  797. mov rrsc = ar.rsc
  798. tbit.nz Krnl, User = sp, 62 // bit 62 is 1 when
  799. mov rlc = ar.lc
  800. ;;
  801. mov ar.rsc = r0 // put RSE in lazy mode
  802. mov rccv = ar.ccv
  803. mov rec = ar.ec
  804. ;; // in kernel
  805. (Krnl) mov rpsr = psr
  806. (User) mov rpsr = psr.um
  807. add addr0 = CxApUNAT, a0
  808. mov rrnat = ar.rnat
  809. add addr1 = CxApLC, a0
  810. (Krnl) mov rdcr = cr.dcr
  811. (Krnl) movl tmp = 1 << PSR_BN
  812. ;;
  813. st8.nta [addr0] = runat, CxApEC - CxApUNAT
  814. st8.nta [addr1] = rlc, CxApCCV - CxApLC
  815. (Krnl) or rpsr = tmp, rpsr
  816. ;;
  817. st8.nta [addr0] = rec, CxApDCR - CxApEC
  818. st8.nta [addr1] = rccv, CxRsPFS - CxApCCV
  819. mov tmp = 1
  820. ;;
  821. st8.nta [addr0] = rdcr, CxRsBSP - CxApDCR
  822. st8.nta [addr1] = rpfs, CxRsBSPSTORE - CxRsPFS
  823. shl tmp = tmp, 63
  824. ;;
  825. st8.nta [addr0] = rbsp, CxRsRSC - CxRsBSP
  826. st8.nta [addr1] = rbsp, CxRsRNAT - CxRsBSPSTORE
  827. or rpfs = rpfs, tmp // validate IFS
  828. ;;
  829. st8.nta [addr0] = rrsc, CxStIIP - CxRsRSC
  830. st8.nta [addr1] = rrnat, CxStIFS - CxRsRNAT
  831. mov rccv = ar.csd
  832. ;;
  833. st8.nta [addr0] = rbrp, CxStIPSR - CxStIIP
  834. st8.nta [addr1] = rpfs, CxSegCSD - CxStIFS
  835. add tmp = CxContextFlags, a0
  836. mov rbsp = ar.bsp
  837. ;;
  838. st8.nta [addr0] = rpsr // save psr
  839. st8.nta [addr1] = rccv // Save CxSegCSD
  840. dep rbsp = 1, rbsp, 3, 6 // Calculate the address of the NAT collection
  841. ;;
  842. mov ar.rsc = rrsc // restore RSC
  843. st4.nta [tmp] = flag
  844. st8.nta [rbsp] = rrnat // Save the current RNAT in NAT collection for the BSP.
  845. // This handles the case where the collection location is in
  846. // the middle of the out registers, and never gets written, but
  847. // is read when we do a continue with this context.
  848. mov ar.unat = runat // restore ar.unat
  849. (p0) br.ret.sptk brp // return to caller.
  850. LEAF_EXIT(RtlCaptureNonVolatileContext)
  851. //++
  852. //
  853. // VOID
  854. // RtlRestoreContext (
  855. // IN PCONTEXT ContextRecord,
  856. // IN PEXCEPTION_RECORD ExceptionRecord OPTIONAL
  857. // )
  858. //
  859. // Routine Description:
  860. //
  861. // This function restores the context of the caller to the specified
  862. // context.
  863. //
  864. // N.B. The context record is assumed to be 16-byte aligned.
  865. //
  866. // N.B. This is a special routine that is used by RtlUnwind2 to restore
  867. // context in the current mode.
  868. //
  869. // N.B. RFI is used to resume execution in kernel mode.
  870. //
  871. // Arguments:
  872. //
  873. // ContextRecord (a0) - Supplies the address of a context record.
  874. //
  875. // ExceptionRecord (a1) - Supplies an optional pointer to an exception
  876. // record.
  877. //
  878. // Return Value:
  879. //
  880. // None.
  881. //
  882. // N.B. There is no return from this routine.
  883. //
  884. //--
  885. NESTED_ENTRY(RtlRestoreContext)
  886. dest1=t8
  887. dest2=t9
  888. rlc=t10
  889. rpreds=t11
  890. rbrp=t12
  891. rbsp=t13
  892. rpfs=t14
  893. runat=t15
  894. rpreds=t16
  895. rsp=t17
  896. rfpsr=t18
  897. jb=t19
  898. tmp=t20
  899. src1=t21
  900. src2=t22
  901. NESTED_SETUP(2, 13, 2, 0)
  902. cmp.eq pt1, p0 = zero, a1
  903. PROLOGUE_END
  904. //
  905. // If an exception record is specified and the exception status is the unwind
  906. // consolidation code and there is at least one parameter, then consolidate
  907. // all the frames that have been unwound and call back to a language specified
  908. // routine.
  909. //
  910. add t1 = ErNumberParameters, a1
  911. (pt1) br.cond.sptk.few Rrc10
  912. ;;
  913. ld4 t0 = [t1], ErExceptionCode - ErNumberParameters
  914. movl t3 = STATUS_UNWIND_CONSOLIDATE
  915. ;;
  916. cmp4.ne pt1, p0 = 0, t0
  917. ld4 t2 = [t1], ErExceptionInformation - ErExceptionCode
  918. ;;
  919. cmp4.eq.and pt1, p0 = t3, t2 // if ne, not a long jump
  920. movl t4 = STATUS_LONGJUMP
  921. ;;
  922. ld8 jb = [t1] // get address of jump buffer
  923. add loc2 = -STACK_SCRATCH_AREA, r32 // Create a vframe for context record.
  924. (pt1) br.cond.dptk.many RtlRcConsolidateFrames
  925. //
  926. // If exception status is STATUS_LONGJUMP, then restore the
  927. // nonvolatile registers to their state at the call to setjmp
  928. // before restoring the context record.
  929. //
  930. cmp4.ne pt1, p0 = t4, t2 // if ne, not a long jump
  931. (pt1) br.cond.sptk.few Rrc10
  932. ;;
  933. //
  934. // restore unat, non-volatile general and branch registers from
  935. // jump buffer and then save them in the context buffer.
  936. //
  937. add src1 = JbIntS0, jb
  938. add src2 = JbIntS1, jb
  939. nop.i 0
  940. ;;
  941. ld8.nt1 s0 = [src1], JbIntS2 - JbIntS0
  942. ld8.nt1 s1 = [src2], JbIntS3 - JbIntS1
  943. nop.i 0
  944. ;;
  945. ld8.nt1 s2 = [src1], JbIntSp - JbIntS2
  946. ld8.nt1 s3 = [src2], JbIntNats - JbIntS3
  947. nop.i 0
  948. ;;
  949. ld8.nt1 rsp = [src1], JbPreds - JbIntSp
  950. ld8.nt1 t2 = [src2]
  951. add t1 = 0x10f0, r0
  952. ;;
  953. ld8.nt1 rpreds = [src1]
  954. add loc11 = CxIntNats, a0
  955. and t2 = t2, t1
  956. ;;
  957. ld8 runat = [loc11]
  958. add dest1 = CxIntS0, a0
  959. add dest2 = CxIntS1, a0
  960. ;;
  961. st8 [dest1] = s0, CxIntS2 - CxIntS0
  962. st8 [dest2] = s1, CxIntS3 - CxIntS1
  963. nop.b 0
  964. ;;
  965. st8 [dest1] = s2, CxIntSp - CxIntS2
  966. st8 [dest2] = s3, CxPreds - CxIntS3
  967. andcm runat = runat, t1
  968. ;;
  969. st8 [dest1] = rsp
  970. st8 [dest2] = rpreds
  971. or runat = runat, t2
  972. ;;
  973. st8 [loc11] = runat
  974. add src1 = JbFltS0, jb
  975. add src2 = JbFltS1, jb
  976. ;;
  977. ldf.fill.nt1 fs0 = [src1], JbFltS2 - JbFltS0
  978. ldf.fill.nt1 fs1 = [src2], JbFltS3 - JbFltS1
  979. nop.i 0
  980. ;;
  981. ldf.fill.nt1 fs2 = [src1], JbFltS4 - JbFltS2
  982. ldf.fill.nt1 fs3 = [src2], JbFltS5 - JbFltS3
  983. nop.i 0
  984. ;;
  985. ldf.fill.nt1 fs4 = [src1], JbFltS6 - JbFltS4
  986. ldf.fill.nt1 fs5 = [src2], JbFltS7 - JbFltS5
  987. nop.i 0
  988. ;;
  989. ldf.fill.nt1 fs6 = [src1], JbFltS8 - JbFltS6
  990. ldf.fill.nt1 fs7 = [src2], JbFltS9 - JbFltS7
  991. nop.i 0
  992. ;;
  993. ldf.fill.nt1 fs8 = [src1], JbFltS10 - JbFltS8
  994. ldf.fill.nt1 fs9 = [src2], JbFltS11 - JbFltS9
  995. nop.i 0
  996. ;;
  997. ldf.fill.nt1 fs10 = [src1], JbFltS12 - JbFltS10
  998. ldf.fill.nt1 fs11 = [src2], JbFltS13 - JbFltS11
  999. nop.i 0
  1000. ;;
  1001. ldf.fill.nt1 fs12 = [src1], JbFltS14 - JbFltS12
  1002. ldf.fill.nt1 fs13 = [src2], JbFltS15 - JbFltS13
  1003. nop.i 0
  1004. ;;
  1005. ldf.fill.nt1 fs14 = [src1], JbFltS16 - JbFltS14
  1006. ldf.fill.nt1 fs15 = [src2], JbFltS17 - JbFltS15
  1007. nop.i 0
  1008. ;;
  1009. ldf.fill.nt1 fs16 = [src1], JbFltS18 - JbFltS16
  1010. ldf.fill.nt1 fs17 = [src2], JbFltS19 - JbFltS17
  1011. nop.i 0
  1012. ;;
  1013. ldf.fill.nt1 fs18 = [src1], JbFPSR - JbFltS18
  1014. ldf.fill.nt1 fs19 = [src2]
  1015. nop.i 0
  1016. ;;
  1017. ld8.nt1 rfpsr = [src1]
  1018. add dest1 = CxFltS0, a0
  1019. add dest2 = CxFltS1, a0
  1020. ;;
  1021. stf.spill [dest1] = fs0, CxFltS2 - CxFltS0
  1022. stf.spill [dest2] = fs1, CxFltS3 - CxFltS1
  1023. nop.i 0
  1024. ;;
  1025. stf.spill [dest1] = fs2, CxFltS4 - CxFltS2
  1026. stf.spill [dest2] = fs3, CxFltS5 - CxFltS3
  1027. nop.i 0
  1028. ;;
  1029. stf.spill [dest1] = fs4, CxFltS6 - CxFltS4
  1030. stf.spill [dest2] = fs5, CxFltS7 - CxFltS5
  1031. nop.i 0
  1032. ;;
  1033. stf.spill [dest1] = fs6, CxFltS8 - CxFltS6
  1034. stf.spill [dest2] = fs7, CxFltS9 - CxFltS7
  1035. nop.i 0
  1036. ;;
  1037. stf.spill [dest1] = fs8, CxFltS10 - CxFltS8
  1038. stf.spill [dest2] = fs9, CxFltS11 - CxFltS9
  1039. nop.i 0
  1040. ;;
  1041. stf.spill [dest1] = fs10, CxFltS12 - CxFltS10
  1042. stf.spill [dest2] = fs11, CxFltS13 - CxFltS11
  1043. nop.i 0
  1044. ;;
  1045. stf.spill [dest1] = fs12, CxFltS14 - CxFltS12
  1046. stf.spill [dest2] = fs13, CxFltS15 - CxFltS13
  1047. nop.i 0
  1048. ;;
  1049. stf.spill [dest1] = fs14, CxFltS16 - CxFltS14
  1050. stf.spill [dest2] = fs15, CxFltS17 - CxFltS15
  1051. nop.i 0
  1052. ;;
  1053. stf.spill [dest1] = fs16, CxFltS18 - CxFltS16
  1054. stf.spill [dest2] = fs17, CxFltS19 - CxFltS17
  1055. nop.i 0
  1056. ;;
  1057. stf.spill [dest1] = fs18
  1058. stf.spill [dest2] = fs19
  1059. add dest1 = CxStFPSR, a0
  1060. ;;
  1061. st8 [dest1] = rfpsr
  1062. add src1 = JbStIIP, jb
  1063. add src2 = JbBrS0, jb
  1064. ;;
  1065. ld8.nt1 loc11 = [src1], JbBrS1 - JbStIIP
  1066. ld8.nt1 loc12 = [src2], JbBrS2 - JbBrS0
  1067. ;;
  1068. ld8.nt1 loc2 = [src1], JbBrS3 - JbBrS1
  1069. ld8.nt1 loc3 = [src2], JbBrS4 - JbBrS2
  1070. ;;
  1071. ld8.nt1 loc4 = [src1], JbRsBSP - JbBrS3
  1072. ld8.nt1 loc5 = [src2], JbRsPFS - JbBrS4
  1073. ;;
  1074. ld8.nt1 rbsp = [src1], JbApUNAT - JbRsBSP
  1075. ld8.nt1 rpfs = [src2], JbApLC - JbRsPFS
  1076. ;;
  1077. ld8.nt1 runat = [src1]
  1078. add dest1 = CxStIIP, a0
  1079. add dest2 = CxBrS0, a0
  1080. ld8.nt1 rlc = [src2]
  1081. movl t0 = 1 << IFS_V
  1082. ;;
  1083. st8 [dest1] = loc11, CxBrS1 - CxStIIP
  1084. st8 [dest2] = loc12, CxBrS2 - CxBrS0
  1085. or rpfs = t0, rpfs // validate the ifs
  1086. ;;
  1087. st8 [dest1] = loc2, CxBrS3 - CxBrS1
  1088. st8 [dest2] = loc3, CxBrS4 - CxBrS2
  1089. ;;
  1090. st8 [dest1] = loc4, CxApUNAT - CxBrS3
  1091. st8 [dest2] = loc5, CxStIFS - CxBrS4
  1092. ;;
  1093. st8 [dest1] = runat, CxRsBSP - CxApUNAT
  1094. st8 [dest2] = rpfs, CxApLC - CxStIFS
  1095. ;;
  1096. st8 [dest2] = rlc
  1097. st8 [dest1] = rbsp
  1098. ;;
  1099. //
  1100. // If the call is from user mode, then use the continue system service to
  1101. // continue execution. Otherwise, restore the context directly since the
  1102. // current mode is kernel and threads can't be arbitrarily interrupted.
  1103. //
  1104. Rrc10:
  1105. #ifndef NTOS_KERNEL_RUNTIME
  1106. mov out0 = a0
  1107. mov out1 = zero
  1108. br.call.sptk.few brp = ZwContinue
  1109. #else
  1110. //
  1111. // Kernel mode; simply restore the registers and rfi
  1112. //
  1113. add src1 = CxIntNats, a0
  1114. add src2 = CxPreds, a0
  1115. add tmp = CxIntGp, a0
  1116. ;;
  1117. ld8.nt1 t17 = [src1], CxBrRp - CxIntNats
  1118. ld8.nt1 t16 = [src2], CxBrS0 - CxPreds
  1119. shr tmp = tmp, 3
  1120. ;;
  1121. ld8.nt1 t0 = [src1], CxBrS1 - CxBrRp
  1122. ld8.nt1 t1 = [src2], CxBrS2 - CxBrS0
  1123. and tmp = 0x3f, tmp
  1124. ;;
  1125. ld8.nt1 t2 = [src1], CxBrS3 - CxBrS1
  1126. ld8.nt1 t3 = [src2], CxBrS4 - CxBrS2
  1127. cmp4.ge pt1, pt0 = 1, tmp
  1128. ;;
  1129. ld8.nt1 t4 = [src1], CxBrT0 - CxBrS3
  1130. ld8.nt1 t5 = [src2], CxBrT1 - CxBrS4
  1131. (pt1) sub loc5 = 1, tmp
  1132. ;;
  1133. ld8.nt1 t6 = [src1], CxApUNAT - CxBrT0
  1134. ld8.nt1 t7 = [src2], CxApLC - CxBrT1
  1135. (pt0) add loc5 = -1, tmp
  1136. ;;
  1137. ld8.nt1 loc11 = [src1], CxApEC - CxApUNAT
  1138. ld8.nt1 t8 = [src2], CxApCCV - CxApLC
  1139. (pt0) sub loc6 = 65, tmp
  1140. ;;
  1141. ld8.nt1 t9 = [src1], CxApDCR - CxApEC
  1142. ld8.nt1 t10 = [src2], CxRsPFS - CxApCCV
  1143. (pt1) shr.u t17 = t17, loc5
  1144. ;;
  1145. ld8.nt1 loc12 = [src1], CxRsBSP - CxApDCR
  1146. ld8.nt1 t11 = [src2], CxRsRSC - CxRsPFS
  1147. (pt0) shl loc7 = t17, loc5
  1148. ;;
  1149. ld8.nt1 loc2 = [src1], CxStIIP - CxRsBSP
  1150. ld8.nt1 loc3 = [src2], CxStIFS - CxRsRSC
  1151. (pt0) shr.u loc8 = t17, loc6
  1152. ;;
  1153. ld8.nt1 loc9 = [src1], CxSegCSD - CxStIIP
  1154. ld8.nt1 loc10 = [src2]
  1155. (pt0) or t17 = loc7, loc8
  1156. ;;
  1157. mov ar.unat = t17
  1158. ld8.nt1 t17 = [src1]
  1159. shr t12 = loc2, 3
  1160. ;;
  1161. add src1 = CxFltS0, a0
  1162. add src2 = CxFltS1, a0
  1163. and t12 = 0x3f, t12 // current rnat save index
  1164. and t13 = 0x7f, loc10 // total frame size
  1165. ;;
  1166. mov ar.ccv = t10
  1167. mov ar.csd = t17
  1168. add t14 = t13, t12
  1169. mov ar.pfs = t11
  1170. ;;
  1171. Rrc20:
  1172. cmp4.gt pt1, pt0 = 63, t14
  1173. ;;
  1174. (pt0) add t14 = -63, t14
  1175. (pt0) add t13 = 1, t13
  1176. ;;
  1177. nop.m 0
  1178. (pt1) shl t13 = t13, 3
  1179. (pt0) br.cond.spnt Rrc20
  1180. ;;
  1181. add loc2 = loc2, t13
  1182. nop.f 0
  1183. mov pr = t16, -1
  1184. ldf.fill.nt1 fs0 = [src1], CxFltS2 - CxFltS0
  1185. ldf.fill.nt1 fs1 = [src2], CxFltS3 - CxFltS1
  1186. mov brp = t0
  1187. ;;
  1188. ldf.fill.nt1 fs2 = [src1], CxFltT0 - CxFltS2
  1189. ldf.fill.nt1 fs3 = [src2], CxFltT1 - CxFltS3
  1190. mov bs0 = t1
  1191. ;;
  1192. ldf.fill.nt1 ft0 = [src1], CxFltT2 - CxFltT0
  1193. ldf.fill.nt1 ft1 = [src2], CxFltT3 - CxFltT1
  1194. mov bs1 = t2
  1195. ;;
  1196. ldf.fill.nt1 ft2 = [src1], CxFltT4 - CxFltT2
  1197. ldf.fill.nt1 ft3 = [src2], CxFltT5 - CxFltT3
  1198. mov bs2 = t3
  1199. ;;
  1200. ldf.fill.nt1 ft4 = [src1], CxFltT6 - CxFltT4
  1201. ldf.fill.nt1 ft5 = [src2], CxFltT7 - CxFltT5
  1202. mov bs3 = t4
  1203. ;;
  1204. ldf.fill.nt1 ft6 = [src1], CxFltT8 - CxFltT6
  1205. ldf.fill.nt1 ft7 = [src2], CxFltT9 - CxFltT7
  1206. mov bs4 = t5
  1207. ;;
  1208. ldf.fill.nt1 ft8 = [src1], CxFltS4 - CxFltT8
  1209. ldf.fill.nt1 ft9 = [src2], CxFltS5 - CxFltT9
  1210. mov bt0 = t6
  1211. ;;
  1212. ldf.fill.nt1 fs4 = [src1], CxFltS6 - CxFltS4
  1213. ldf.fill.nt1 fs5 = [src2], CxFltS7 - CxFltS5
  1214. mov bt1 = t7
  1215. ;;
  1216. ldf.fill.nt1 fs6 = [src1], CxFltS8 - CxFltS6
  1217. ldf.fill.nt1 fs7 = [src2], CxFltS9 - CxFltS7
  1218. mov ar.lc = t8
  1219. ;;
  1220. ldf.fill.nt1 fs8 = [src1], CxFltS10 - CxFltS8
  1221. ldf.fill.nt1 fs9 = [src2], CxFltS11 - CxFltS9
  1222. mov ar.ec = t9
  1223. ;;
  1224. ldf.fill.nt1 fs10 = [src1], CxFltS12 - CxFltS10
  1225. ldf.fill.nt1 fs11 = [src2], CxFltS13 - CxFltS11
  1226. nop.i 0
  1227. ;;
  1228. ldf.fill.nt1 fs12 = [src1], CxFltS14 - CxFltS12
  1229. ldf.fill.nt1 fs13 = [src2], CxFltS15 - CxFltS13
  1230. add loc6 = CxIntGp, a0
  1231. ;;
  1232. ldf.fill.nt1 fs14 = [src1], CxFltS16 - CxFltS14
  1233. ldf.fill.nt1 fs15 = [src2], CxFltS17 - CxFltS15
  1234. add loc7 = CxIntT0, a0
  1235. ;;
  1236. ldf.fill.nt1 fs16 = [src1], CxFltS18 - CxFltS16
  1237. ldf.fill.nt1 fs17 = [src2], CxFltS19 - CxFltS17
  1238. add t19 = CxRsRNAT, a0
  1239. ;;
  1240. ldf.fill.nt1 fs18 = [src1]
  1241. ldf.fill.nt1 fs19 = [src2]
  1242. add t7 = CxStFPSR, a0
  1243. ;;
  1244. ld8.nt1 loc8 = [t7] // load fpsr from context
  1245. ld8.nt1 loc5 = [t19] // load rnat from context
  1246. nop.i 0
  1247. ld8.fill.nt1 gp = [loc6], CxIntT1 - CxIntGp
  1248. ld8.fill.nt1 t0 = [loc7], CxIntS0 - CxIntT0
  1249. ;;
  1250. ld8.fill.nt1 t1 = [loc6], CxIntS1 - CxIntT1
  1251. ld8.fill.nt1 s0 = [loc7], CxIntS2 - CxIntS0
  1252. ;;
  1253. ld8.fill.nt1 s1 = [loc6], CxIntS3 - CxIntS1
  1254. ld8.fill.nt1 s2 = [loc7], CxIntV0 - CxIntS2
  1255. ;;
  1256. ld8.fill.nt1 s3 = [loc6], CxIntTeb - CxIntS3
  1257. ld8.fill.nt1 v0 = [loc7], CxIntT2 - CxIntV0
  1258. ;;
  1259. ld8.fill.nt1 teb = [loc6], CxIntT3 - CxIntTeb
  1260. ld8.fill.nt1 t2 = [loc7], CxIntSp - CxIntT2
  1261. ;;
  1262. ld8.fill.nt1 t3 = [loc6], CxIntT4 - CxIntT3
  1263. ld8.fill.nt1 loc4 = [loc7], CxIntT5 - CxIntSp
  1264. ;;
  1265. ld8.fill.nt1 t4 = [loc6], CxIntT6 - CxIntT4
  1266. ld8.fill.nt1 t5 = [loc7], CxIntT7 - CxIntT5
  1267. ;;
  1268. ld8.fill.nt1 t6 = [loc6], CxIntT8 - CxIntT6
  1269. ld8.fill.nt1 t7 = [loc7], CxIntT9 - CxIntT7
  1270. ;;
  1271. ld8.fill.nt1 t8 = [loc6], CxIntT10 - CxIntT8
  1272. ld8.fill.nt1 t9 = [loc7], CxIntT11 - CxIntT9
  1273. ;;
  1274. ld8.fill.nt1 t10 = [loc6], CxIntT12 - CxIntT10
  1275. ld8.fill.nt1 t11 = [loc7], CxIntT13 - CxIntT11
  1276. ;;
  1277. ld8.fill.nt1 t12 = [loc6], CxIntT14 - CxIntT12
  1278. ld8.fill.nt1 t13 = [loc7], CxIntT15 - CxIntT13
  1279. ;;
  1280. ld8.fill.nt1 t14 = [loc6], CxIntT16 - CxIntT14
  1281. ld8.fill.nt1 t15 = [loc7], CxIntT17 - CxIntT15
  1282. ;;
  1283. ld8.fill.nt1 t16 = [loc6], CxIntT18 - CxIntT16
  1284. ld8.fill.nt1 t17 = [loc7], CxIntT19 - CxIntT17
  1285. ;;
  1286. ld8.fill.nt1 t18 = [loc6], CxIntT20 - CxIntT18
  1287. ld8.fill.nt1 t19 = [loc7], CxIntT21 - CxIntT19
  1288. ;;
  1289. ld8.fill.nt1 t20 = [loc6], CxIntT22 - CxIntT20
  1290. ld8.fill.nt1 t21 = [loc7]
  1291. ;;
  1292. rsm 1 << PSR_I
  1293. ld8.fill.nt1 t22 = [loc6]
  1294. ;;
  1295. bsw.0
  1296. ;;
  1297. add r20 = CxStIPSR, a0
  1298. ;;
  1299. ld8.nt1 r20 = [r20] // load IPSR
  1300. movl r23 = 1 << IFS_V
  1301. ;;
  1302. mov ar.fpsr = loc8 // set fpsr
  1303. mov ar.unat = loc11
  1304. ;;
  1305. or r21 = r23, loc10 // set ifs valid bit
  1306. ;;
  1307. mov cr.dcr = loc12
  1308. mov r17 = loc2 // put BSP in a shadow reg
  1309. or r16 = 0x3, loc3 // put RSE in eager mode
  1310. mov ar.rsc = r0 // put RSE in enforced lazy
  1311. mov r22 = loc9 // put iip in a shadow reg
  1312. dep r21 = 0, r21, IFS_MBZ0, IFS_V-IFS_MBZ0
  1313. ;;
  1314. mov r18 = loc4 // put SP in a shadow reg
  1315. mov r19 = loc5 // put RNaTs in a shadow reg
  1316. ;;
  1317. alloc r23 = 0, 0, 0, 0
  1318. mov sp = r18
  1319. ;;
  1320. loadrs
  1321. ;;
  1322. rsm 1 << PSR_IC
  1323. ;;
  1324. srlz.d
  1325. ;;
  1326. mov cr.iip = r22
  1327. mov cr.ifs = r21
  1328. ;;
  1329. mov ar.bspstore = r17
  1330. mov cr.ipsr = r20
  1331. nop.i 0
  1332. ;;
  1333. mov ar.rnat = r19 // set rnat register
  1334. mov ar.rsc = r16 // restore RSC
  1335. ;;
  1336. invala
  1337. nop.i 0
  1338. rfi
  1339. ;;
  1340. #endif // NTOS_KERNEL_RUNTIME
  1341. LEAF_EXIT(RtlRestoreContext)
  1342. //++
  1343. //
  1344. // VOID
  1345. // RtlpFlushRSE (
  1346. // OUT PULONGLONG Bsp,
  1347. // OUT PULONGLONG Rnat
  1348. // )
  1349. //
  1350. // Routine Description:
  1351. //
  1352. // This function flushes the RSE, then captures the values of bsp
  1353. // and rnat into the input buffers.
  1354. //
  1355. // Arguments:
  1356. //
  1357. // Return Value:
  1358. //
  1359. // None.
  1360. //
  1361. //--
  1362. LEAF_ENTRY(RtlpFlushRSE)
  1363. flushrs
  1364. mov t2 = ar.rsc
  1365. ;;
  1366. mov t0 = ar.bsp
  1367. mov ar.rsc = r0 // put RSE in lazy mode
  1368. ;;
  1369. st8 [a0] = t0
  1370. mov t1 = ar.rnat
  1371. nop.i 0
  1372. ;;
  1373. st8 [a1] = t1
  1374. mov ar.rsc = t2
  1375. ;;
  1376. br.ret.sptk brp
  1377. LEAF_EXIT(RtlpFlushRSE)
  1378. //++
  1379. //
  1380. // VOID
  1381. // RtlRcConsolidateFrames (
  1382. // IN PCONTEXT ContextRecord
  1383. // IN PEXCEPTION_RECORD ExceptionRecord
  1384. // )
  1385. //
  1386. // Routine Description:
  1387. //
  1388. // This routine is called at the end of a unwind operation to logically
  1389. // remove unwound frames from the stack. This is accomplished by specifing
  1390. // the variable frame pointer and a context ABI unwind.
  1391. //
  1392. // The following code calls the language call back function specified in the
  1393. // exception record. If the function returns, then the destination frame
  1394. // context is restored and control transfered to the address returned by the
  1395. // language call back function. If control does not return, then another
  1396. // exception must be raised.
  1397. //
  1398. // Arguments:
  1399. //
  1400. // ContextRecord - Supplies a pointer to the context record.
  1401. //
  1402. // ExceptionRecord - Supplies a pointer to an exception record.
  1403. //
  1404. // Implicit Arguments:
  1405. //
  1406. // Virtual frame pointer (r34) - Supplies a pointer to the context record minus the stack area.
  1407. //
  1408. // LanguageSpecificHandler (jb) - Supplies a pointer to the language specific handler
  1409. //
  1410. // Return Value:
  1411. //
  1412. // None.
  1413. //
  1414. //--
  1415. .global RtlRcConsolidateFrames;
  1416. .proc RtlRcConsolidateFrames;
  1417. RtlRcConsolidateFrames::
  1418. .prologue
  1419. .unwabi @nt, CONTEXT_FRAME
  1420. .regstk 2, 13, 2, 0
  1421. .vframe loc2 // Specify that r32 content the saved sp
  1422. PROLOGUE_END
  1423. ld8 t3 = [jb], 8
  1424. add loc3 = CxStIIP, a0
  1425. ;;
  1426. ld8 gp = [jb]
  1427. mov bt0 = t3
  1428. ;;
  1429. mov out0 = a1 // Pass exception record as argument.
  1430. br.call.sptk brp = bt0
  1431. ;;
  1432. //
  1433. // The language specific handler retuns the address where control
  1434. // should be returned using the passed context. Update the context
  1435. // record with the new address.
  1436. //
  1437. st8 [loc3] = r8
  1438. br.cond.sptk Rrc10
  1439. ;;
  1440. LEAF_EXIT(RtlRcConsolidateFrames)