Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

511 lines
14 KiB

  1. // TITLE("Hibernation wake dispatcher")
  2. //++
  3. //
  4. // Copyright (c) 1999 Intel Corporation
  5. //
  6. // Module Name:
  7. //
  8. // wakes.s
  9. //
  10. // Abstract:
  11. //
  12. //
  13. // Author:
  14. //
  15. // Allen Kay ([email protected]) 8 June, 1999
  16. //
  17. // Environment:
  18. //
  19. // Firmware, OS Loader. Position-independent.
  20. //
  21. // Revision History:
  22. //
  23. //--
  24. #include "ksia64.h"
  25. #include "paldef.h"
  26. .global HiberMapPage
  27. .global HiberRemapPage
  28. .global HiberWakeState
  29. .global HiberFirstRemap
  30. .global HiberLastRemap
  31. .global HiberImagePageSelf
  32. .global HiberBreakOnWake
  33. // VOID
  34. // WakeDispatcher(
  35. // VOID
  36. // )
  37. //
  38. // Routine description:
  39. //
  40. // This code performs the final stages of restarting the hibernation
  41. // image. Pages that were loaded in temporary buffer space because
  42. // the memory they belong in was in use by the firmware are copied
  43. // to their final destination; an IMB is issued after this to ensure
  44. // that the I-cache is coherent with any code that got copied; and
  45. // necessary context is loaded into registers and NT is reentered.
  46. //
  47. // Because this code is part of the loader image that may be overwritten
  48. // by this copy process, it must itself have been copied to a free
  49. // page before it is executed. Note that because there is presently
  50. // no mechanism for allocating multiple contiguous pages, this code cannot
  51. // exceed one page (8K).
  52. //
  53. // Arguments:
  54. //
  55. // None.
  56. //
  57. // Return Value:
  58. //
  59. // Never returns.
  60. LEAF_ENTRY(WakeDispatcherStartLocal)
  61. .prologue
  62. .regstk 1, 30, 2, 0
  63. alloc t4 = ar.pfs, 1, 30, 2, 0
  64. ARGPTR(a0)
  65. rMapPage = loc11
  66. rRemapPage = loc12
  67. rWakeState = loc13
  68. rPageCount = loc14
  69. rPageSelf = loc15
  70. rBreakOnWake = loc16
  71. src1 = loc17
  72. src2 = loc18
  73. tmp = loc19
  74. rKSEG0 = loc20
  75. rpT0 = loc21
  76. rpT1 = loc22
  77. rpT2 = loc23
  78. rpT3 = loc24
  79. //
  80. // Get variables into registers before copying any pages, as they may be
  81. // overwritten.
  82. //
  83. movl rpT0 = HiberMapPage // pointer to source pages
  84. movl rpT1 = HiberRemapPage // pointer to target pages
  85. movl rpT2 = HiberWakeState // pointer to KPROCESSOR_STATE for
  86. // restarting the hibernation image
  87. ;;
  88. ld8 rMapPage = [rpT0] // load them
  89. ld8 rRemapPage = [rpT1]
  90. ld8 rWakeState = [rpT2]
  91. ;;
  92. movl rpT0 = HiberFirstRemap // first page index
  93. movl rpT1 = HiberLastRemap // last page index
  94. movl rpT2 = HiberImagePageSelf // PFN where MemImage ends up
  95. movl rpT3 = HiberBreakOnWake // "break on wake" flag
  96. ;;
  97. ld4 t0 = [rpT0] // load them
  98. ld4 t1 = [rpT1]
  99. ld8 rPageSelf = [rpT2]
  100. ld1 rBreakOnWake = [rpT3]
  101. ;;
  102. sub rPageCount = t1, t0 // number of pages to copy
  103. add rMapPage = t0, rMapPage // first source page
  104. add rRemapPage = t0, rRemapPage // first target page
  105. ;;
  106. cmp.eq pt1, pt0 = rPageCount, zero // nothing to copy
  107. ;;
  108. (pt1) br.cond.spnt CopyDone
  109. //
  110. // Copy pages.
  111. //
  112. NextPage:
  113. add rPageCount = -1, rPageCount // count page copied
  114. movl rKSEG0 = KSEG0_BASE // physical -> KSEG0
  115. ld8.fill t0 = [rMapPage], 8 // source page number
  116. ;;
  117. shl t0 = t0, PAGE_SHIFT // page -> physical address
  118. ;;
  119. add t0 = rKSEG0, t0 // physical -> KSEG0
  120. ld8.fill t1 = [rRemapPage], 8 // destination address
  121. ;;
  122. shl t1 = t1, PAGE_SHIFT // page -> physical address
  123. ;;
  124. add t1 = rKSEG0, t1 // physical -> KSEG0
  125. movl t2 = 1024 // 8KB = 1024 quadwords
  126. ;;
  127. NextQuadWord:
  128. add t2 = -1, t2 // count quadword
  129. ld8.fill t3 = [t0], 8 // load a quadword
  130. ;;
  131. st8.spill [t1] = t3, 8 // store it
  132. ;;
  133. cmp.eq pt0, pt1 = t2, zero
  134. ;;
  135. (pt1) br.cond.spnt NextQuadWord
  136. cmp.eq pt0, pt1 = rPageCount, zero
  137. ;;
  138. (pt1) br.cond.spnt NextPage
  139. //
  140. // All necessary pages have been copied. Check the break-on-wake flag,
  141. // and change the signature NT will see when it wakes up if it was set.
  142. //
  143. CopyDone:
  144. cmp.eq pt1, pt0 = rBreakOnWake, zero // no flag set, do nothing
  145. ;;
  146. (pt1) br.cond.spnt SkipSigChange
  147. shl rPageSelf = rPageSelf, PAGE_SHIFT // convert to physical
  148. ;;
  149. add rPageSelf = rKSEG0, rPageSelf // make superpage address
  150. movl t0 = 0x706b7262 // 'brkp'
  151. ;;
  152. st4 [rPageSelf] = t0 // signature is first longword of MemImage
  153. //
  154. // Synchronize the I-cache, load essential NT context, and transfer control
  155. // to the restored system.
  156. //
  157. SkipSigChange:
  158. #if 0
  159. PublicFunction(PalProc)
  160. mov out0 = PAL_CACHE_FLUSH // call PAL cache flush routine
  161. mov out1 = 1 // flush I-cache only
  162. movl rpT0 = PalProc
  163. ;;
  164. ld8 t0 = [rpT0]
  165. ;;
  166. mov bt0 = t0
  167. ;;
  168. br.call.spnt brp = bt0
  169. #endif
  170. //
  171. // Restore context. Only the integer registers are restored; this code runs
  172. // in the firmware environment, so floating point can't be used, and NT
  173. // PALcode abstractions such as the PSR don't exist. It is the responsibility
  174. // of NT's code that saves the hibernation context to put enough information
  175. // in the integer registers in the CONTEXT to be able to finish restoring
  176. // context to restart NT.
  177. //
  178. mov a0 = rWakeState // CONTEXT is the first thing
  179. // in the KPROCESSOR_STATE
  180. ;;
  181. //
  182. // Restore all the registers.
  183. //
  184. add src1 = CxIntNats, a0
  185. add src2 = CxPreds, a0
  186. add tmp = CxIntGp, a0
  187. ;;
  188. ld8.nt1 t17 = [src1], CxBrRp - CxIntNats
  189. ld8.nt1 t16 = [src2], CxBrS0 - CxPreds
  190. shr tmp = tmp, 3
  191. ;;
  192. ld8.nt1 t0 = [src1], CxBrS1 - CxBrRp
  193. ld8.nt1 t1 = [src2], CxBrS2 - CxBrS0
  194. and tmp = 0x3f, tmp
  195. ;;
  196. ld8.nt1 t2 = [src1], CxBrS3 - CxBrS1
  197. ld8.nt1 t3 = [src2], CxBrS4 - CxBrS2
  198. cmp4.ge pt1, pt0 = 1, tmp
  199. ;;
  200. ld8.nt1 t4 = [src1], CxBrT0 - CxBrS3
  201. ld8.nt1 t5 = [src2], CxBrT1 - CxBrS4
  202. (pt1) sub loc5 = 1, tmp
  203. ;;
  204. ld8.nt1 t6 = [src1], CxApUNAT - CxBrT0
  205. ld8.nt1 t7 = [src2], CxApLC - CxBrT1
  206. (pt0) add loc5 = -1, tmp
  207. ;;
  208. ld8.nt1 loc0 = [src1], CxApEC - CxApUNAT
  209. ld8.nt1 t8 = [src2], CxApCCV - CxApLC
  210. (pt0) sub loc6 = 65, tmp
  211. ;;
  212. ld8.nt1 t9 = [src1], CxApDCR - CxApEC
  213. ld8.nt1 t10 = [src2], CxRsPFS - CxApCCV
  214. (pt1) shr.u t17 = t17, loc5
  215. ;;
  216. ld8.nt1 loc1 = [src1], CxRsBSP - CxApDCR
  217. ld8.nt1 t11 = [src2], CxRsRSC - CxRsPFS
  218. (pt0) shl loc7 = t17, loc5
  219. ;;
  220. ld8.nt1 loc2 = [src1], CxStIIP - CxRsBSP
  221. ld8.nt1 loc3 = [src2], CxStIFS - CxRsRSC
  222. (pt0) shr.u loc8 = t17, loc6
  223. ;;
  224. ld8.nt1 loc9 = [src1]
  225. ld8.nt1 loc10 = [src2]
  226. (pt0) or t17 = loc7, loc8
  227. ;;
  228. mov ar.unat = t17
  229. add src1 = CxFltS0, a0
  230. shr t12 = loc2, 3
  231. ;;
  232. add src2 = CxFltS1, a0
  233. and t12 = 0x3f, t12 // current rnat save index
  234. and t13 = 0x7f, loc10 // total frame size
  235. ;;
  236. mov ar.ccv = t10
  237. add t14 = t13, t12
  238. mov ar.pfs = t11
  239. ;;
  240. Rrc20:
  241. cmp4.gt pt1, pt0 = 63, t14
  242. ;;
  243. (pt0) add t14 = -63, t14
  244. (pt0) add t13 = 1, t13
  245. ;;
  246. nop.m 0
  247. (pt1) shl t13 = t13, 3
  248. (pt0) br.cond.spnt Rrc20
  249. ;;
  250. add loc2 = loc2, t13
  251. nop.f 0
  252. mov pr = t16, -1
  253. ldf.fill.nt1 fs0 = [src1], CxFltS2 - CxFltS0
  254. ldf.fill.nt1 fs1 = [src2], CxFltS3 - CxFltS1
  255. mov brp = t0
  256. ;;
  257. ldf.fill.nt1 fs2 = [src1], CxFltT0 - CxFltS2
  258. ldf.fill.nt1 fs3 = [src2], CxFltT1 - CxFltS3
  259. mov bs0 = t1
  260. ;;
  261. ldf.fill.nt1 ft0 = [src1], CxFltT2 - CxFltT0
  262. ldf.fill.nt1 ft1 = [src2], CxFltT3 - CxFltT1
  263. mov bs1 = t2
  264. ;;
  265. ldf.fill.nt1 ft2 = [src1], CxFltT4 - CxFltT2
  266. ldf.fill.nt1 ft3 = [src2], CxFltT5 - CxFltT3
  267. mov bs2 = t3
  268. ;;
  269. ldf.fill.nt1 ft4 = [src1], CxFltT6 - CxFltT4
  270. ldf.fill.nt1 ft5 = [src2], CxFltT7 - CxFltT5
  271. mov bs3 = t4
  272. ;;
  273. ldf.fill.nt1 ft6 = [src1], CxFltT8 - CxFltT6
  274. ldf.fill.nt1 ft7 = [src2], CxFltT9 - CxFltT7
  275. mov bs4 = t5
  276. ;;
  277. ldf.fill.nt1 ft8 = [src1], CxFltS4 - CxFltT8
  278. ldf.fill.nt1 ft9 = [src2], CxFltS5 - CxFltT9
  279. mov bt0 = t6
  280. ;;
  281. ldf.fill.nt1 fs4 = [src1], CxFltS6 - CxFltS4
  282. ldf.fill.nt1 fs5 = [src2], CxFltS7 - CxFltS5
  283. mov bt1 = t7
  284. ;;
  285. ldf.fill.nt1 fs6 = [src1], CxFltS8 - CxFltS6
  286. ldf.fill.nt1 fs7 = [src2], CxFltS9 - CxFltS7
  287. mov ar.lc = t8
  288. ;;
  289. ldf.fill.nt1 fs8 = [src1], CxFltS10 - CxFltS8
  290. ldf.fill.nt1 fs9 = [src2], CxFltS11 - CxFltS9
  291. mov ar.ec = t9
  292. ;;
  293. ldf.fill.nt1 fs10 = [src1], CxFltS12 - CxFltS10
  294. ldf.fill.nt1 fs11 = [src2], CxFltS13 - CxFltS11
  295. nop.i 0
  296. ;;
  297. ldf.fill.nt1 fs12 = [src1], CxFltS14 - CxFltS12
  298. ldf.fill.nt1 fs13 = [src2], CxFltS15 - CxFltS13
  299. add loc6 = CxIntGp, a0
  300. ;;
  301. ldf.fill.nt1 fs14 = [src1], CxFltS16 - CxFltS14
  302. ldf.fill.nt1 fs15 = [src2], CxFltS17 - CxFltS15
  303. add loc7 = CxIntT0, a0
  304. ;;
  305. ldf.fill.nt1 fs16 = [src1], CxFltS18 - CxFltS16
  306. ldf.fill.nt1 fs17 = [src2], CxFltS19 - CxFltS17
  307. add t19 = CxRsRNAT, a0
  308. ;;
  309. ldf.fill.nt1 fs18 = [src1]
  310. ldf.fill.nt1 fs19 = [src2]
  311. add t7 = CxStFPSR, a0
  312. ;;
  313. ld8.nt1 loc8 = [t7] // load fpsr from context
  314. ld8.nt1 loc5 = [t19] // load rnat from context
  315. nop.i 0
  316. ld8.fill.nt1 gp = [loc6], CxIntT1 - CxIntGp
  317. ld8.fill.nt1 t0 = [loc7], CxIntS0 - CxIntT0
  318. ;;
  319. ld8.fill.nt1 t1 = [loc6], CxIntS1 - CxIntT1
  320. ld8.fill.nt1 s0 = [loc7], CxIntS2 - CxIntS0
  321. ;;
  322. ld8.fill.nt1 s1 = [loc6], CxIntS3 - CxIntS1
  323. ld8.fill.nt1 s2 = [loc7], CxIntV0 - CxIntS2
  324. ;;
  325. ld8.fill.nt1 s3 = [loc6], CxIntTeb - CxIntS3
  326. ld8.fill.nt1 v0 = [loc7], CxIntT2 - CxIntV0
  327. ;;
  328. ld8.fill.nt1 teb = [loc6], CxIntT3 - CxIntTeb
  329. ld8.fill.nt1 t2 = [loc7], CxIntSp - CxIntT2
  330. ;;
  331. ld8.fill.nt1 t3 = [loc6], CxIntT4 - CxIntT3
  332. ld8.fill.nt1 loc4 = [loc7], CxIntT5 - CxIntSp
  333. ;;
  334. ld8.fill.nt1 t4 = [loc6], CxIntT6 - CxIntT4
  335. ld8.fill.nt1 t5 = [loc7], CxIntT7 - CxIntT5
  336. ;;
  337. ld8.fill.nt1 t6 = [loc6], CxIntT8 - CxIntT6
  338. ld8.fill.nt1 t7 = [loc7], CxIntT9 - CxIntT7
  339. ;;
  340. ld8.fill.nt1 t8 = [loc6], CxIntT10 - CxIntT8
  341. ld8.fill.nt1 t9 = [loc7], CxIntT11 - CxIntT9
  342. ;;
  343. ld8.fill.nt1 t10 = [loc6], CxIntT12 - CxIntT10
  344. ld8.fill.nt1 t11 = [loc7], CxIntT13 - CxIntT11
  345. ;;
  346. ld8.fill.nt1 t12 = [loc6], CxIntT14 - CxIntT12
  347. ld8.fill.nt1 t13 = [loc7], CxIntT15 - CxIntT13
  348. ;;
  349. ld8.fill.nt1 t14 = [loc6], CxIntT16 - CxIntT14
  350. ld8.fill.nt1 t15 = [loc7], CxIntT17 - CxIntT15
  351. ;;
  352. ld8.fill.nt1 t16 = [loc6], CxIntT18 - CxIntT16
  353. ld8.fill.nt1 t17 = [loc7], CxIntT19 - CxIntT17
  354. ;;
  355. ld8.fill.nt1 t18 = [loc6], CxIntT20 - CxIntT18
  356. ld8.fill.nt1 t19 = [loc7], CxIntT21 - CxIntT19
  357. ;;
  358. ld8.fill.nt1 t20 = [loc6], CxIntT22 - CxIntT20
  359. ld8.fill.nt1 t21 = [loc7]
  360. ;;
  361. rsm 1 << PSR_I
  362. ld8.fill.nt1 t22 = [loc6]
  363. ;;
  364. rsm 1 << PSR_IC
  365. movl t0 = 1 << IFS_V
  366. ;;
  367. mov ar.fpsr = loc8 // set fpsr
  368. mov ar.unat = loc0
  369. ;;
  370. srlz.d
  371. or loc10 = t0, loc10 // set ifs valid bit
  372. ;;
  373. mov cr.iip = loc9
  374. mov cr.ifs = loc10
  375. bsw.0
  376. ;;
  377. mov cr.dcr = loc1
  378. mov r17 = loc2 // put BSP in a shadow reg
  379. or r16 = 0x3, loc3 // put RSE in eager mode
  380. mov ar.rsc = r0 // put RSE in enforced lazy
  381. nop.m 0
  382. add r20 = CxStIPSR, a0
  383. ;;
  384. ld8.nt1 r20 = [r20] // load IPSR
  385. mov r18 = loc4 // put SP in a shadow reg
  386. mov r19 = loc5 // put RNaTs in a shadow reg
  387. ;;
  388. alloc t0 = 0, 0, 0, 0
  389. mov cr.ipsr = r20
  390. mov sp = r18
  391. ;;
  392. loadrs
  393. ;;
  394. mov ar.bspstore = r17
  395. nop.i 0
  396. ;;
  397. mov ar.rnat = r19 // set rnat register
  398. mov ar.rsc = r16 // restore RSC
  399. bsw.1
  400. ;;
  401. invala
  402. nop.i 0
  403. rfi
  404. ;;
  405. //
  406. // This label is used to determine the size of the wake dispatcher code in the
  407. // process of copying it to a free page.
  408. //
  409. WakeDispatcherEndLocal::
  410. LEAF_EXIT(WakeDispatcherEndLocal)
  411. .sdata
  412. WakeDispatcherStart::
  413. data4 @secrel(WakeDispatcherStartLocal)
  414. WakeDispatcherEnd::
  415. data4 @secrel(WakeDispatcherEndLocal)