Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

548 lines
18 KiB

  1. ;/* *************************************************************************
  2. ;** INTEL Corporation Proprietary Information
  3. ;**
  4. ;** This listing is supplied under the terms of a license
  5. ;** agreement with INTEL Corporation and may not be copied
  6. ;** nor disclosed except in accordance with the terms of
  7. ;** that agreement.
  8. ;**
  9. ;** Copyright (c) 1995 Intel Corporation.
  10. ;** All Rights Reserved.
  11. ;**
  12. ;** *************************************************************************
  13. ;*/
  14. ;--------------------------------------------------------------------------;
  15. ;
  16. ; $Header: S:\h26x\src\dec\d35bimot.asv 1.5 08 Mar 1996 16:46:04 AGUPTA2 $
  17. ; $Log: S:\h26x\src\dec\d35bimot.asv $
  18. ;//
  19. ;// Rev 1.5 08 Mar 1996 16:46:04 AGUPTA2
  20. ;// Added segment declaration to place the rtn in the right segment.
  21. ;//
  22. ;//
  23. ;// Rev 1.4 19 Jan 1996 17:51:16 RMCKENZX
  24. ;// changed local variables to live on the stack
  25. ;//
  26. ;// Rev 1.3 19 Jan 1996 13:30:34 RMCKENZX
  27. ;// Added rounding to int-half bidirectional prediction
  28. ;//
  29. ;// Rev 1.2 05 Jan 1996 15:58:36 RMCKENZX
  30. ;// Eliminated separate chroma entry point, using a
  31. ;// block number check instead
  32. ;//
  33. ;// Rev 1.1 27 Dec 1995 14:35:50 RMCKENZX
  34. ;// Added copyright notice
  35. ;
  36. ; D35BiMot.asm
  37. ;
  38. ; Description:
  39. ; This module does bi-directional motion compensated prediction for
  40. ; B frames. It is called after forward prediction has been computed
  41. ; and will average in the backward prediction for those pels where
  42. ; the backward motion vector points inside of the referenced P frame.
  43. ;
  44. ; Routines: prototypes in:
  45. ; H263BiMotionComp none
  46. ;
  47. ; Data
  48. ; This routine assumes that the PITCH is 384.
  49. ;
  50. ;--------------------------------------------------------------------------;
  51. ;--------------------------------------------------------------------------;
  52. ;
  53. ; $Header: S:\h26x\src\dec\d35bimot.asv 1.5 08 Mar 1996 16:46:04 AGUPTA2 $
  54. ; $Log$
  55. ;//
  56. ;// Rev 1.0 22 Nov 1995 13:33:52 RMCKENZX
  57. ;// Initial revision.
  58. ;
  59. ;--------------------------------------------------------------------------;
  60. .586
  61. uDst EQU [esp+44]
  62. uRef EQU [esp+48]
  63. mvx EQU [esp+52]
  64. mvy EQU [esp+56]
  65. iNum EQU [esp+60]
  66. uColStart EQU [esp+04]
  67. uColEnd EQU [esp+08]
  68. uRowStart EQU [esp+12]
  69. uRowEnd EQU [esp+16]
  70. iColCount EQU [esp+20]
  71. PITCH = 384
  72. IACODE2 SEGMENT PARA USE32 PUBLIC 'CODE'
  73. IACODE2 ENDS
  74. IACODE2 SEGMENT
  75. PUBLIC _H263BiMotionComp
  76. ;--------------------------------------------------------------------------;
  77. ;
  78. ; Name:
  79. ; H263BiMotionComp(U32, U32, I32, I32, I32)
  80. ;
  81. ; Inputs -- C calling convention:
  82. ; uDst flat pointer to block's forward predicted values.
  83. ; uRef flat pointer to backward predicted values.
  84. ; mvx x component of backward motion vector for this block.
  85. ; mvy y component of backward motion vector for this block.
  86. ; iNum block number.
  87. ;
  88. ; Returns:
  89. ; updates the values pointed to by uDst.
  90. ;
  91. ;--------------------------------------------------------------------------;
  92. ;
  93. ; Version: 2.0
  94. ; Date: 9 November 1995
  95. ; Author: R. McKenzie
  96. ;
  97. ;--------------------------------------------------------------------------;
  98. ;
  99. ; set up
  100. ;
  101. _H263BiMotionComp:
  102. push edi
  103. push ebx
  104. push esi
  105. push ebp
  106. sub esp, 24
  107. ;
  108. mov ebx, mvy
  109. mov edx, iNum
  110. cmp edx, 4 ; check block number
  111. jge Chroma ; do things differently for chroma
  112. ; compute adjusted_mvx and adjusted_mvy
  113. mov ecx, edx
  114. and edx, 2
  115. and ecx, 1
  116. mov eax, mvx
  117. sal ecx, 4
  118. lea ebx, [ebx+8*edx] ; avoid the shift with lea
  119. add eax, ecx ; adjusted_mvx
  120. mov edi, uDst
  121. ; check bounds
  122. cmp eax, -14
  123. jl hasta_la_vista_baby
  124. cmp eax, 30
  125. jg hasta_la_vista_baby
  126. cmp ebx, -14
  127. jl hasta_la_vista_baby
  128. cmp ebx, 30
  129. jg hasta_la_vista_baby
  130. ; compute row and column start & end positions
  131. ; 17 cycles
  132. mov esi, 1
  133. mov edi, 30
  134. sub esi, eax ; 1 - adjusted_mvx
  135. sub edi, eax ; 30 - adjusted_mvx
  136. sar edi, 1 ; End = (30 - adjusted_mvx) >> 1
  137. mov eax, 1 ; DELAY SLOT, preload 1
  138. sar esi, 1 ; Start = (1 - adjusted_mvx) >> 1
  139. sub edi, 7 ; End-7
  140. mov ecx, esi ; Start
  141. mov edx, edi ; End-7
  142. sar esi, 31 ; start_mask = 0ffffffffh if Start < 0
  143. mov ebp, 30 ; DELAY SLOT, preload 30
  144. sar edi, 31 ; end_mask = 0fffffffh if End < 7
  145. xor esi, -1 ; toggle start_mask
  146. and esi, ecx ; max(0, Start)
  147. and edi, edx ; min(0, End-7)
  148. mov uColStart, esi ; save Start
  149. add edi, 7 ; min(7, End)
  150. sub eax, ebx ; 1 - adjusted_mvy
  151. sub ebp, ebx ; 30 - adjusted_mvy
  152. sar ebp, 1 ; End = (30 - adjusted_mvy) >> 1
  153. mov uColEnd, edi ; DELAY SLOT, save End
  154. sar eax, 1 ; Start = (1 - adjusted_mvy) >> 1
  155. sub ebp, 7 ; End-7
  156. mov ecx, eax ; Start
  157. mov edx, ebp ; End-7
  158. sar eax, 31 ; start_mask = 0ffffffffh if Start < 0
  159. mov esi, uRef ; DELAY SLOT, preload ref. pointer
  160. sar ebp, 31 ; end_mask = 0fffffffh if End < 7
  161. xor eax, -1 ; toggle start mask
  162. and ecx, eax ; max(0, Start)
  163. and ebp, edx ; min(0, End-7)
  164. mov uRowStart, ecx ; save Start
  165. add ebp, 7 ; min(7, End)
  166. ; Compute pBackRef for BBlockAdjust
  167. mov eax, mvx
  168. mov ebx, mvy
  169. sar eax, 1
  170. and ebx, -2
  171. shl ebx, 6 ; (mvy>>1) << 7 = (mvy&(-2)) << 6
  172. add esi, eax ; pBackRef += mvx>>1
  173. add esi, ebx ; pBackRef += 128*(mvy>>1)
  174. mov uRowEnd, ebp
  175. shl ebx, 1
  176. mov eax, mvx
  177. add esi, ebx ; pBackRef += 256*(mvy>>1)
  178. mov ebx, mvy
  179. mov edi, uDst
  180. jmp H263BBlockAdjust ; Off to do the actual adjustment
  181. Chroma:
  182. mov eax, mvx
  183. mov ebx, mvy
  184. ; check bounds
  185. cmp eax, -14
  186. jl hasta_la_vista_baby
  187. cmp eax, 14
  188. jg hasta_la_vista_baby
  189. cmp ebx, -14
  190. jl hasta_la_vista_baby
  191. cmp ebx, 14
  192. jg hasta_la_vista_baby
  193. ; compute row and column start & end positions
  194. ; 17 cycles
  195. ; note that mvy slides through the following code in register ebx!
  196. mov esi, 1
  197. mov edi, 14
  198. sub esi, eax ; 1 - mvx
  199. sub edi, eax ; 14 - mvx
  200. sar edi, 1 ; End = (14 - mvx) >> 1
  201. mov eax, 1 ; DELAY SLOT, preload 1
  202. sar esi, 1 ; Start = (1 - mvx) >> 1
  203. sub edi, 7 ; End-7
  204. mov ecx, esi ; Start
  205. mov edx, edi ; End-7
  206. sar esi, 31 ; start_mask = 0ffffffffh if Start < 0
  207. mov ebp, 14 ; DELAY SLOT, preload 14
  208. sar edi, 31 ; end_mask = 0fffffffh if End < 7
  209. xor esi, -1 ; toggle start_mask
  210. and esi, ecx ; max(0, Start)
  211. and edi, edx ; min(0, End-7)
  212. mov uColStart, esi ; save Start
  213. add edi, 7 ; min(7, End)
  214. sub eax, ebx ; 1 - mvy
  215. sub ebp, ebx ; 14 - mvy
  216. sar ebp, 1 ; End = (14 - mvy) >> 1
  217. mov uColEnd, edi ; DELAY SLOT, save End
  218. sar eax, 1 ; Start = (1 - mvy) >> 1
  219. sub ebp, 7 ; End-7
  220. mov ecx, eax ; Start
  221. mov edx, ebp ; End-7
  222. sar eax, 31 ; start_mask = 0ffffffffh if Start < 0
  223. mov esi, uRef ; DELAY SLOT, preload ref. pointer
  224. sar ebp, 31 ; end_mask = 0fffffffh if End < 7
  225. xor eax, -1 ; toggle start mask
  226. and ecx, eax ; max(0, Start)
  227. and ebp, edx ; min(0, End-7)
  228. mov uRowStart, ecx ; save Start
  229. add ebp, 7 ; min(7, End)
  230. ; Compute pBackRef for BBlockAdjust
  231. mov eax, mvx
  232. mov edi, uDst ; DELAY SLOT, preload dest. pointer
  233. sar eax, 1
  234. and ebx, -2
  235. shl ebx, 6 ; (mvy>>1) << 7 = (mvy&(-2)) << 6
  236. add esi, eax ; pBackRef += mvx>>1
  237. add esi, ebx ; pBackRef += 128*(mvy>>1)
  238. mov uRowEnd, ebp ; DELAY SLOT, save End
  239. shl ebx, 1
  240. mov eax, mvx ; DELAY SLOT, restore mvx
  241. add esi, ebx ; pBackRef += 256*(mvy>>1)
  242. mov ebx, mvy ; DELAY SLOT, restore mvy
  243. ;--------------------------------------------------------------------------;
  244. ;
  245. ; Name:
  246. ; H263BBlockAdjust
  247. ;
  248. ; Inputs:
  249. ; pBiRef edi flat pointer to block's forward predicted values
  250. ; pBackRef esi flat pointer to block's bacward predicted values as
  251. ; adjusted by the motion vectors
  252. ; mvx eax x component of backward motion vector,
  253. ; used for parity only
  254. ; mvy ebx y component of backward motion vector,
  255. ; used for parity only
  256. ; uColStart starting index for columns
  257. ; uColEnd ending index for columns
  258. ; uRowStart starting index for rows
  259. ; uRowEnd ending index for rows
  260. ;
  261. ; Returns:
  262. ; Updated values pointed to by pBiRef.
  263. ;
  264. ; Notes:
  265. ; 1. This routine is jumped into from either H263BiMotionCompLuma or
  266. ; H263BiMotionCompChroma and effects the returns for those routines.
  267. ;
  268. ; 2. The values of the starting and ending indicies MUST satisfy:
  269. ; 0 <= Start <= End <= 7
  270. ;
  271. ; 3. Only the last (least significant) bits of mvx and mvy are used
  272. ; to determine whether we need to use half-pel or full-pel
  273. ; prediction.
  274. ;
  275. ; 4. The address in pBackRef must have been adjusted by the motion
  276. ; vectors to point to the target pels.
  277. ;
  278. ;--------------------------------------------------------------------------;
  279. ;
  280. ; Version: 1.1
  281. ; Date: 10 November 1995
  282. ; Author: R. McKenzie
  283. ;
  284. ;--------------------------------------------------------------------------;
  285. ;-------------------------------;
  286. ; common set up for all loops ;
  287. ;-------------------------------;
  288. H263BBlockAdjust:
  289. mov ecx, uRowStart ; row = uRowStart
  290. mov edx, uColEnd
  291. shl ecx, 7 ; 128*row
  292. mov ebp, uColStart
  293. sub ebp, edx ; uColStart - uColEnd
  294. add edx, ecx ; uColEnd += 128*row
  295. shl ecx, 1 ; 256*row
  296. mov iColCount, ebp ; inner loop starting position
  297. add edx, ecx ; uColEnd += 256*row
  298. xor ecx, ecx ; clear ecx
  299. add esi, edx ; pBackRef += PITCH*row+uColEnd
  300. add edi, edx ; pBiRef += PITCH*row+uColEnd
  301. and eax, 1
  302. je even_mvx
  303. and ebx, 1
  304. je odd_even
  305. ;
  306. ; mvx is odd (horizontal half pel motion)
  307. ; mvy is odd (vertical half pel motion)
  308. ;
  309. odd_odd:
  310. mov ebx, uRowStart
  311. mov eax, uRowEnd
  312. xor edx, edx
  313. sub eax, ebx
  314. loopoo_preamble:
  315. push eax ; save outer count
  316. mov al, [esi+ebp] ; I
  317. mov bl, [esi+ebp+1] ; I unpaired instruction
  318. add eax, ebx ; I
  319. mov bl, [esi+ebp+PITCH] ; I
  320. add eax, ebx ; I
  321. mov cl, [esi+ebp+PITCH+1] ; I
  322. add ecx, eax ; I
  323. inc ebp ; I
  324. mov eax, 0 ; I
  325. jg loopoo_postamble
  326. loopoo_inner:
  327. add ecx, 2 ; II
  328. mov al, [esi+ebp] ; I
  329. shr ecx, 2 ; II
  330. mov bl, [esi+ebp+1] ; I
  331. mov dl, [edi+ebp-1] ; II
  332. add eax, ebx ; I
  333. add edx, ecx ; II
  334. mov bl, [esi+ebp+PITCH] ; I
  335. shr edx, 1 ; II
  336. add eax, ebx ; I
  337. mov [edi+ebp-1], dl ; II
  338. mov cl, [esi+ebp+PITCH+1] ; I
  339. add ecx, eax ; I
  340. inc ebp
  341. mov eax, 0 ; I
  342. jle loopoo_inner
  343. loopoo_postamble:
  344. add ecx, 2 ; II
  345. add esi, PITCH
  346. shr ecx, 2 ; II
  347. mov dl, [edi+ebp-1] ; II
  348. add edx, ecx ; II
  349. add edi, PITCH
  350. shr edx, 1 ; II
  351. pop eax ; fetch outer count
  352. mov [edi+ebp-1-PITCH], dl ; II
  353. mov ebp, iColCount
  354. dec eax
  355. jge loopoo_preamble
  356. add esp, 24
  357. jmp bye_bye
  358. ;
  359. ; mvx is odd (horizontal half pel motion)
  360. ; mvy is even (vertical full pel motion)
  361. ;
  362. odd_even:
  363. mov dl, BYTE PTR uRowStart
  364. mov cl, BYTE PTR uRowEnd
  365. sub dl, cl ; outer loop control
  366. sub edi, PITCH ; adjust destination pointer
  367. loopoe_preamble:
  368. mov al, [esi+ebp] ; I
  369. mov bl, [esi+ebp+1] ; I Probable (75%) Bank Conflict
  370. add edi, PITCH
  371. inc ebp
  372. lea ecx, [eax+ebx+1] ; I
  373. jg loopoe_postamble
  374. loopoe_inner:
  375. shr ecx, 1 ; II
  376. mov al, [edi+ebp-1] ; II
  377. add ecx, eax ; II
  378. mov al, [esi+ebp] ; I
  379. shr ecx, 1 ; II
  380. mov bl, [esi+ebp+1] ; I
  381. mov [edi+ebp-1], cl ; II
  382. inc ebp
  383. lea ecx, [eax+ebx+1] ; I
  384. jle loopoe_inner
  385. loopoe_postamble:
  386. shr ecx, 1 ; II
  387. mov al, [edi+ebp-1] ; II
  388. add ecx, eax ; II
  389. add esi, PITCH
  390. shr ecx, 1 ; II
  391. inc dl
  392. mov [edi+ebp-1], cl ; II
  393. mov ebp, iColCount
  394. jle loopoe_preamble ; unpaired
  395. add esp, 24
  396. jmp bye_bye
  397. ;---------------------------;
  398. ; mvx is even -- test mvy ;
  399. ;---------------------------;
  400. even_mvx:
  401. and ebx, 1
  402. je even_even
  403. ;
  404. ; mvx is even (horizontal full pel motion)
  405. ; mvy is odd (vertical half pel motion)
  406. ;
  407. even_odd:
  408. mov dl, BYTE PTR uRowStart
  409. mov cl, BYTE PTR uRowEnd
  410. sub dl, cl ; outer loop control
  411. sub edi, PITCH ; adjust destination pointer
  412. loopeo_preamble:
  413. mov al, [esi+ebp] ; I
  414. mov bl, [esi+ebp+PITCH] ; I Probable (75%) Bank Conflict
  415. add edi, PITCH
  416. inc ebp
  417. lea ecx, [eax+ebx+1] ; I
  418. jg loopeo_postamble
  419. loopeo_inner:
  420. shr ecx, 1 ; II
  421. mov al, [edi+ebp-1] ; II
  422. add ecx, eax ; II
  423. mov al, [esi+ebp] ; I
  424. shr ecx, 1 ; II
  425. mov bl, [esi+ebp+PITCH] ; I
  426. mov [edi+ebp-1], cl ; II
  427. inc ebp
  428. lea ecx, [eax+ebx+1] ; I
  429. jle loopeo_inner
  430. loopeo_postamble:
  431. shr ecx, 1 ; II
  432. mov al, [edi+ebp-1] ; II
  433. add ecx, eax ; II
  434. add esi, PITCH
  435. shr ecx, 1 ; II
  436. inc dl
  437. mov [edi+ebp-1], cl ; II
  438. mov ebp, iColCount
  439. jle loopeo_preamble ; unpaired
  440. add esp, 24
  441. jmp bye_bye
  442. ;
  443. ; mvx is even (horizontal full pel motion)
  444. ; mvy is even (vertical full pel motion)
  445. ;
  446. even_even:
  447. mov dl, BYTE PTR uRowStart
  448. mov cl, BYTE PTR uRowEnd
  449. sub dl, cl
  450. loopee_preamble:
  451. mov al, [esi+ebp] ; I
  452. mov bl, [edi+ebp] ; I possbile bank conflict
  453. test ebp, ebp
  454. je loopee_postamble
  455. loopee_inner:
  456. lea ecx, [eax+ebx] ; II
  457. mov al, [esi+ebp+1] ; I
  458. shr ecx, 1 ; II
  459. mov bl, [edi+ebp+1] ; I
  460. mov [edi+ebp], cl ; II
  461. inc ebp
  462. jl loopee_inner ; unpaired
  463. loopee_postamble:
  464. add eax, ebx ; II
  465. add edi, PITCH
  466. shr eax, 1 ; II
  467. add esi, PITCH
  468. mov [edi+ebp-PITCH], al ; II
  469. mov ebp, iColCount
  470. inc dl
  471. jle loopee_preamble
  472. ;
  473. ; "Remember when I promised to kill you last?"
  474. ;
  475. hasta_la_vista_baby:
  476. add esp, 24
  477. bye_bye:
  478. pop ebp
  479. pop esi
  480. pop ebx
  481. pop edi
  482. ret
  483. ; biMotionCompLuma ENDP
  484. ; 1111111111222222222233333333334444444444555555555566666666667777777
  485. ;234567890123456789012345678901234567890123456789012345678901234567890123456
  486. ;--------------------------------------------------------------------------;
  487. IACODE2 ENDS
  488. END
  489. // bimot.asm page 9 1:41 PM, 11/21/95 //