mirror of https://github.com/lianthony/NT4.0
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
489 lines
18 KiB
489 lines
18 KiB
// TITLE("Stretch Blt")
|
|
//++
|
|
//
|
|
// Copyright (c) 1994 Microsoft Corporation
|
|
//
|
|
// Module Name:
|
|
//
|
|
// Str.s
|
|
//
|
|
// Abstract:
|
|
//
|
|
// Stretch blt DIB-.DIB same format
|
|
//
|
|
// Author:
|
|
//
|
|
// Mark Enstrom (marke) 17-Oct-1994
|
|
//
|
|
// Environment:
|
|
//
|
|
// User mode.
|
|
//
|
|
// Revision History:
|
|
//
|
|
//--
|
|
|
|
#include "ksmips.h"
|
|
#include "gdimips.h"
|
|
|
|
|
|
SBTTL("vDirectStretch8")
|
|
//++
|
|
//
|
|
// VOID
|
|
// vDirectStretch8(
|
|
// PSTR_BLT pStrBlt
|
|
// )
|
|
//
|
|
// Routine Description:
|
|
//
|
|
// StretchBlt from 8Bpp -> 8Bpp
|
|
//
|
|
// Arguments:
|
|
//
|
|
// a0 - PSTR_BLT, pointer to stretch blt param block
|
|
//
|
|
// Return Value:
|
|
//
|
|
// None
|
|
//
|
|
//--
|
|
|
|
.struct 0
|
|
SpS0: .space 4
|
|
SpS1: .space 4
|
|
SpS2: .space 4
|
|
SpS3: .space 4
|
|
SpS4: .space 4
|
|
SpS5: .space 4
|
|
SpS6: .space 4
|
|
SpS7: .space 4
|
|
SpS8: .space 4
|
|
// pad to align 0 mod 8
|
|
.space 4
|
|
SpLeftCase: .space 4
|
|
SpRightCase: .space 4
|
|
SppjSrcScan: .space 4
|
|
SpSrcIntStep: .space 4
|
|
SpXCount: .space 4
|
|
SpDstStride: .space 4
|
|
SpFrameLength:
|
|
|
|
NESTED_ENTRY(vDirectStretch8, SpFrameLength, zero, _TEXT$00)
|
|
|
|
subu sp,sp,SpFrameLength
|
|
|
|
sw s0,SpS0(sp)
|
|
sw s1,SpS1(sp)
|
|
sw s2,SpS2(sp)
|
|
sw s3,SpS3(sp)
|
|
sw s4,SpS4(sp)
|
|
sw s5,SpS5(sp)
|
|
sw s6,SpS6(sp)
|
|
sw s7,SpS7(sp)
|
|
sw s8,SpS8(sp)
|
|
|
|
PROLOGUE_END
|
|
|
|
//
|
|
// calculate starting addressing parameters
|
|
//
|
|
|
|
lw a1,str_pjSrcScan(a0) // load stating src scan line address
|
|
lw t0,str_XSrcStart(a0) // left most src pixel
|
|
lw a2,str_pjDstScan(a0) // load stating dst scan line address
|
|
lw t1,str_XDstStart(a0) // left most dst pixel
|
|
lw t2,str_XDstEnd(a0) // right edge
|
|
lw t4,str_lDeltaDst(a0) // load delta dst
|
|
lw t5,str_ulYDstToSrcIntCeil(a0) // load integer part of dst to src Y mapping
|
|
lw a3,str_lDeltaSrc(a0) // load and save lDeltaSrc in a3
|
|
lw t7,str_ulYFracAccumulator(a0) // store YFracAccum in t7
|
|
addu a1,a1,t0 // calc src pixel address
|
|
sw a1,SppjSrcScan(sp) // save src pixel address
|
|
subu t9,t2,t1 // calculate XCount
|
|
mult a3,t5 // calc int * DeltaSrc
|
|
subu s5,t4,t9 // calc DstStride = lDeltaDst -XCount
|
|
addu v0,a2,t2 // calc ending dst pixel address
|
|
and v0,v0,3 // calc right DWORD alignment case
|
|
sw v0,SpRightCase(sp) // save
|
|
mflo s4 // s4 = int scr stride
|
|
addu a2,a2,t1 // calc left dst pixel addresss
|
|
and v1,a2,3 // calc left DWORD alignment case
|
|
subu t9,t9,v0 // subtract right alignment pixels from XCount
|
|
li t0,4 // left pixels = (4 - LeftCase) & 0x03
|
|
subu t0,t0,v1 // (4 - LeftCase)
|
|
and t1,t0,3 // (4 - LeftCase) & 0x03
|
|
sw t1,SpLeftCase(sp) // save left byte count
|
|
subu s7,t9,t1 // full DWORD count
|
|
lw s6,str_YDstCount(a0) // save Y count
|
|
|
|
//
|
|
// calc left and right jump table addresses
|
|
//
|
|
|
|
la t9,LeftCase1 // jump table starting address
|
|
subu v1,v1,1 // jump = 32 * ((Left - 1) & 0x03)
|
|
and v1,v1,3 // (Left-1) & 0x03
|
|
sll v1,v1,5 // 32 * (8 instructions)
|
|
addu t9,t9,v1 // t9 = left jump dest
|
|
|
|
la t8,RightCase1 // jump table starting address
|
|
li t0,3 // 3
|
|
subu t0,t0,v0 // 3 - Right
|
|
sll t0,t0,5 // 32 * (3 - right)
|
|
addu t8,t8,t0 // t8 = right jump dest
|
|
|
|
//
|
|
// can 2 scan lines be drawn from 1 src scan:
|
|
//
|
|
// YDstToSrcInt (s4) must be zero, YCount (s6) must be at least 2, and
|
|
// the fraction add to YFracAccum must not cause a carry
|
|
//
|
|
|
|
LoopTop:
|
|
|
|
lw t0,str_ulYDstToSrcFracCeil(a0) // frac part of dda
|
|
lw t1,str_ulYFracAccumulator(a0) // load y error accum
|
|
addu t2,t1,t0 // calc new error term
|
|
sltu t5,t2,t1 // was there a carry (save for end in t5)
|
|
|
|
|
|
//
|
|
// PRESERVE ORDERING and delay slots
|
|
//
|
|
|
|
.set noreorder
|
|
|
|
or v0,t5,s4 // t0 must be zero in order to do 2 scan lines at once
|
|
bnel v0,zero,SingleLoop
|
|
sw t2,str_ulYFracAccumulator(a0) // save yAccum only if branck taken
|
|
|
|
srl v0,s6,1 // s6/2, must not be zero for 2 loop case
|
|
beql v0,zero,SingleLoop // branch to single loop
|
|
sw t2,str_ulYFracAccumulator(a0) // save yAccum only if branck taken
|
|
|
|
.set reorder
|
|
|
|
//
|
|
// There was no carry from error term, add 1 extra frac term for the
|
|
// extra scan line that will be drawn (t2 is current accum)
|
|
//
|
|
|
|
addu v0,t2,t0 // v0 = YAccum + YFrac
|
|
sltu t5,v0,t2 // save carry in t5 (for end of scan line)
|
|
sw v0,str_ulYFracAccumulator(a0) // save yAccum
|
|
|
|
//
|
|
// Double loop, write 2 destination scan lines from 1 source
|
|
//
|
|
// load left case and X DDA variables
|
|
//
|
|
|
|
lw v1,SpLeftCase(sp)
|
|
lw t4,str_lDeltaDst(a0)
|
|
lw t0,str_ulXDstToSrcIntCeil(a0)
|
|
lw t1,str_ulXDstToSrcFracCeil(a0)
|
|
lw t2,str_ulXFracAccumulator(a0)
|
|
|
|
addu t4,t4,a2 // next scan line dst address
|
|
beq v1,zero,20f
|
|
10:
|
|
|
|
lbu v0,0(a1) // get src byte
|
|
addu t3,t1,t2 // accumulat 1 more frac part
|
|
sltu s8,t3,t1 // fake carry
|
|
addu a1,a1,t0 // add int portion to a1
|
|
addu a1,a1,s8 // add carry
|
|
sb v0,0(a2) // store byte
|
|
sb v0,0(t4) // store byte on next scan line
|
|
addu a2,a2,1 // increment dst
|
|
addu t4,t4,1 // increment dst
|
|
subu v1,v1,1 // dec left byte count
|
|
move t2,t3 // save accum
|
|
bne v1,zero,10b
|
|
|
|
20:
|
|
|
|
addu v0,a2,s7 // add DWORD count to current address (a2)
|
|
beq a2,v0,DualDwordLoopEnd // make sure at least 1 DWORD needs to be stored
|
|
|
|
DualDwordAlignedLoop:
|
|
|
|
lbu s0,0(a1) // get src byte
|
|
addu t3,t1,t2 // accumulat 1 more frac part
|
|
sltu s8,t3,t1 // fake carry (t3 is accum, t2 is temp)
|
|
addu a1,a1,t0 // add int portion to a1
|
|
addu a1,a1,s8 // add carry
|
|
|
|
lbu s1,0(a1) // get src byte
|
|
addu t2,t1,t3 // accumulat 1 more frac part
|
|
sltu s8,t2,t1 // fake carry (t2 is accum, t3 is temp)
|
|
addu a1,a1,t0 // add int portion to a1
|
|
addu a1,a1,s8 // add carry
|
|
|
|
lbu s2,0(a1) // get src byte
|
|
addu t3,t1,t2 // accumulat 1 more frac part
|
|
sltu s8,t3,t1 // fake carry (t3 is accum, t2 is temp)
|
|
addu a1,a1,t0 // add int portion to a1
|
|
addu a1,a1,s8 // add carry
|
|
|
|
lbu s3,0(a1) // get src byte
|
|
addu t2,t1,t3 // accumulat 1 more frac part
|
|
sltu s8,t2,t1 // fake carry (t2 is accum, t3 is temp)
|
|
addu a1,a1,t0 // add int portion to a1
|
|
addu a1,a1,s8 // add carry
|
|
|
|
sll s3,s3,24 // build s3 | s2 | s1 | s0 DWORD
|
|
sll s2,s2,16 // build s3 | s2 | s1 | s0 DWORD
|
|
sll s1,s1,8 // build s3 | s2 | s1 | s0 DWORD
|
|
|
|
or s0,s0,s1 // combine DWORD
|
|
or s0,s0,s2
|
|
or s0,s0,s3
|
|
|
|
sw s0,0(a2) // store
|
|
sw s0,0(t4) // store to next scan line
|
|
addu a2,a2,4 // inc pjDst
|
|
addu t4,t4,4 // inc pjDst + lDeltaDst
|
|
bne a2,v0,DualDwordAlignedLoop
|
|
|
|
//
|
|
// right edge case
|
|
//
|
|
|
|
DualDwordLoopEnd:
|
|
|
|
lw v1,SpRightCase(sp) // get right byte count
|
|
|
|
beq v1,zero,20f
|
|
|
|
10:
|
|
|
|
lbu v0,0(a1) // get src byte
|
|
addu t3,t1,t2 // accumulat 1 more frac part
|
|
sltu s8,t3,t1 // fake carry
|
|
addu a1,a1,t0 // add int portion to a1
|
|
addu a1,a1,s8 // add carry
|
|
sb v0,0(a2) // store byte
|
|
sb v0,0(t4) // store byte
|
|
addu a2,a2,1 // increment dst
|
|
addu t4,t4,1 // increment dst
|
|
subu v1,v1,1 // decrement byte count
|
|
move t2,t3 // save accum
|
|
bne v1,zero,10b
|
|
|
|
20:
|
|
|
|
//
|
|
// reduce YCount by one extra
|
|
//
|
|
// increment dst 1 extra scan line (replace pjDst[a2] with t4)
|
|
// jump to end of scan line
|
|
//
|
|
|
|
|
|
subu s6,s6,1 // YCount--
|
|
move a2,t4
|
|
beq zero,zero,ScanLineComplete
|
|
|
|
SingleLoop:
|
|
|
|
//
|
|
// load X DDA variables
|
|
//
|
|
|
|
lw t0,str_ulXDstToSrcIntCeil(a0)
|
|
lw t1,str_ulXDstToSrcFracCeil(a0)
|
|
|
|
//
|
|
// jump into left alignment table
|
|
//
|
|
|
|
lw t2,str_ulXFracAccumulator(a0)
|
|
j t9
|
|
|
|
//
|
|
// calculated jump table for left alignment cases, .set noreorder required!
|
|
//
|
|
|
|
.set noreorder
|
|
|
|
LeftCase1:
|
|
|
|
lbu v0,0(a1) // get src byte
|
|
addu t3,t1,t2 // accumulat 1 more frac part
|
|
sltu s8,t3,t1 // fake carry
|
|
addu a1,a1,t0 // add int portion to a1
|
|
addu a1,a1,s8 // add carry
|
|
sb v0,0(a2) // store byte
|
|
addu a2,a2,1 // increment dst
|
|
move t2,t3 // save accum
|
|
|
|
LeftCase2:
|
|
|
|
lbu v0,0(a1) // get src byte
|
|
addu t3,t1,t2 // accumulat 1 more frac part
|
|
sltu s8,t3,t1 // fake carry
|
|
addu a1,a1,t0 // add int portion to a1
|
|
addu a1,a1,s8 // add carry
|
|
sb v0,0(a2) // store byte
|
|
addu a2,a2,1 // increment dst
|
|
move t2,t3 // save accum
|
|
|
|
LeftCase3:
|
|
|
|
lbu v0,0(a1) // get src byte
|
|
addu t3,t1,t2 // accumulat 1 more frac part
|
|
sltu s8,t3,t1 // fake carry
|
|
addu a1,a1,t0 // add int portion to a1
|
|
addu a1,a1,s8 // add carry
|
|
sb v0,0(a2) // store byte
|
|
addu a2,a2,1 // increment dst
|
|
move t2,t3 // save accum
|
|
|
|
addu v0,a2,s7 // add DWORD count to current address (a2)
|
|
|
|
|
|
.set reorder
|
|
|
|
beq v0,a2,DwordLoopEnd // make sure at least 1 DWORD needs written
|
|
|
|
DwordAlignedLoop:
|
|
|
|
lbu s0,0(a1) // get src byte
|
|
addu t3,t1,t2 // accumulat 1 more frac part
|
|
sltu s8,t3,t1 // fake carry (t3 is accum, t2 is temp)
|
|
addu a1,a1,t0 // add int portion to a1
|
|
addu a1,a1,s8 // add carry
|
|
|
|
lbu s1,0(a1) // get src byte
|
|
addu t2,t1,t3 // accumulat 1 more frac part
|
|
sltu s8,t2,t1 // fake carry (t2 is accum, t3 is temp)
|
|
addu a1,a1,t0 // add int portion to a1
|
|
addu a1,a1,s8 // add carry
|
|
|
|
lbu s2,0(a1) // get src byte
|
|
addu t3,t1,t2 // accumulat 1 more frac part
|
|
sltu s8,t3,t1 // fake carry (t3 is accum, t2 is temp)
|
|
addu a1,a1,t0 // add int portion to a1
|
|
addu a1,a1,s8 // add carry
|
|
|
|
lbu s3,0(a1) // get src byte
|
|
addu t2,t1,t3 // accumulat 1 more frac part
|
|
sltu s8,t2,t1 // fake carry (t2 is accum, t3 is temp)
|
|
addu a1,a1,t0 // add int portion to a1
|
|
addu a1,a1,s8 // add carry
|
|
|
|
sll s3,s3,24 // build s3 | s2 | s1 | s0 DWORD
|
|
sll s2,s2,16 // build s3 | s2 | s1 | s0 DWORD
|
|
sll s1,s1,8 // build s3 | s2 | s1 | s0 DWORD
|
|
|
|
or s0,s0,s1 // combine DWORD
|
|
or s0,s0,s2
|
|
or s0,s0,s3
|
|
|
|
sw s0,0(a2) // store
|
|
addu a2,a2,4 // inc a2
|
|
bne a2,v0,DwordAlignedLoop
|
|
|
|
//
|
|
// jump table to right case
|
|
//
|
|
|
|
DwordLoopEnd:
|
|
|
|
|
|
j t8
|
|
|
|
|
|
//
|
|
// must set noreorder to preserve jump table
|
|
//
|
|
|
|
|
|
.set noreorder
|
|
|
|
RightCase1:
|
|
|
|
lbu v0,0(a1) // get src byte
|
|
addu t3,t1,t2 // accumulat 1 more frac part
|
|
sltu s8,t3,t1 // fake carry
|
|
addu a1,a1,t0 // add int portion to a1
|
|
addu a1,a1,s8 // add carry
|
|
sb v0,0(a2) // store byte
|
|
addu a2,a2,1 // increment dst
|
|
move t2,t3 // save accum
|
|
|
|
RightCase2:
|
|
|
|
lbu v0,0(a1) // get src byte
|
|
addu t3,t1,t2 // accumulat 1 more frac part
|
|
sltu s8,t3,t1 // fake carry
|
|
addu a1,a1,t0 // add int portion to a1
|
|
addu a1,a1,s8 // add carry
|
|
sb v0,0(a2) // store byte
|
|
addu a2,a2,1 // increment dst
|
|
move t2,t3 // save accum
|
|
|
|
RightCase3:
|
|
|
|
lbu v0,0(a1) // get src byte
|
|
addu t3,t1,t2 // accumulat 1 more frac part
|
|
sltu s8,t3,t1 // fake carry
|
|
addu a1,a1,t0 // add int portion to a1
|
|
addu a1,a1,s8 // add carry
|
|
sb v0,0(a2) // store byte
|
|
addu a2,a2,1 // increment dst
|
|
move t2,t3 // save accum
|
|
|
|
ScanLineComplete:
|
|
|
|
//
|
|
// run Y DDA calculations and addusrc and dst scan line strides
|
|
//
|
|
// yAxccum is stored in t7
|
|
//
|
|
|
|
lw a1,SppjSrcScan(sp) // load src start scan address
|
|
addu a1,a1,s4 // pjSrcScan + Int portion of stride
|
|
|
|
.set reorder
|
|
|
|
|
|
beq t5,zero,10f // check pre-calculated error term carry
|
|
|
|
addu a1,a1,a3 // add in 1 extra scan
|
|
|
|
10:
|
|
//
|
|
// save new starting scan line address
|
|
//
|
|
|
|
sw a1,SppjSrcScan(sp) // save pjSrcScan for next loop
|
|
|
|
//
|
|
// dec y count, inc dst address
|
|
//
|
|
|
|
subu s6,s6,1 // YCount--
|
|
addu a2,a2,s5 // pjDst + DstStride, start of next dst line
|
|
bne s6,zero,LoopTop
|
|
|
|
//
|
|
// restore saved registers and return
|
|
//
|
|
|
|
lw s0,SpS0(sp)
|
|
lw s1,SpS1(sp)
|
|
lw s2,SpS2(sp)
|
|
lw s3,SpS3(sp)
|
|
lw s4,SpS4(sp)
|
|
lw s5,SpS5(sp)
|
|
lw s6,SpS6(sp)
|
|
lw s7,SpS7(sp)
|
|
lw s8,SpS8(sp)
|
|
addu sp,sp,SpFrameLength
|
|
|
|
j ra
|
|
|
|
.end vDirectStretch8
|