Windows NT 4.0 source code leak
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

2108 lines
80 KiB

// TITLE("Glyph expansion from 1bpp to 8bpp")
//++
//
// Copyright (c) 1994 Microsoft Corporation
//
// Module Name:
//
//
// Abstract:
//
// Expand a 1bpp buffer to 8bpp. Both opaque and transparent mode
//
//
// Author:
//
// Mark Enstrom (marke) 28-July-1994
//
// Environment:
//
// User mode.
//
// Revision History:
//
//--
#include "ksmips.h"
#include "gdimips.h"
.extern gTextLeftMask 4*8*2
.extern gTextRightMask 4*8*2
SBTTL("vSrcOpaqCopyS1D8_64")
//++
//
// VOID
// vSrcOpaqCopyS1D8_64(
// PBYTE pjSrcIn,
// LONG SrcLeft,
// LONG DeltaSrcIn,
// PBYTE pjDstIn,
// LONG DstLeft,
// LONG DstRight,
// LONG DeltaDstIn,
// LONG cy,
// ULONG uF,
// ULONG uB,
// SURFACE *pS
// );
//
// Routine Description:
//
// Opaque text expansion of a 1BPP buffer to 8Bpp destination
//
// Arguments:
//
// a0 - pjSrcIn - pointer to start of first src scan line
// a1 - SrcLeft - left (starting) src pixel
// a2 - DeltaSrcIn - src Scan line stride
// a3 - pjDstIn - pointer to start of first dst scan line
// DstLeft - left (starting) dst pixel
// DstRight - right(ending) dst pixel
// DeltaDstIn - dst scan line stride
// cy - Number of scan lines to copy
// uF - Foreground color
// uB - Background color
// pS - pointer to destination SURFACE
//
//
// Return Value:
//
// None
//
//--
.struct 0
OpExpTable: .space 32*4
OpS0: .space 4
OpS1: .space 4
OpS2: .space 4
OpS3: .space 4
OpS4: .space 4
.space 4 * 3
OpFrameLength:
OppjSrcIn: .space 4
OpSrcLeft: .space 4
OpDeltaSrcIn: .space 4
OppjDstIn: .space 4
OpDstLeft: .space 4
OpDstRight: .space 4
OpDeltaDstIn: .space 4
Opcy: .space 4
OpuF: .space 4
OpuB: .space 4
OpupS: .space 4
NESTED_ENTRY(vSrcOpaqCopyS1D8_64, OpFrameLength, zero)
subu sp,sp,OpFrameLength
sw s0,OpS0(sp)
sw s1,OpS1(sp)
sw s2,OpS2(sp)
sw s3,OpS3(sp)
sw s4,OpS4(sp)
PROLOGUE_END
//
// save params
//
sw a0,OppjSrcIn(sp) // save param
sw a1,OpSrcLeft(sp) // save param
sw a2,OpDeltaSrcIn(sp) // save param
sw a3,OppjDstIn(sp) // save param
//
// NOTE: (sp) points to a 16 (quadword aligned) ULONG text expansion table
//
//
// build color table:
// build a DWORD of Background pixels to start and store it
//
lbu v0,OpuF(sp) // load foreground color
lbu v1,OpuB(sp) // load background color
sll t0,v1,8 // jb00
or t0,v1,t0 // jbjb
sll t1,t0,16 // jbjb0000
or t0,t0,t1 // 0 0 0 0
sw t0,0(sp) // store 0
//
// now continually shift the 32 bit value left, and either or
// it Fg or Bg into the new right-most position. Note: 1BB pixel values
// are stored BIG-endian, so they need to be reversed
//
sll t0,t0,8
or t0,t0,v0 // 0 0 0 1
sw t0,8*8(sp) // store 1
sll t0,t0,8
or t0,t0,v1 // 0 0 1 0
sw t0,4*8(sp) // store 2
sll t0,t0,8
or t0,t0,v0 // 0 1 0 1
sw t0,10*8(sp) // store 5
sll t0,t0,8
or t0,t0,v1 // 1 0 1 0
sw t0,5*8(sp) // store 10
sll t0,t0,8
or t0,t0,v1 // 0 1 0 0
sw t0,2*8(sp) // store 4
sll t0,t0,8
or t0,t0,v0 // 1 0 0 1
sw t0,9*8(sp) // store 9
sll t0,t0,8
or t0,t0,v0 // 0 0 1 1
sw t0,12*8(sp) // store 3
sll t0,t0,8
or t0,t0,v0 // 0 1 1 1
sw t0,14*8(sp) // store 7
sll t0,t0,8
or t0,t0,v0 // 1 1 1 1
sw t0,15*8(sp) // store 15
sll t0,t0,8
or t0,t0,v1 // 1 1 1 0
sw t0, 7*8(sp) // store 14
sll t0,t0,8
or t0,t0,v0 // 1 1 0 1
sw t0,11*8(sp) // store 13
sll t0,t0,8
or t0,t0,v0 // 1 0 1 1
sw t0,13*8(sp) // store 11
sll t0,t0,8
or t0,t0,v1 // 0 1 1 0
sw t0,6*8(sp) // store 6
sll t0,t0,8
or t0,t0,v1 // 1 1 0 0
sw t0,3*8(sp) // store 12
sll t0,t0,8
or t0,t0,v1 // 1 0 0 0
sw t0,1*8(sp) // store 8
//
// perform the expansion in three pieces. First do the DWORD aligned
// middle. Next the start alignment, finally the ending alignment.
// The temporary 1Bpp buffer was generated so that each src byte
// willl expand to an even DWORD boundary.
//
// LeftAln = ((DstLeft + 7) & ~0x07);
// RightAln = ( DstRight & ~0x07);
//
lw t0,OpDstLeft(sp) // load DstLeft
lw t1,OpDstRight(sp) // load DstRight
addu t2,t0,7 // DstLeft + 7
li t8,-8 // ~0x07 = -8
and t2,t2,t8 // LeftAln = ((DstLeft + 7) & ~0x07)
and t3,t1,t8 // RightAln = ( DstRight & ~0x07)
//
// ending address offsets.
// EndOffset is the number of bytes from pjDst to pjDstEnd
// EndOffset4 is the number of 4 DWORDS blocks in EndOffset * 16
// EndOffset16 is the number of 16 DWORD blocks in EndOffset * 64
//
subu t5,t3,t2 // EndOffset = RightAln - LeftAln
li t8,-16 // ~0x0F
li t9,-64 // ~0x3F
and t6,t5,t8 // EndOffset4 = EndOffset & ~0x0F
and t7,t5,t9 // EndOffset8 = EndOffset & ~0x3F
//
// calculate src and dst address and dstEndY
//
lw t8,Opcy(sp) // cy
lw t9,OpDeltaDstIn(sp) // DeltaDstIn
addu a3,a3,t2 // pjDst = pjDstIn + LeftAln
addu a1,a1,7 // SrcLeft+7
mult t8,t9 // start mul for pjDstEndY = pjDst + cy * DeltaDstIn
srl a1,a1,3 // (SrcLeft+7) >> 3 = byte offset for src
addu a0,a0,a1 // pjSrc = pjSrcIn + (SrcLeft+7) >> 3;
srl t8,t5,3 // DeltaSrc = DeltaSrcIn - (EndOffset >> 3);
subu t8,a2,t8 // DeltaSrc = DeltaSrcIn - (EndOffset >> 3);
subu t9,t9,t5 // DeltaDst = DeltaDstIn - EndOffset
mflo a1 // cy * DeltaDstIn
addu a1,a3,a1 // pjDstEndY = pjDst + cy * DeltaDstIn,
// endinf scan line address
//
// if RightAln is greater than LeftAln, then The src text expansion covers
// at least 1 whole quadword. This is the requirement of this loop. If not,
// deal with the narrow blt below
//
slt t0,t2,t3 // skip main loop if RightAln <= LeftAln
beq t0,zero,Opaq8Partial
//
// Main loop register usage
//
// a0: pjSrc t0: pjDstEnd4 sp: TextTable t8: DeltaSrc
// a1: pjDstEndY t1: pjDstEnd16 t5: EndOffset t9: DeltaDst
// a2: pjDstEnd t2: t6: EndOffset4
// a3: pjDst t3: t7: EndOffset16
//
Opaq8MainLoop:
//
// if the scan line is QW aligned, use 64 bit stores, else use 32 bit stores.
// This alignment could change on a scan line basis because DeltaDst in only
// gaurenteed to be dword aligned. The 64 bit store loop is used because
// direct frame buffer output is always QW aligned.
//
and a2,a3,4
beq a2,zero,Opaq8QWMainLoop
//
// init scan line check addresses
//
addu a2,a3,t5 // pjDstEnd = pjDst + EndOffset
addu t0,a3,t6 // pjDstEnd4 = pjDst + EndOffset4
addu t1,a3,t7 // pjDstEnd8 = pjDst + EndOffset16
//
// 8 DWORD loop
//
beq a3,t1,20f
10:
lbu v0,0(a0) // c0 = *(pjSrc)
lbu v1,1(a0) // c1 = *(pjSrc+1)
lbu s0,2(a0) // c2 = *(pjSrc+2)
lbu s1,3(a0) // c3 = *(pjSrc+3)
srl s2,v0,4 // TextExpTable[c0 >> 4] , c0 >> 4
sll s2,s2,3 // make QWORD offset
addu s2,s2,sp // offset from base of table
lwc1 f0,0(s2)
and s2,v0,0x0f // c0 & 0x0f
sll s2,s2,3 // qword offset
addu s2,s2,sp // offset from base
lwc1 f2,0(s2)
srl s2,v1,4 // TextExpTable[c0 >> 4] , c0 >> 4
sll s2,s2,3 // make QWORD offset
addu s2,s2,sp // offset from base of table
lwc1 f4,0(s2)
and s2,v1,0x0f // c0 & 0x0f
sll s2,s2,3 // qword offset
addu s2,s2,sp // offset from base
lwc1 f6,0(s2)
srl s2,s0,4 // TextExpTable[c0 >> 4] , c0 >> 4
sll s2,s2,3 // make QWORD offset
addu s2,s2,sp // offset from base of table
lwc1 f8,0(s2)
and s2,s0,0x0f // c0 & 0x0f
sll s2,s2,3 // qword offset
addu s2,s2,sp // offset from base
lwc1 f10,0(s2)
srl s2,s1,4 // TextExpTable[c0 >> 4] , c0 >> 4
sll s2,s2,3 // make qWORD offset
addu s2,s2,sp // offset from base of table
lwc1 f12,0(s2)
and s2,s1,0x0f // c0 & 0x0f
sll s2,s2,3 // qword offset
addu s2,s2,sp // offset from base
lwc1 f14,0(s2)
swc1 f0 ,0x00(a3) // store results
swc1 f2 ,0x04(a3) // store results
swc1 f4 ,0x08(a3) // store results
swc1 f6 ,0x0c(a3) // store results
swc1 f8 ,0x10(a3) // store results
swc1 f10 ,0x14(a3) // store results
swc1 f12 ,0x18(a3) // store results
swc1 f14 ,0x1c(a3) // store results
//
// load second 4 bytes
//
lbu v0,4(a0) // c0 = *(pjSrc+4)
lbu v1,5(a0) // c1 = *(pjSrc+5)
lbu s0,6(a0) // c2 = *(pjSrc+6)
lbu s1,7(a0) // c3 = *(pjSrc+7)
srl s2,v0,4 // TextExpTable[c0 >> 4] , c0 >> 4
sll s2,s2,3 // make QWORD offset
addu s2,s2,sp // offset from base of table
lwc1 f0,0(s2)
and s2,v0,0x0f // c0 & 0x0f
sll s2,s2,3 // qword offset
addu s2,s2,sp // offset from base
lwc1 f2,0(s2)
srl s2,v1,4 // TextExpTable[c0 >> 4] , c0 >> 4
sll s2,s2,3 // make QWORD offset
addu s2,s2,sp // offset from base of table
lwc1 f4,0(s2)
and s2,v1,0x0f // c0 & 0x0f
sll s2,s2,3 // qword offset
addu s2,s2,sp // offset from base
lwc1 f6,0(s2)
srl s2,s0,4 // TextExpTable[c0 >> 4] , c0 >> 4
sll s2,s2,3 // make QWORD offset
addu s2,s2,sp // offset from base of table
lwc1 f8,0(s2)
and s2,s0,0x0f // c0 & 0x0f
sll s2,s2,3 // qword offset
addu s2,s2,sp // offset from base
lwc1 f10,0(s2)
srl s2,s1,4 // TextExpTable[c0 >> 4] , c0 >> 4
sll s2,s2,3 // make qWORD offset
addu s2,s2,sp // offset from base of table
lwc1 f12,0(s2)
and s2,s1,0x0f // c0 & 0x0f
sll s2,s2,3 // qword offset
addu s2,s2,sp // offset from base
lwc1 f14,0(s2)
//
// Store results, this will allow fastest video memory
// stores on MIPS "JAZZ" platform. This sequence must
// execute in order
//
.set noreorder
swc1 f0 ,0x20(a3) // store results
swc1 f2 ,0x24(a3) // store results
swc1 f4 ,0x28(a3) // store results
swc1 f6 ,0x2c(a3) // store results
swc1 f8 ,0x30(a3) // store results
swc1 f10,0x34(a3) // store results
swc1 f12,0x38(a3) // store results
swc1 f14,0x3c(a3) // store results
.set reorder
addu a3,a3,0x40 // pjDst += 64
addu a0,a0,8 // pjSrc += 8
bne a3,t1,10b
20:
//
// 4 DWORD loop
//
beq a3,t0,40f
30:
lbu v0,0(a0) // c0 = *(pjSrc)
lbu v1,1(a0) // c1 = *(pjSrc+1)
srl s2,v0,4 // TextExpTable[c0 >> 4] , c0 >> 4
sll s2,s2,3 // make qWORD offset
addu s2,s2,sp // offset from base of table
lwc1 f0,0(s2)
and s2,v0,0x0f // c0 & 0x0f
sll s2,s2,3 // qword offset
addu s2,s2,sp // offset from base
lwc1 f2,0(s2)
srl s2,v1,4 // TextExpTable[c0 >> 4] , c0 >> 4
sll s2,s2,3 // make qWORD offset
addu s2,s2,sp // offset from base of table
lwc1 f4,0(s2)
and s2,v1,0x0f // c0 & 0x0f
sll s2,s2,3 // qword offset
addu s2,s2,sp // offset from base
lwc1 f6,0(s2)
//
// This sequence must execute in order
//
.set noreorder
swc1 f0,0x00(a3) // store results
swc1 f2,0x04(a3) // store results
swc1 f4,0x08(a3) // store results
swc1 f6,0x0c(a3) // store results
.set reorder
addu a3,a3,0x10 // pjDst += 16
addu a0,a0,2 // pjSrc += 2
bne a3,t0,30b
40:
//
// 2 DWORD loop
//
beq a3,a2,60f
50:
lbu v0,0(a0) // c0 = *(pjSrc)
srl s2,v0,4 // TextExpTable[c0 >> 4] , c0 >> 4
sll s2,s2,3 // make qWORD offset
addu s2,s2,sp // offset from base of table
lwc1 f0,0(s2)
and s2,v0,0x0f // c0 & 0x0f
sll s2,s2,3 // qword offset
addu s2,s2,sp // offset from base
lwc1 f2,0(s2)
swc1 f0,0x00(a3) // store results
swc1 f2,0x04(a3) // store results
addu a3,a3,0x08 // pjDst += 8
addu a0,a0,1 // pjSrc += 1
bne a3,a2,50b // loop till done
60:
//
// end of scan line, add stride to src and dst then check for end condition
//
addu a3,a3,t9 // pjDst += DeltaDst
addu a0,a0,t8 // pjSrc += DeltaSrc
bne a3,a1,Opaq8MainLoop // continue
//
// done, go to alignmend edge cases
//
beq zero,zero,Opaq8Partial // Done with main, go to start and end cases
Opaq8QWMainLoop:
//
// Destination is quadword aligned, use 64 bit stores
//
addu a2,a3,t5 // pjDstEnd = pjDst + EndOffset
addu t0,a3,t6 // pjDstEnd4 = pjDst + EndOffset4
addu t1,a3,t7 // pjDstEnd8 = pjDst + EndOffset16
//
// 8 DWORD loop
//
beq a3,t1,20f
10:
lbu v0,0(a0) // c0 = *(pjSrc)
lbu v1,1(a0) // c1 = *(pjSrc+1)
lbu s0,2(a0) // c2 = *(pjSrc+2)
lbu s1,3(a0) // c3 = *(pjSrc+3)
srl s2,v0,4 // TextExpTable[c0 >> 4] , c0 >> 4
sll s2,s2,3 // make qWORD offset
addu s2,s2,sp // offset from base of table
lw t4,0(s2) // lower dword
and s2,v0,0x0f // c0 & 0x0f
sll s2,s2,3 // qword offset
addu s2,s2,sp // offset from base
ldl t4,3(s2) // upper dword
dmtc1 t4,f0 // move to 64 bit f register
srl s2,v1,4 // TextExpTable[c0 >> 4] , c0 >> 4
sll s2,s2,3 // make qword offset
addu s2,s2,sp // offset from base of table
lw t4,0(s2)
and s2,v1,0x0f // c0 & 0x0f
sll s2,s2,3 // qword offset
addu s2,s2,sp // offset from base
ldl t4,3(s2)
dmtc1 t4,f2 // move to 64 bit f register
srl s2,s0,4 // TextExpTable[c0 >> 4] , c0 >> 4
sll s2,s2,3 // make DWORD offset
addu s2,s2,sp // offset from base of table
lw t4,0(s2)
and s2,s0,0x0f // c0 & 0x0f
sll s2,s2,3 // qword offset
addu s2,s2,sp // offset from base
ldl t4,3(s2)
dmtc1 t4,f4 // move to 64 bit f register
srl s2,s1,4 // TextExpTable[c0 >> 4] , c0 >> 4
sll s2,s2,3 // make DWORD offset
addu s2,s2,sp // offset from base of table
lw t4,0(s2)
and s2,s1,0x0f // c0 & 0x0f
sll s2,s2,3 // qword offset
addu s2,s2,sp // offset from base
ldl t4,3(s2)
dmtc1 t4,f6 // move to 64 bit f register
//
// load second 4 bytes
//
lbu v0,4(a0) // c0 = *(pjSrc+4)
lbu v1,5(a0) // c1 = *(pjSrc+5)
lbu s0,6(a0) // c2 = *(pjSrc+6)
lbu s1,7(a0) // c3 = *(pjSrc+7)
srl s2,v0,4 // TextExpTable[c0 >> 4] , c0 >> 4
sll s2,s2,3 // make DWORD offset
addu s2,s2,sp // offset from base of table
lw t4,0(s2)
and s2,v0,0x0f // c0 & 0x0f
sll s2,s2,3 // qword offset
addu s2,s2,sp // offset from base
ldl t4,3,(s2)
dmtc1 t4,f8 // move to 64 bit f register
srl s2,v1,4 // TextExpTable[c0 >> 4] , c0 >> 4
sll s2,s2,3 // make DWORD offset
addu s2,s2,sp // offset from base of table
lw t4,0(s2)
and s2,v1,0x0f // c0 & 0x0f
sll s2,s2,3 // qword offset
addu s2,s2,sp // offset from base
ldl t4,3(s2)
dmtc1 t4,f10 // move to 64 bit f register
srl s2,s0,4 // TextExpTable[c0 >> 4] , c0 >> 4
sll s2,s2,3 // make DWORD offset
addu s2,s2,sp // offset from base of table
lw t4,0(s2)
and s2,s0,0x0f // c0 & 0x0f
sll s2,s2,3 // qword offset
addu s2,s2,sp // offset from base
ldl t4,3(s2)
dmtc1 t4,f12 // move to 64 bit f register
srl s2,s1,4 // TextExpTable[c0 >> 4] , c0 >> 4
sll s2,s2,3 // make DWORD offset
addu s2,s2,sp // offset from base of table
lw t4,0(s2)
and s2,s1,0x0f // c0 & 0x0f
sll s2,s2,3 // qword offset
addu s2,s2,sp // offset from base
ldl t4,3(s2)
dmtc1 t4,f14 // move to 64 bit f register
//
// Store results, this will allow fastest video memory
// stores on MIPS "JAZZ" platform. This sequence must
// execute in order.
//
.set noreorder
sdc1 f0 ,0x00(a3) // store results
sdc1 f2 ,0x08(a3) // store results
sdc1 f4 ,0x10(a3) // store results
sdc1 f6 ,0x18(a3) // store results
sdc1 f8 ,0x20(a3) // store results
sdc1 f10,0x28(a3) // store results
sdc1 f12,0x30(a3) // store results
sdc1 f14,0x38(a3) // store results
.set reorder
addu a3,a3,0x40 // pjDst += 64
addu a0,a0,8 // pjSrc += 8
bne a3,t1,10b // loop till done
20:
//
// 4 DWORD loop
//
beq a3,t0,40f
30:
lbu v0,0(a0) // c0 = *(pjSrc)
lbu v1,1(a0) // c1 = *(pjSrc+1)
srl s2,v0,4 // TextExpTable[c0 >> 4] , c0 >> 4
sll s2,s2,3 // make qword offset
addu s2,s2,sp // offset from base of table
lw t4,0(s2)
and s2,v0,0x0f // c0 & 0x0f
sll s2,s2,3 // qword offset
addu s2,s2,sp // offset from base
ldl t4,3(s2)
dmtc1 t4,f0 // move to 64 bit f register
srl s2,v1,4 // TextExpTable[c0 >> 4] , c0 >> 4
sll s2,s2,3 // make qword offset
addu s2,s2,sp // offset from base of table
lw t4,0(s2)
and s2,v1,0x0f // c0 & 0x0f
sll s2,s2,3 // qword offset
addu s2,s2,sp // offset from base
ldl t4,3(s2)
dmtc1 t4,f2 // move to 64 bit f register
sdc1 f0,0x00(a3) // store results
sdc1 f2,0x08(a3) // store results
addu a3,a3,0x10 // pjDst += 16
addu a0,a0,2 // pjSrc += 2
bne a3,t0,30b // loop till done
40:
//
// 2 DWORD loop
//
beq a3,a2,60f
50:
lbu v0,0(a0) // c0 = *(pjSrc)
srl s2,v0,4 // TextExpTable[c0 >> 4] , c0 >> 4
sll s2,s2,3 // make qword offset
addu s2,s2,sp // offset from base of table
lw t4,0(s2)
and s2,v0,0x0f // c0 & 0x0f
sll s2,s2,3 // qword offset
addu s2,s2,sp // offset from base
ldl t4,3(s2)
dmtc1 t4,f0 // move to 64 bit f register
sdc1 f0,0x00(a3) // store results
addu a3,a3,0x08 // pjDst += 8
addu a0,a0,1 // pjSrc += 1
bne a3,a2,50b // loop till done
60:
//
// end of scan line, add stride to src and dst then check for end condition
//
addu a3,a3,t9 // pjDst += DeltaDst
addu a0,a0,t8 // pjSrc += DeltaSrc
bne a3,a1,Opaq8MainLoop // continue
//
// partial QWORD start and end
//
Opaq8Partial:
lw a0,OpDstLeft(sp) // left edge
lw a1,OpDstRight(sp) // right edge
and a2,a0,7 // LeftAln = DstLeft & 0x07
and a3,a1,7 // RightAln = DstRight & 0x07
//
// do we have left alignment?
//
li t2,-8 // 0xFFFFFFFF8 mask
beq a2,zero,100f // if LeftAln == 0, skip
lw t6,OpSrcLeft(sp) // Left Src Edge
lw s0,OppjSrcIn(sp) // Src asddress
lw s1,OppjDstIn(sp) // Src asddress
lw t9,Opcy(sp) // cy
lw t8,OpDeltaDstIn(sp) // Delta Dst
lw t7,OpDeltaSrcIn(sp) // Delta Src
//
// pjSrc = pjSrcIn + (SrcLeft >> 3)
// pjDst = pjDstIn + (DstLeft & ~0x07)
// pjDstEndY = pjDst + cy * DeltaDstIn
//
mult t9,t8 // cy * DeltaDstIn
srl t6,t6,3 // SrcLeft >> 3
addu s0,s0,t6 // pjSrcIn + (SrcLeft >> 3)
and t5,a0,t2 // DstLeft & ~0x07
and t2,a1,t2 // DstRight & ~0x07
addu s1,s1,t5 // pjDstIn + (DstLeft & ~0x07)
mflo t9 // t9 = cy * DeltaDstIn
addu s2,s1,t9 // s2 = pjDstEndY = pjDst + cy * DeltaDstIn
//
// determine if left and right are in same quadword
//
bne t5,t2,50f // in ne, go to left case
//
// combined right and left edge in same quadword
//
// determine edge masks for DWORD 0
//
la v0,gTextLeftMask
la v1,gTextRightMask
sll t2,a2,3 // left edge 2-dword offset
addu t6,t2,v0 // table address
lw a0,0(t6) // Left Mask 0
sll t2,a3,3 // right edge 2-dword offset
addu t6,t2,v1 // table address
lw a1,0(t6) // Right Mask 0
sll t2,a2,3 // left edge 2-dword offset
addu t6,t2,v0 // table address
lw a2,4(t6) // Left Mask 1
sll t2,a3,3 // right edge 2-dword offset
addu t6,t2,v1 // table address
lw a3,4(t6) // Right Mask 1
and a0,a0,a1 // mask0 = Left0 & Right0
nor a1,a0,0 // ~mask0
and a2,a2,a3 // mask1 = Left1 & Right1
nor a3,a2,0 // ~mask1
//
// variables all initialized, ready for expansion loop
//
Opaq8SinleQWLoop:
lbu v0,0(s0) // get src byte
lw t2,0(s1) // dest 0,1
lw t3,4(s1) // dest 0,1
srl v1,v0,4 // isolate first (high) nibble
sll v1,v1,3 // qword offset
addu v1,v1,sp // offset in text expansion table
lw t0,0(v1) // t0 = text expansion for nibble 0
and v0,v0,0x0f // isolate second (low) nibble
sll v0,v0,3 // qword offset
addu v0,v0,sp // add offset to base of table
lw t1,0(v0) // t1 = Text expansion for nibble 1
and t2,t2,a1 // dest0 & ~mask0
and t3,t3,a3 // dest1 & ~mask1
and t0,t0,a0 // src0 & mask0
and t1,t1,a2 // src1 & mask1
or t0,t0,t2 // (src0 & mask0) | (dest0 & ~mask0)
or t1,t1,t3 // (src1 & mask1) | (dest1 & ~mask1)
sw t0,0(s1) // re-load f0 with dest0
sw t1,4(s1) // re-load f1 with dest1
addu s1,s1,t8 // next dest scan line
addu s0,s0,t7 // inc src address to next scan line
bne s1,s2,Opaq8SinleQWLoop // loop till done
//
// done:
//
beql zero,zero,200f
50:
//
// do LeftAln edge, 2 cases:
//
// 1,2,3: lwr,swr dest 0, lw,sw dest 1
// 4,5,6,7: lwr,swr dest 1
//
//
slt s4,a2,4 // if LeftAln < 4
beq s4,zero,60f // case 4,5,6,7
//
// LeftAln Case 1,2,3: need one partial DWORD at psDst + LeftAln
// and one full DWORD at pjDst+4
//
Opaq8Left123Loop:
lbu v0,0(s0) // load src byte
addu s0,s0,t7 // pjSrc += DeltaSrc
srl v1,v0,4 // isolate first nibble
sll v1,v1,3 // qword index
addu v1,v1,sp // table offset
addu v1,v1,a2 // left aln offset for lwr
lwr t0,0(v1) // get shifted text expansion data
and v0,v0,0x0f // isolate second nibble
sll v0,v0,3 // qword index
addu v0,v0,sp // table lookup
lw t1,0(v0) // get text exp data
addu t2,s1,a2 // pjDst + LeftAln
sw t1,4(s1) // store full DWORD
addu s1,s1,t8 // next dest scan line
swr t0,0(t2) // store shifted (t0 precalculated from old s1)
bne s1,s2,Opaq8Left123Loop
//
// goto right edge case
//
beql zero,zero,100f
60:
//
// case 4,5,6,7
//
subu t6,a2,4 // LeftAln-4: offset for loading text exp shifted
Opaq8Left567Loop:
lbu v0,0(s0) // load src byte
addu s0,s0,t7 // pjSrc += DeltaSrc
and v0,v0,0x0f // isolate second nibble
sll v0,v0,3 // qword index
addu v0,v0,sp // table lookup
addu v0,v0,t6 // lwr offset
lwr t1,0(v0) // get text exp data
addu t2,s1,a2 // pjDst + LeftAln
addu s1,s1,t8 // next dest scan line
swr t1,0(t2) // store partial DWORD
bne s1,s2,Opaq8Left567Loop // loop till done
100:
//
// do we have to do right alignment?
//
// a0 = DstLeft a2 = DstLeft & 0x07 = LeftAln
// a1 = DstRight a3 = DstRight & 0x07 = RightAln
//
//
// if RightAln == 0, no right edge alignment is needed
//
li t2,-8 // load 0xfffffff8 mask
beq a3,zero,200f
//
// must do right edge, load needed params amd calc base addresses
//
lw t6,OpSrcLeft(sp) // Left Src Edge
lw s0,OppjSrcIn(sp) // Src asddress
lw s1,OppjDstIn(sp) // Src asddress
lw t9,Opcy(sp) // cy
lw t8,OpDeltaDstIn(sp) // Delta Dst
lw t7,OpDeltaSrcIn(sp) // Delta Src
//
// pjDst = pjDstIn + (DstRight & ~0x07)
// pjDstEndY = pjDst + cy * DeltaDstIn
// pjSrc = pjSrcIn + ((SrcLeft + (DstRight - DstLeft)) >> 3)
//
mult t9,t8 // cy * DeltaDstIn
and t2,a1,t2 // DstRight & ~0x07
addu s1,s1,t2 // pjDstIn + (DstRight & ~0x07)
mflo t9 // t9 = cy * DeltaDstIn
addu s2,s1,t9 // s2 = pjDstEndY = pjDst + cy * DeltaDstIn
subu t2,a1,a0 // DstRight - DstLeft (cx)
addu t6,t6,t2 // SrcLeft + cx
srl t6,t6,3 // (SrcLeft + cx) >> 3
addu s0,s0,t6 // pjSrcIn + ((SrcLeft +cx) >> 3)
//
// three right edge cases based on RightAln (a3)
//
// 1,2,3,4: lwl,swl
// 5,6,7 lw,sw lwl,swl
//
slt s4,a3,5 // case 1,2,3,4
subu a3,a3,1
beq s4,zero,110f // not less than 5
//
// offset for lwl,swl
//
//
// case 1,2,3
//
Opaq8Right123Loop:
lbu v0,0(s0) // load src byte
addu s0,s0,t7 // pjSrc += DeltaSrc
srl v0,v0,4 // isolate first nibble
sll v0,v0,3 // qword index
addu v0,v0,sp // table lookup
addu v0,v0,a3 // lwl offset
lwl t1,0(v0) // get text exp data
addu t2,s1,a3 // pjDst + LeftAln
addu s1,s1,t8 // next dest scan line
swl t1,0(t2) // store partial DWORD
bne s1,s2,Opaq8Right123Loop
//
// done
//
beql zero,zero,200f
110:
//
// case 5,6,7: Store bytes 567 based on ending alignment
//
subu t2,a3,4 // 4,5,6 -> 0,1,2 for lwl offset
// from text exp table
Opaq8Right567Loop:
lbu v0,0(s0) // load src byte
addu s0,s0,t7 // pjSrc += DeltaSrc
srl v1,v0,4 // isolate first nibble
sll v1,v1,3 // qword index
addu v1,v1,sp // table lookup
lw v1,0(v1) // get text exp data
and v0,v0,0x0f // isolate second nibble
sll v0,v0,3 // qword index
addu v0,v0,sp // table lookup
addu v0,v0,t2 // lwl offset
lwl t1,0(v0) // get text exp data
sw v1,0(s1)
addu t3,s1,a3 // pjDst + RightAln
addu s1,s1,t8 // next dest scan line
swl t1,0(t3) // store partial qword
bne s1,s2,Opaq8Right567Loop // loop till done
200:
//
// restore saveed registers and stack
//
lw s0,OpS0(sp)
lw s1,OpS1(sp)
lw s2,OpS2(sp)
lw s3,OpS3(sp)
lw s4,OpS4(sp)
addu sp,sp,OpFrameLength
j ra
.end vSrcOpaqCopyS1D8
SBTTL("vSrcTranCopyS1D8")
//++
//
// VOID
// vSrcTranCopyS1D8(
// PBYTE pjSrcIn,
// LONG SrcLeft,
// LONG DeltaSrcIn,
// PBYTE pjDstIn,
// LONG DstLeft,
// LONG DstRight,
// LONG DeltaDstIn,
// LONG cy,
// ULONG uF,
// ULONG uB,
// SURFACE *pS
// );
//
// Routine Description:
//
// This routine is called to display a complete glyph Buffer. The src pixels
// set to one will cause the Foreground color to be written to the dst. Src pixels
// that are "0" will not be copied.
//
// Arguments:
//
// a0 - pjSrcIn - pointer to start of first src scan line
// a1 - SrcLeft - left (starting) src pixel
// a2 - DeltaSrcIn - src Scan line stride
// a3 - pjDstIn - pointer to start of first dst scan line
// DstLeft - left (starting) dst pixel
// DstRight - right(ending) dst pixel
// DeltaDstIn - dst scan line stride
// cy - Number of scan lines to copy
// uF - Foreground color
// uB - Background color
// pS - pointer to destination SURFACE
//
//
// Return Value:
//
// None.
//
//--
.struct 0
TrS0: .space 4
TrS1: .space 4
TrS2: .space 4
.space 4
TrFrameLength:
TrpjSrcIn: .space 4
TrSrcLeft: .space 4
TrDeltaSrcIn: .space 4
TrpjDstIn: .space 4
TrDstLeft: .space 4
TrDstRight: .space 4
TrDeltaDstIn: .space 4
Trcy: .space 4
TruF: .space 4
TruB: .space 4
TrpS: .space 4
NESTED_ENTRY(vSrcTranCopyS1D8, TrFrameLength, zero)
subu sp,sp,TrFrameLength
sw s0,TrS0(sp)
sw s1,TrS1(sp)
sw s2,TrS2(sp)
PROLOGUE_END
//
// This routine does left edge clipping using a mask generated
// from the left edge case (cxStart & 0x07). The case where the blt
// starts and ends in the same scan line is also handled by combining
// a start and end mask into a single mask. The right edge is handled
// by a special loop that only writes pixels that are left of the
// right edge
//
//
// save call parametrs
//
sw a0,TrpjSrcIn(sp)
sw a1,TrSrcLeft(sp)
sw a2,TrDeltaSrcIn(sp)
sw a3,TrpjDstIn(sp)
//
// build foreground lw from byte
//
lbu a1,TruF(sp)
sll t0,a1,8 // 00 00 fg 00
or a1,a1,t0 // 00 00 fg fg
sll t0,a1,16 // fg fg 00 00
or t0,t0,a1 // fg fg fg fg
//
// calculate left and right edge cases, and pixel count
//
lw t1,TrDstLeft(sp) // DstLeft
lw t2,TrDstRight(sp) // DstRight
lw t7,TrSrcLeft(sp) // xSrcStart
subu a2,t2,t1 // cx = DstRight - DstLeft
addu t8,t7,a2 // SrcRight = SrcLeft + cx
srl t4,t7,3 // xSrcStart >> 3
srl t1,t8,3 // xSrcEnd >> 3
li t2,0xff // build load mask for first src byte
and t7,t7,0x07 // xSrcStart & 0x07
srl v0,t2,t7 // 0xFF >> (xSrcStart & 0x07) = start mask
and t8,t8,0x07 // xSrcEnd & 0x07
or s2,t8,zero // s2 = (xSrcEnd & 0x07), save for end aln
//
// if (xSrcStart >> 3) == (xSrcEnd >> 3) then this blt
// starts and stops in the same quadword, jump to end strip case
//
beq t4,t1,50f // if not equal, skip
//
// subtract partial right edge (xSrcEnd & 0x07) from cx,
// do this part after main loop.
//
subu a2,a2,t8 // cx -= (xSrcEnd & 0x07)
//
// Load Loop variables
//
// a0 pjDst
// a1 pjSrc
// a2 cx
// a3 cy
// t3 DeltaDst
// s1 DeltaSrc
// t8 DstLeft
// t1 Dispatch base 0
// t5 Dispatch base 1
//
lw t3,TrDeltaDstIn(sp) // get the scan line stride in bytes
lw a0,TrpjDstIn(sp) // get Dst pointer
lw a1,TrpjSrcIn(sp) // get Src pointer
lw a3,Trcy(sp) // Src height
lw s1,TrDeltaSrcIn(sp) // src stride in bytes
lw t8,TrDstLeft(sp) // xDstStart
//
// drawing is always aligned
//
// if start is not aligned,and the
// src pixel with start mask, and
// and start address with 0xFFFFFFF8
//
la t1,60f // get base high dispatch address
la t5,80f // get base low dispatch address
//
// compute starting src and dst address
//
addu a1,a1,t4 // pjSrc = pjSrcStart + (xSrcStart >> 3)
li t9,-8 // load 0xfffffff8 mask
and t8,t8,t9 // (xDstStart & ~0x07)
addu a0,a0,t8 // pjDst = pjDst + (xDstStart & ~0x07)
//
// compute number of Src bytes, = (cx + (xSrcStart & 0x07) + 7) /8
//
addu t2,a2,t7 // Tmpcx = cx + (xSrcStart & 0x07)
addu t2,t2,7 // round the bitmap span in bytes
mult a3,t3 // compute offset to end of drawing
srl t2,t2,3 // compute bitmap span in bytes = Tmpcx/8
sll t4,t2,3 // compute draw span in bytes
subu t3,t3,t4 // compute draw stride in bytes
subu t6,s1,t2 // compute src stride in bytes
mflo a3 // get offset to end of drawing
addu a3,a3,a0 // compute ending address of drawing
//
// restore src and mask
//
or t8,v0,zero // resotore and mask
//
// Set the current draw and bitmap base addresses, and begin drawing the
// next scan line.
//
.set noreorder
.set noat
addu t4,t2,a1 // compute ending bitmap address
//
// A glyph scan line is processed four bits at a time. A dispatch is executed into
// an array of code fragments that actually draw the pixels on the display.
//
//
// The fisrt source byte may represent a partial value, mask with
// starting alignment (sSrcStart & 0x07)
//
10: lbu v0,0(a1) // get next byte of glyph
addu a1,a1,1 // advance to next glyph byte
and v0,v0,t8 // mask off src pixels not wanted
beq zero,v0,30f // if eq, no glyph bits to draw
sll v1,v0,7 - 6 // shift high nibble into position
and v1,v1,0xf << 5 // isolate low order nibble
addu v1,v1,t1 // compute dispatch address
j v1 // dispatch to pixel store routine
sll v0,v0,6 // shift next nibble into position
20: lbu v0,0(a1) // get next byte of glyph
addu a1,a1,1 // advance to next glyph byte
beq zero,v0,30f // if eq, no glyph bits to draw
sll v1,v0,7 - 6 // shift high nibble into position
and v1,v1,0xf << 5 // isolate low order nibble
addu v1,v1,t1 // compute dispatch address
j v1 // dispatch to pixel store routine
sll v0,v0,6 // shift next nibble into position
30: bne a1,t4,20b // if ne, not end of glyph
addu a0,a0,8 // advance to next draw point
addu a0,a0,t3 // compute next scanline address
40: addu a1,a1,t6 // compute next src scanline address
bne a0,a3,10b // if eq, no more pixels to draw
addu t4,t2,a1 // compute ending bitmap address
//
// Set start mask to 0xFF, since the end case is the strip
// following a block of 1 or more quadwords.
//
li v0,0xff // start mask = ff
50:
//
// check for end strip to draw
//
beq s2,zero,EndvSrcTranCopyS1D8
nop
//
// must do end strip of s2 pixels, load params
//
lw t8,TrSrcLeft(sp) // xSrcStart
lw t9,TrDstLeft(sp) // xDstStart
lw a2,TrDstRight(sp) // xDstEnd
lw a0,TrpjDstIn(sp) // get Dst pointer
lw a1,TrpjSrcIn(sp) // get Src pointer
lw a3,Trcy(sp) // Src height
lw s1,TrDeltaSrcIn(sp) // src scan line stride in bytes
lw t3,TrDeltaDstIn(sp) // get the Dst scan line stride in bytes
subu a2,a2,t9 // cx = xDstEnd - xDstStart
addu t8,t8,a2 // xSrcEnd = xSrcStart + cx
//
// starting src address = pjSrc + (xSrcEnd >> 3)
//
srl t1,t8,3 // (xSrcEnd >> 3)
addu a1,a1,t1 // pjSrc + (xSrcEnd >> 3)
//
// starting dst address = pjDst + xDstStart + (cx - s2),
// calc ending dst address = pjDst + (cy * DeltaDst)
//
mult a3,t3 // cy * DeltaDst
subu a2,a2,s2 // cx - s2
addu a2,a2,t9 // xDstStart + (cx - s2)
addu a0,a0,a2 // pjDst = pjDst + xDstStart + (cx - s2)
mflo a3 // cy * DeltaHeight
addu a3,a3,a0 // pjDstEnd = pjDst + cy * DeltaHeight
//
// build jump table for masking pixels,
// jump to check last n pixels 4 * (7 - (xSrcEnd & 0x07))
//
li t8,7 //
subu t8,t8,s2 // 7 - (xSrcEnd & 0x07)
sll t8,t8,4 // 4 instructions (16 bytes)
la v1,100f // byte 7
addu v1,v1,t8 // jump table address
//
// loop until pjDst = pjDstEnd:
//
// Load byte
// store foreground color to each byte set
//
51:
lbu t1,0(a1) // load next src byte
addu a1,a1,s1 // inc src address
and t1,t1,v0 // start mask
j v1 // jump into table
nop
100:
// byte 6
and t5,t1,0x02
beq t5,zero,53f
nop
sb t0,6(a0)
53:
// byte 5
and t5,t1,0x04
beq t5,zero,54f
nop
sb t0,5(a0)
54:
// byte 4
and t5,t1,0x08
beq t5,zero,55f
nop
sb t0,4(a0)
55:
// byte 3
and t5,t1,0x10
beq t5,zero,56f
nop
sb t0,3(a0)
56:
// byte 2
and t5,t1,0x20
beq t5,zero,57f
nop
sb t0,2(a0)
57:
// byte 1
and t5,t1,0x40
beq t5,zero,58f
nop
sb t0,1(a0)
58:
// byte 0
and t5,t1,0x80
beq t5,zero,59f
nop
sb t0,0(a0)
59:
addu a0,a0,t3 // pjDst += DeltaDst
bne a0,a3,51b // while pjDst != pjDstEnd
nop
EndvSrcTranCopyS1D8:
.set reorder
.set at
lw s0,TrS0(sp) // save s0
lw s1,TrS1(sp) // save s1
lw s2,TrS2(sp) // save s2
addu sp,sp,TrFrameLength // restore stack
j ra // return
//
// The following code is arranged as 16, four instruction blocks. The block
// of code that is chosen for execution is determined from the high order
// glyph nibble. These glyph nibbles are always aligned.
//
// The glyph nibbles are encoded in big endian order and therefore the pixels
// that are stored are the reverse of the big endian bits within the nibble.
//
.align 4
.set noreorder
.set noat
60: // reference label
//
// Pattern 0000
//
and v1,v0,0xf << 6 // isolate low order nibble
addu v1,v1,t5 // compute dispatch address
j v1 // dispatch to pixel store routine
addu a0,a0,4 // advance to next draw point
nop // fill
nop //
nop //
nop //
61:
//
// Pattern 0001 -> 1000
//
sb t0,3(a0) // store pixel
and v1,v0,0xf << 6 // isolate low order nibble
addu v1,v1,t5 // compute dispatch address
j v1 // dispatch to pixel store routine
addu a0,a0,4 // advance to next draw point
nop // fill
nop //
nop //
62:
//
// Pattern 0010 -> 0100
//
sb t0,2(a0) // store pixel
and v1,v0,0xf << 6 // isolate low order nibble
addu v1,v1,t5 // compute dispatch address
j v1 // dispatch to pixel store routine
addu a0,a0,4 // advance to next draw point
nop // fill
nop //
nop //
63:
//
// Pattern 0011 -> 1100
//
sh t0,2(a0) // store pixels
and v1,v0,0xf << 6 // isolate low order nibble
addu v1,v1,t5 // compute dispatch address
j v1 // dispatch to pixel store routine
addu a0,a0,4 // advance to next draw point
nop // fill
nop //
nop //
64:
//
// Pattern 0100 -> 0010
//
sb t0,1(a0) // store pixel
and v1,v0,0xf << 6 // isolate low order nibble
addu v1,v1,t5 // compute dispatch address
j v1 // dispatch to pixel store routine
addu a0,a0,4 // advance to next draw point
nop // fill
nop //
nop //
65:
//
// Pattern 0101 -> 1010
//
sb t0,1(a0) // store pixel
sb t0,3(a0) // store pixel
and v1,v0,0xf << 6 // isolate low order nibble
addu v1,v1,t5 // compute dispatch address
j v1 // dispatch to pixel store routine
addu a0,a0,4 // advance to next draw point
nop // fill
nop //
66:
//
// Pattern 0110 -> 0110
//
sb t0,1(a0) // store pixel
sb t0,2(a0) // store pixel
and v1,v0,0xf << 6 // isolate low order nibble
addu v1,v1,t5 // compute dispatch address
j v1 // dispatch to pixel store routine
addu a0,a0,4 // advance to next draw point
nop // fill
nop //
67:
//
// Pattern 0111 -> 1110
//
swr t0,1(a0) // store pixels
and v1,v0,0xf << 6 // isolate low order nibble
addu v1,v1,t5 // compute dispatch address
j v1 // dispatch to pixel store routine
addu a0,a0,4 // advance to next draw point
nop // fill
nop //
nop //
68:
//
// Pattern 1000 -> 0001
//
sb t0,0(a0) // store pixel
and v1,v0,0xf << 6 // isolate low order nibble
addu v1,v1,t5 // compute dispatch address
j v1 // dispatch to pixel store routine
addu a0,a0,4 // advance to next draw point
nop // fill
nop //
nop //
69:
//
// Pattern 1001 -> 1001
//
sb t0,0(a0) // store pixel
sb t0,3(a0) // store pixel
and v1,v0,0xf << 6 // isolate low order nibble
addu v1,v1,t5 // compute dispatch address
j v1 // dispatch to pixel store routine
addu a0,a0,4 // advance to next draw point
nop // fill
nop //
70:
//
// Pattern 1010 -> 0101
//
sb t0,0(a0) // store pixel
sb t0,2(a0) // store pixel
and v1,v0,0xf << 6 // isolate low order nibble
addu v1,v1,t5 // compute dispatch address
j v1 // dispatch to pixel store routine
addu a0,a0,4 // advance to next draw point
nop // fill
nop //
71:
//
// Pattern 1011 -> 1101
//
sb t0,0(a0) // store pixel
sh t0,2(a0) // store pixels
and v1,v0,0xf << 6 // isolate low order nibble
addu v1,v1,t5 // compute dispatch address
j v1 // dispatch to pixel store routine
addu a0,a0,4 // advance to next draw point
nop // fill
nop //
72:
//
// Pattern 1100 -> 0011
//
sh t0,0(a0) // store pixels
and v1,v0,0xf << 6 // isolate low order nibble
addu v1,v1,t5 // compute dispatch address
j v1 // dispatch to pixel store routine
addu a0,a0,4 // advance to next draw point
nop // fill
nop //
nop //
73:
//
// Pattern 1101 -> 1011
//
sh t0,0(a0) // store pixels
sb t0,3(a0) // store pixel
and v1,v0,0xf << 6 // isolate low order nibble
addu v1,v1,t5 // compute dispatch address
j v1 // dispatch to pixel store routine
addu a0,a0,4 // advance to next draw point
nop // fill
nop //
74:
//
//
// Pattern 1110 -> 0111
//
swl t0,2(a0) // store pixels
and v1,v0,0xf << 6 // isolate low order nibble
addu v1,v1,t5 // compute dispatch address
j v1 // dispatch to pixel store routine
addu a0,a0,4 // advance to next draw point
nop // fill
nop //
nop //
75:
//
// Pattern 1111 -> 1111
//
sw t0,0(a0) // store pixels
and v1,v0,0xf << 6 // isolate low order nibble
addu v1,v1,t5 // compute dispatch address
j v1 // dispatch to pixel store routine
addu a0,a0,4 // advance to next draw point
nop // fill
nop //
nop //
.set at
.set reorder
//
// The following code is arranged as 16, 16 instruction blocks. The block
// of code that is chosen for execution is determined from the low order
// glyph nibble and the two low its of the draw address.
//
// The glyph nibbles are encoded in big endian order and therefore the pixels
// that are stored are the reverse of the big endian bits within the nibble.
//
.set noreorder
.set noat
80: // reference label
//
// Pattern 0000
//
addu a0,a0,4 // advance to next draw point
beql a1,t4,40b // if eq then end of scan line
addu a0,a0,t3 // compute next scanline address
lbu v0,0(a1) // get next byte of glyph
addu a1,a1,1 // advance to next glyph byte
beq zero,v0,30b // if eq, no glyph bits to draw
sll v1,v0,7 - 6 // shift high nibble into position
and v1,v1,0xf << 5 // isolate low order nibble
addu v1,v1,t1 // compute dispatch address
j v1 // dispatch to pixel store routine
sll v0,v0,6 // shift next nibble into position
nop
nop
nop
nop
nop
81: // reference label
//
// Pattern 0001 -> 1000
//
sb t0,3(a0) // store pixel
addu a0,a0,4 // advance to next draw point
beql a1,t4,40b // if eq then end of scan line
addu a0,a0,t3 // compute next scanline address
lbu v0,0(a1) // get next byte of glyph
addu a1,a1,1 // advance to next glyph byte
beq zero,v0,30b // if eq, no glyph bits to draw
sll v1,v0,7 - 6 // shift high nibble into position
and v1,v1,0xf << 5 // isolate low order nibble
addu v1,v1,t1 // compute dispatch address
j v1 // dispatch to pixel store routine
sll v0,v0,6 // shift next nibble into position
nop
nop
nop
nop
82: // reference label
//
// Pattern 0010 -> 0100
//
sb t0,2(a0) // store pixel
addu a0,a0,4 // advance to next draw point
beql a1,t4,40b // if eq then end of scan line
addu a0,a0,t3 // compute next scanline address
lbu v0,0(a1) // get next byte of glyph
addu a1,a1,1 // advance to next glyph byte
beq zero,v0,30b // if eq, no glyph bits to draw
sll v1,v0,7 - 6 // shift high nibble into position
and v1,v1,0xf << 5 // isolate low order nibble
addu v1,v1,t1 // compute dispatch address
j v1 // dispatch to pixel store routine
sll v0,v0,6 // shift next nibble into position
nop
nop
nop
nop //
83: // reference label
//
// Pattern 0011 -> 1100
//
sh t0,2(a0) // store pixels
addu a0,a0,4 // advance to next draw point
beql a1,t4,40b // if eq then end of scan line
addu a0,a0,t3 // compute next scanline address
lbu v0,0(a1) // get next byte of glyph
addu a1,a1,1 // advance to next glyph byte
beq zero,v0,30b // if eq, no glyph bits to draw
sll v1,v0,7 - 6 // shift high nibble into position
and v1,v1,0xf << 5 // isolate low order nibble
addu v1,v1,t1 // compute dispatch address
j v1 // dispatch to pixel store routine
sll v0,v0,6 // shift next nibble into position
nop
nop
nop
nop //
84: // reference label
//
// Pattern 0100 -> 0010
//
sb t0,1(a0) // store pixel
addu a0,a0,4 // advance to next draw point
beql a1,t4,40b // if eq then end of scan line
addu a0,a0,t3 // compute next scanline address
lbu v0,0(a1) // get next byte of glyph
addu a1,a1,1 // advance to next glyph byte
beq zero,v0,30b // if eq, no glyph bits to draw
sll v1,v0,7 - 6 // shift high nibble into position
and v1,v1,0xf << 5 // isolate low order nibble
addu v1,v1,t1 // compute dispatch address
j v1 // dispatch to pixel store routine
sll v0,v0,6 // shift next nibble into position
nop
nop
nop
nop //
85: // reference label
//
// Pattern 0101 -> 1010
//
sb t0,1(a0) // store pixel
sb t0,3(a0) // store pixel
addu a0,a0,4 // advance to next draw point
beql a1,t4,40b // if eq then end of scan line
addu a0,a0,t3 // compute next scanline address
lbu v0,0(a1) // get next byte of glyph
addu a1,a1,1 // advance to next glyph byte
beq zero,v0,30b // if eq, no glyph bits to draw
sll v1,v0,7 - 6 // shift high nibble into position
and v1,v1,0xf << 5 // isolate low order nibble
addu v1,v1,t1 // compute dispatch address
j v1 // dispatch to pixel store routine
sll v0,v0,6 // shift next nibble into position
nop
nop
nop
86: // reference label
//
// Pattern 0110 -> 0110
//
sb t0,1(a0) // store pixel
sb t0,2(a0) // store pixel
addu a0,a0,4 // advance to next draw point
beql a1,t4,40b // if eq then end of scan line
addu a0,a0,t3 // compute next scanline address
lbu v0,0(a1) // get next byte of glyph
addu a1,a1,1 // advance to next glyph byte
beq zero,v0,30b // if eq, no glyph bits to draw
sll v1,v0,7 - 6 // shift high nibble into position
and v1,v1,0xf << 5 // isolate low order nibble
addu v1,v1,t1 // compute dispatch address
j v1 // dispatch to pixel store routine
sll v0,v0,6 // shift next nibble into position
nop
nop
nop
87: // reference label
//
// Pattern 0111 -> 1110
//
swr t0,1(a0) // store pixels
addu a0,a0,4 // advance to next draw point
beql a1,t4,40b // if eq then end of scan line
addu a0,a0,t3 // compute next scanline address
lbu v0,0(a1) // get next byte of glyph
addu a1,a1,1 // advance to next glyph byte
beq zero,v0,30b // if eq, no glyph bits to draw
sll v1,v0,7 - 6 // shift high nibble into position
and v1,v1,0xf << 5 // isolate low order nibble
addu v1,v1,t1 // compute dispatch address
j v1 // dispatch to pixel store routine
sll v0,v0,6 // shift next nibble into position
nop
nop
nop
nop
88: // reference label
//
// Pattern 1000 -> 0001
//
sb t0,0(a0) // store pixel
addu a0,a0,4 // advance to next draw point
beql a1,t4,40b // if eq then end of scan line
addu a0,a0,t3 // compute next scanline address
lbu v0,0(a1) // get next byte of glyph
addu a1,a1,1 // advance to next glyph byte
beq zero,v0,30b // if eq, no glyph bits to draw
sll v1,v0,7 - 6 // shift high nibble into position
and v1,v1,0xf << 5 // isolate low order nibble
addu v1,v1,t1 // compute dispatch address
j v1 // dispatch to pixel store routine
sll v0,v0,6 // shift next nibble into position
nop
nop
nop
nop
89: // reference label
//
// Pattern 1001 -> 1001
//
sb t0,0(a0) // store pixel
sb t0,3(a0) // store pixel
addu a0,a0,4 // advance to next draw point
beql a1,t4,40b // if eq then end of scan line
addu a0,a0,t3 // compute next scanline address
lbu v0,0(a1) // get next byte of glyph
addu a1,a1,1 // advance to next glyph byte
beq zero,v0,30b // if eq, no glyph bits to draw
sll v1,v0,7 - 6 // shift high nibble into position
and v1,v1,0xf << 5 // isolate low order nibble
addu v1,v1,t1 // compute dispatch address
j v1 // dispatch to pixel store routine
sll v0,v0,6 // shift next nibble into position
nop
nop
nop
90: // reference label
//
// Pattern 1010 -> 0101
//
sb t0,0(a0) // store pixel
sb t0,2(a0) // store pixel
addu a0,a0,4 // advance to next draw point
beql a1,t4,40b // if eq then end of scan line
addu a0,a0,t3 // compute next scanline address
lbu v0,0(a1) // get next byte of glyph
addu a1,a1,1 // advance to next glyph byte
beq zero,v0,30b // if eq, no glyph bits to draw
sll v1,v0,7 - 6 // shift high nibble into position
and v1,v1,0xf << 5 // isolate low order nibble
addu v1,v1,t1 // compute dispatch address
j v1 // dispatch to pixel store routine
sll v0,v0,6 // shift next nibble into position
nop
nop
nop
91: // reference label
//
// Pattern 1011 -> 1101
//
sb t0,0(a0) // store pixel
sh t0,2(a0) // store pixels
addu a0,a0,4 // advance to next draw point
beql a1,t4,40b // if eq then end of scan line
addu a0,a0,t3 // compute next scanline address
lbu v0,0(a1) // get next byte of glyph
addu a1,a1,1 // advance to next glyph byte
beq zero,v0,30b // if eq, no glyph bits to draw
sll v1,v0,7 - 6 // shift high nibble into position
and v1,v1,0xf << 5 // isolate low order nibble
addu v1,v1,t1 // compute dispatch address
j v1 // dispatch to pixel store routine
sll v0,v0,6 // shift next nibble into position
nop
nop
nop
92: // reference label
//
// Pattern 1100 -> 0011
//
sh t0,0(a0) // store pixels
addu a0,a0,4 // advance to next draw point
beql a1,t4,40b // if eq then end of scan line
addu a0,a0,t3 // compute next scanline address
lbu v0,0(a1) // get next byte of glyph
addu a1,a1,1 // advance to next glyph byte
beq zero,v0,30b // if eq, no glyph bits to draw
sll v1,v0,7 - 6 // shift high nibble into position
and v1,v1,0xf << 5 // isolate low order nibble
addu v1,v1,t1 // compute dispatch address
j v1 // dispatch to pixel store routine
sll v0,v0,6 // shift next nibble into position
nop
nop
nop
nop
93: // reference label
//
// Pattern 1101 -> 1011
//
sh t0,0(a0) // store pixels
sb t0,3(a0) // store pixel
addu a0,a0,4 // advance to next draw point
beql a1,t4,40b // if eq then end of scan line
addu a0,a0,t3 // compute next scanline address
lbu v0,0(a1) // get next byte of glyph
addu a1,a1,1 // advance to next glyph byte
beq zero,v0,30b // if eq, no glyph bits to draw
sll v1,v0,7 - 6 // shift high nibble into position
and v1,v1,0xf << 5 // isolate low order nibble
addu v1,v1,t1 // compute dispatch address
j v1 // dispatch to pixel store routine
sll v0,v0,6 // shift next nibble into position
nop
nop
nop
94: // reference label
//
// Pattern 1110 -> 0111
//
swl t0,2(a0) // store pixels
addu a0,a0,4 // advance to next draw point
beql a1,t4,40b // if eq then end of scan line
addu a0,a0,t3 // compute next scanline address
lbu v0,0(a1) // get next byte of glyph
addu a1,a1,1 // advance to next glyph byte
beq zero,v0,30b // if eq, no glyph bits to draw
sll v1,v0,7 - 6 // shift high nibble into position
and v1,v1,0xf << 5 // isolate low order nibble
addu v1,v1,t1 // compute dispatch address
j v1 // dispatch to pixel store routine
sll v0,v0,6 // shift next nibble into position
nop
nop
nop
nop
95: // reference label
//
// Pattern 1111 -> 1111
//
sw t0,0(a0) // store pixels
addu a0,a0,4 // advance to next draw point
beql a1,t4,40b // if eq then end of scan line
addu a0,a0,t3 // compute next scanline address
lbu v0,0(a1) // get next byte of glyph
addu a1,a1,1 // advance to next glyph byte
beq zero,v0,30b // if eq, no glyph bits to draw
sll v1,v0,7 - 6 // shift high nibble into position
and v1,v1,0xf << 5 // isolate low order nibble
addu v1,v1,t1 // compute dispatch address
j v1 // dispatch to pixel store routine
sll v0,v0,6 // shift next nibble into position
nop
nop
nop
nop
.set at
.set reorder
.end vSrcTranCopyS1D8