// TITLE("Glyph expansion from 1bpp to 8bpp") //++ // // Copyright (c) 1994 Microsoft Corporation // // Module Name: // // // Abstract: // // Expand a 1bpp buffer to 8bpp. Both opaque and transparent mode // // // Author: // // Mark Enstrom (marke) 28-July-1994 // // Environment: // // User mode. // // Revision History: // //-- #include "ksmips.h" #include "gdimips.h" .extern gTextLeftMask 4*8*2 .extern gTextRightMask 4*8*2 SBTTL("vSrcOpaqCopyS1D8_64") //++ // // VOID // vSrcOpaqCopyS1D8_64( // PBYTE pjSrcIn, // LONG SrcLeft, // LONG DeltaSrcIn, // PBYTE pjDstIn, // LONG DstLeft, // LONG DstRight, // LONG DeltaDstIn, // LONG cy, // ULONG uF, // ULONG uB, // SURFACE *pS // ); // // Routine Description: // // Opaque text expansion of a 1BPP buffer to 8Bpp destination // // Arguments: // // a0 - pjSrcIn - pointer to start of first src scan line // a1 - SrcLeft - left (starting) src pixel // a2 - DeltaSrcIn - src Scan line stride // a3 - pjDstIn - pointer to start of first dst scan line // DstLeft - left (starting) dst pixel // DstRight - right(ending) dst pixel // DeltaDstIn - dst scan line stride // cy - Number of scan lines to copy // uF - Foreground color // uB - Background color // pS - pointer to destination SURFACE // // // Return Value: // // None // //-- .struct 0 OpExpTable: .space 32*4 OpS0: .space 4 OpS1: .space 4 OpS2: .space 4 OpS3: .space 4 OpS4: .space 4 .space 4 * 3 OpFrameLength: OppjSrcIn: .space 4 OpSrcLeft: .space 4 OpDeltaSrcIn: .space 4 OppjDstIn: .space 4 OpDstLeft: .space 4 OpDstRight: .space 4 OpDeltaDstIn: .space 4 Opcy: .space 4 OpuF: .space 4 OpuB: .space 4 OpupS: .space 4 NESTED_ENTRY(vSrcOpaqCopyS1D8_64, OpFrameLength, zero) subu sp,sp,OpFrameLength sw s0,OpS0(sp) sw s1,OpS1(sp) sw s2,OpS2(sp) sw s3,OpS3(sp) sw s4,OpS4(sp) PROLOGUE_END // // save params // sw a0,OppjSrcIn(sp) // save param sw a1,OpSrcLeft(sp) // save param sw a2,OpDeltaSrcIn(sp) // save param sw a3,OppjDstIn(sp) // save param // // NOTE: (sp) points to a 16 (quadword aligned) ULONG text expansion table // // // build color table: // build a DWORD of Background pixels to start and store it // lbu v0,OpuF(sp) // load foreground color lbu v1,OpuB(sp) // load background color sll t0,v1,8 // jb00 or t0,v1,t0 // jbjb sll t1,t0,16 // jbjb0000 or t0,t0,t1 // 0 0 0 0 sw t0,0(sp) // store 0 // // now continually shift the 32 bit value left, and either or // it Fg or Bg into the new right-most position. Note: 1BB pixel values // are stored BIG-endian, so they need to be reversed // sll t0,t0,8 or t0,t0,v0 // 0 0 0 1 sw t0,8*8(sp) // store 1 sll t0,t0,8 or t0,t0,v1 // 0 0 1 0 sw t0,4*8(sp) // store 2 sll t0,t0,8 or t0,t0,v0 // 0 1 0 1 sw t0,10*8(sp) // store 5 sll t0,t0,8 or t0,t0,v1 // 1 0 1 0 sw t0,5*8(sp) // store 10 sll t0,t0,8 or t0,t0,v1 // 0 1 0 0 sw t0,2*8(sp) // store 4 sll t0,t0,8 or t0,t0,v0 // 1 0 0 1 sw t0,9*8(sp) // store 9 sll t0,t0,8 or t0,t0,v0 // 0 0 1 1 sw t0,12*8(sp) // store 3 sll t0,t0,8 or t0,t0,v0 // 0 1 1 1 sw t0,14*8(sp) // store 7 sll t0,t0,8 or t0,t0,v0 // 1 1 1 1 sw t0,15*8(sp) // store 15 sll t0,t0,8 or t0,t0,v1 // 1 1 1 0 sw t0, 7*8(sp) // store 14 sll t0,t0,8 or t0,t0,v0 // 1 1 0 1 sw t0,11*8(sp) // store 13 sll t0,t0,8 or t0,t0,v0 // 1 0 1 1 sw t0,13*8(sp) // store 11 sll t0,t0,8 or t0,t0,v1 // 0 1 1 0 sw t0,6*8(sp) // store 6 sll t0,t0,8 or t0,t0,v1 // 1 1 0 0 sw t0,3*8(sp) // store 12 sll t0,t0,8 or t0,t0,v1 // 1 0 0 0 sw t0,1*8(sp) // store 8 // // perform the expansion in three pieces. First do the DWORD aligned // middle. Next the start alignment, finally the ending alignment. // The temporary 1Bpp buffer was generated so that each src byte // willl expand to an even DWORD boundary. // // LeftAln = ((DstLeft + 7) & ~0x07); // RightAln = ( DstRight & ~0x07); // lw t0,OpDstLeft(sp) // load DstLeft lw t1,OpDstRight(sp) // load DstRight addu t2,t0,7 // DstLeft + 7 li t8,-8 // ~0x07 = -8 and t2,t2,t8 // LeftAln = ((DstLeft + 7) & ~0x07) and t3,t1,t8 // RightAln = ( DstRight & ~0x07) // // ending address offsets. // EndOffset is the number of bytes from pjDst to pjDstEnd // EndOffset4 is the number of 4 DWORDS blocks in EndOffset * 16 // EndOffset16 is the number of 16 DWORD blocks in EndOffset * 64 // subu t5,t3,t2 // EndOffset = RightAln - LeftAln li t8,-16 // ~0x0F li t9,-64 // ~0x3F and t6,t5,t8 // EndOffset4 = EndOffset & ~0x0F and t7,t5,t9 // EndOffset8 = EndOffset & ~0x3F // // calculate src and dst address and dstEndY // lw t8,Opcy(sp) // cy lw t9,OpDeltaDstIn(sp) // DeltaDstIn addu a3,a3,t2 // pjDst = pjDstIn + LeftAln addu a1,a1,7 // SrcLeft+7 mult t8,t9 // start mul for pjDstEndY = pjDst + cy * DeltaDstIn srl a1,a1,3 // (SrcLeft+7) >> 3 = byte offset for src addu a0,a0,a1 // pjSrc = pjSrcIn + (SrcLeft+7) >> 3; srl t8,t5,3 // DeltaSrc = DeltaSrcIn - (EndOffset >> 3); subu t8,a2,t8 // DeltaSrc = DeltaSrcIn - (EndOffset >> 3); subu t9,t9,t5 // DeltaDst = DeltaDstIn - EndOffset mflo a1 // cy * DeltaDstIn addu a1,a3,a1 // pjDstEndY = pjDst + cy * DeltaDstIn, // endinf scan line address // // if RightAln is greater than LeftAln, then The src text expansion covers // at least 1 whole quadword. This is the requirement of this loop. If not, // deal with the narrow blt below // slt t0,t2,t3 // skip main loop if RightAln <= LeftAln beq t0,zero,Opaq8Partial // // Main loop register usage // // a0: pjSrc t0: pjDstEnd4 sp: TextTable t8: DeltaSrc // a1: pjDstEndY t1: pjDstEnd16 t5: EndOffset t9: DeltaDst // a2: pjDstEnd t2: t6: EndOffset4 // a3: pjDst t3: t7: EndOffset16 // Opaq8MainLoop: // // if the scan line is QW aligned, use 64 bit stores, else use 32 bit stores. // This alignment could change on a scan line basis because DeltaDst in only // gaurenteed to be dword aligned. The 64 bit store loop is used because // direct frame buffer output is always QW aligned. // and a2,a3,4 beq a2,zero,Opaq8QWMainLoop // // init scan line check addresses // addu a2,a3,t5 // pjDstEnd = pjDst + EndOffset addu t0,a3,t6 // pjDstEnd4 = pjDst + EndOffset4 addu t1,a3,t7 // pjDstEnd8 = pjDst + EndOffset16 // // 8 DWORD loop // beq a3,t1,20f 10: lbu v0,0(a0) // c0 = *(pjSrc) lbu v1,1(a0) // c1 = *(pjSrc+1) lbu s0,2(a0) // c2 = *(pjSrc+2) lbu s1,3(a0) // c3 = *(pjSrc+3) srl s2,v0,4 // TextExpTable[c0 >> 4] , c0 >> 4 sll s2,s2,3 // make QWORD offset addu s2,s2,sp // offset from base of table lwc1 f0,0(s2) and s2,v0,0x0f // c0 & 0x0f sll s2,s2,3 // qword offset addu s2,s2,sp // offset from base lwc1 f2,0(s2) srl s2,v1,4 // TextExpTable[c0 >> 4] , c0 >> 4 sll s2,s2,3 // make QWORD offset addu s2,s2,sp // offset from base of table lwc1 f4,0(s2) and s2,v1,0x0f // c0 & 0x0f sll s2,s2,3 // qword offset addu s2,s2,sp // offset from base lwc1 f6,0(s2) srl s2,s0,4 // TextExpTable[c0 >> 4] , c0 >> 4 sll s2,s2,3 // make QWORD offset addu s2,s2,sp // offset from base of table lwc1 f8,0(s2) and s2,s0,0x0f // c0 & 0x0f sll s2,s2,3 // qword offset addu s2,s2,sp // offset from base lwc1 f10,0(s2) srl s2,s1,4 // TextExpTable[c0 >> 4] , c0 >> 4 sll s2,s2,3 // make qWORD offset addu s2,s2,sp // offset from base of table lwc1 f12,0(s2) and s2,s1,0x0f // c0 & 0x0f sll s2,s2,3 // qword offset addu s2,s2,sp // offset from base lwc1 f14,0(s2) swc1 f0 ,0x00(a3) // store results swc1 f2 ,0x04(a3) // store results swc1 f4 ,0x08(a3) // store results swc1 f6 ,0x0c(a3) // store results swc1 f8 ,0x10(a3) // store results swc1 f10 ,0x14(a3) // store results swc1 f12 ,0x18(a3) // store results swc1 f14 ,0x1c(a3) // store results // // load second 4 bytes // lbu v0,4(a0) // c0 = *(pjSrc+4) lbu v1,5(a0) // c1 = *(pjSrc+5) lbu s0,6(a0) // c2 = *(pjSrc+6) lbu s1,7(a0) // c3 = *(pjSrc+7) srl s2,v0,4 // TextExpTable[c0 >> 4] , c0 >> 4 sll s2,s2,3 // make QWORD offset addu s2,s2,sp // offset from base of table lwc1 f0,0(s2) and s2,v0,0x0f // c0 & 0x0f sll s2,s2,3 // qword offset addu s2,s2,sp // offset from base lwc1 f2,0(s2) srl s2,v1,4 // TextExpTable[c0 >> 4] , c0 >> 4 sll s2,s2,3 // make QWORD offset addu s2,s2,sp // offset from base of table lwc1 f4,0(s2) and s2,v1,0x0f // c0 & 0x0f sll s2,s2,3 // qword offset addu s2,s2,sp // offset from base lwc1 f6,0(s2) srl s2,s0,4 // TextExpTable[c0 >> 4] , c0 >> 4 sll s2,s2,3 // make QWORD offset addu s2,s2,sp // offset from base of table lwc1 f8,0(s2) and s2,s0,0x0f // c0 & 0x0f sll s2,s2,3 // qword offset addu s2,s2,sp // offset from base lwc1 f10,0(s2) srl s2,s1,4 // TextExpTable[c0 >> 4] , c0 >> 4 sll s2,s2,3 // make qWORD offset addu s2,s2,sp // offset from base of table lwc1 f12,0(s2) and s2,s1,0x0f // c0 & 0x0f sll s2,s2,3 // qword offset addu s2,s2,sp // offset from base lwc1 f14,0(s2) // // Store results, this will allow fastest video memory // stores on MIPS "JAZZ" platform. This sequence must // execute in order // .set noreorder swc1 f0 ,0x20(a3) // store results swc1 f2 ,0x24(a3) // store results swc1 f4 ,0x28(a3) // store results swc1 f6 ,0x2c(a3) // store results swc1 f8 ,0x30(a3) // store results swc1 f10,0x34(a3) // store results swc1 f12,0x38(a3) // store results swc1 f14,0x3c(a3) // store results .set reorder addu a3,a3,0x40 // pjDst += 64 addu a0,a0,8 // pjSrc += 8 bne a3,t1,10b 20: // // 4 DWORD loop // beq a3,t0,40f 30: lbu v0,0(a0) // c0 = *(pjSrc) lbu v1,1(a0) // c1 = *(pjSrc+1) srl s2,v0,4 // TextExpTable[c0 >> 4] , c0 >> 4 sll s2,s2,3 // make qWORD offset addu s2,s2,sp // offset from base of table lwc1 f0,0(s2) and s2,v0,0x0f // c0 & 0x0f sll s2,s2,3 // qword offset addu s2,s2,sp // offset from base lwc1 f2,0(s2) srl s2,v1,4 // TextExpTable[c0 >> 4] , c0 >> 4 sll s2,s2,3 // make qWORD offset addu s2,s2,sp // offset from base of table lwc1 f4,0(s2) and s2,v1,0x0f // c0 & 0x0f sll s2,s2,3 // qword offset addu s2,s2,sp // offset from base lwc1 f6,0(s2) // // This sequence must execute in order // .set noreorder swc1 f0,0x00(a3) // store results swc1 f2,0x04(a3) // store results swc1 f4,0x08(a3) // store results swc1 f6,0x0c(a3) // store results .set reorder addu a3,a3,0x10 // pjDst += 16 addu a0,a0,2 // pjSrc += 2 bne a3,t0,30b 40: // // 2 DWORD loop // beq a3,a2,60f 50: lbu v0,0(a0) // c0 = *(pjSrc) srl s2,v0,4 // TextExpTable[c0 >> 4] , c0 >> 4 sll s2,s2,3 // make qWORD offset addu s2,s2,sp // offset from base of table lwc1 f0,0(s2) and s2,v0,0x0f // c0 & 0x0f sll s2,s2,3 // qword offset addu s2,s2,sp // offset from base lwc1 f2,0(s2) swc1 f0,0x00(a3) // store results swc1 f2,0x04(a3) // store results addu a3,a3,0x08 // pjDst += 8 addu a0,a0,1 // pjSrc += 1 bne a3,a2,50b // loop till done 60: // // end of scan line, add stride to src and dst then check for end condition // addu a3,a3,t9 // pjDst += DeltaDst addu a0,a0,t8 // pjSrc += DeltaSrc bne a3,a1,Opaq8MainLoop // continue // // done, go to alignmend edge cases // beq zero,zero,Opaq8Partial // Done with main, go to start and end cases Opaq8QWMainLoop: // // Destination is quadword aligned, use 64 bit stores // addu a2,a3,t5 // pjDstEnd = pjDst + EndOffset addu t0,a3,t6 // pjDstEnd4 = pjDst + EndOffset4 addu t1,a3,t7 // pjDstEnd8 = pjDst + EndOffset16 // // 8 DWORD loop // beq a3,t1,20f 10: lbu v0,0(a0) // c0 = *(pjSrc) lbu v1,1(a0) // c1 = *(pjSrc+1) lbu s0,2(a0) // c2 = *(pjSrc+2) lbu s1,3(a0) // c3 = *(pjSrc+3) srl s2,v0,4 // TextExpTable[c0 >> 4] , c0 >> 4 sll s2,s2,3 // make qWORD offset addu s2,s2,sp // offset from base of table lw t4,0(s2) // lower dword and s2,v0,0x0f // c0 & 0x0f sll s2,s2,3 // qword offset addu s2,s2,sp // offset from base ldl t4,3(s2) // upper dword dmtc1 t4,f0 // move to 64 bit f register srl s2,v1,4 // TextExpTable[c0 >> 4] , c0 >> 4 sll s2,s2,3 // make qword offset addu s2,s2,sp // offset from base of table lw t4,0(s2) and s2,v1,0x0f // c0 & 0x0f sll s2,s2,3 // qword offset addu s2,s2,sp // offset from base ldl t4,3(s2) dmtc1 t4,f2 // move to 64 bit f register srl s2,s0,4 // TextExpTable[c0 >> 4] , c0 >> 4 sll s2,s2,3 // make DWORD offset addu s2,s2,sp // offset from base of table lw t4,0(s2) and s2,s0,0x0f // c0 & 0x0f sll s2,s2,3 // qword offset addu s2,s2,sp // offset from base ldl t4,3(s2) dmtc1 t4,f4 // move to 64 bit f register srl s2,s1,4 // TextExpTable[c0 >> 4] , c0 >> 4 sll s2,s2,3 // make DWORD offset addu s2,s2,sp // offset from base of table lw t4,0(s2) and s2,s1,0x0f // c0 & 0x0f sll s2,s2,3 // qword offset addu s2,s2,sp // offset from base ldl t4,3(s2) dmtc1 t4,f6 // move to 64 bit f register // // load second 4 bytes // lbu v0,4(a0) // c0 = *(pjSrc+4) lbu v1,5(a0) // c1 = *(pjSrc+5) lbu s0,6(a0) // c2 = *(pjSrc+6) lbu s1,7(a0) // c3 = *(pjSrc+7) srl s2,v0,4 // TextExpTable[c0 >> 4] , c0 >> 4 sll s2,s2,3 // make DWORD offset addu s2,s2,sp // offset from base of table lw t4,0(s2) and s2,v0,0x0f // c0 & 0x0f sll s2,s2,3 // qword offset addu s2,s2,sp // offset from base ldl t4,3,(s2) dmtc1 t4,f8 // move to 64 bit f register srl s2,v1,4 // TextExpTable[c0 >> 4] , c0 >> 4 sll s2,s2,3 // make DWORD offset addu s2,s2,sp // offset from base of table lw t4,0(s2) and s2,v1,0x0f // c0 & 0x0f sll s2,s2,3 // qword offset addu s2,s2,sp // offset from base ldl t4,3(s2) dmtc1 t4,f10 // move to 64 bit f register srl s2,s0,4 // TextExpTable[c0 >> 4] , c0 >> 4 sll s2,s2,3 // make DWORD offset addu s2,s2,sp // offset from base of table lw t4,0(s2) and s2,s0,0x0f // c0 & 0x0f sll s2,s2,3 // qword offset addu s2,s2,sp // offset from base ldl t4,3(s2) dmtc1 t4,f12 // move to 64 bit f register srl s2,s1,4 // TextExpTable[c0 >> 4] , c0 >> 4 sll s2,s2,3 // make DWORD offset addu s2,s2,sp // offset from base of table lw t4,0(s2) and s2,s1,0x0f // c0 & 0x0f sll s2,s2,3 // qword offset addu s2,s2,sp // offset from base ldl t4,3(s2) dmtc1 t4,f14 // move to 64 bit f register // // Store results, this will allow fastest video memory // stores on MIPS "JAZZ" platform. This sequence must // execute in order. // .set noreorder sdc1 f0 ,0x00(a3) // store results sdc1 f2 ,0x08(a3) // store results sdc1 f4 ,0x10(a3) // store results sdc1 f6 ,0x18(a3) // store results sdc1 f8 ,0x20(a3) // store results sdc1 f10,0x28(a3) // store results sdc1 f12,0x30(a3) // store results sdc1 f14,0x38(a3) // store results .set reorder addu a3,a3,0x40 // pjDst += 64 addu a0,a0,8 // pjSrc += 8 bne a3,t1,10b // loop till done 20: // // 4 DWORD loop // beq a3,t0,40f 30: lbu v0,0(a0) // c0 = *(pjSrc) lbu v1,1(a0) // c1 = *(pjSrc+1) srl s2,v0,4 // TextExpTable[c0 >> 4] , c0 >> 4 sll s2,s2,3 // make qword offset addu s2,s2,sp // offset from base of table lw t4,0(s2) and s2,v0,0x0f // c0 & 0x0f sll s2,s2,3 // qword offset addu s2,s2,sp // offset from base ldl t4,3(s2) dmtc1 t4,f0 // move to 64 bit f register srl s2,v1,4 // TextExpTable[c0 >> 4] , c0 >> 4 sll s2,s2,3 // make qword offset addu s2,s2,sp // offset from base of table lw t4,0(s2) and s2,v1,0x0f // c0 & 0x0f sll s2,s2,3 // qword offset addu s2,s2,sp // offset from base ldl t4,3(s2) dmtc1 t4,f2 // move to 64 bit f register sdc1 f0,0x00(a3) // store results sdc1 f2,0x08(a3) // store results addu a3,a3,0x10 // pjDst += 16 addu a0,a0,2 // pjSrc += 2 bne a3,t0,30b // loop till done 40: // // 2 DWORD loop // beq a3,a2,60f 50: lbu v0,0(a0) // c0 = *(pjSrc) srl s2,v0,4 // TextExpTable[c0 >> 4] , c0 >> 4 sll s2,s2,3 // make qword offset addu s2,s2,sp // offset from base of table lw t4,0(s2) and s2,v0,0x0f // c0 & 0x0f sll s2,s2,3 // qword offset addu s2,s2,sp // offset from base ldl t4,3(s2) dmtc1 t4,f0 // move to 64 bit f register sdc1 f0,0x00(a3) // store results addu a3,a3,0x08 // pjDst += 8 addu a0,a0,1 // pjSrc += 1 bne a3,a2,50b // loop till done 60: // // end of scan line, add stride to src and dst then check for end condition // addu a3,a3,t9 // pjDst += DeltaDst addu a0,a0,t8 // pjSrc += DeltaSrc bne a3,a1,Opaq8MainLoop // continue // // partial QWORD start and end // Opaq8Partial: lw a0,OpDstLeft(sp) // left edge lw a1,OpDstRight(sp) // right edge and a2,a0,7 // LeftAln = DstLeft & 0x07 and a3,a1,7 // RightAln = DstRight & 0x07 // // do we have left alignment? // li t2,-8 // 0xFFFFFFFF8 mask beq a2,zero,100f // if LeftAln == 0, skip lw t6,OpSrcLeft(sp) // Left Src Edge lw s0,OppjSrcIn(sp) // Src asddress lw s1,OppjDstIn(sp) // Src asddress lw t9,Opcy(sp) // cy lw t8,OpDeltaDstIn(sp) // Delta Dst lw t7,OpDeltaSrcIn(sp) // Delta Src // // pjSrc = pjSrcIn + (SrcLeft >> 3) // pjDst = pjDstIn + (DstLeft & ~0x07) // pjDstEndY = pjDst + cy * DeltaDstIn // mult t9,t8 // cy * DeltaDstIn srl t6,t6,3 // SrcLeft >> 3 addu s0,s0,t6 // pjSrcIn + (SrcLeft >> 3) and t5,a0,t2 // DstLeft & ~0x07 and t2,a1,t2 // DstRight & ~0x07 addu s1,s1,t5 // pjDstIn + (DstLeft & ~0x07) mflo t9 // t9 = cy * DeltaDstIn addu s2,s1,t9 // s2 = pjDstEndY = pjDst + cy * DeltaDstIn // // determine if left and right are in same quadword // bne t5,t2,50f // in ne, go to left case // // combined right and left edge in same quadword // // determine edge masks for DWORD 0 // la v0,gTextLeftMask la v1,gTextRightMask sll t2,a2,3 // left edge 2-dword offset addu t6,t2,v0 // table address lw a0,0(t6) // Left Mask 0 sll t2,a3,3 // right edge 2-dword offset addu t6,t2,v1 // table address lw a1,0(t6) // Right Mask 0 sll t2,a2,3 // left edge 2-dword offset addu t6,t2,v0 // table address lw a2,4(t6) // Left Mask 1 sll t2,a3,3 // right edge 2-dword offset addu t6,t2,v1 // table address lw a3,4(t6) // Right Mask 1 and a0,a0,a1 // mask0 = Left0 & Right0 nor a1,a0,0 // ~mask0 and a2,a2,a3 // mask1 = Left1 & Right1 nor a3,a2,0 // ~mask1 // // variables all initialized, ready for expansion loop // Opaq8SinleQWLoop: lbu v0,0(s0) // get src byte lw t2,0(s1) // dest 0,1 lw t3,4(s1) // dest 0,1 srl v1,v0,4 // isolate first (high) nibble sll v1,v1,3 // qword offset addu v1,v1,sp // offset in text expansion table lw t0,0(v1) // t0 = text expansion for nibble 0 and v0,v0,0x0f // isolate second (low) nibble sll v0,v0,3 // qword offset addu v0,v0,sp // add offset to base of table lw t1,0(v0) // t1 = Text expansion for nibble 1 and t2,t2,a1 // dest0 & ~mask0 and t3,t3,a3 // dest1 & ~mask1 and t0,t0,a0 // src0 & mask0 and t1,t1,a2 // src1 & mask1 or t0,t0,t2 // (src0 & mask0) | (dest0 & ~mask0) or t1,t1,t3 // (src1 & mask1) | (dest1 & ~mask1) sw t0,0(s1) // re-load f0 with dest0 sw t1,4(s1) // re-load f1 with dest1 addu s1,s1,t8 // next dest scan line addu s0,s0,t7 // inc src address to next scan line bne s1,s2,Opaq8SinleQWLoop // loop till done // // done: // beql zero,zero,200f 50: // // do LeftAln edge, 2 cases: // // 1,2,3: lwr,swr dest 0, lw,sw dest 1 // 4,5,6,7: lwr,swr dest 1 // // slt s4,a2,4 // if LeftAln < 4 beq s4,zero,60f // case 4,5,6,7 // // LeftAln Case 1,2,3: need one partial DWORD at psDst + LeftAln // and one full DWORD at pjDst+4 // Opaq8Left123Loop: lbu v0,0(s0) // load src byte addu s0,s0,t7 // pjSrc += DeltaSrc srl v1,v0,4 // isolate first nibble sll v1,v1,3 // qword index addu v1,v1,sp // table offset addu v1,v1,a2 // left aln offset for lwr lwr t0,0(v1) // get shifted text expansion data and v0,v0,0x0f // isolate second nibble sll v0,v0,3 // qword index addu v0,v0,sp // table lookup lw t1,0(v0) // get text exp data addu t2,s1,a2 // pjDst + LeftAln sw t1,4(s1) // store full DWORD addu s1,s1,t8 // next dest scan line swr t0,0(t2) // store shifted (t0 precalculated from old s1) bne s1,s2,Opaq8Left123Loop // // goto right edge case // beql zero,zero,100f 60: // // case 4,5,6,7 // subu t6,a2,4 // LeftAln-4: offset for loading text exp shifted Opaq8Left567Loop: lbu v0,0(s0) // load src byte addu s0,s0,t7 // pjSrc += DeltaSrc and v0,v0,0x0f // isolate second nibble sll v0,v0,3 // qword index addu v0,v0,sp // table lookup addu v0,v0,t6 // lwr offset lwr t1,0(v0) // get text exp data addu t2,s1,a2 // pjDst + LeftAln addu s1,s1,t8 // next dest scan line swr t1,0(t2) // store partial DWORD bne s1,s2,Opaq8Left567Loop // loop till done 100: // // do we have to do right alignment? // // a0 = DstLeft a2 = DstLeft & 0x07 = LeftAln // a1 = DstRight a3 = DstRight & 0x07 = RightAln // // // if RightAln == 0, no right edge alignment is needed // li t2,-8 // load 0xfffffff8 mask beq a3,zero,200f // // must do right edge, load needed params amd calc base addresses // lw t6,OpSrcLeft(sp) // Left Src Edge lw s0,OppjSrcIn(sp) // Src asddress lw s1,OppjDstIn(sp) // Src asddress lw t9,Opcy(sp) // cy lw t8,OpDeltaDstIn(sp) // Delta Dst lw t7,OpDeltaSrcIn(sp) // Delta Src // // pjDst = pjDstIn + (DstRight & ~0x07) // pjDstEndY = pjDst + cy * DeltaDstIn // pjSrc = pjSrcIn + ((SrcLeft + (DstRight - DstLeft)) >> 3) // mult t9,t8 // cy * DeltaDstIn and t2,a1,t2 // DstRight & ~0x07 addu s1,s1,t2 // pjDstIn + (DstRight & ~0x07) mflo t9 // t9 = cy * DeltaDstIn addu s2,s1,t9 // s2 = pjDstEndY = pjDst + cy * DeltaDstIn subu t2,a1,a0 // DstRight - DstLeft (cx) addu t6,t6,t2 // SrcLeft + cx srl t6,t6,3 // (SrcLeft + cx) >> 3 addu s0,s0,t6 // pjSrcIn + ((SrcLeft +cx) >> 3) // // three right edge cases based on RightAln (a3) // // 1,2,3,4: lwl,swl // 5,6,7 lw,sw lwl,swl // slt s4,a3,5 // case 1,2,3,4 subu a3,a3,1 beq s4,zero,110f // not less than 5 // // offset for lwl,swl // // // case 1,2,3 // Opaq8Right123Loop: lbu v0,0(s0) // load src byte addu s0,s0,t7 // pjSrc += DeltaSrc srl v0,v0,4 // isolate first nibble sll v0,v0,3 // qword index addu v0,v0,sp // table lookup addu v0,v0,a3 // lwl offset lwl t1,0(v0) // get text exp data addu t2,s1,a3 // pjDst + LeftAln addu s1,s1,t8 // next dest scan line swl t1,0(t2) // store partial DWORD bne s1,s2,Opaq8Right123Loop // // done // beql zero,zero,200f 110: // // case 5,6,7: Store bytes 567 based on ending alignment // subu t2,a3,4 // 4,5,6 -> 0,1,2 for lwl offset // from text exp table Opaq8Right567Loop: lbu v0,0(s0) // load src byte addu s0,s0,t7 // pjSrc += DeltaSrc srl v1,v0,4 // isolate first nibble sll v1,v1,3 // qword index addu v1,v1,sp // table lookup lw v1,0(v1) // get text exp data and v0,v0,0x0f // isolate second nibble sll v0,v0,3 // qword index addu v0,v0,sp // table lookup addu v0,v0,t2 // lwl offset lwl t1,0(v0) // get text exp data sw v1,0(s1) addu t3,s1,a3 // pjDst + RightAln addu s1,s1,t8 // next dest scan line swl t1,0(t3) // store partial qword bne s1,s2,Opaq8Right567Loop // loop till done 200: // // restore saveed registers and stack // lw s0,OpS0(sp) lw s1,OpS1(sp) lw s2,OpS2(sp) lw s3,OpS3(sp) lw s4,OpS4(sp) addu sp,sp,OpFrameLength j ra .end vSrcOpaqCopyS1D8 SBTTL("vSrcTranCopyS1D8") //++ // // VOID // vSrcTranCopyS1D8( // PBYTE pjSrcIn, // LONG SrcLeft, // LONG DeltaSrcIn, // PBYTE pjDstIn, // LONG DstLeft, // LONG DstRight, // LONG DeltaDstIn, // LONG cy, // ULONG uF, // ULONG uB, // SURFACE *pS // ); // // Routine Description: // // This routine is called to display a complete glyph Buffer. The src pixels // set to one will cause the Foreground color to be written to the dst. Src pixels // that are "0" will not be copied. // // Arguments: // // a0 - pjSrcIn - pointer to start of first src scan line // a1 - SrcLeft - left (starting) src pixel // a2 - DeltaSrcIn - src Scan line stride // a3 - pjDstIn - pointer to start of first dst scan line // DstLeft - left (starting) dst pixel // DstRight - right(ending) dst pixel // DeltaDstIn - dst scan line stride // cy - Number of scan lines to copy // uF - Foreground color // uB - Background color // pS - pointer to destination SURFACE // // // Return Value: // // None. // //-- .struct 0 TrS0: .space 4 TrS1: .space 4 TrS2: .space 4 .space 4 TrFrameLength: TrpjSrcIn: .space 4 TrSrcLeft: .space 4 TrDeltaSrcIn: .space 4 TrpjDstIn: .space 4 TrDstLeft: .space 4 TrDstRight: .space 4 TrDeltaDstIn: .space 4 Trcy: .space 4 TruF: .space 4 TruB: .space 4 TrpS: .space 4 NESTED_ENTRY(vSrcTranCopyS1D8, TrFrameLength, zero) subu sp,sp,TrFrameLength sw s0,TrS0(sp) sw s1,TrS1(sp) sw s2,TrS2(sp) PROLOGUE_END // // This routine does left edge clipping using a mask generated // from the left edge case (cxStart & 0x07). The case where the blt // starts and ends in the same scan line is also handled by combining // a start and end mask into a single mask. The right edge is handled // by a special loop that only writes pixels that are left of the // right edge // // // save call parametrs // sw a0,TrpjSrcIn(sp) sw a1,TrSrcLeft(sp) sw a2,TrDeltaSrcIn(sp) sw a3,TrpjDstIn(sp) // // build foreground lw from byte // lbu a1,TruF(sp) sll t0,a1,8 // 00 00 fg 00 or a1,a1,t0 // 00 00 fg fg sll t0,a1,16 // fg fg 00 00 or t0,t0,a1 // fg fg fg fg // // calculate left and right edge cases, and pixel count // lw t1,TrDstLeft(sp) // DstLeft lw t2,TrDstRight(sp) // DstRight lw t7,TrSrcLeft(sp) // xSrcStart subu a2,t2,t1 // cx = DstRight - DstLeft addu t8,t7,a2 // SrcRight = SrcLeft + cx srl t4,t7,3 // xSrcStart >> 3 srl t1,t8,3 // xSrcEnd >> 3 li t2,0xff // build load mask for first src byte and t7,t7,0x07 // xSrcStart & 0x07 srl v0,t2,t7 // 0xFF >> (xSrcStart & 0x07) = start mask and t8,t8,0x07 // xSrcEnd & 0x07 or s2,t8,zero // s2 = (xSrcEnd & 0x07), save for end aln // // if (xSrcStart >> 3) == (xSrcEnd >> 3) then this blt // starts and stops in the same quadword, jump to end strip case // beq t4,t1,50f // if not equal, skip // // subtract partial right edge (xSrcEnd & 0x07) from cx, // do this part after main loop. // subu a2,a2,t8 // cx -= (xSrcEnd & 0x07) // // Load Loop variables // // a0 pjDst // a1 pjSrc // a2 cx // a3 cy // t3 DeltaDst // s1 DeltaSrc // t8 DstLeft // t1 Dispatch base 0 // t5 Dispatch base 1 // lw t3,TrDeltaDstIn(sp) // get the scan line stride in bytes lw a0,TrpjDstIn(sp) // get Dst pointer lw a1,TrpjSrcIn(sp) // get Src pointer lw a3,Trcy(sp) // Src height lw s1,TrDeltaSrcIn(sp) // src stride in bytes lw t8,TrDstLeft(sp) // xDstStart // // drawing is always aligned // // if start is not aligned,and the // src pixel with start mask, and // and start address with 0xFFFFFFF8 // la t1,60f // get base high dispatch address la t5,80f // get base low dispatch address // // compute starting src and dst address // addu a1,a1,t4 // pjSrc = pjSrcStart + (xSrcStart >> 3) li t9,-8 // load 0xfffffff8 mask and t8,t8,t9 // (xDstStart & ~0x07) addu a0,a0,t8 // pjDst = pjDst + (xDstStart & ~0x07) // // compute number of Src bytes, = (cx + (xSrcStart & 0x07) + 7) /8 // addu t2,a2,t7 // Tmpcx = cx + (xSrcStart & 0x07) addu t2,t2,7 // round the bitmap span in bytes mult a3,t3 // compute offset to end of drawing srl t2,t2,3 // compute bitmap span in bytes = Tmpcx/8 sll t4,t2,3 // compute draw span in bytes subu t3,t3,t4 // compute draw stride in bytes subu t6,s1,t2 // compute src stride in bytes mflo a3 // get offset to end of drawing addu a3,a3,a0 // compute ending address of drawing // // restore src and mask // or t8,v0,zero // resotore and mask // // Set the current draw and bitmap base addresses, and begin drawing the // next scan line. // .set noreorder .set noat addu t4,t2,a1 // compute ending bitmap address // // A glyph scan line is processed four bits at a time. A dispatch is executed into // an array of code fragments that actually draw the pixels on the display. // // // The fisrt source byte may represent a partial value, mask with // starting alignment (sSrcStart & 0x07) // 10: lbu v0,0(a1) // get next byte of glyph addu a1,a1,1 // advance to next glyph byte and v0,v0,t8 // mask off src pixels not wanted beq zero,v0,30f // if eq, no glyph bits to draw sll v1,v0,7 - 6 // shift high nibble into position and v1,v1,0xf << 5 // isolate low order nibble addu v1,v1,t1 // compute dispatch address j v1 // dispatch to pixel store routine sll v0,v0,6 // shift next nibble into position 20: lbu v0,0(a1) // get next byte of glyph addu a1,a1,1 // advance to next glyph byte beq zero,v0,30f // if eq, no glyph bits to draw sll v1,v0,7 - 6 // shift high nibble into position and v1,v1,0xf << 5 // isolate low order nibble addu v1,v1,t1 // compute dispatch address j v1 // dispatch to pixel store routine sll v0,v0,6 // shift next nibble into position 30: bne a1,t4,20b // if ne, not end of glyph addu a0,a0,8 // advance to next draw point addu a0,a0,t3 // compute next scanline address 40: addu a1,a1,t6 // compute next src scanline address bne a0,a3,10b // if eq, no more pixels to draw addu t4,t2,a1 // compute ending bitmap address // // Set start mask to 0xFF, since the end case is the strip // following a block of 1 or more quadwords. // li v0,0xff // start mask = ff 50: // // check for end strip to draw // beq s2,zero,EndvSrcTranCopyS1D8 nop // // must do end strip of s2 pixels, load params // lw t8,TrSrcLeft(sp) // xSrcStart lw t9,TrDstLeft(sp) // xDstStart lw a2,TrDstRight(sp) // xDstEnd lw a0,TrpjDstIn(sp) // get Dst pointer lw a1,TrpjSrcIn(sp) // get Src pointer lw a3,Trcy(sp) // Src height lw s1,TrDeltaSrcIn(sp) // src scan line stride in bytes lw t3,TrDeltaDstIn(sp) // get the Dst scan line stride in bytes subu a2,a2,t9 // cx = xDstEnd - xDstStart addu t8,t8,a2 // xSrcEnd = xSrcStart + cx // // starting src address = pjSrc + (xSrcEnd >> 3) // srl t1,t8,3 // (xSrcEnd >> 3) addu a1,a1,t1 // pjSrc + (xSrcEnd >> 3) // // starting dst address = pjDst + xDstStart + (cx - s2), // calc ending dst address = pjDst + (cy * DeltaDst) // mult a3,t3 // cy * DeltaDst subu a2,a2,s2 // cx - s2 addu a2,a2,t9 // xDstStart + (cx - s2) addu a0,a0,a2 // pjDst = pjDst + xDstStart + (cx - s2) mflo a3 // cy * DeltaHeight addu a3,a3,a0 // pjDstEnd = pjDst + cy * DeltaHeight // // build jump table for masking pixels, // jump to check last n pixels 4 * (7 - (xSrcEnd & 0x07)) // li t8,7 // subu t8,t8,s2 // 7 - (xSrcEnd & 0x07) sll t8,t8,4 // 4 instructions (16 bytes) la v1,100f // byte 7 addu v1,v1,t8 // jump table address // // loop until pjDst = pjDstEnd: // // Load byte // store foreground color to each byte set // 51: lbu t1,0(a1) // load next src byte addu a1,a1,s1 // inc src address and t1,t1,v0 // start mask j v1 // jump into table nop 100: // byte 6 and t5,t1,0x02 beq t5,zero,53f nop sb t0,6(a0) 53: // byte 5 and t5,t1,0x04 beq t5,zero,54f nop sb t0,5(a0) 54: // byte 4 and t5,t1,0x08 beq t5,zero,55f nop sb t0,4(a0) 55: // byte 3 and t5,t1,0x10 beq t5,zero,56f nop sb t0,3(a0) 56: // byte 2 and t5,t1,0x20 beq t5,zero,57f nop sb t0,2(a0) 57: // byte 1 and t5,t1,0x40 beq t5,zero,58f nop sb t0,1(a0) 58: // byte 0 and t5,t1,0x80 beq t5,zero,59f nop sb t0,0(a0) 59: addu a0,a0,t3 // pjDst += DeltaDst bne a0,a3,51b // while pjDst != pjDstEnd nop EndvSrcTranCopyS1D8: .set reorder .set at lw s0,TrS0(sp) // save s0 lw s1,TrS1(sp) // save s1 lw s2,TrS2(sp) // save s2 addu sp,sp,TrFrameLength // restore stack j ra // return // // The following code is arranged as 16, four instruction blocks. The block // of code that is chosen for execution is determined from the high order // glyph nibble. These glyph nibbles are always aligned. // // The glyph nibbles are encoded in big endian order and therefore the pixels // that are stored are the reverse of the big endian bits within the nibble. // .align 4 .set noreorder .set noat 60: // reference label // // Pattern 0000 // and v1,v0,0xf << 6 // isolate low order nibble addu v1,v1,t5 // compute dispatch address j v1 // dispatch to pixel store routine addu a0,a0,4 // advance to next draw point nop // fill nop // nop // nop // 61: // // Pattern 0001 -> 1000 // sb t0,3(a0) // store pixel and v1,v0,0xf << 6 // isolate low order nibble addu v1,v1,t5 // compute dispatch address j v1 // dispatch to pixel store routine addu a0,a0,4 // advance to next draw point nop // fill nop // nop // 62: // // Pattern 0010 -> 0100 // sb t0,2(a0) // store pixel and v1,v0,0xf << 6 // isolate low order nibble addu v1,v1,t5 // compute dispatch address j v1 // dispatch to pixel store routine addu a0,a0,4 // advance to next draw point nop // fill nop // nop // 63: // // Pattern 0011 -> 1100 // sh t0,2(a0) // store pixels and v1,v0,0xf << 6 // isolate low order nibble addu v1,v1,t5 // compute dispatch address j v1 // dispatch to pixel store routine addu a0,a0,4 // advance to next draw point nop // fill nop // nop // 64: // // Pattern 0100 -> 0010 // sb t0,1(a0) // store pixel and v1,v0,0xf << 6 // isolate low order nibble addu v1,v1,t5 // compute dispatch address j v1 // dispatch to pixel store routine addu a0,a0,4 // advance to next draw point nop // fill nop // nop // 65: // // Pattern 0101 -> 1010 // sb t0,1(a0) // store pixel sb t0,3(a0) // store pixel and v1,v0,0xf << 6 // isolate low order nibble addu v1,v1,t5 // compute dispatch address j v1 // dispatch to pixel store routine addu a0,a0,4 // advance to next draw point nop // fill nop // 66: // // Pattern 0110 -> 0110 // sb t0,1(a0) // store pixel sb t0,2(a0) // store pixel and v1,v0,0xf << 6 // isolate low order nibble addu v1,v1,t5 // compute dispatch address j v1 // dispatch to pixel store routine addu a0,a0,4 // advance to next draw point nop // fill nop // 67: // // Pattern 0111 -> 1110 // swr t0,1(a0) // store pixels and v1,v0,0xf << 6 // isolate low order nibble addu v1,v1,t5 // compute dispatch address j v1 // dispatch to pixel store routine addu a0,a0,4 // advance to next draw point nop // fill nop // nop // 68: // // Pattern 1000 -> 0001 // sb t0,0(a0) // store pixel and v1,v0,0xf << 6 // isolate low order nibble addu v1,v1,t5 // compute dispatch address j v1 // dispatch to pixel store routine addu a0,a0,4 // advance to next draw point nop // fill nop // nop // 69: // // Pattern 1001 -> 1001 // sb t0,0(a0) // store pixel sb t0,3(a0) // store pixel and v1,v0,0xf << 6 // isolate low order nibble addu v1,v1,t5 // compute dispatch address j v1 // dispatch to pixel store routine addu a0,a0,4 // advance to next draw point nop // fill nop // 70: // // Pattern 1010 -> 0101 // sb t0,0(a0) // store pixel sb t0,2(a0) // store pixel and v1,v0,0xf << 6 // isolate low order nibble addu v1,v1,t5 // compute dispatch address j v1 // dispatch to pixel store routine addu a0,a0,4 // advance to next draw point nop // fill nop // 71: // // Pattern 1011 -> 1101 // sb t0,0(a0) // store pixel sh t0,2(a0) // store pixels and v1,v0,0xf << 6 // isolate low order nibble addu v1,v1,t5 // compute dispatch address j v1 // dispatch to pixel store routine addu a0,a0,4 // advance to next draw point nop // fill nop // 72: // // Pattern 1100 -> 0011 // sh t0,0(a0) // store pixels and v1,v0,0xf << 6 // isolate low order nibble addu v1,v1,t5 // compute dispatch address j v1 // dispatch to pixel store routine addu a0,a0,4 // advance to next draw point nop // fill nop // nop // 73: // // Pattern 1101 -> 1011 // sh t0,0(a0) // store pixels sb t0,3(a0) // store pixel and v1,v0,0xf << 6 // isolate low order nibble addu v1,v1,t5 // compute dispatch address j v1 // dispatch to pixel store routine addu a0,a0,4 // advance to next draw point nop // fill nop // 74: // // // Pattern 1110 -> 0111 // swl t0,2(a0) // store pixels and v1,v0,0xf << 6 // isolate low order nibble addu v1,v1,t5 // compute dispatch address j v1 // dispatch to pixel store routine addu a0,a0,4 // advance to next draw point nop // fill nop // nop // 75: // // Pattern 1111 -> 1111 // sw t0,0(a0) // store pixels and v1,v0,0xf << 6 // isolate low order nibble addu v1,v1,t5 // compute dispatch address j v1 // dispatch to pixel store routine addu a0,a0,4 // advance to next draw point nop // fill nop // nop // .set at .set reorder // // The following code is arranged as 16, 16 instruction blocks. The block // of code that is chosen for execution is determined from the low order // glyph nibble and the two low its of the draw address. // // The glyph nibbles are encoded in big endian order and therefore the pixels // that are stored are the reverse of the big endian bits within the nibble. // .set noreorder .set noat 80: // reference label // // Pattern 0000 // addu a0,a0,4 // advance to next draw point beql a1,t4,40b // if eq then end of scan line addu a0,a0,t3 // compute next scanline address lbu v0,0(a1) // get next byte of glyph addu a1,a1,1 // advance to next glyph byte beq zero,v0,30b // if eq, no glyph bits to draw sll v1,v0,7 - 6 // shift high nibble into position and v1,v1,0xf << 5 // isolate low order nibble addu v1,v1,t1 // compute dispatch address j v1 // dispatch to pixel store routine sll v0,v0,6 // shift next nibble into position nop nop nop nop nop 81: // reference label // // Pattern 0001 -> 1000 // sb t0,3(a0) // store pixel addu a0,a0,4 // advance to next draw point beql a1,t4,40b // if eq then end of scan line addu a0,a0,t3 // compute next scanline address lbu v0,0(a1) // get next byte of glyph addu a1,a1,1 // advance to next glyph byte beq zero,v0,30b // if eq, no glyph bits to draw sll v1,v0,7 - 6 // shift high nibble into position and v1,v1,0xf << 5 // isolate low order nibble addu v1,v1,t1 // compute dispatch address j v1 // dispatch to pixel store routine sll v0,v0,6 // shift next nibble into position nop nop nop nop 82: // reference label // // Pattern 0010 -> 0100 // sb t0,2(a0) // store pixel addu a0,a0,4 // advance to next draw point beql a1,t4,40b // if eq then end of scan line addu a0,a0,t3 // compute next scanline address lbu v0,0(a1) // get next byte of glyph addu a1,a1,1 // advance to next glyph byte beq zero,v0,30b // if eq, no glyph bits to draw sll v1,v0,7 - 6 // shift high nibble into position and v1,v1,0xf << 5 // isolate low order nibble addu v1,v1,t1 // compute dispatch address j v1 // dispatch to pixel store routine sll v0,v0,6 // shift next nibble into position nop nop nop nop // 83: // reference label // // Pattern 0011 -> 1100 // sh t0,2(a0) // store pixels addu a0,a0,4 // advance to next draw point beql a1,t4,40b // if eq then end of scan line addu a0,a0,t3 // compute next scanline address lbu v0,0(a1) // get next byte of glyph addu a1,a1,1 // advance to next glyph byte beq zero,v0,30b // if eq, no glyph bits to draw sll v1,v0,7 - 6 // shift high nibble into position and v1,v1,0xf << 5 // isolate low order nibble addu v1,v1,t1 // compute dispatch address j v1 // dispatch to pixel store routine sll v0,v0,6 // shift next nibble into position nop nop nop nop // 84: // reference label // // Pattern 0100 -> 0010 // sb t0,1(a0) // store pixel addu a0,a0,4 // advance to next draw point beql a1,t4,40b // if eq then end of scan line addu a0,a0,t3 // compute next scanline address lbu v0,0(a1) // get next byte of glyph addu a1,a1,1 // advance to next glyph byte beq zero,v0,30b // if eq, no glyph bits to draw sll v1,v0,7 - 6 // shift high nibble into position and v1,v1,0xf << 5 // isolate low order nibble addu v1,v1,t1 // compute dispatch address j v1 // dispatch to pixel store routine sll v0,v0,6 // shift next nibble into position nop nop nop nop // 85: // reference label // // Pattern 0101 -> 1010 // sb t0,1(a0) // store pixel sb t0,3(a0) // store pixel addu a0,a0,4 // advance to next draw point beql a1,t4,40b // if eq then end of scan line addu a0,a0,t3 // compute next scanline address lbu v0,0(a1) // get next byte of glyph addu a1,a1,1 // advance to next glyph byte beq zero,v0,30b // if eq, no glyph bits to draw sll v1,v0,7 - 6 // shift high nibble into position and v1,v1,0xf << 5 // isolate low order nibble addu v1,v1,t1 // compute dispatch address j v1 // dispatch to pixel store routine sll v0,v0,6 // shift next nibble into position nop nop nop 86: // reference label // // Pattern 0110 -> 0110 // sb t0,1(a0) // store pixel sb t0,2(a0) // store pixel addu a0,a0,4 // advance to next draw point beql a1,t4,40b // if eq then end of scan line addu a0,a0,t3 // compute next scanline address lbu v0,0(a1) // get next byte of glyph addu a1,a1,1 // advance to next glyph byte beq zero,v0,30b // if eq, no glyph bits to draw sll v1,v0,7 - 6 // shift high nibble into position and v1,v1,0xf << 5 // isolate low order nibble addu v1,v1,t1 // compute dispatch address j v1 // dispatch to pixel store routine sll v0,v0,6 // shift next nibble into position nop nop nop 87: // reference label // // Pattern 0111 -> 1110 // swr t0,1(a0) // store pixels addu a0,a0,4 // advance to next draw point beql a1,t4,40b // if eq then end of scan line addu a0,a0,t3 // compute next scanline address lbu v0,0(a1) // get next byte of glyph addu a1,a1,1 // advance to next glyph byte beq zero,v0,30b // if eq, no glyph bits to draw sll v1,v0,7 - 6 // shift high nibble into position and v1,v1,0xf << 5 // isolate low order nibble addu v1,v1,t1 // compute dispatch address j v1 // dispatch to pixel store routine sll v0,v0,6 // shift next nibble into position nop nop nop nop 88: // reference label // // Pattern 1000 -> 0001 // sb t0,0(a0) // store pixel addu a0,a0,4 // advance to next draw point beql a1,t4,40b // if eq then end of scan line addu a0,a0,t3 // compute next scanline address lbu v0,0(a1) // get next byte of glyph addu a1,a1,1 // advance to next glyph byte beq zero,v0,30b // if eq, no glyph bits to draw sll v1,v0,7 - 6 // shift high nibble into position and v1,v1,0xf << 5 // isolate low order nibble addu v1,v1,t1 // compute dispatch address j v1 // dispatch to pixel store routine sll v0,v0,6 // shift next nibble into position nop nop nop nop 89: // reference label // // Pattern 1001 -> 1001 // sb t0,0(a0) // store pixel sb t0,3(a0) // store pixel addu a0,a0,4 // advance to next draw point beql a1,t4,40b // if eq then end of scan line addu a0,a0,t3 // compute next scanline address lbu v0,0(a1) // get next byte of glyph addu a1,a1,1 // advance to next glyph byte beq zero,v0,30b // if eq, no glyph bits to draw sll v1,v0,7 - 6 // shift high nibble into position and v1,v1,0xf << 5 // isolate low order nibble addu v1,v1,t1 // compute dispatch address j v1 // dispatch to pixel store routine sll v0,v0,6 // shift next nibble into position nop nop nop 90: // reference label // // Pattern 1010 -> 0101 // sb t0,0(a0) // store pixel sb t0,2(a0) // store pixel addu a0,a0,4 // advance to next draw point beql a1,t4,40b // if eq then end of scan line addu a0,a0,t3 // compute next scanline address lbu v0,0(a1) // get next byte of glyph addu a1,a1,1 // advance to next glyph byte beq zero,v0,30b // if eq, no glyph bits to draw sll v1,v0,7 - 6 // shift high nibble into position and v1,v1,0xf << 5 // isolate low order nibble addu v1,v1,t1 // compute dispatch address j v1 // dispatch to pixel store routine sll v0,v0,6 // shift next nibble into position nop nop nop 91: // reference label // // Pattern 1011 -> 1101 // sb t0,0(a0) // store pixel sh t0,2(a0) // store pixels addu a0,a0,4 // advance to next draw point beql a1,t4,40b // if eq then end of scan line addu a0,a0,t3 // compute next scanline address lbu v0,0(a1) // get next byte of glyph addu a1,a1,1 // advance to next glyph byte beq zero,v0,30b // if eq, no glyph bits to draw sll v1,v0,7 - 6 // shift high nibble into position and v1,v1,0xf << 5 // isolate low order nibble addu v1,v1,t1 // compute dispatch address j v1 // dispatch to pixel store routine sll v0,v0,6 // shift next nibble into position nop nop nop 92: // reference label // // Pattern 1100 -> 0011 // sh t0,0(a0) // store pixels addu a0,a0,4 // advance to next draw point beql a1,t4,40b // if eq then end of scan line addu a0,a0,t3 // compute next scanline address lbu v0,0(a1) // get next byte of glyph addu a1,a1,1 // advance to next glyph byte beq zero,v0,30b // if eq, no glyph bits to draw sll v1,v0,7 - 6 // shift high nibble into position and v1,v1,0xf << 5 // isolate low order nibble addu v1,v1,t1 // compute dispatch address j v1 // dispatch to pixel store routine sll v0,v0,6 // shift next nibble into position nop nop nop nop 93: // reference label // // Pattern 1101 -> 1011 // sh t0,0(a0) // store pixels sb t0,3(a0) // store pixel addu a0,a0,4 // advance to next draw point beql a1,t4,40b // if eq then end of scan line addu a0,a0,t3 // compute next scanline address lbu v0,0(a1) // get next byte of glyph addu a1,a1,1 // advance to next glyph byte beq zero,v0,30b // if eq, no glyph bits to draw sll v1,v0,7 - 6 // shift high nibble into position and v1,v1,0xf << 5 // isolate low order nibble addu v1,v1,t1 // compute dispatch address j v1 // dispatch to pixel store routine sll v0,v0,6 // shift next nibble into position nop nop nop 94: // reference label // // Pattern 1110 -> 0111 // swl t0,2(a0) // store pixels addu a0,a0,4 // advance to next draw point beql a1,t4,40b // if eq then end of scan line addu a0,a0,t3 // compute next scanline address lbu v0,0(a1) // get next byte of glyph addu a1,a1,1 // advance to next glyph byte beq zero,v0,30b // if eq, no glyph bits to draw sll v1,v0,7 - 6 // shift high nibble into position and v1,v1,0xf << 5 // isolate low order nibble addu v1,v1,t1 // compute dispatch address j v1 // dispatch to pixel store routine sll v0,v0,6 // shift next nibble into position nop nop nop nop 95: // reference label // // Pattern 1111 -> 1111 // sw t0,0(a0) // store pixels addu a0,a0,4 // advance to next draw point beql a1,t4,40b // if eq then end of scan line addu a0,a0,t3 // compute next scanline address lbu v0,0(a1) // get next byte of glyph addu a1,a1,1 // advance to next glyph byte beq zero,v0,30b // if eq, no glyph bits to draw sll v1,v0,7 - 6 // shift high nibble into position and v1,v1,0xf << 5 // isolate low order nibble addu v1,v1,t1 // compute dispatch address j v1 // dispatch to pixel store routine sll v0,v0,6 // shift next nibble into position nop nop nop nop .set at .set reorder .end vSrcTranCopyS1D8