// TITLE("Pattern Tiler") //++ // // Copyright (c) 1992 Microsoft Corporation // // Module Name: // // tiler.s // // Abstract: // // This module implements code to copy a pattern to a target surface. // // N.B. The code is written to optimally write to a frame buffer display // surface. This means there is an occasional movement of data to // floating point registers so that 8-byte writes to the display // can be performed. // // Author: // // Donald Sidoroff (donalds) 2-Feb-1992 // // Rewritten by: // // David N. Cutler (davec) 4-May-1992 // // Environment: // // User mode only. // // Revision History: // //-- #include "ksmips.h" #include "gdimips.h" .extern Gdip64bitDisabled 4 SBTTL("rop P, Aligned") //++ // // VOID // vFetchAndCopy ( // IN PFETCHFRAME pff // ) // // Routine Description: // // This routine repeatedly tiles one scan line of an aligned pattern. // // Arguments: // // pff (a0) - Supplies a pointer to a fetch frame. // // Return Value: // // None. // //-- LEAF_ENTRY(vCopyPattern) ALTERNATE_ENTRY(vFetchAndCopy) lw t0,ff_pvTrg(a0) // get starting target address lw t1,ff_pvPat(a0) // get base pattern address lw t2,ff_xPat(a0) // get pattern offset in bytes lw t3,ff_cxPat(a0) // get pattern width in pixels lw t4,ff_culFill(a0) // compute ending target address sll a1,t4,2 // addu t4,a1,t0 // addu t5,t2,t1 // compute current pattern address subu v0,t3,8 // check if pattern is exactly 8 bytes bne zero,v0,10f // if ne, pattern is not 8 bytes lw v0,0(t5) // get low part of 8-byte pattern lw v1,4(t5) // get high part of 8-byte pattern beq zero,t2,CopyPattern // if eq, zero offset value lw v1,0(t1) // get high part of 8-byte pattern b CopyPattern // finish in common code // // The pattern is not 8 bytes in width or cannot be moved 8 bytes at a time. // .set noreorder .set noat 10: lw v0,0(t5) // get 4-byte pattern value addu t3,t3,t1 // compute ending pattern address 20: addu t0,t0,4 // advance target pointer sw v0,-4(t0) // store pattern in target beq t0,t4,30f // if eq, end of target addu t5,t5,4 // advance pixel offset subu t6,t5,t3 // check if at end of pattern bne zero,t6,20b // if ne, not at end of pattern lw v0,0(t5) // get 4-byte pattern value move t5,t1 // set starting pattern addres b 20b // lw v0,0(t5) // get 4-byte pattern value .set at .set reorder 30: j ra // return SBTTL("rop P, Unaligned") //++ // // VOID // vFetchShiftAndCopy ( // IN PFETCHFRAME pff // ) // // Routine Description: // // This routine repeatedly tiles one line of an unaligned pattern // using rop (P). // // Arguments: // // pff (a0) - Supplies a pointer to a fetch frame. // // Return Value: // // None. // //-- ALTERNATE_ENTRY(vFetchShiftAndCopy) lw t0,ff_pvTrg(a0) // get starting target address lw t1,ff_pvPat(a0) // get base pattern address lw t2,ff_xPat(a0) // get pattern offset in bytes lw t3,ff_cxPat(a0) // get pattern width in pixels lw t4,ff_culFill(a0) // compute ending target address sll a1,t4,2 // addu t4,a1,t0 // addu t5,t2,t1 // compute current pattern address subu v0,t3,8 // check if pattern is exactly 8 bytes bne zero,v0,10f // if ne, pattern is not 8 bytes lwr v0,0(t5) // get low part of 8-byte pattern lwl v0,3(t5) // lwr v1,4(t5) // get high part of 8-byte pattern lwl v1,3 - 4(t5) // b CopyPattern // finish in common code // // The pattern is not 8 bytes in width or cannot be moved 8 bytes at a time. // .set noreorder .set noat 10: lwr v0,0(t5) // get low bytes of pattern lwl v0,3(t5) // get high bytes of pattern addu t0,t0,4 // advance target pointer sw v0,-4(t0) // store pattern in target beq t0,t4,20f // if eq, end of target addu t2,t2,4 // advance pixel offset subu t6,t2,t3 // check if at end of pattern bltz t6,10b // if ltz, not at end of pattern addu t5,t2,t1 // compute address of pattern move t2,t6 // set offset in pattern b 10b // addu t5,t2,t1 // compute address of pattern .set at .set reorder 20: j ra // return SBTTL("rop Pn, Aligned") //++ // // VOID // vFetchNotAndCopy ( // IN PFETCHFRAME pff // ) // // Routine Description: // // This routine repeatedly tiles one line of an aligned pattern. // // Arguments: // // pff (a0) - Supplies a pointer to a fetch frame. // // Return Value: // // None. // //-- ALTERNATE_ENTRY(vFetchNotAndCopy) lw t0,ff_pvTrg(a0) // get starting target address lw t1,ff_pvPat(a0) // get base pattern address lw t2,ff_xPat(a0) // get pattern offset in bytes lw t3,ff_cxPat(a0) // get pattern width in pixels lw t4,ff_culFill(a0) // compute ending target address sll a1,t4,2 // addu t4,a1,t0 // addu t5,t2,t1 // compute current pattern address subu v0,t3,8 // check if pattern is exactly 8 bytes bne zero,v0,20f // if ne, pattern is not 8 bytes lw v0,0(t5) // get low part of 8-byte pattern lw v1,4(t5) // get high part of 8-byte pattern beq zero,t2,10f // if eq, zero offset value lw v1,0(t1) // get high part of 8-byte pattern 10: nor v0,v0,zero // complement pattern nor v1,v1,zero // b CopyPattern // finish in common code // // The pattern is not 8 bytes in width or cannot be moved 8 bytes at a time. // .set noreorder .set noat 20: lw v0,0(t5) // get 4-byte pattern value addu t3,t3,t1 // compute ending pattern address 30: addu t0,t0,4 // advance target pointer nor v0,v0,zero // complement pattern sw v0,-4(t0) // store pattern in target beq t0,t4,40f // if eq, end of target addu t5,t5,4 // advance pattern address subu t6,t5,t3 // check if at end of pattern bne zero,t6,30b // if ne, not at end of pattern lw v0,0(t5) // get 4-byte pattern value move t5,t1 // set starting pattern address b 30b // lw v0,0(t5) // get 4-byte pattern value .set at .set reorder 40: j ra // return SBTTL("rop Pn, Unaligned") //++ // // VOID // vFetchShiftNotAndCopy ( // IN PFETCHFRAME pff // ) // // Routine Description: // // This routine repeatedly tiles one line of an unaligned pattern // using rop (Pn). // // Arguments: // // pff (a0) - Supplies a pointer to a fetch frame. // // Return Value: // // None. // //-- ALTERNATE_ENTRY(vFetchShiftNotAndCopy) lw t0,ff_pvTrg(a0) // get starting target address lw t1,ff_pvPat(a0) // get base pattern address lw t2,ff_xPat(a0) // get pattern offset in bytes lw t3,ff_cxPat(a0) // get pattern width in pixels lw t4,ff_culFill(a0) // compute ending target address sll a1,t4,2 // addu t4,a1,t0 // addu t5,t2,t1 // compute current pattern address subu v0,t3,8 // check if pattern is exactly 8 bytes bne zero,v0,10f // if ne, pattern is not 8 bytes lwr v0,0(t5) // get low part of 8-byte pattern lwl v0,3(t5) // lwr v1,4(t5) // get high part of 8-byte pattern lwl v1,3 - 4(t5) // nor v0,v0,zero // complement pattern nor v1,v1,zero // b CopyPattern // finish in common code // // The pattern is not 8 bytes in width or cannot be moved 8 bytes at a time. // .set noreorder .set noat 10: lwr v0,0(t5) // get low bytes of pattern lwl v0,3(t5) // get high bytes of pattern addu t0,t0,4 // advance target pointer nor v0,v0,zero // complement pattern sw v0,-4(t0) // store pattern in target beq t0,t4,20f // if eq, end of target addu t2,t2,4 // advance pixel offset subu t6,t2,t3 // check if at end of pattern bltz t6,10b // if ltz, not at end of pattern addu t5,t2,t1 // compute address of pattern move t2,t6 // set offset in pattern b 10b // addu t5,t2,t1 // compute address of pattern .set at .set reorder 20: j ra // return SBTTL("Copy Pattern") //++ // // Routine Description: // // This routine contains common code for copying an 8-byte pattern to // a target surface. // // Arguments: // // a1 - Supplies the size of the fill in bytes. // v0 and v1 - Supplies the 8-byte pattern to copy. // t0 - Supplies the starting target surface address. // t4 - Supplies the ending target surface address. // // Return Value: // // None. // //-- CopyPattern: // // // If the fill size is not an even multiple of 8 bytes, then move one // longword and swap the pattern value. // and t8,a1,0x4 // check if even multiple of 8 bytes beq zero,t8,10f // if eq, even multiple of 8 bytes sw v0,0(t0) // store low 4 bytes of pattern addu t0,t0,4 // advance target address subu a1,a1,4 // reduce size of fill operation beq zero,a1,200f // if eq, no more to move move t8,v0 // swap 8-byte pattern value move v0,v1 // move v1,t8 // // // Many system platforms do not support 64 bit access to video memory. For // these platforms, data is moved 32-bits at a time. // 10: lbu t7,Gdip64bitDisabled // get 64-bit disable flag bne zero,t7,140f // if eq, 64-bit access is disabled // // If the target buffer is 8-byte aligned, then move the pattern value to // the target 32 bytes at a time by moving any intervening 8-byte blocks // first. Otherwise, move a single longword, move any intervening 8-byte // blocks, move 32-byte blocks, and then move a single longword at the end. // and t8,t0,0x4 // isolate target alignment bits bne zero,t8,70f // if ne, target not aligned // // Move 8-byte pattern value to target 32 bytes at a time. // .set noreorder .set noat dsll v0,v0,32 // merge 8 bytes of pattern dsrl v0,v0,32 // dsll v1,v1,32 // or v0,v0,v1 // and t8,a1,0x18 // check if even multiple of 32 bytes beq zero,t8,30f // if eq, even multiple of 32 bytes subu t4,t4,32 // compute ending segment address subu a1,a1,t8 // reduce size of fill operation beq zero,a1,40f // if eq, only alignment part to move addu t0,t0,t8 // advance target address xor t8,t8,0x18 // check if 24 bytes need to be moved beql zero,t8,20f // if eq, 24 bytes to move sd v0,-24(t0) // store first 8 bytes of 24 bytes and t8,t8,0x10 // check if 8 bytes to move bnel zero,t8,30f // if ne, only 8 bytes to move sd v0,-8(t0) // store 8-bytes of pattern 20: sd v0,-16(t0) // store last 16 bytes of 16 or 24 bytes sd v0,-8(t0) // 30: sd v0,0(t0) // store 8 byte pattern value 4 times sd v0,8(t0) // sd v0,16(t0) // sd v0,24(t0) // bne t0,t4,30b // if ne, more to move addu t0,t0,32 // advance target address .set at .set reorder j ra // return .set noreorder .set noat 40: xor t8,t8,0x18 // check if 24 bytes need to be moved beql zero,t8,50f // if eq, 24 bytes to move sd v0,-24(t0) // store first 8 bytes of 24 bytes and t8,t8,0x10 // check if 8 bytes to move bnel zero,t8,60f // if ne, only 8 bytes to move sd v0,-8(t0) // store 8-bytes of pattern 50: sd v0,-16(t0) // store last 16 bytes of 16 or 24 bytes sd v0,-8(t0) // .set at .set reorder 60: j ra // return // // Align the target to an 8-byte boundary, move any intervening 8-byte blocks, // move the pattern to the target 32 bytes at a time, and move the remaining // longword at the end. // 70: sw v0,0(t0) // store low 4 bytes of pattern addu t0,t0,4 // advance target address subu a1,a1,8 // reduce size of fill beq zero,a1,120f // if eq, nothing in the middle .set noreorder .set noat dsll v1,v1,32 // merge 8 bytes of pattern dsrl v1,v1,32 // dsll v0,v0,32 // or v1,v0,v1 // and t8,a1,0x18 // check if even multiple of 32 bytes beq zero,t8,90f // if eq, even multiple of 32 bytes subu t4,t4,32 + 4 // compute ending segment address subu a1,a1,t8 // reduce size of fill operation beq zero,a1,100f // if eq, only alignment part to move addu t0,t0,t8 // advance target address xor t8,t8,0x18 // check if 24 bytes need to be moved beql zero,t8,80f // if eq, 24 bytes to move sd v1,-24(t0) // store first 8 bytes of 24 bytes and t8,t8,0x10 // check if 8 bytes to move bnel zero,t8,90f // if ne, only 8 bytes to move sd v1,-8(t0) // store 8-bytes of pattern 80: sd v1,-16(t0) // store last 16 bytes of 16 or 24 bytes sd v1,-8(t0) // 90: sd v1,0(t0) // store 8 byte pattern value 4 times sd v1,8(t0) // sd v1,16(t0) // sd v1,24(t0) // bne t0,t4,90b // if ne, more to move addu t0,t0,32 // advance target address .set at .set reorder sw v1,0(t0) // store high bytes of pattern j ra // return .set noreorder .set noat 100: xor t8,t8,0x18 // check if 24 bytes need to be moved beql zero,t8,110f // if eq, 24 bytes to move sd v1,-24(t0) // store first 8 bytes of 24 bytes and t8,t8,0x10 // check if 8 bytes to move bnel zero,t8,120f // if ne, only 8 bytes to move sd v1,-8(t0) // store 8-bytes of pattern 110: sd v1,-16(t0) // store last 16 bytes of 16 or 24 bytes sd v1,-8(t0) // .set at .set reorder 120: sw v1,0(t0) // store high 4 bytes of pattern j ra // return // // Move 8-byte pattern value to target 8 bytes at a time using 32-bit // operations. // .set noreorder .set noat 140: and t8,a1,0x8 // check if even multiple of 8 bytes beq zero,t8,160f // if eq, even multiple of 8 bytes subu t4,t4,8 // compute ending segment address 150: sw v0,0(t0) // store 8-byte pattern value sw v1,4(t0) // bne t0,t4,150b // if ne, more to move addu t0,t0,8 // advance target address .set at .set reorder j ra // return // // Move 8-byte pattern value to target 16 bytes at a time using 32-bit // operations. // .set noreorder .set noat 160: subu t4,t4,8 // compute ending segment address 170: sw v0,0(t0) // store 8-byte pattern value sw v1,4(t0) // sw v0,8(t0) // store 8-byte pattern value sw v1,12(t0) // bne t0,t4,170b // if ne, more to move addu t0,t0,16 // advance target address .set at .set reorder 200: j ra // return .end vCopyPattern SBTTL("rop DPx, Aligned") //++ // // VOID // vFetchAndMerge ( // IN PFETCHFRAME pff // ) // // Routine Description: // // This routine repeatedly tiles one line of an aligned pattern. // // Arguments: // // pff (a0) - Supplies a pointer to a fetch frame. // // Return Value: // // None. // //-- LEAF_ENTRY(vMergePattern) ALTERNATE_ENTRY(vFetchAndMerge) lw t0,ff_pvTrg(a0) // get starting target address lw t1,ff_pvPat(a0) // get base pattern address lw t2,ff_xPat(a0) // get pattern offset in bytes lw t3,ff_cxPat(a0) // get pattern width in pixels lw t4,ff_culFill(a0) // compute ending target address sll a1,t4,2 // addu t4,a1,t0 // addu t5,t2,t1 // compute current pattern address subu v0,t3,8 // check if pattern is exactly 8 bytes bne zero,v0,10f // if ne, pattern is not 8 bytes lw v0,0(t5) // get low part of 8-byte pattern lw v1,4(t5) // get high part of 8-byte pattern beq zero,t2,MergePattern // if eq, zero offset value lw v1,0(t1) // get high part of 8-byte pattern b MergePattern // finish in common code // // The pattern is not 8 bytes in width or cannot be moved 8 bytes at a time. // .set noreorder .set noat 10: lw v0,0(t5) // get 4-byte pattern value addu t3,t3,t1 // compute ending pattern address 20: lw v1,0(t0) // get 4-byte target value addu t0,t0,4 // advance target pointer xor v0,v1,v0 // compute exclusive or with pattern sw v0,-4(t0) // store pattern in target beq t0,t4,30f // if eq, end of target addu t5,t5,4 // advance pixel offset subu t6,t5,t3 // check if at end of pattern bne zero,t6,20b // if ne, not at end of pattern lw v0,0(t5) // get 4-byte pattern value move t5,t1 // set starting pattern address b 20b // lw v0,0(t5) // get 4-byte pattern value .set at .set reorder 30: j ra // return SBTTL("rop DPx, Unaligned") //++ // // VOID // vFetchShiftAndMerge ( // IN PFETCHFRAME pff // ) // // Routine Description: // // This routine repeatedly tiles one line of an unaligned pattern // using rop (DPx). // // Arguments: // // pff (a0) - Supplies a pointer to a fetch frame. // // Return Value: // // None. // //-- ALTERNATE_ENTRY(vFetchShiftAndMerge) lw t0,ff_pvTrg(a0) // get starting target address lw t1,ff_pvPat(a0) // get base pattern address lw t2,ff_xPat(a0) // get pattern offset in bytes lw t3,ff_cxPat(a0) // get pattern width in pixels lw t4,ff_culFill(a0) // compute ending target address sll a1,t4,2 // addu t4,a1,t0 // addu t5,t2,t1 // compute current pattern address subu v0,t3,8 // check if pattern is exactly 8 bytes bne zero,v0,10f // if ne, pattern is not 8 bytes lwr v0,0(t5) // get low part of 8-byte pattern lwl v0,3(t5) // lwr v1,4(t5) // get high part of 8-byte pattern lwl v1,3 - 4(t5) // b MergePattern // finish in common code // // The pattern is not 8 bytes in width or cannot be moved 8 bytes at a time. // .set noreorder .set noat 10: lw v1,0(t0) // get 4-byte target value lwr v0,0(t5) // get low bytes of pattern lwl v0,3(t5) // get high bytes of pattern addu t0,t0,4 // advance target pointer xor v0,v1,v0 // compute exclusive or with pattern sw v0,-4(t0) // store pattern in target beq t0,t4,20f // if eq, end of target addu t2,t2,4 // advance pixel offset subu t6,t2,t3 // check if at end of pattern bltz t6,10b // if ltz, not at end of pattern addu t5,t2,t1 // compute address of pattern move t2,t6 // set offset in pattern b 10b // addu t5,t2,t1 // compute address of pattern .set at .set reorder 20: j ra // return SBTTL("Merge Pattern") //++ // // Routine Description: // // This routine contains common code for merging an 8-byte pattern to // a target surface. // // Arguments: // // v0 and v1 - Supplies the 8-byte pattern to copy. // t0 - Supplies the starting target surface address. // t4 - Supplies the ending target surface address. // // Return Value: // // None. // //-- MergePattern: // // // If the fill size is not an even multiple of 8 bytes, then merge one // longword and swap the pattern value. // and t8,a1,0x4 // check if even multiple of 8 bytes beq zero,t8,10f // if eq, even multiple of 8 bytes lw t6,0(t0) // get 4-byte target value addu t0,t0,4 // advance target address xor t6,t6,v0 // compute exclusive or with pattern sw t6,-4(t0) // store low 4 bytes of pattern subu a1,a1,4 // reduce size of fill operation beq zero,a1,160f // if eq, no more to move move t8,v0 // swap 8-byte pattern value move v0,v1 // move v1,t8 // // // Many system platforms do not support 64 bit access to video memory. For // these platforms, data is moved 32-bits at a time. // 10: lbu t7,Gdip64bitDisabled // get 64-bit disable flag bne zero,t7,110f // if eq, 64-bit access is disabled // // If the target buffer is 8-byte aligned, then merge the pattern value with // the target 8 bytes at a time. Otherwise, merge a single longword, merge any // intervening 8-byte blocks, and then merge a single longword at the end. // and t8,t0,0x4 // isolate target alignment bits bne zero,t8,30f // if ne, target alignment problem // // Merge 8-byte pattern value with target. // .set noreorder .set noat dsll v0,v0,32 // merge 8 bytes of pattern dsrl v0,v0,32 // dsll v1,v1,32 // or v0,v0,v1 // and a2,a1,32 - 1 // isolate residual number of bytes subu a2,a1,a2 // compute 32-byte block count beq zero,a2,17f // if eq, no 32-byte block to merge subu a1,a1,a2 // compute residual number of bytes addu a2,a2,t0 // compute ending segment address subu a2,a2,32 // // // Merge 8-byte pattern value with target 32 bytes at a time. // 13: ld t1,0(t0) // get 8-byte target values ld t2,8(t0) // ld t3,16(t0) // ld t5,24(t0) // xor t1,t1,v0 // compute exclusive or with pattern xor t2,t2,v0 // xor t3,t3,v0 // xor t5,t5,v0 // sd t1,0(t0) // store 8-byte pattern values sd t2,8(t0) // sd t3,16(t0) // sd t5,24(t0) // bne t0,a2,13b // if ne, more to move addu t0,t0,32 // advance target address .set at .set reorder beq zero,a1,160f // if eq, no residual 8-byte blocks // // Merge 8-byte pattern value with target 8 bytes at a time. // .set noreorder .set noat 17: subu t4,t4,8 // compute ending segment address 20: ld t1,0(t0) // get 8-byte target value xor t1,t1,v0 // compute exclusive or with pattern sd t1,0(t0) // store 8-byte pattern value bne t0,t4,20b // if ne, more to move addu t0,t0,8 // advance target address .set at .set reorder j ra // return // // Align the target to an 8-byte boundary, merge any intervening 8-byte blocks, // and merge the remaining longword at the end. // 30: lw t6,0(t0) // get 4-byte target value addu t0,t0,4 // advance target address xor t6,t6,v0 // compute exclusive or with pattern sw t6,-4(t0) // store low 4 bytes of pattern subu a1,a1,8 // reduce size of fill beq zero,a1,50f // if eq, nothing in the middle // // Merge 8-byte pattern value with target. // .set noreorder .set noat dsll v1,v1,32 // merge 8 bytes of pattern dsrl v1,v1,32 // dsll v0,v0,32 // or v1,v0,v1 // and a2,a1,32 - 1 // isolate residual number of bytes subu a2,a1,a2 // compute 32-byte block count beq zero,a2,37f // if eq, no 32-byte block to merge subu a1,a1,a2 // compute residual number of bytes addu a2,a2,t0 // compute ending segment address subu a2,a2,32 // // // Merge 8-byte pattern value with target 32 bytes at a time. // 33: ld t1,0(t0) // get 8-byte target values ld t2,8(t0) // ld t3,16(t0) // ld t5,24(t0) // xor t1,t1,v1 // compute exclusive or with pattern xor t2,t2,v1 // xor t3,t3,v1 // xor t5,t5,v1 // sd t1,0(t0) // store 8-byte pattern values sd t2,8(t0) // sd t3,16(t0) // sd t5,24(t0) // bne t0,a2,33b // if ne, more to move addu t0,t0,32 // advance target address .set at .set reorder beq zero,a1,50f // if eq, no residual 8-byte blocks // // Merge 8-byte pattern value with target 8 bytes at a time. // .set noreorder .set noat 37: subu t4,t4,12 // compute ending segment address 40: ld t1,0(t0) // get 8-byte target value xor t1,t1,v1 // compute exclusive or with pattern sd t1,0(t0) // store 8-byte pattern value bne t0,t4,40b // if ne, more to move addu t0,t0,8 // advance target address .set at .set reorder 50: lw t6,0(t0) // get 4-byte target value xor t6,t6,v1 // compute exclusive or with pattern sw t6,0(t0) // store high bytes of pattern j ra // return // // Merge 8-byte pattern value with target using 32-bit operations. // .set noreorder .set noat 110: subu t4,t4,8 // compute ending segment address 120: lw t6,0(t0) // get 8-byte target value lw t7,4(t0) // xor t6,t6,v0 // compute exclusive or with pattern xor t7,t7,v1 // sw t6,0(t0) // store 8-byte pattern value sw t7,4(t0) // bne t0,t4,120b // if ne, more to move addu t0,t0,8 // advance target address .set at .set reorder 160: j ra // return .end vMergePattern