mirror of https://github.com/lianthony/NT4.0
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
848 lines
32 KiB
848 lines
32 KiB
// TITLE("Pattern Tiler")
|
|
//++
|
|
//
|
|
// Copyright (c) 1992 Microsoft Corporation
|
|
//
|
|
// Module Name:
|
|
//
|
|
// tiler.s
|
|
//
|
|
// Abstract:
|
|
//
|
|
// This module implements code to copy a pattern to a target surface.
|
|
//
|
|
// N.B. The code is written to optimally write to a frame buffer display
|
|
// surface. This means there is an occasional movement of data to
|
|
// floating point registers so that 8-byte writes to the display
|
|
// can be performed.
|
|
//
|
|
// Author:
|
|
//
|
|
// Donald Sidoroff (donalds) 2-Feb-1992
|
|
//
|
|
// Rewritten by:
|
|
//
|
|
// David N. Cutler (davec) 4-May-1992
|
|
//
|
|
// Environment:
|
|
//
|
|
// User mode only.
|
|
//
|
|
// Revision History:
|
|
//
|
|
//--
|
|
|
|
#include "ksmips.h"
|
|
#include "gdimips.h"
|
|
|
|
.extern Gdip64bitDisabled 4
|
|
|
|
|
|
SBTTL("rop P, Aligned")
|
|
//++
|
|
//
|
|
// VOID
|
|
// vFetchAndCopy (
|
|
// IN PFETCHFRAME pff
|
|
// )
|
|
//
|
|
// Routine Description:
|
|
//
|
|
// This routine repeatedly tiles one scan line of an aligned pattern.
|
|
//
|
|
// Arguments:
|
|
//
|
|
// pff (a0) - Supplies a pointer to a fetch frame.
|
|
//
|
|
// Return Value:
|
|
//
|
|
// None.
|
|
//
|
|
//--
|
|
|
|
LEAF_ENTRY(vCopyPattern)
|
|
|
|
ALTERNATE_ENTRY(vFetchAndCopy)
|
|
|
|
lw t0,ff_pvTrg(a0) // get starting target address
|
|
lw t1,ff_pvPat(a0) // get base pattern address
|
|
lw t2,ff_xPat(a0) // get pattern offset in bytes
|
|
lw t3,ff_cxPat(a0) // get pattern width in pixels
|
|
lw t4,ff_culFill(a0) // compute ending target address
|
|
sll a1,t4,2 //
|
|
addu t4,a1,t0 //
|
|
addu t5,t2,t1 // compute current pattern address
|
|
subu v0,t3,8 // check if pattern is exactly 8 bytes
|
|
bne zero,v0,10f // if ne, pattern is not 8 bytes
|
|
lw v0,0(t5) // get low part of 8-byte pattern
|
|
lw v1,4(t5) // get high part of 8-byte pattern
|
|
beq zero,t2,CopyPattern // if eq, zero offset value
|
|
lw v1,0(t1) // get high part of 8-byte pattern
|
|
b CopyPattern // finish in common code
|
|
|
|
//
|
|
// The pattern is not 8 bytes in width or cannot be moved 8 bytes at a time.
|
|
//
|
|
|
|
.set noreorder
|
|
.set noat
|
|
10: lw v0,0(t5) // get 4-byte pattern value
|
|
addu t3,t3,t1 // compute ending pattern address
|
|
20: addu t0,t0,4 // advance target pointer
|
|
sw v0,-4(t0) // store pattern in target
|
|
beq t0,t4,30f // if eq, end of target
|
|
addu t5,t5,4 // advance pixel offset
|
|
subu t6,t5,t3 // check if at end of pattern
|
|
bne zero,t6,20b // if ne, not at end of pattern
|
|
lw v0,0(t5) // get 4-byte pattern value
|
|
move t5,t1 // set starting pattern addres
|
|
b 20b //
|
|
lw v0,0(t5) // get 4-byte pattern value
|
|
.set at
|
|
.set reorder
|
|
|
|
30: j ra // return
|
|
|
|
SBTTL("rop P, Unaligned")
|
|
//++
|
|
//
|
|
// VOID
|
|
// vFetchShiftAndCopy (
|
|
// IN PFETCHFRAME pff
|
|
// )
|
|
//
|
|
// Routine Description:
|
|
//
|
|
// This routine repeatedly tiles one line of an unaligned pattern
|
|
// using rop (P).
|
|
//
|
|
// Arguments:
|
|
//
|
|
// pff (a0) - Supplies a pointer to a fetch frame.
|
|
//
|
|
// Return Value:
|
|
//
|
|
// None.
|
|
//
|
|
//--
|
|
|
|
ALTERNATE_ENTRY(vFetchShiftAndCopy)
|
|
|
|
lw t0,ff_pvTrg(a0) // get starting target address
|
|
lw t1,ff_pvPat(a0) // get base pattern address
|
|
lw t2,ff_xPat(a0) // get pattern offset in bytes
|
|
lw t3,ff_cxPat(a0) // get pattern width in pixels
|
|
lw t4,ff_culFill(a0) // compute ending target address
|
|
sll a1,t4,2 //
|
|
addu t4,a1,t0 //
|
|
addu t5,t2,t1 // compute current pattern address
|
|
subu v0,t3,8 // check if pattern is exactly 8 bytes
|
|
bne zero,v0,10f // if ne, pattern is not 8 bytes
|
|
lwr v0,0(t5) // get low part of 8-byte pattern
|
|
lwl v0,3(t5) //
|
|
lwr v1,4(t5) // get high part of 8-byte pattern
|
|
lwl v1,3 - 4(t5) //
|
|
b CopyPattern // finish in common code
|
|
|
|
//
|
|
// The pattern is not 8 bytes in width or cannot be moved 8 bytes at a time.
|
|
//
|
|
|
|
.set noreorder
|
|
.set noat
|
|
10: lwr v0,0(t5) // get low bytes of pattern
|
|
lwl v0,3(t5) // get high bytes of pattern
|
|
addu t0,t0,4 // advance target pointer
|
|
sw v0,-4(t0) // store pattern in target
|
|
beq t0,t4,20f // if eq, end of target
|
|
addu t2,t2,4 // advance pixel offset
|
|
subu t6,t2,t3 // check if at end of pattern
|
|
bltz t6,10b // if ltz, not at end of pattern
|
|
addu t5,t2,t1 // compute address of pattern
|
|
move t2,t6 // set offset in pattern
|
|
b 10b //
|
|
addu t5,t2,t1 // compute address of pattern
|
|
.set at
|
|
.set reorder
|
|
|
|
20: j ra // return
|
|
|
|
SBTTL("rop Pn, Aligned")
|
|
//++
|
|
//
|
|
// VOID
|
|
// vFetchNotAndCopy (
|
|
// IN PFETCHFRAME pff
|
|
// )
|
|
//
|
|
// Routine Description:
|
|
//
|
|
// This routine repeatedly tiles one line of an aligned pattern.
|
|
//
|
|
// Arguments:
|
|
//
|
|
// pff (a0) - Supplies a pointer to a fetch frame.
|
|
//
|
|
// Return Value:
|
|
//
|
|
// None.
|
|
//
|
|
//--
|
|
|
|
ALTERNATE_ENTRY(vFetchNotAndCopy)
|
|
|
|
lw t0,ff_pvTrg(a0) // get starting target address
|
|
lw t1,ff_pvPat(a0) // get base pattern address
|
|
lw t2,ff_xPat(a0) // get pattern offset in bytes
|
|
lw t3,ff_cxPat(a0) // get pattern width in pixels
|
|
lw t4,ff_culFill(a0) // compute ending target address
|
|
sll a1,t4,2 //
|
|
addu t4,a1,t0 //
|
|
addu t5,t2,t1 // compute current pattern address
|
|
subu v0,t3,8 // check if pattern is exactly 8 bytes
|
|
bne zero,v0,20f // if ne, pattern is not 8 bytes
|
|
lw v0,0(t5) // get low part of 8-byte pattern
|
|
lw v1,4(t5) // get high part of 8-byte pattern
|
|
beq zero,t2,10f // if eq, zero offset value
|
|
lw v1,0(t1) // get high part of 8-byte pattern
|
|
10: nor v0,v0,zero // complement pattern
|
|
nor v1,v1,zero //
|
|
b CopyPattern // finish in common code
|
|
|
|
//
|
|
// The pattern is not 8 bytes in width or cannot be moved 8 bytes at a time.
|
|
//
|
|
|
|
.set noreorder
|
|
.set noat
|
|
20: lw v0,0(t5) // get 4-byte pattern value
|
|
addu t3,t3,t1 // compute ending pattern address
|
|
30: addu t0,t0,4 // advance target pointer
|
|
nor v0,v0,zero // complement pattern
|
|
sw v0,-4(t0) // store pattern in target
|
|
beq t0,t4,40f // if eq, end of target
|
|
addu t5,t5,4 // advance pattern address
|
|
subu t6,t5,t3 // check if at end of pattern
|
|
bne zero,t6,30b // if ne, not at end of pattern
|
|
lw v0,0(t5) // get 4-byte pattern value
|
|
move t5,t1 // set starting pattern address
|
|
b 30b //
|
|
lw v0,0(t5) // get 4-byte pattern value
|
|
.set at
|
|
.set reorder
|
|
|
|
40: j ra // return
|
|
|
|
SBTTL("rop Pn, Unaligned")
|
|
//++
|
|
//
|
|
// VOID
|
|
// vFetchShiftNotAndCopy (
|
|
// IN PFETCHFRAME pff
|
|
// )
|
|
//
|
|
// Routine Description:
|
|
//
|
|
// This routine repeatedly tiles one line of an unaligned pattern
|
|
// using rop (Pn).
|
|
//
|
|
// Arguments:
|
|
//
|
|
// pff (a0) - Supplies a pointer to a fetch frame.
|
|
//
|
|
// Return Value:
|
|
//
|
|
// None.
|
|
//
|
|
//--
|
|
|
|
ALTERNATE_ENTRY(vFetchShiftNotAndCopy)
|
|
|
|
lw t0,ff_pvTrg(a0) // get starting target address
|
|
lw t1,ff_pvPat(a0) // get base pattern address
|
|
lw t2,ff_xPat(a0) // get pattern offset in bytes
|
|
lw t3,ff_cxPat(a0) // get pattern width in pixels
|
|
lw t4,ff_culFill(a0) // compute ending target address
|
|
sll a1,t4,2 //
|
|
addu t4,a1,t0 //
|
|
addu t5,t2,t1 // compute current pattern address
|
|
subu v0,t3,8 // check if pattern is exactly 8 bytes
|
|
bne zero,v0,10f // if ne, pattern is not 8 bytes
|
|
lwr v0,0(t5) // get low part of 8-byte pattern
|
|
lwl v0,3(t5) //
|
|
lwr v1,4(t5) // get high part of 8-byte pattern
|
|
lwl v1,3 - 4(t5) //
|
|
nor v0,v0,zero // complement pattern
|
|
nor v1,v1,zero //
|
|
b CopyPattern // finish in common code
|
|
|
|
//
|
|
// The pattern is not 8 bytes in width or cannot be moved 8 bytes at a time.
|
|
//
|
|
|
|
.set noreorder
|
|
.set noat
|
|
10: lwr v0,0(t5) // get low bytes of pattern
|
|
lwl v0,3(t5) // get high bytes of pattern
|
|
addu t0,t0,4 // advance target pointer
|
|
nor v0,v0,zero // complement pattern
|
|
sw v0,-4(t0) // store pattern in target
|
|
beq t0,t4,20f // if eq, end of target
|
|
addu t2,t2,4 // advance pixel offset
|
|
subu t6,t2,t3 // check if at end of pattern
|
|
bltz t6,10b // if ltz, not at end of pattern
|
|
addu t5,t2,t1 // compute address of pattern
|
|
move t2,t6 // set offset in pattern
|
|
b 10b //
|
|
addu t5,t2,t1 // compute address of pattern
|
|
.set at
|
|
.set reorder
|
|
|
|
20: j ra // return
|
|
|
|
SBTTL("Copy Pattern")
|
|
//++
|
|
//
|
|
// Routine Description:
|
|
//
|
|
// This routine contains common code for copying an 8-byte pattern to
|
|
// a target surface.
|
|
//
|
|
// Arguments:
|
|
//
|
|
// a1 - Supplies the size of the fill in bytes.
|
|
// v0 and v1 - Supplies the 8-byte pattern to copy.
|
|
// t0 - Supplies the starting target surface address.
|
|
// t4 - Supplies the ending target surface address.
|
|
//
|
|
// Return Value:
|
|
//
|
|
// None.
|
|
//
|
|
//--
|
|
|
|
CopyPattern: //
|
|
|
|
//
|
|
// If the fill size is not an even multiple of 8 bytes, then move one
|
|
// longword and swap the pattern value.
|
|
//
|
|
|
|
and t8,a1,0x4 // check if even multiple of 8 bytes
|
|
beq zero,t8,10f // if eq, even multiple of 8 bytes
|
|
sw v0,0(t0) // store low 4 bytes of pattern
|
|
addu t0,t0,4 // advance target address
|
|
subu a1,a1,4 // reduce size of fill operation
|
|
beq zero,a1,200f // if eq, no more to move
|
|
move t8,v0 // swap 8-byte pattern value
|
|
move v0,v1 //
|
|
move v1,t8 //
|
|
|
|
//
|
|
// Many system platforms do not support 64 bit access to video memory. For
|
|
// these platforms, data is moved 32-bits at a time.
|
|
//
|
|
|
|
10: lbu t7,Gdip64bitDisabled // get 64-bit disable flag
|
|
bne zero,t7,140f // if eq, 64-bit access is disabled
|
|
|
|
//
|
|
// If the target buffer is 8-byte aligned, then move the pattern value to
|
|
// the target 32 bytes at a time by moving any intervening 8-byte blocks
|
|
// first. Otherwise, move a single longword, move any intervening 8-byte
|
|
// blocks, move 32-byte blocks, and then move a single longword at the end.
|
|
//
|
|
|
|
|
|
and t8,t0,0x4 // isolate target alignment bits
|
|
bne zero,t8,70f // if ne, target not aligned
|
|
|
|
//
|
|
// Move 8-byte pattern value to target 32 bytes at a time.
|
|
//
|
|
|
|
.set noreorder
|
|
.set noat
|
|
dsll v0,v0,32 // merge 8 bytes of pattern
|
|
dsrl v0,v0,32 //
|
|
dsll v1,v1,32 //
|
|
or v0,v0,v1 //
|
|
and t8,a1,0x18 // check if even multiple of 32 bytes
|
|
beq zero,t8,30f // if eq, even multiple of 32 bytes
|
|
subu t4,t4,32 // compute ending segment address
|
|
subu a1,a1,t8 // reduce size of fill operation
|
|
beq zero,a1,40f // if eq, only alignment part to move
|
|
addu t0,t0,t8 // advance target address
|
|
xor t8,t8,0x18 // check if 24 bytes need to be moved
|
|
beql zero,t8,20f // if eq, 24 bytes to move
|
|
sd v0,-24(t0) // store first 8 bytes of 24 bytes
|
|
and t8,t8,0x10 // check if 8 bytes to move
|
|
bnel zero,t8,30f // if ne, only 8 bytes to move
|
|
sd v0,-8(t0) // store 8-bytes of pattern
|
|
20: sd v0,-16(t0) // store last 16 bytes of 16 or 24 bytes
|
|
sd v0,-8(t0) //
|
|
30: sd v0,0(t0) // store 8 byte pattern value 4 times
|
|
sd v0,8(t0) //
|
|
sd v0,16(t0) //
|
|
sd v0,24(t0) //
|
|
bne t0,t4,30b // if ne, more to move
|
|
addu t0,t0,32 // advance target address
|
|
.set at
|
|
.set reorder
|
|
|
|
j ra // return
|
|
|
|
.set noreorder
|
|
.set noat
|
|
40: xor t8,t8,0x18 // check if 24 bytes need to be moved
|
|
beql zero,t8,50f // if eq, 24 bytes to move
|
|
sd v0,-24(t0) // store first 8 bytes of 24 bytes
|
|
and t8,t8,0x10 // check if 8 bytes to move
|
|
bnel zero,t8,60f // if ne, only 8 bytes to move
|
|
sd v0,-8(t0) // store 8-bytes of pattern
|
|
50: sd v0,-16(t0) // store last 16 bytes of 16 or 24 bytes
|
|
sd v0,-8(t0) //
|
|
.set at
|
|
.set reorder
|
|
|
|
60: j ra // return
|
|
|
|
//
|
|
// Align the target to an 8-byte boundary, move any intervening 8-byte blocks,
|
|
// move the pattern to the target 32 bytes at a time, and move the remaining
|
|
// longword at the end.
|
|
//
|
|
|
|
70: sw v0,0(t0) // store low 4 bytes of pattern
|
|
addu t0,t0,4 // advance target address
|
|
subu a1,a1,8 // reduce size of fill
|
|
beq zero,a1,120f // if eq, nothing in the middle
|
|
|
|
.set noreorder
|
|
.set noat
|
|
dsll v1,v1,32 // merge 8 bytes of pattern
|
|
dsrl v1,v1,32 //
|
|
dsll v0,v0,32 //
|
|
or v1,v0,v1 //
|
|
and t8,a1,0x18 // check if even multiple of 32 bytes
|
|
beq zero,t8,90f // if eq, even multiple of 32 bytes
|
|
subu t4,t4,32 + 4 // compute ending segment address
|
|
subu a1,a1,t8 // reduce size of fill operation
|
|
beq zero,a1,100f // if eq, only alignment part to move
|
|
addu t0,t0,t8 // advance target address
|
|
xor t8,t8,0x18 // check if 24 bytes need to be moved
|
|
beql zero,t8,80f // if eq, 24 bytes to move
|
|
sd v1,-24(t0) // store first 8 bytes of 24 bytes
|
|
and t8,t8,0x10 // check if 8 bytes to move
|
|
bnel zero,t8,90f // if ne, only 8 bytes to move
|
|
sd v1,-8(t0) // store 8-bytes of pattern
|
|
80: sd v1,-16(t0) // store last 16 bytes of 16 or 24 bytes
|
|
sd v1,-8(t0) //
|
|
90: sd v1,0(t0) // store 8 byte pattern value 4 times
|
|
sd v1,8(t0) //
|
|
sd v1,16(t0) //
|
|
sd v1,24(t0) //
|
|
bne t0,t4,90b // if ne, more to move
|
|
addu t0,t0,32 // advance target address
|
|
.set at
|
|
.set reorder
|
|
|
|
sw v1,0(t0) // store high bytes of pattern
|
|
j ra // return
|
|
|
|
.set noreorder
|
|
.set noat
|
|
100: xor t8,t8,0x18 // check if 24 bytes need to be moved
|
|
beql zero,t8,110f // if eq, 24 bytes to move
|
|
sd v1,-24(t0) // store first 8 bytes of 24 bytes
|
|
and t8,t8,0x10 // check if 8 bytes to move
|
|
bnel zero,t8,120f // if ne, only 8 bytes to move
|
|
sd v1,-8(t0) // store 8-bytes of pattern
|
|
110: sd v1,-16(t0) // store last 16 bytes of 16 or 24 bytes
|
|
sd v1,-8(t0) //
|
|
.set at
|
|
.set reorder
|
|
|
|
120: sw v1,0(t0) // store high 4 bytes of pattern
|
|
j ra // return
|
|
|
|
//
|
|
// Move 8-byte pattern value to target 8 bytes at a time using 32-bit
|
|
// operations.
|
|
//
|
|
|
|
.set noreorder
|
|
.set noat
|
|
140: and t8,a1,0x8 // check if even multiple of 8 bytes
|
|
beq zero,t8,160f // if eq, even multiple of 8 bytes
|
|
subu t4,t4,8 // compute ending segment address
|
|
150: sw v0,0(t0) // store 8-byte pattern value
|
|
sw v1,4(t0) //
|
|
bne t0,t4,150b // if ne, more to move
|
|
addu t0,t0,8 // advance target address
|
|
.set at
|
|
.set reorder
|
|
|
|
j ra // return
|
|
|
|
//
|
|
// Move 8-byte pattern value to target 16 bytes at a time using 32-bit
|
|
// operations.
|
|
//
|
|
|
|
.set noreorder
|
|
.set noat
|
|
160: subu t4,t4,8 // compute ending segment address
|
|
170: sw v0,0(t0) // store 8-byte pattern value
|
|
sw v1,4(t0) //
|
|
sw v0,8(t0) // store 8-byte pattern value
|
|
sw v1,12(t0) //
|
|
bne t0,t4,170b // if ne, more to move
|
|
addu t0,t0,16 // advance target address
|
|
.set at
|
|
.set reorder
|
|
|
|
200: j ra // return
|
|
|
|
.end vCopyPattern
|
|
|
|
SBTTL("rop DPx, Aligned")
|
|
//++
|
|
//
|
|
// VOID
|
|
// vFetchAndMerge (
|
|
// IN PFETCHFRAME pff
|
|
// )
|
|
//
|
|
// Routine Description:
|
|
//
|
|
// This routine repeatedly tiles one line of an aligned pattern.
|
|
//
|
|
// Arguments:
|
|
//
|
|
// pff (a0) - Supplies a pointer to a fetch frame.
|
|
//
|
|
// Return Value:
|
|
//
|
|
// None.
|
|
//
|
|
//--
|
|
|
|
LEAF_ENTRY(vMergePattern)
|
|
|
|
ALTERNATE_ENTRY(vFetchAndMerge)
|
|
|
|
lw t0,ff_pvTrg(a0) // get starting target address
|
|
lw t1,ff_pvPat(a0) // get base pattern address
|
|
lw t2,ff_xPat(a0) // get pattern offset in bytes
|
|
lw t3,ff_cxPat(a0) // get pattern width in pixels
|
|
lw t4,ff_culFill(a0) // compute ending target address
|
|
sll a1,t4,2 //
|
|
addu t4,a1,t0 //
|
|
addu t5,t2,t1 // compute current pattern address
|
|
subu v0,t3,8 // check if pattern is exactly 8 bytes
|
|
bne zero,v0,10f // if ne, pattern is not 8 bytes
|
|
lw v0,0(t5) // get low part of 8-byte pattern
|
|
lw v1,4(t5) // get high part of 8-byte pattern
|
|
beq zero,t2,MergePattern // if eq, zero offset value
|
|
lw v1,0(t1) // get high part of 8-byte pattern
|
|
b MergePattern // finish in common code
|
|
|
|
//
|
|
// The pattern is not 8 bytes in width or cannot be moved 8 bytes at a time.
|
|
//
|
|
|
|
.set noreorder
|
|
.set noat
|
|
10: lw v0,0(t5) // get 4-byte pattern value
|
|
addu t3,t3,t1 // compute ending pattern address
|
|
20: lw v1,0(t0) // get 4-byte target value
|
|
addu t0,t0,4 // advance target pointer
|
|
xor v0,v1,v0 // compute exclusive or with pattern
|
|
sw v0,-4(t0) // store pattern in target
|
|
beq t0,t4,30f // if eq, end of target
|
|
addu t5,t5,4 // advance pixel offset
|
|
subu t6,t5,t3 // check if at end of pattern
|
|
bne zero,t6,20b // if ne, not at end of pattern
|
|
lw v0,0(t5) // get 4-byte pattern value
|
|
move t5,t1 // set starting pattern address
|
|
b 20b //
|
|
lw v0,0(t5) // get 4-byte pattern value
|
|
.set at
|
|
.set reorder
|
|
|
|
30: j ra // return
|
|
|
|
SBTTL("rop DPx, Unaligned")
|
|
//++
|
|
//
|
|
// VOID
|
|
// vFetchShiftAndMerge (
|
|
// IN PFETCHFRAME pff
|
|
// )
|
|
//
|
|
// Routine Description:
|
|
//
|
|
// This routine repeatedly tiles one line of an unaligned pattern
|
|
// using rop (DPx).
|
|
//
|
|
// Arguments:
|
|
//
|
|
// pff (a0) - Supplies a pointer to a fetch frame.
|
|
//
|
|
// Return Value:
|
|
//
|
|
// None.
|
|
//
|
|
//--
|
|
|
|
ALTERNATE_ENTRY(vFetchShiftAndMerge)
|
|
|
|
lw t0,ff_pvTrg(a0) // get starting target address
|
|
lw t1,ff_pvPat(a0) // get base pattern address
|
|
lw t2,ff_xPat(a0) // get pattern offset in bytes
|
|
lw t3,ff_cxPat(a0) // get pattern width in pixels
|
|
lw t4,ff_culFill(a0) // compute ending target address
|
|
sll a1,t4,2 //
|
|
addu t4,a1,t0 //
|
|
addu t5,t2,t1 // compute current pattern address
|
|
subu v0,t3,8 // check if pattern is exactly 8 bytes
|
|
bne zero,v0,10f // if ne, pattern is not 8 bytes
|
|
lwr v0,0(t5) // get low part of 8-byte pattern
|
|
lwl v0,3(t5) //
|
|
lwr v1,4(t5) // get high part of 8-byte pattern
|
|
lwl v1,3 - 4(t5) //
|
|
b MergePattern // finish in common code
|
|
|
|
//
|
|
// The pattern is not 8 bytes in width or cannot be moved 8 bytes at a time.
|
|
//
|
|
|
|
.set noreorder
|
|
.set noat
|
|
10: lw v1,0(t0) // get 4-byte target value
|
|
lwr v0,0(t5) // get low bytes of pattern
|
|
lwl v0,3(t5) // get high bytes of pattern
|
|
addu t0,t0,4 // advance target pointer
|
|
xor v0,v1,v0 // compute exclusive or with pattern
|
|
sw v0,-4(t0) // store pattern in target
|
|
beq t0,t4,20f // if eq, end of target
|
|
addu t2,t2,4 // advance pixel offset
|
|
subu t6,t2,t3 // check if at end of pattern
|
|
bltz t6,10b // if ltz, not at end of pattern
|
|
addu t5,t2,t1 // compute address of pattern
|
|
move t2,t6 // set offset in pattern
|
|
b 10b //
|
|
addu t5,t2,t1 // compute address of pattern
|
|
.set at
|
|
.set reorder
|
|
|
|
20: j ra // return
|
|
|
|
SBTTL("Merge Pattern")
|
|
//++
|
|
//
|
|
// Routine Description:
|
|
//
|
|
// This routine contains common code for merging an 8-byte pattern to
|
|
// a target surface.
|
|
//
|
|
// Arguments:
|
|
//
|
|
// v0 and v1 - Supplies the 8-byte pattern to copy.
|
|
// t0 - Supplies the starting target surface address.
|
|
// t4 - Supplies the ending target surface address.
|
|
//
|
|
// Return Value:
|
|
//
|
|
// None.
|
|
//
|
|
//--
|
|
|
|
MergePattern: //
|
|
|
|
//
|
|
// If the fill size is not an even multiple of 8 bytes, then merge one
|
|
// longword and swap the pattern value.
|
|
//
|
|
|
|
and t8,a1,0x4 // check if even multiple of 8 bytes
|
|
beq zero,t8,10f // if eq, even multiple of 8 bytes
|
|
lw t6,0(t0) // get 4-byte target value
|
|
addu t0,t0,4 // advance target address
|
|
xor t6,t6,v0 // compute exclusive or with pattern
|
|
sw t6,-4(t0) // store low 4 bytes of pattern
|
|
subu a1,a1,4 // reduce size of fill operation
|
|
beq zero,a1,160f // if eq, no more to move
|
|
move t8,v0 // swap 8-byte pattern value
|
|
move v0,v1 //
|
|
move v1,t8 //
|
|
|
|
|
|
//
|
|
// Many system platforms do not support 64 bit access to video memory. For
|
|
// these platforms, data is moved 32-bits at a time.
|
|
//
|
|
|
|
10: lbu t7,Gdip64bitDisabled // get 64-bit disable flag
|
|
bne zero,t7,110f // if eq, 64-bit access is disabled
|
|
|
|
//
|
|
// If the target buffer is 8-byte aligned, then merge the pattern value with
|
|
// the target 8 bytes at a time. Otherwise, merge a single longword, merge any
|
|
// intervening 8-byte blocks, and then merge a single longword at the end.
|
|
//
|
|
|
|
and t8,t0,0x4 // isolate target alignment bits
|
|
bne zero,t8,30f // if ne, target alignment problem
|
|
|
|
//
|
|
// Merge 8-byte pattern value with target.
|
|
//
|
|
|
|
.set noreorder
|
|
.set noat
|
|
dsll v0,v0,32 // merge 8 bytes of pattern
|
|
dsrl v0,v0,32 //
|
|
dsll v1,v1,32 //
|
|
or v0,v0,v1 //
|
|
and a2,a1,32 - 1 // isolate residual number of bytes
|
|
subu a2,a1,a2 // compute 32-byte block count
|
|
beq zero,a2,17f // if eq, no 32-byte block to merge
|
|
subu a1,a1,a2 // compute residual number of bytes
|
|
addu a2,a2,t0 // compute ending segment address
|
|
subu a2,a2,32 //
|
|
|
|
//
|
|
// Merge 8-byte pattern value with target 32 bytes at a time.
|
|
//
|
|
|
|
13: ld t1,0(t0) // get 8-byte target values
|
|
ld t2,8(t0) //
|
|
ld t3,16(t0) //
|
|
ld t5,24(t0) //
|
|
xor t1,t1,v0 // compute exclusive or with pattern
|
|
xor t2,t2,v0 //
|
|
xor t3,t3,v0 //
|
|
xor t5,t5,v0 //
|
|
sd t1,0(t0) // store 8-byte pattern values
|
|
sd t2,8(t0) //
|
|
sd t3,16(t0) //
|
|
sd t5,24(t0) //
|
|
bne t0,a2,13b // if ne, more to move
|
|
addu t0,t0,32 // advance target address
|
|
.set at
|
|
.set reorder
|
|
|
|
beq zero,a1,160f // if eq, no residual 8-byte blocks
|
|
|
|
//
|
|
// Merge 8-byte pattern value with target 8 bytes at a time.
|
|
//
|
|
|
|
.set noreorder
|
|
.set noat
|
|
17: subu t4,t4,8 // compute ending segment address
|
|
20: ld t1,0(t0) // get 8-byte target value
|
|
xor t1,t1,v0 // compute exclusive or with pattern
|
|
sd t1,0(t0) // store 8-byte pattern value
|
|
bne t0,t4,20b // if ne, more to move
|
|
addu t0,t0,8 // advance target address
|
|
.set at
|
|
.set reorder
|
|
|
|
j ra // return
|
|
|
|
//
|
|
// Align the target to an 8-byte boundary, merge any intervening 8-byte blocks,
|
|
// and merge the remaining longword at the end.
|
|
//
|
|
|
|
30: lw t6,0(t0) // get 4-byte target value
|
|
addu t0,t0,4 // advance target address
|
|
xor t6,t6,v0 // compute exclusive or with pattern
|
|
sw t6,-4(t0) // store low 4 bytes of pattern
|
|
subu a1,a1,8 // reduce size of fill
|
|
beq zero,a1,50f // if eq, nothing in the middle
|
|
|
|
//
|
|
// Merge 8-byte pattern value with target.
|
|
//
|
|
|
|
.set noreorder
|
|
.set noat
|
|
dsll v1,v1,32 // merge 8 bytes of pattern
|
|
dsrl v1,v1,32 //
|
|
dsll v0,v0,32 //
|
|
or v1,v0,v1 //
|
|
and a2,a1,32 - 1 // isolate residual number of bytes
|
|
subu a2,a1,a2 // compute 32-byte block count
|
|
beq zero,a2,37f // if eq, no 32-byte block to merge
|
|
subu a1,a1,a2 // compute residual number of bytes
|
|
addu a2,a2,t0 // compute ending segment address
|
|
subu a2,a2,32 //
|
|
|
|
//
|
|
// Merge 8-byte pattern value with target 32 bytes at a time.
|
|
//
|
|
|
|
33: ld t1,0(t0) // get 8-byte target values
|
|
ld t2,8(t0) //
|
|
ld t3,16(t0) //
|
|
ld t5,24(t0) //
|
|
xor t1,t1,v1 // compute exclusive or with pattern
|
|
xor t2,t2,v1 //
|
|
xor t3,t3,v1 //
|
|
xor t5,t5,v1 //
|
|
sd t1,0(t0) // store 8-byte pattern values
|
|
sd t2,8(t0) //
|
|
sd t3,16(t0) //
|
|
sd t5,24(t0) //
|
|
bne t0,a2,33b // if ne, more to move
|
|
addu t0,t0,32 // advance target address
|
|
.set at
|
|
.set reorder
|
|
|
|
beq zero,a1,50f // if eq, no residual 8-byte blocks
|
|
|
|
//
|
|
// Merge 8-byte pattern value with target 8 bytes at a time.
|
|
//
|
|
|
|
.set noreorder
|
|
.set noat
|
|
37: subu t4,t4,12 // compute ending segment address
|
|
40: ld t1,0(t0) // get 8-byte target value
|
|
xor t1,t1,v1 // compute exclusive or with pattern
|
|
sd t1,0(t0) // store 8-byte pattern value
|
|
bne t0,t4,40b // if ne, more to move
|
|
addu t0,t0,8 // advance target address
|
|
.set at
|
|
.set reorder
|
|
|
|
50: lw t6,0(t0) // get 4-byte target value
|
|
xor t6,t6,v1 // compute exclusive or with pattern
|
|
sw t6,0(t0) // store high bytes of pattern
|
|
j ra // return
|
|
|
|
//
|
|
// Merge 8-byte pattern value with target using 32-bit operations.
|
|
//
|
|
|
|
.set noreorder
|
|
.set noat
|
|
110: subu t4,t4,8 // compute ending segment address
|
|
120: lw t6,0(t0) // get 8-byte target value
|
|
lw t7,4(t0) //
|
|
xor t6,t6,v0 // compute exclusive or with pattern
|
|
xor t7,t7,v1 //
|
|
sw t6,0(t0) // store 8-byte pattern value
|
|
sw t7,4(t0) //
|
|
bne t0,t4,120b // if ne, more to move
|
|
addu t0,t0,8 // advance target address
|
|
.set at
|
|
.set reorder
|
|
|
|
160: j ra // return
|
|
|
|
.end vMergePattern
|