windows-nt-4.0/private/ntos/w32/ntgdi/gre/mips/tiler.s


								//      TITLE("Pattern Tiler")

								//++

								//

								// Copyright (c) 1992  Microsoft Corporation

								//

								// Module Name:

								//

								//    tiler.s

								//

								// Abstract:

								//

								//    This module implements code to copy a pattern to a target surface.

								//

								//    N.B. The code is written to optimally write to a frame buffer display

								//         surface. This means there is an occasional movement of data to

								//         floating point registers so that 8-byte writes to the display

								//         can be performed.

								//

								// Author:

								//

								//   Donald Sidoroff (donalds) 2-Feb-1992

								//

								// Rewritten by:

								//

								//   David N. Cutler (davec) 4-May-1992

								//

								// Environment:

								//

								//    User mode only.

								//

								// Revision History:

								//

								//--


								#include "ksmips.h"

								#include "gdimips.h"


								        .extern Gdip64bitDisabled       4


								        SBTTL("rop P, Aligned")

								//++

								//

								// VOID

								// vFetchAndCopy (

								//    IN PFETCHFRAME pff

								//    )

								//

								// Routine Description:

								//

								//    This routine repeatedly tiles one scan line of an aligned pattern.

								//

								// Arguments:

								//

								//    pff (a0) - Supplies a pointer to a fetch frame.

								//

								// Return Value:

								//

								//    None.

								//

								//--


								        LEAF_ENTRY(vCopyPattern)


								        ALTERNATE_ENTRY(vFetchAndCopy)


								        lw      t0,ff_pvTrg(a0)         // get starting target address

								        lw      t1,ff_pvPat(a0)         // get base pattern address

								        lw      t2,ff_xPat(a0)          // get pattern offset in bytes

								        lw      t3,ff_cxPat(a0)         // get pattern width in pixels

								        lw      t4,ff_culFill(a0)       // compute ending target address

								        sll     a1,t4,2                 //

								        addu    t4,a1,t0                //

								        addu    t5,t2,t1                // compute current pattern address

								        subu    v0,t3,8                 // check if pattern is exactly 8 bytes

								        bne     zero,v0,10f             // if ne, pattern is not 8 bytes

								        lw      v0,0(t5)                // get low part of 8-byte pattern

								        lw      v1,4(t5)                // get high part of 8-byte pattern

								        beq     zero,t2,CopyPattern     // if eq, zero offset value

								        lw      v1,0(t1)                // get high part of 8-byte pattern

								        b       CopyPattern             // finish in common code


								//

								// The pattern is not 8 bytes in width or cannot be moved 8 bytes at a time.

								//


								        .set    noreorder

								        .set    noat

								10:     lw      v0,0(t5)                // get 4-byte pattern value

								        addu    t3,t3,t1                // compute ending pattern address

								20:     addu    t0,t0,4                 // advance target pointer

								        sw      v0,-4(t0)               // store pattern in target

								        beq     t0,t4,30f               // if eq, end of target

								        addu    t5,t5,4                 // advance pixel offset

								        subu    t6,t5,t3                // check if at end of pattern

								        bne     zero,t6,20b             // if ne, not at end of pattern

								        lw      v0,0(t5)                // get 4-byte pattern value

								        move    t5,t1                   // set starting pattern addres

								        b       20b                     //

								        lw      v0,0(t5)                // get 4-byte pattern value

								        .set    at

								        .set    reorder


								30:     j       ra                      // return


								        SBTTL("rop P, Unaligned")

								//++

								//

								// VOID

								// vFetchShiftAndCopy (

								//    IN PFETCHFRAME pff

								//    )

								//

								// Routine Description:

								//

								//    This routine repeatedly tiles one line of an unaligned pattern

								//    using rop (P).

								//

								// Arguments:

								//

								//    pff (a0) - Supplies a pointer to a fetch frame.

								//

								// Return Value:

								//

								//    None.

								//

								//--


								        ALTERNATE_ENTRY(vFetchShiftAndCopy)


								        lw      t0,ff_pvTrg(a0)         // get starting target address

								        lw      t1,ff_pvPat(a0)         // get base pattern address

								        lw      t2,ff_xPat(a0)          // get pattern offset in bytes

								        lw      t3,ff_cxPat(a0)         // get pattern width in pixels

								        lw      t4,ff_culFill(a0)       // compute ending target address

								        sll     a1,t4,2                 //

								        addu    t4,a1,t0                //

								        addu    t5,t2,t1                // compute current pattern address

								        subu    v0,t3,8                 // check if pattern is exactly 8 bytes

								        bne     zero,v0,10f             // if ne, pattern is not 8 bytes

								        lwr     v0,0(t5)                // get low part of 8-byte pattern

								        lwl     v0,3(t5)                //

								        lwr     v1,4(t5)                // get high part of 8-byte pattern

								        lwl     v1,3 - 4(t5)            //

								        b       CopyPattern             // finish in common code


								//

								// The pattern is not 8 bytes in width or cannot be moved 8 bytes at a time.

								//


								        .set    noreorder

								        .set    noat

								10:     lwr     v0,0(t5)                // get low bytes of pattern

								        lwl     v0,3(t5)                // get high bytes of pattern

								        addu    t0,t0,4                 // advance target pointer

								        sw      v0,-4(t0)               // store pattern in target

								        beq     t0,t4,20f               // if eq, end of target

								        addu    t2,t2,4                 // advance pixel offset

								        subu    t6,t2,t3                // check if at end of pattern

								        bltz    t6,10b                  // if ltz, not at end of pattern

								        addu    t5,t2,t1                // compute address of pattern

								        move    t2,t6                   // set offset in pattern

								        b       10b                     //

								        addu    t5,t2,t1                // compute address of pattern

								        .set    at

								        .set    reorder


								20:     j       ra                      // return


								        SBTTL("rop Pn, Aligned")

								//++

								//

								// VOID

								// vFetchNotAndCopy (

								//    IN PFETCHFRAME pff

								//    )

								//

								// Routine Description:

								//

								//    This routine repeatedly tiles one line of an aligned pattern.

								//

								// Arguments:

								//

								//    pff (a0) - Supplies a pointer to a fetch frame.

								//

								// Return Value:

								//

								//    None.

								//

								//--


								        ALTERNATE_ENTRY(vFetchNotAndCopy)


								        lw      t0,ff_pvTrg(a0)         // get starting target address

								        lw      t1,ff_pvPat(a0)         // get base pattern address

								        lw      t2,ff_xPat(a0)          // get pattern offset in bytes

								        lw      t3,ff_cxPat(a0)         // get pattern width in pixels

								        lw      t4,ff_culFill(a0)       // compute ending target address

								        sll     a1,t4,2                 //

								        addu    t4,a1,t0                //

								        addu    t5,t2,t1                // compute current pattern address

								        subu    v0,t3,8                 // check if pattern is exactly 8 bytes

								        bne     zero,v0,20f             // if ne, pattern is not 8 bytes

								        lw      v0,0(t5)                // get low part of 8-byte pattern

								        lw      v1,4(t5)                // get high part of 8-byte pattern

								        beq     zero,t2,10f             // if eq, zero offset value

								        lw      v1,0(t1)                // get high part of 8-byte pattern

								10:     nor     v0,v0,zero              // complement pattern

								        nor     v1,v1,zero              //

								        b       CopyPattern             // finish in common code


								//

								// The pattern is not 8 bytes in width or cannot be moved 8 bytes at a time.

								//


								        .set    noreorder

								        .set    noat

								20:     lw      v0,0(t5)                // get 4-byte pattern value

								        addu    t3,t3,t1                // compute ending pattern address

								30:     addu    t0,t0,4                 // advance target pointer

								        nor     v0,v0,zero              // complement pattern

								        sw      v0,-4(t0)               // store pattern in target

								        beq     t0,t4,40f               // if eq, end of target

								        addu    t5,t5,4                 // advance pattern address

								        subu    t6,t5,t3                // check if at end of pattern

								        bne     zero,t6,30b             // if ne, not at end of pattern

								        lw      v0,0(t5)                // get 4-byte pattern value

								        move    t5,t1                   // set starting pattern address

								        b       30b                     //

								        lw      v0,0(t5)                // get 4-byte pattern value

								        .set    at

								        .set    reorder


								40:     j       ra                      // return


								        SBTTL("rop Pn, Unaligned")

								//++

								//

								// VOID

								// vFetchShiftNotAndCopy (

								//    IN PFETCHFRAME pff

								//    )

								//

								// Routine Description:

								//

								//    This routine repeatedly tiles one line of an unaligned pattern

								//    using rop (Pn).

								//

								// Arguments:

								//

								//    pff (a0) - Supplies a pointer to a fetch frame.

								//

								// Return Value:

								//

								//    None.

								//

								//--


								        ALTERNATE_ENTRY(vFetchShiftNotAndCopy)


								        lw      t0,ff_pvTrg(a0)         // get starting target address

								        lw      t1,ff_pvPat(a0)         // get base pattern address

								        lw      t2,ff_xPat(a0)          // get pattern offset in bytes

								        lw      t3,ff_cxPat(a0)         // get pattern width in pixels

								        lw      t4,ff_culFill(a0)       // compute ending target address

								        sll     a1,t4,2                 //

								        addu    t4,a1,t0                //

								        addu    t5,t2,t1                // compute current pattern address

								        subu    v0,t3,8                 // check if pattern is exactly 8 bytes

								        bne     zero,v0,10f             // if ne, pattern is not 8 bytes

								        lwr     v0,0(t5)                // get low part of 8-byte pattern

								        lwl     v0,3(t5)                //

								        lwr     v1,4(t5)                // get high part of 8-byte pattern

								        lwl     v1,3 - 4(t5)            //

								        nor     v0,v0,zero              // complement pattern

								        nor     v1,v1,zero              //

								        b       CopyPattern             // finish in common code


								//

								// The pattern is not 8 bytes in width or cannot be moved 8 bytes at a time.

								//


								        .set    noreorder

								        .set    noat

								10:     lwr     v0,0(t5)                // get low bytes of pattern

								        lwl     v0,3(t5)                // get high bytes of pattern

								        addu    t0,t0,4                 // advance target pointer

								        nor     v0,v0,zero              // complement pattern

								        sw      v0,-4(t0)               // store pattern in target

								        beq     t0,t4,20f               // if eq, end of target

								        addu    t2,t2,4                 // advance pixel offset

								        subu    t6,t2,t3                // check if at end of pattern

								        bltz    t6,10b                  // if ltz, not at end of pattern

								        addu    t5,t2,t1                // compute address of pattern

								        move    t2,t6                   // set offset in pattern

								        b       10b                     //

								        addu    t5,t2,t1                // compute address of pattern

								        .set    at

								        .set    reorder


								20:     j       ra                      // return


								        SBTTL("Copy Pattern")

								//++

								//

								// Routine Description:

								//

								//    This routine contains common code for copying an 8-byte pattern to

								//    a target surface.

								//

								// Arguments:

								//

								//    a1 - Supplies the size of the fill in bytes.

								//    v0 and v1 - Supplies the 8-byte pattern to copy.

								//    t0 - Supplies the starting target surface address.

								//    t4 - Supplies the ending target surface address.

								//

								// Return Value:

								//

								//    None.

								//

								//--


								CopyPattern:                            //


								//

								// If the fill size is not an even multiple of 8 bytes, then move one

								// longword and swap the pattern value.

								//


								        and     t8,a1,0x4               // check if even multiple of 8 bytes

								        beq     zero,t8,10f             // if eq, even multiple of 8 bytes

								        sw      v0,0(t0)                // store low 4 bytes of pattern

								        addu    t0,t0,4                 // advance target address

								        subu    a1,a1,4                 // reduce size of fill operation

								        beq     zero,a1,200f            // if eq, no more to move

								        move    t8,v0                   // swap 8-byte pattern value

								        move    v0,v1                   //

								        move    v1,t8                   //


								//

								// Many system platforms do not support 64 bit access to video memory. For

								// these platforms, data is moved 32-bits at a time.

								//


								10:     lbu     t7,Gdip64bitDisabled    // get 64-bit disable flag

								        bne     zero,t7,140f            // if eq, 64-bit access is disabled


								//

								// If the target buffer is 8-byte aligned, then move the pattern value to

								// the target 32 bytes at a time by moving any intervening 8-byte blocks

								// first. Otherwise, move a single longword, move any intervening 8-byte

								// blocks, move 32-byte blocks, and then move a single longword at the end.

								//


								        and     t8,t0,0x4               // isolate target alignment bits

								        bne     zero,t8,70f             // if ne, target not aligned


								//

								// Move 8-byte pattern value to target 32 bytes at a time.

								//


								        .set    noreorder

								        .set    noat

								        dsll    v0,v0,32                // merge 8 bytes of pattern

								        dsrl    v0,v0,32                //

								        dsll    v1,v1,32                //

								        or      v0,v0,v1                //

								        and     t8,a1,0x18              // check if even multiple of 32 bytes

								        beq     zero,t8,30f             // if eq, even multiple of 32 bytes

								        subu    t4,t4,32                // compute ending segment address

								        subu    a1,a1,t8                // reduce size of fill operation

								        beq     zero,a1,40f             // if eq, only alignment part to move

								        addu    t0,t0,t8                // advance target address

								        xor     t8,t8,0x18              // check if 24 bytes need to be moved

								        beql    zero,t8,20f             // if eq, 24 bytes to move

								        sd      v0,-24(t0)              // store first 8 bytes of 24 bytes

								        and     t8,t8,0x10              // check if 8 bytes to move

								        bnel    zero,t8,30f             // if ne, only 8 bytes to move

								        sd      v0,-8(t0)               // store 8-bytes of pattern

								20:     sd      v0,-16(t0)              // store last 16 bytes of 16 or 24 bytes

								        sd      v0,-8(t0)               //

								30:     sd      v0,0(t0)                // store 8 byte pattern value 4 times

								        sd      v0,8(t0)                //

								        sd      v0,16(t0)               //

								        sd      v0,24(t0)               //

								        bne     t0,t4,30b               // if ne, more to move

								        addu    t0,t0,32                // advance target address

								        .set    at

								        .set    reorder


								        j       ra                      // return


								        .set    noreorder

								        .set    noat

								40:     xor     t8,t8,0x18              // check if 24 bytes need to be moved

								        beql    zero,t8,50f             // if eq, 24 bytes to move

								        sd      v0,-24(t0)              // store first 8 bytes of 24 bytes

								        and     t8,t8,0x10              // check if 8 bytes to move

								        bnel    zero,t8,60f             // if ne, only 8 bytes to move

								        sd      v0,-8(t0)               // store 8-bytes of pattern

								50:     sd      v0,-16(t0)              // store last 16 bytes of 16 or 24 bytes

								        sd      v0,-8(t0)               //

								        .set    at

								        .set    reorder


								60:     j       ra                      // return


								//

								// Align the target to an 8-byte boundary, move any intervening 8-byte blocks,

								// move the pattern to the target 32 bytes at a time, and move the remaining

								// longword at the end.

								//


								70:     sw      v0,0(t0)                // store low 4 bytes of pattern

								        addu    t0,t0,4                 // advance target address

								        subu    a1,a1,8                 // reduce size of fill

								        beq     zero,a1,120f            // if eq, nothing in the middle


								        .set    noreorder

								        .set    noat

								        dsll    v1,v1,32                // merge 8 bytes of pattern

								        dsrl    v1,v1,32                //

								        dsll    v0,v0,32                //

								        or      v1,v0,v1                //

								        and     t8,a1,0x18              // check if even multiple of 32 bytes

								        beq     zero,t8,90f             // if eq, even multiple of 32 bytes

								        subu    t4,t4,32 + 4            // compute ending segment address

								        subu    a1,a1,t8                // reduce size of fill operation

								        beq     zero,a1,100f            // if eq, only alignment part to move

								        addu    t0,t0,t8                // advance target address

								        xor     t8,t8,0x18              // check if 24 bytes need to be moved

								        beql    zero,t8,80f             // if eq, 24 bytes to move

								        sd      v1,-24(t0)              // store first 8 bytes of 24 bytes

								        and     t8,t8,0x10              // check if 8 bytes to move

								        bnel    zero,t8,90f             // if ne, only 8 bytes to move

								        sd      v1,-8(t0)               // store 8-bytes of pattern

								80:     sd      v1,-16(t0)              // store last 16 bytes of 16 or 24 bytes

								        sd      v1,-8(t0)               //

								90:     sd      v1,0(t0)                // store 8 byte pattern value 4 times

								        sd      v1,8(t0)                //

								        sd      v1,16(t0)               //

								        sd      v1,24(t0)               //

								        bne     t0,t4,90b               // if ne, more to move

								        addu    t0,t0,32                // advance target address

								        .set    at

								        .set    reorder


								        sw      v1,0(t0)                // store high bytes of pattern

								        j       ra                      // return


								        .set    noreorder

								        .set    noat

								100:    xor     t8,t8,0x18              // check if 24 bytes need to be moved

								        beql    zero,t8,110f            // if eq, 24 bytes to move

								        sd      v1,-24(t0)              // store first 8 bytes of 24 bytes

								        and     t8,t8,0x10              // check if 8 bytes to move

								        bnel    zero,t8,120f            // if ne, only 8 bytes to move

								        sd      v1,-8(t0)               // store 8-bytes of pattern

								110:    sd      v1,-16(t0)              // store last 16 bytes of 16 or 24 bytes

								        sd      v1,-8(t0)               //

								        .set    at

								        .set    reorder


								120:    sw      v1,0(t0)                // store high 4 bytes of pattern

								        j       ra                      // return


								//

								// Move 8-byte pattern value to target 8 bytes at a time using 32-bit

								// operations.

								//


								        .set    noreorder

								        .set    noat

								140:    and     t8,a1,0x8               // check if even multiple of 8 bytes

								        beq     zero,t8,160f            // if eq, even multiple of 8 bytes

								        subu    t4,t4,8                 // compute ending segment address

								150:    sw      v0,0(t0)                // store 8-byte pattern value

								        sw      v1,4(t0)                //

								        bne     t0,t4,150b              // if ne, more to move

								        addu    t0,t0,8                 // advance target address

								        .set    at

								        .set    reorder


								        j       ra                      // return


								//

								// Move 8-byte pattern value to target 16 bytes at a time using 32-bit

								// operations.

								//


								        .set    noreorder

								        .set    noat

								160:    subu    t4,t4,8                 // compute ending segment address

								170:    sw      v0,0(t0)                // store 8-byte pattern value

								        sw      v1,4(t0)                //

								        sw      v0,8(t0)                // store 8-byte pattern value

								        sw      v1,12(t0)               //

								        bne     t0,t4,170b              // if ne, more to move

								        addu    t0,t0,16                // advance target address

								        .set    at

								        .set    reorder


								200:    j       ra                      // return


								        .end    vCopyPattern


								        SBTTL("rop DPx, Aligned")

								//++

								//

								// VOID

								// vFetchAndMerge (

								//    IN PFETCHFRAME pff

								//    )

								//

								// Routine Description:

								//

								//    This routine repeatedly tiles one line of an aligned pattern.

								//

								// Arguments:

								//

								//    pff (a0) - Supplies a pointer to a fetch frame.

								//

								// Return Value:

								//

								//    None.

								//

								//--


								        LEAF_ENTRY(vMergePattern)


								        ALTERNATE_ENTRY(vFetchAndMerge)


								        lw      t0,ff_pvTrg(a0)         // get starting target address

								        lw      t1,ff_pvPat(a0)         // get base pattern address

								        lw      t2,ff_xPat(a0)          // get pattern offset in bytes

								        lw      t3,ff_cxPat(a0)         // get pattern width in pixels

								        lw      t4,ff_culFill(a0)       // compute ending target address

								        sll     a1,t4,2                 //

								        addu    t4,a1,t0                //

								        addu    t5,t2,t1                // compute current pattern address

								        subu    v0,t3,8                 // check if pattern is exactly 8 bytes

								        bne     zero,v0,10f             // if ne, pattern is not 8 bytes

								        lw      v0,0(t5)                // get low part of 8-byte pattern

								        lw      v1,4(t5)                // get high part of 8-byte pattern

								        beq     zero,t2,MergePattern    // if eq, zero offset value

								        lw      v1,0(t1)                // get high part of 8-byte pattern

								        b       MergePattern            // finish in common code


								//

								// The pattern is not 8 bytes in width or cannot be moved 8 bytes at a time.

								//


								        .set    noreorder

								        .set    noat

								10:     lw      v0,0(t5)                // get 4-byte pattern value

								        addu    t3,t3,t1                // compute ending pattern address

								20:     lw      v1,0(t0)                // get 4-byte target value

								        addu    t0,t0,4                 // advance target pointer

								        xor     v0,v1,v0                // compute exclusive or with pattern

								        sw      v0,-4(t0)               // store pattern in target

								        beq     t0,t4,30f               // if eq, end of target

								        addu    t5,t5,4                 // advance pixel offset

								        subu    t6,t5,t3                // check if at end of pattern

								        bne     zero,t6,20b             // if ne, not at end of pattern

								        lw      v0,0(t5)                // get 4-byte pattern value

								        move    t5,t1                   // set starting pattern address

								        b       20b                     //

								        lw      v0,0(t5)                // get 4-byte pattern value

								        .set    at

								        .set    reorder


								30:     j       ra                      // return


								        SBTTL("rop DPx, Unaligned")

								//++

								//

								// VOID

								// vFetchShiftAndMerge (

								//    IN PFETCHFRAME pff

								//    )

								//

								// Routine Description:

								//

								//    This routine repeatedly tiles one line of an unaligned pattern

								//    using rop (DPx).

								//

								// Arguments:

								//

								//    pff (a0) - Supplies a pointer to a fetch frame.

								//

								// Return Value:

								//

								//    None.

								//

								//--


								        ALTERNATE_ENTRY(vFetchShiftAndMerge)


								        lw      t0,ff_pvTrg(a0)         // get starting target address

								        lw      t1,ff_pvPat(a0)         // get base pattern address

								        lw      t2,ff_xPat(a0)          // get pattern offset in bytes

								        lw      t3,ff_cxPat(a0)         // get pattern width in pixels

								        lw      t4,ff_culFill(a0)       // compute ending target address

								        sll     a1,t4,2                 //

								        addu    t4,a1,t0                //

								        addu    t5,t2,t1                // compute current pattern address

								        subu    v0,t3,8                 // check if pattern is exactly 8 bytes

								        bne     zero,v0,10f             // if ne, pattern is not 8 bytes

								        lwr     v0,0(t5)                // get low part of 8-byte pattern

								        lwl     v0,3(t5)                //

								        lwr     v1,4(t5)                // get high part of 8-byte pattern

								        lwl     v1,3 - 4(t5)            //

								        b       MergePattern            // finish in common code


								//

								// The pattern is not 8 bytes in width or cannot be moved 8 bytes at a time.

								//


								        .set    noreorder

								        .set    noat

								10:     lw      v1,0(t0)                // get 4-byte target value

								        lwr     v0,0(t5)                // get low bytes of pattern

								        lwl     v0,3(t5)                // get high bytes of pattern

								        addu    t0,t0,4                 // advance target pointer

								        xor     v0,v1,v0                // compute exclusive or with pattern

								        sw      v0,-4(t0)               // store pattern in target

								        beq     t0,t4,20f               // if eq, end of target

								        addu    t2,t2,4                 // advance pixel offset

								        subu    t6,t2,t3                // check if at end of pattern

								        bltz    t6,10b                  // if ltz, not at end of pattern

								        addu    t5,t2,t1                // compute address of pattern

								        move    t2,t6                   // set offset in pattern

								        b       10b                     //

								        addu    t5,t2,t1                // compute address of pattern

								        .set    at

								        .set    reorder


								20:     j       ra                      // return


								        SBTTL("Merge Pattern")

								//++

								//

								// Routine Description:

								//

								//    This routine contains common code for merging an 8-byte pattern to

								//    a target surface.

								//

								// Arguments:

								//

								//    v0 and v1 - Supplies the 8-byte pattern to copy.

								//    t0 - Supplies the starting target surface address.

								//    t4 - Supplies the ending target surface address.

								//

								// Return Value:

								//

								//    None.

								//

								//--


								MergePattern:                           //


								//

								// If the fill size is not an even multiple of 8 bytes, then merge one

								// longword and swap the pattern value.

								//


								        and     t8,a1,0x4               // check if even multiple of 8 bytes

								        beq     zero,t8,10f             // if eq, even multiple of 8 bytes

								        lw      t6,0(t0)                // get 4-byte target value

								        addu    t0,t0,4                 // advance target address

								        xor     t6,t6,v0                // compute exclusive or with pattern

								        sw      t6,-4(t0)               // store low 4 bytes of pattern

								        subu    a1,a1,4                 // reduce size of fill operation

								        beq     zero,a1,160f            // if eq, no more to move

								        move    t8,v0                   // swap 8-byte pattern value

								        move    v0,v1                   //

								        move    v1,t8                   //


								//

								// Many system platforms do not support 64 bit access to video memory. For

								// these platforms, data is moved 32-bits at a time.

								//


								10:     lbu     t7,Gdip64bitDisabled    // get 64-bit disable flag

								        bne     zero,t7,110f            // if eq, 64-bit access is disabled


								//

								// If the target buffer is 8-byte aligned, then merge the pattern value with

								// the target 8 bytes at a time. Otherwise, merge a single longword, merge any

								// intervening 8-byte blocks, and then merge a single longword at the end.

								//


								        and     t8,t0,0x4               // isolate target alignment bits

								        bne     zero,t8,30f             // if ne, target alignment problem


								//

								// Merge 8-byte pattern value with target.

								//


								        .set    noreorder

								        .set    noat

								        dsll    v0,v0,32                // merge 8 bytes of pattern

								        dsrl    v0,v0,32                //

								        dsll    v1,v1,32                //

								        or      v0,v0,v1                //

								        and     a2,a1,32 - 1            // isolate residual number of bytes

								        subu    a2,a1,a2                // compute 32-byte block count

								        beq     zero,a2,17f             // if eq, no 32-byte block to merge

								        subu    a1,a1,a2                // compute residual number of bytes

								        addu    a2,a2,t0                // compute ending segment address

								        subu    a2,a2,32                //


								//

								// Merge 8-byte pattern value with target 32 bytes at a time.

								//


								13:     ld      t1,0(t0)                // get 8-byte target values

								        ld      t2,8(t0)                //

								        ld      t3,16(t0)               //

								        ld      t5,24(t0)               //

								        xor     t1,t1,v0                // compute exclusive or with pattern

								        xor     t2,t2,v0                //

								        xor     t3,t3,v0                //

								        xor     t5,t5,v0                //

								        sd      t1,0(t0)                // store 8-byte pattern values

								        sd      t2,8(t0)                //

								        sd      t3,16(t0)               //

								        sd      t5,24(t0)               //

								        bne     t0,a2,13b               // if ne, more to move

								        addu    t0,t0,32                // advance target address

								        .set    at

								        .set    reorder


								        beq    zero,a1,160f             // if eq, no residual 8-byte blocks


								//

								// Merge 8-byte pattern value with target 8 bytes at a time.

								//


								        .set    noreorder

								        .set    noat

								17:     subu    t4,t4,8                 // compute ending segment address

								20:     ld      t1,0(t0)                // get 8-byte target value

								        xor     t1,t1,v0                // compute exclusive or with pattern

								        sd      t1,0(t0)                // store 8-byte pattern value

								        bne     t0,t4,20b               // if ne, more to move

								        addu    t0,t0,8                 // advance target address

								        .set    at

								        .set    reorder


								        j       ra                      // return


								//

								// Align the target to an 8-byte boundary, merge any intervening 8-byte blocks,

								// and merge the remaining longword at the end.

								//


								30:     lw      t6,0(t0)                // get 4-byte target value

								        addu    t0,t0,4                 // advance target address

								        xor     t6,t6,v0                // compute exclusive or with pattern

								        sw      t6,-4(t0)               // store low 4 bytes of pattern

								        subu    a1,a1,8                 // reduce size of fill

								        beq     zero,a1,50f             // if eq, nothing in the middle


								//

								// Merge 8-byte pattern value with target.

								//


								        .set    noreorder

								        .set    noat

								        dsll    v1,v1,32                // merge 8 bytes of pattern

								        dsrl    v1,v1,32                //

								        dsll    v0,v0,32                //

								        or      v1,v0,v1                //

								        and     a2,a1,32 - 1            // isolate residual number of bytes

								        subu    a2,a1,a2                // compute 32-byte block count

								        beq     zero,a2,37f             // if eq, no 32-byte block to merge

								        subu    a1,a1,a2                // compute residual number of bytes

								        addu    a2,a2,t0                // compute ending segment address

								        subu    a2,a2,32                //


								//

								// Merge 8-byte pattern value with target 32 bytes at a time.

								//


								33:     ld      t1,0(t0)                // get 8-byte target values

								        ld      t2,8(t0)                //

								        ld      t3,16(t0)               //

								        ld      t5,24(t0)               //

								        xor     t1,t1,v1                // compute exclusive or with pattern

								        xor     t2,t2,v1                //

								        xor     t3,t3,v1                //

								        xor     t5,t5,v1                //

								        sd      t1,0(t0)                // store 8-byte pattern values

								        sd      t2,8(t0)                //

								        sd      t3,16(t0)               //

								        sd      t5,24(t0)               //

								        bne     t0,a2,33b               // if ne, more to move

								        addu    t0,t0,32                // advance target address

								        .set    at

								        .set    reorder


								        beq    zero,a1,50f              // if eq, no residual 8-byte blocks


								//

								// Merge 8-byte pattern value with target 8 bytes at a time.

								//


								        .set    noreorder

								        .set    noat

								37:     subu    t4,t4,12                // compute ending segment address

								40:     ld      t1,0(t0)                // get 8-byte target value

								        xor     t1,t1,v1                // compute exclusive or with pattern

								        sd      t1,0(t0)                // store 8-byte pattern value

								        bne     t0,t4,40b               // if ne, more to move

								        addu    t0,t0,8                 // advance target address

								        .set    at

								        .set    reorder


								50:     lw      t6,0(t0)                // get 4-byte target value

								        xor     t6,t6,v1                // compute exclusive or with pattern

								        sw      t6,0(t0)                // store high bytes of pattern

								        j       ra                      // return


								//

								// Merge 8-byte pattern value with target using 32-bit operations.

								//


								        .set    noreorder

								        .set    noat

								110:    subu    t4,t4,8                 // compute ending segment address

								120:    lw      t6,0(t0)                // get 8-byte target value

								        lw      t7,4(t0)                //

								        xor     t6,t6,v0                // compute exclusive or with pattern

								        xor     t7,t7,v1                //

								        sw      t6,0(t0)                // store 8-byte pattern value

								        sw      t7,4(t0)                //

								        bne     t0,t4,120b              // if ne, more to move

								        addu    t0,t0,8                 // advance target address

								        .set    at

								        .set    reorder


								160:    j       ra                      // return


								        .end    vMergePattern