mirror of https://github.com/lianthony/NT4.0
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
137 lines
5.9 KiB
137 lines
5.9 KiB
#++
|
|
# Copyright 1991, 1994, Digital Equipment Corporation
|
|
#
|
|
# ots_zero(char *dstptr, long dstlen)
|
|
#
|
|
# Zero dstlen bytes of memory at *dstptr
|
|
#
|
|
# Special conventions: No stack space, r16-r17 and r27-r28 ONLY,
|
|
# no linkage pointer required.
|
|
# (Warning: The auto-loader potentially takes some regs across
|
|
# the call if this is being used in a shared lib. environment.)
|
|
#
|
|
# This is a GEM support routine for zeroing a region of memory. It is
|
|
# basically idential to BSD's bzero, though it has limited register
|
|
# convensions to allow it to work better with compiled code. (Note that
|
|
# this is just a stripped down version of ots_fill.)
|
|
#
|
|
# This is optimized for extremely high performance both for small and
|
|
# large blocks. In order to reduce overhead for small cases, they are
|
|
# retired as quickly as possible, more case analysis is reserved
|
|
# for cases which will do more.
|
|
#
|
|
# This version of OTS_ZERO provides longword granularity for Alpha.
|
|
#
|
|
# 012 30 Aug 1994 WBN Longword granularity version based on
|
|
# OTS_ZERO_ALPHA.M64 edit 011.
|
|
#--
|
|
|
|
#include "ots_defs.hs"
|
|
|
|
# r16 = dst
|
|
# r17 = len
|
|
# destroys r16-r17, r27-r28
|
|
|
|
.globl _OtsZero
|
|
.ent _OtsZero
|
|
_OtsZero:
|
|
.set noat
|
|
.set noreorder
|
|
.frame sp,0,r26
|
|
.prologue 0
|
|
beq r17, done # No memory refs if len=0
|
|
subq r17, 4, r28 # Length-4
|
|
and r16, 3, r27 # Dst alignment (0-3)
|
|
andnot r16, 3, r16 # LW aligned dst pointer
|
|
addq r27, r28, r17 # Alignment + length - 4
|
|
bge r28, geq4 # Lengths >= 4 may not need load
|
|
ldl r28, (r16) # Load first LW of dst
|
|
bgt r17, double # Skip if it crosses to next LW
|
|
addq r17, 4, r17 # Find endpoint within LW
|
|
mskql r28, r27, r27 # Clear from startpoint thru 7
|
|
mskqh r28, r17, r28 # Clear from 0 to endpoint
|
|
or r28, r27, r27 # Combine dest parts
|
|
stl r27, (r16)
|
|
ret r31, (r26)
|
|
|
|
double: mskql r28, r27, r28 # Clear from startpoint in first LW
|
|
ldl r27, 4(r16) # Load second LW of dst
|
|
stl r28, (r16)
|
|
mskqh r27, r17, r27 # Clear up to endpoint in second LW
|
|
stl r27, 4(r16)
|
|
ret r31, (r26)
|
|
|
|
# Come here if length to be zeroed is >= 4.
|
|
# r16-> dst aligned to LW
|
|
# r17 = alignment + length - 4
|
|
# r27 = dst alignment within LW
|
|
# r28 = length-4
|
|
|
|
#.align quad
|
|
|
|
geq4: and r16, 4, r28 # Which LW in QW to store first?
|
|
beq r17, simple # Go handle single aligned LW
|
|
bne r28, longs # Go use QW stores
|
|
quad: subq r17, 4, r17 # Does dest end in first QW?
|
|
blt r17, shortq # Ends within first QW
|
|
beq r27, wh_qw # Store a whole QW
|
|
ldq r28, (r16) # Load first QW of dest
|
|
mskql r28, r27, r27 # Clear from startpoint
|
|
wh_qw: stq r27, (r16) # Store first QW of dest
|
|
br r31, join # Go clear rest of string
|
|
|
|
simple: stl r31, (r16) # Single aligned LW
|
|
ret r31, (r26)
|
|
|
|
shortq: ldq r28, (r16) # Load QW of dest
|
|
mskql r28, r27, r27 # Clear from startpoint thru 7
|
|
mskqh r28, r17, r28 # Clear from 0 up to endpoint
|
|
or r28, r27, r27 # Merge
|
|
stq r27, (r16) # Store
|
|
ret r31, (r26)
|
|
|
|
longs: beq r27, wh_lw # Store a whole LW
|
|
ldl r28, (r16) # Load first LW of dest
|
|
mskql r28, r27, r27 # Clear from startpoint
|
|
wh_lw: stl r27, (r16) # Store first LW of dest
|
|
join: subq r17, 32, r17 # At least 4 more quadwords?
|
|
and r17, 24, r27 # How many after multiple of 4?
|
|
bge r17, unroll # Taken branch for long strings
|
|
short: and r17, 7, r17 # How many odd bytes?
|
|
beq r27, last # Skip if no more whole QWs
|
|
stq_u r31, 8(r16) # Clear one...
|
|
subq r27, 16, r27 # Map 8/16/24 to -8/0/8
|
|
addq r16, 8, r16 # Update dest pointer
|
|
blt r27, last # Skip if no more whole QWs
|
|
#stall
|
|
stq_u r31, 8(r16) # Clear two...
|
|
addq r16, 8, r16 # Update dest pointer
|
|
nop
|
|
beq r27, last # Skip if no more whole QWs
|
|
stq_u r31, 8(r16) # Clear three...
|
|
addq r16, 8, r16 # Update dest pointer
|
|
last: beq r17, done # Finished if no odd bytes
|
|
ldq_u r27, 8(r16) # Load last QW of dst
|
|
subq r17, 4, r28 # More than a LW left?
|
|
andnot r16, 7, r16 # Clean pointer for STL
|
|
mskqh r27, r17, r27 # Clear up to endpoint
|
|
bgt r28, lastq # Go store a QW
|
|
stl r27, 8(r16) # LW store for last piece
|
|
done: ret r31, (r26)
|
|
|
|
lastq: stq r27, 8(r16) # QW store for last piece
|
|
ret r31, (r26)
|
|
|
|
unroll: stq_u r31, 8(r16) # Store 4 QWs per iteration
|
|
stq_u r31, 16(r16)
|
|
stq_u r31, 24(r16)
|
|
subq r17, 32, r17 # Decrement remaining count
|
|
stq_u r31, 32(r16)
|
|
addq r16, 32, r16 # Update dest pointer
|
|
bge r17, unroll # Repeat until done
|
|
br r31, short # Then handle leftovers
|
|
|
|
|
|
.set at
|
|
.set reorder
|
|
.end _OtsZero
|