mirror of https://github.com/lianthony/NT4.0
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
702 lines
22 KiB
702 lines
22 KiB
#++
|
|
#
|
|
# Copyright (c) 1993 by
|
|
# Digital Equipment Corporation, Maynard, MA
|
|
#
|
|
# This software is furnished under a license and may be used and copied
|
|
# only in accordance with the terms of such license and with the
|
|
# inclusion of the above copyright notice. This software or any other
|
|
# copies thereof may not be provided or otherwise made available to any
|
|
# other person. No title to and ownership of the software is hereby
|
|
# transferred.
|
|
#
|
|
# The information in this software is subject to change without notice
|
|
# and should not be construed as a commitment by Digital Equipment
|
|
# Corporation.
|
|
#
|
|
# Digital assumes no responsibility for the use or reliability of its
|
|
# software on equipment which is not supplied by Digital.
|
|
#
|
|
|
|
# Facility:
|
|
#
|
|
# GEM/OTS - GEM compiler system support library
|
|
#
|
|
# Abstract:
|
|
#
|
|
# OTS character string support, Alpha version
|
|
# This module provides support for string index, search, and verify.
|
|
#
|
|
# Authors:
|
|
#
|
|
# Bill Noyce
|
|
# Kent Glossop
|
|
#
|
|
# long ots_index(const char *str, long strlen, const char *pat, long patlen);
|
|
#
|
|
# Searches a string for a substring
|
|
# returns r0=zero-based position if found, or -1 if not.
|
|
# Register usage: r0-r1, r16-r23 and r27-r28 ONLY (r26 is ra)
|
|
#
|
|
# long ots_search(const char *str, long strlen, const char *cset, long csetlen);
|
|
#
|
|
# Searches a string for any character in a set of characters
|
|
# returns r0=zero-based position if found, or -1 if not.
|
|
# Register usage: r0-r1, r16-r23 and r27-r28 ONLY (r26 is ra)
|
|
#
|
|
# long ots_search_char(const char *str, long strlen, char pat);
|
|
# (also known as ots_index_char)
|
|
#
|
|
# Searches a string for a signle pattern character
|
|
# returns r0=zero-based position if found, or -1 if not.
|
|
# Register usage: r0, r16-r18 and r27-r28 ONLY (r26 is ra)
|
|
# (Note: GEM presumes r19 is also killed)
|
|
#
|
|
# long ots_search_mask(const char *str, long strlen, const char maskvec[], int mask)
|
|
#
|
|
# Searches a string until a character matching at least one bit
|
|
# in a mask is found in a table (similar to a VAX SCANC instruction.)
|
|
# returns r0=zero-based position if found, or -1 if not.
|
|
# Register usage: r0-1, r16-r21 and r27-r28 ONLY (r26 is ra)
|
|
#
|
|
# long ots_verify(char *str, long strlen, char *cset, long csetlen);
|
|
#
|
|
# Verifies a string against a set of characters
|
|
# returns r0=zero-based position for mismatch, or -1 if all validate.
|
|
# Register usage: r0-r1, r16-r23 and r27-r28 ONLY (r26 is ra)
|
|
#
|
|
# long ots_verify_char(char *str, long strlen, char pat);
|
|
#
|
|
# Verifies a string against a single character
|
|
# returns r0=zero-based position for mismatch, or -1 if not.
|
|
# Register usage: r0, r16-r18 and r27-r28 ONLY (r26 is ra)
|
|
# (Note: GEM presumes r19 is also killed)
|
|
#
|
|
# long ots_verify_mask(const char *str, long strlen, const char maskvec[], int mask)
|
|
#
|
|
# Verifies a string until a character not matching at least one bit
|
|
# in a mask is found in a table (similar to a VAX SPANC instruction.)
|
|
# returns r0=zero-based position if found, or -1 if not.
|
|
# Register usage: r0-1, r16-r21 and r27-r28 ONLY (r26 is ra)
|
|
#
|
|
# Special conventions for all:
|
|
# No stack space
|
|
# No linkage pointer required.
|
|
# (Warning: The auto-loader potentially takes some regs across
|
|
# the call if this is being used in a shared lib. environment.)
|
|
#
|
|
# Modification history:
|
|
#
|
|
# 006 28 May 1992 WBN Initial version, replacing BLISS -005
|
|
#
|
|
# 007 22 Sep 1992 KDG Add case-sensitive names
|
|
#
|
|
# 008 14 Nov 1992 KDG - Merge modules together (allows index/search/verify
|
|
# to use the single-character versions w/o calls)
|
|
# - initial multi-character index/search/verify
|
|
#
|
|
# 009 4 Dec 1992 KDG Fix bgt that should have been bge (GEM_BUGS #2091)
|
|
#
|
|
# 010 26 Jan 1993 KDG Add underscore
|
|
#
|
|
# All of the routines other than the single character search/verify could
|
|
# be significantly improved at some point in the future
|
|
#--
|
|
|
|
#include "ots_defs.hs"
|
|
|
|
# "Package"
|
|
#
|
|
.globl _OtsLocation
|
|
.ent _OtsLocation
|
|
_OtsLocation:
|
|
.set noat
|
|
.set noreorder
|
|
|
|
# ots_index
|
|
# This is currently a primitive brute-force string index (only marginally
|
|
# better than the original compiled code. Should be tailored to compare
|
|
# up to 8 at a time, particularly for patterns <= 8 characters.)
|
|
|
|
# register use
|
|
# r0 - remaining match positions counter (-1)
|
|
# r1 - loop counter [rlen]
|
|
# r16 - source pointer (incremented on each match)
|
|
# r17 - source length
|
|
# r18 - pattern pointer
|
|
# r19 - pattern length
|
|
# r20 - loop source pointer [rsp]
|
|
# r21 - loop source temp [rs]
|
|
# r22 - loop pattern pointer [rpp]
|
|
# r23 - loop pattern temp [rp]
|
|
# r27 - available
|
|
# r28 - available
|
|
|
|
.globl _OtsStringIndex
|
|
.aent _OtsStringIndex
|
|
_OtsStringIndex:
|
|
.frame sp,0,r26
|
|
|
|
cmpeq r19, 1, r20 # check for single-character index
|
|
beq r19, i_ret0 # pattern length 0 always matches @0
|
|
subq r17, r19, r0 # number of match positions - 1
|
|
bne r20, search_single # single character index
|
|
blt r0, i_retm1 # return -1 if no match positions
|
|
|
|
# outer loop
|
|
i_outlp:
|
|
lda r20, -1(r16) # initialize source pointer
|
|
lda r22, -1(r18) # initialize pattern pointer
|
|
mov r19, r1 # initialize length counter
|
|
|
|
# core brute-force matching loop
|
|
i_matlp:
|
|
ldq_u r21, 1(r20) # load qw containing source byte
|
|
lda r20, 1(r20) # bump source pointer
|
|
ldq_u r23, 1(r22) # load qw containing pattern byte
|
|
lda r22, 1(r22) # bump pattern pointer
|
|
subq r1, 1, r1 # decrement length
|
|
extbl r21, r20, r21 # extract source byte
|
|
extbl r23, r22, r23 # extract pattern byte
|
|
xor r21, r23, r21 # match?
|
|
bne r21, i_mismat # if not, try pattern at next position
|
|
bgt r1, i_matlp # continue matching pattern at current position?
|
|
|
|
# matched
|
|
i_ret:
|
|
subq r17, r19, r1 # number of match positions - 1
|
|
subq r1, r0, r0 # actual position
|
|
ret r31, (r26)
|
|
|
|
# mismatch at current position - advance to next if more positions
|
|
i_mismat:
|
|
subq r0, 1, r0 # decrement match positions
|
|
lda r16, 1(r16) # set r16 to next match position
|
|
bge r0, i_outlp # if remaining positions, attempt match
|
|
|
|
i_retm1:
|
|
lda r0, -1(r31) # return -1
|
|
ret r31, (r26)
|
|
|
|
i_ret0: clr r0
|
|
ret r31, (r26)
|
|
|
|
# ots_search
|
|
# R16 -> string
|
|
# R17 = length
|
|
# R18 -> character set
|
|
# R19 = character set length
|
|
# result in R0: -1 if all matched, or position in range 0..length-1
|
|
# destroys R0-R1, R16-R23, R27-R28
|
|
#
|
|
# This routine could definitely be improved. (It should only
|
|
# be necessary to go to memory for every 8th character for both
|
|
# the string and the character set, and for character sets
|
|
# <= 8 characters, it should be possible to simply keep the
|
|
# set in a register while the string is being processed.)
|
|
#
|
|
.globl _OtsStringSearch
|
|
.aent _OtsStringSearch
|
|
_OtsStringSearch:
|
|
.frame sp,0,r26
|
|
|
|
cmpeq r19, 1, r0 # check for single-character search, clear r0 otherwise
|
|
ble r19, s_retm1 # return -1 if no characters in the match set
|
|
bne r0, search_single # single character search
|
|
nop
|
|
|
|
# outer loop
|
|
s_outlp:
|
|
ldq_u r20, (r16) # load qw containing source byte
|
|
lda r22, -1(r18) # initialize character set pointer
|
|
mov r19, r1 # initialize character set length counter
|
|
extbl r20, r16, r20 # extract the source byte to match
|
|
|
|
# core brute-force matching loop
|
|
s_matlp:
|
|
ldq_u r23, 1(r22) # load qw containing character set byte
|
|
lda r22, 1(r22) # bump character set pointer
|
|
subq r1, 1, r1 # decrement remaining cset length
|
|
extbl r23, r22, r23 # extract character set byte
|
|
xor r20, r23, r21 # match?
|
|
beq r21, s_match # if match, we're done
|
|
bgt r1, s_matlp # continue matching pattern at current position?
|
|
|
|
# no current position - advance to next if more positions
|
|
lda r16, 1(r16) # bump source pointer
|
|
addq r0, 1, r0 # increment position
|
|
subq r17, 1, r17 # decrement match count
|
|
bgt r17, s_outlp # if remaining positions, attempt match
|
|
s_retm1:lda r0, -1(r31) # if not, return -1
|
|
s_match:ret r31, (r26)
|
|
|
|
search_single:
|
|
ldq_u r19, (r18) # load the quadword containing the byte
|
|
extbl r19, r18, r18 # extract the byte of interest
|
|
# and fall through to the character search rtn
|
|
|
|
# ots_search_char (ots_index_char)
|
|
# r16 -> string
|
|
# r17 = length
|
|
# r18 = character to find
|
|
# result in r0: -1 if not found, or position in range 0..length-1
|
|
# destroys r16-r18, r27-r28
|
|
#
|
|
.globl _OtsStringSearchChar
|
|
.aent _OtsStringSearchChar
|
|
_OtsStringSearchChar:
|
|
.globl _OtsStringIndexChar
|
|
.aent _OtsStringIndexChar
|
|
_OtsStringIndexChar:
|
|
.frame sp,0,r26
|
|
search_char:
|
|
sll r18, 8, r28 # Replicate char in the quadword...
|
|
beq r17, sc_fail # Quick exit if length=0
|
|
|
|
ldq_u r27, (r16) # First quadword of string
|
|
addq r16, r17, r0 # Point to end of string
|
|
|
|
subq r17, 8, r17 # Length > 8?
|
|
or r18, r28, r18 # ...
|
|
|
|
sll r18, 16, r28 # ...
|
|
bgt r17, sc_long # Skip if length > 8
|
|
|
|
ldq_u r16, -1(r0) # Last quadword of string
|
|
extql r27, r0, r27 # Position string at high end of QW
|
|
|
|
or r18, r28, r18 # ...
|
|
sll r18, 32, r28 # ...
|
|
|
|
extqh r16, r0, r16 # Position string at high end of QW
|
|
or r18, r28, r18 # Pattern fills a quadword
|
|
|
|
or r27, r16, r27 # String fills a quadword
|
|
xor r27, r18, r27 # Diff betw. string and pattern
|
|
|
|
cmpbge r31, r27, r27 # Set 1's where string=pattern
|
|
subq r31, r17, r17 # Compute 8 - length
|
|
|
|
srl r27, r17, r27 # Shift off bits not part of string
|
|
clr r0 # Set return value
|
|
|
|
and r27, 0xF, r28 # One of first 4 characters?
|
|
blbs r27, sc_done # Return 0 if first char matched
|
|
|
|
subq r27, 1, r0 # Flip the first '1' bit
|
|
beq r28, sc_geq_4 # Skip if no match in first 4
|
|
|
|
andnot r27, r0, r0 # Make one-bit mask of first match
|
|
srl r0, 2, r0 # Map 2/4/8 -> 0/1/2
|
|
|
|
# stall
|
|
|
|
addq r0, 1, r0 # Bump by 1
|
|
ret r31, (r26) # return
|
|
|
|
sc_geq_4:
|
|
andnot r27, r0, r28 # Make one-bit mask of first match
|
|
beq r27, sc_done # Return -1 if there were none
|
|
|
|
srl r28, 5, r27 # Map 10/20/48/80 -> 0/1/2/4
|
|
srl r28, 7, r28 # Map 10/20/40/80 -> 0/0/0/1
|
|
|
|
addq r27, 4, r0 # Bump by 4
|
|
subq r0, r28, r0 # and correct
|
|
|
|
sc_done:ret r31, (r26)
|
|
|
|
# Enter here if string length > 8.
|
|
# R16 -> start of string
|
|
# R17 = length - 8
|
|
# R18 = fill in bytes 0,1
|
|
# R27 = 1st QW of string
|
|
# R28 = fill in bytes 2,3
|
|
|
|
#.odd
|
|
sc_long:or r18, r28, r18 # R18 has pattern in low 4 bytes
|
|
|
|
sll r18, 32, r28 # ...
|
|
and r16, 7, r0 # Where in QW did we start?
|
|
|
|
or r18, r28, r18 # Pattern fills a QW
|
|
ldq_u r28, 8(r16) # Get next QW (string B)
|
|
|
|
xor r27, r18, r27 # Diff Betw. string and pattern
|
|
cmpbge r31, r27, r27 # Set 1's where string=pattern
|
|
|
|
addq r17, r0, r17 # Remaining length after 1st QW
|
|
srl r27, r0, r27 # Discard bits preceding string
|
|
|
|
subq r17, 16, r17 # More than two QW's to go?
|
|
sll r27, r0, r27 # Reposition like other bits
|
|
|
|
subq r17, r0, r0 # Remember start point to compute len
|
|
ble r17, sc_bottom # Skip the loop if 2 QW's or less
|
|
|
|
sc_loop:xor r28, r18, r28 # Diff betw string B and pattern
|
|
bne r27, sc_done_a # Exit if a match in string A
|
|
|
|
cmpbge r31, r28, r28 # 1's where string B = pattern
|
|
ldq_u r27, 16(r16) # Load string A
|
|
|
|
subq r17, 16, r17 # Decrement remaining length
|
|
bne r28, sc_done_b # Exit if a match in string B
|
|
|
|
ldq_u r28, 24(r16) # Load string B
|
|
addq r16, 16, r16 # Increment pointer
|
|
|
|
xor r27, r18, r27 # Diff betw string A and pattern
|
|
cmpbge r31, r27, r27 # 1's where string A = pattern
|
|
|
|
bgt r17, sc_loop # Repeat if more than 2 QW's left
|
|
|
|
nop #.align quad
|
|
|
|
sc_bottom:
|
|
bne r27, sc_done_a # Exit if a match in string A
|
|
addq r17, 8, r27 # More than 1 QW left?
|
|
|
|
xor r28, r18, r28 # Diff betw string B and pattern
|
|
ble r27, sc_last # Skip if this is last QW
|
|
|
|
cmpbge r31, r28, r27 # 1's where string B = pattern
|
|
ldq_u r28, 16(r16) # Load string A
|
|
|
|
subq r17, 8, r17 # Adjust len for final return
|
|
bne r27, sc_done_a # Exit if a match in string B
|
|
|
|
addq r17, 8, r27 # Ensure -7 <= (r27=len-8) <= 0
|
|
xor r28, r18, r28 # Diff betw string A and pattern
|
|
|
|
sc_last:mskqh r27, r27, r27 # Nonzero in bytes beyond string
|
|
subq r17, 8, r17 # Adjust len for final return
|
|
|
|
or r28, r27, r28 # Zeros only for matches within string
|
|
cmpbge r31, r28, r27 # Where are the matches?
|
|
|
|
bne r27, sc_done_a # Compute index if a match found
|
|
sc_fail:lda r0, -1(r31) # Else return -1
|
|
|
|
ret r31, (r26)
|
|
|
|
nop #.align 8
|
|
|
|
sc_done_b:
|
|
addq r17, 8, r17 # Adjust length
|
|
mov r28, r27 # Put mask where it's expected
|
|
|
|
sc_done_a:
|
|
subq r0, r17, r0 # (start - remaining) = base index
|
|
blbs r27, sc_exit # Return R0 if first char matched
|
|
|
|
and r27, 0xF, r16 # One of first 4 characters?
|
|
subq r27, 1, r28 # Flip the first '1' bit
|
|
|
|
andnot r27, r28, r28 # Make one-bit mask of first match
|
|
beq r16, sc_geq_4x # Skip if no match in first 4
|
|
|
|
srl r28, 2, r28 # Map 2/4/8 -> 0/1/2
|
|
addq r0, 1, r0 # Bump by 1
|
|
|
|
addq r0, r28, r0 # Add byte offset
|
|
sc_exit:ret r31, (r26) # return
|
|
|
|
sc_geq_4x:
|
|
addq r0, 4, r0 # Bump by 4
|
|
srl r28, 5, r27 # Map 10/20/48/80 -> 0/1/2/4
|
|
|
|
srl r28, 7, r28 # Map 10/20/40/80 -> 0/0/0/1
|
|
addq r0, r27, r0 # Add 0/1/2/4
|
|
|
|
subq r0, r28, r0 # and correct
|
|
ret r31, (r26)
|
|
|
|
# ots_search_mask
|
|
# This routine could be tailored by loading a longword or
|
|
# a quadword at a time and doing table lookups on the
|
|
# characters largely in parallel.
|
|
#
|
|
.globl _OtsStringSearchMask
|
|
.aent _OtsStringSearchMask
|
|
_OtsStringSearchMask:
|
|
.frame sp,0,r26
|
|
|
|
lda r16, -1(r16) # bias initial address for better loop code
|
|
nop # should be lnop (unop) or fnop to dual issue
|
|
lda r0, -1(r31) # initialize position to -1
|
|
ble r17, sm_ret # return -1 if source len is zero
|
|
# slow way - ~14 cycles/byte
|
|
sm_loop:
|
|
ldq_u r21, 1(r16) # load qw containing the byte
|
|
lda r16, 1(r16) # bump pointer
|
|
addq r0, 1, r0 # bump position
|
|
subq r17, 1, r17 # decrement the length
|
|
extbl r21, r16, r21 # extract the byte
|
|
addq r21, r18, r21 # get the byte in the table
|
|
ldq_u r20, (r21) # load qw from table containing lookup
|
|
extbl r20, r21, r20 # extract table byte
|
|
and r20, r19, r20 # check if any bits in the mask match
|
|
beq r17, sm_end # if last character, handle specially
|
|
beq r20, sm_loop # if no match, go do the loop again
|
|
sm_ret:
|
|
ret r31, (r26) # if not a match, we're done
|
|
sm_end: lda r21, -1(r31) # get -1
|
|
cmoveq r20, r21, r0 # -1 if last char didn't match
|
|
ret r31, (r26)
|
|
|
|
# ots_verify
|
|
# R16 -> string
|
|
# R17 = length
|
|
# R18 -> character set
|
|
# R19 = character set length
|
|
# result in R0: -1 if all matched, or position in range 0..length-1
|
|
# destroys R0-R1, R16-R23, R27-R28
|
|
#
|
|
# This routine could definitely be improved. (It should only
|
|
# be necessary to go to memory for every 8th character for both
|
|
# the string and the character set, and for character sets
|
|
# <= 8 characters, it should be possible to simply keep the
|
|
# set in a register while the string is being processed.)
|
|
#
|
|
.globl _OtsStringVerify
|
|
.aent _OtsStringVerify
|
|
_OtsStringVerify:
|
|
.frame sp,0,r26
|
|
|
|
cmpeq r19, 1, r0 # check for single-character search, clear r0 otherwise
|
|
ble r19, v_ret0 # return 0 if no characters in the match set
|
|
bne r0, verify_single # single character verify
|
|
nop
|
|
# outer loop
|
|
v_outlp:
|
|
ldq_u r20, (r16) # load qw containing source byte
|
|
lda r22, -1(r18) # initialize character set pointer
|
|
mov r19, r1 # initialize character set length counter
|
|
extbl r20, r16, r20 # extract the source byte to match
|
|
|
|
# core brute-force matching loop
|
|
v_matlp:
|
|
ldq_u r23, 1(r22) # load qw containing character set byte
|
|
lda r22, 1(r22) # bump character set pointer
|
|
subq r1, 1, r1 # decrement remaining cset length
|
|
extbl r23, r22, r23 # extract character set byte
|
|
xor r20, r23, r21 # match?
|
|
beq r21, v_match # if match, move to the next character
|
|
bgt r1, v_matlp # continue matching pattern at current position?
|
|
# if we made it through the whole character set, this is a mismatch
|
|
v_ret0: ret r31, (r26)
|
|
v_match: # match at current position - advance to next if more positions
|
|
lda r16, 1(r16) # bump source pointer
|
|
addq r0, 1, r0 # increment position
|
|
subq r17, 1, r17 # decrement match count
|
|
bgt r17, v_outlp # if remaining positions, attempt match
|
|
lda r0, -1(r31) # if everything verified, return -1
|
|
ret r31, (r26)
|
|
|
|
verify_single:
|
|
ldq_u r19, (r18) # load the quadword containing the byte
|
|
extbl r19, r18, r18 # extract the byte of interest
|
|
# and fall through to the character verify rtn
|
|
|
|
# ots_verify_char
|
|
# R16 -> string
|
|
# R17 = length
|
|
# R18 = character to check
|
|
# result in R0: -1 if all matched, or position in range 0..length-1
|
|
# destroys R16-R18, R27-R28
|
|
#
|
|
.globl _OtsStringVerifyChar
|
|
.aent _OtsStringVerifyChar
|
|
_OtsStringVerifyChar:
|
|
.frame sp,0,r26
|
|
|
|
sll r18, 8, r28 # Replicate char in the quadword...
|
|
beq r17, vc_fail # Quick exit if length=0
|
|
|
|
ldq_u r27, (r16) # First quadword of string
|
|
addq r16, r17, r0 # Point to end of string
|
|
|
|
subq r17, 8, r17 # Length > 8?
|
|
or r18, r28, r18 # ...
|
|
|
|
sll r18, 16, r28 # ...
|
|
bgt r17, vc_long # Skip if length > 8
|
|
|
|
ldq_u r16, -1(r0) # Last quadword of string
|
|
extql r27, r0, r27 # Position string at high end of QW
|
|
|
|
or r18, r28, r18 # ...
|
|
sll r18, 32, r28 # ...
|
|
|
|
extqh r16, r0, r16 # Position string at high end of QW
|
|
or r18, r28, r18 # Pattern fills a quadword
|
|
|
|
or r27, r16, r27 # String fills a quadword
|
|
xor r27, r18, r18 # Diff betw. string and pattern
|
|
|
|
subq r31, r17, r17 # 8 - length
|
|
extql r18, r17, r28 # Shift off bytes preceding string
|
|
|
|
lda r0, -1(r31) # Prepare to return -1 for all matched
|
|
cmpbge r31, r28, r27 # Set 1's where string=pattern
|
|
|
|
addl r28, 0, r18 # Is first LW all zero?
|
|
beq r28, vc_done # Quick exit if all matched
|
|
|
|
addq r27, 1, r28 # Flip the first '0' bit
|
|
beq r18, vc_geq_4 # No diffs in first longword
|
|
|
|
andnot r28, r27, r28 # Make one-bit mask of first diff
|
|
srl r28, 2, r0 # Map 1/2/4/8 -> 0/0/1/2
|
|
|
|
and r27, 1, r27 # 1 if first character matched
|
|
addq r0, r27, r0 # Bump by 1 if so
|
|
|
|
ret r31, (r26) # return
|
|
|
|
nop #.align 8
|
|
|
|
vc_geq_4:
|
|
andnot r28, r27, r28 # Make one-bit mask of first diff
|
|
srl r28, 5, r27 # Map 10/20/48/80 -> 0/1/2/4
|
|
|
|
srl r28, 7, r28 # Map 10/20/40/80 -> 0/0/0/1
|
|
addq r27, 4, r0 # Bump by 4
|
|
|
|
subq r0, r28, r0 # and correct 4/5/6/8 -> 4/5/6/7
|
|
vc_done:ret r31, (r26)
|
|
|
|
# Enter here if string length > 8.
|
|
# R16 -> start of string
|
|
# R17 = length - 8
|
|
# R18 = fill in bytes 0,1
|
|
# R27 = 1st QW of string
|
|
# R28 = fill in bytes 2,3
|
|
|
|
#.align 8
|
|
vc_long:and r16, 7, r0 # Where in QW did we start?
|
|
or r18, r28, r18 # R18 has pattern in low 4 bytes
|
|
|
|
sll r18, 32, r28 # ...
|
|
addq r17, r0, r17 # Remaining length after 1st QW
|
|
|
|
or r18, r28, r18 # Pattern fills a QW
|
|
ldq_u r28, 8(r16) # Get next QW (string B)
|
|
|
|
xor r27, r18, r27 # Diff Betw. string and pattern
|
|
mskqh r27, r0, r27 # Discard diffs before string
|
|
|
|
subq r17, 16, r17 # More than two QW's to go?
|
|
subq r17, r0, r0 # Remember start point to compute len
|
|
|
|
ble r17, vc_bottom # Skip the loop if 2 QW's or less
|
|
vc_loop:bne r27, vc_done_a
|
|
|
|
ldq_u r27, 16(r16) # Load string A
|
|
xor r28, r18, r28 # Diff betw string B and pattern
|
|
|
|
subq r17, 16, r17 # Decrement remaining length
|
|
bne r28, vc_done_b # Exit if a diff in string B
|
|
|
|
ldq_u r28, 24(r16) # Load string B
|
|
addq r16, 16, r16 # Increment pointer
|
|
|
|
xor r27, r18, r27 # Diff betw string A and pattern
|
|
bgt r17, vc_loop # Repeat if more than 2 QW's left
|
|
|
|
vc_bottom:
|
|
bne r27, vc_done_a # Exit if a match in string A
|
|
addq r17, 8, r17 # More than 1 QW left?
|
|
|
|
xor r28, r18, r27 # Diff betw string B and pattern
|
|
ble r17, vc_last # Skip if this is last QW
|
|
|
|
subq r17, 16, r17 # Adjust len for final return
|
|
bne r27, vc_done_a # Exit if a match in string B
|
|
|
|
ldq_u r28, 16(r16) # Load string A
|
|
addq r17, 8, r17 # Ensure -7 <- (r17=len-8) <= 0
|
|
|
|
nop
|
|
xor r28, r18, r27 # Diff betw string A and pattern
|
|
|
|
vc_last:mskqh r17, r17, r28 # -1 in bytes beyond string
|
|
subq r17, 16, r17 # Adjust len for final return
|
|
|
|
andnot r27, r28, r27 # Nonzeros only for diffs within string
|
|
bne r27, vc_done_a # Compute index if a diff found
|
|
|
|
vc_fail:lda r0, -1(r31) # Else return -1
|
|
ret r31, (r26)
|
|
|
|
vc_done_b:
|
|
addq r17, 8, r17 # Adjust length
|
|
mov r28, r27 # Put difference where it's expected
|
|
|
|
vc_done_a:
|
|
cmpbge r31, r27, r28 # 1's where they match
|
|
subq r0, r17, r0 # (start - remaining) = base index
|
|
|
|
addl r27, 0, r16 # First longword all zero?
|
|
blbc r28, vc_exit # Return R0 if first char different
|
|
|
|
addq r28, 1, r27 # Flip the first '0' bit
|
|
beq r16, vc_geq_4x # Skip if no match in first 4
|
|
|
|
andnot r27, r28, r28 # Make one-bit mask of first match
|
|
srl r28, 2, r28 # Map 2/4/8 -> 0/1/2
|
|
|
|
addq r0, 1, r0 # Bump by 1
|
|
addq r0, r28, r0 # Add byte offset
|
|
|
|
vc_exit:ret r31, (r26) # return
|
|
|
|
vc_geq_4x:
|
|
andnot r27, r28, r28 # Make one-bit mask of first match
|
|
|
|
srl r28, 5, r27 # Map 10/20/48/80 -> 0/1/2/4
|
|
addq r0, 4, r0 # Bump by 4
|
|
|
|
srl r28, 7, r28 # Map 10/20/40/80 -> 0/0/0/1
|
|
addq r0, r27, r0 # Add 0/1/2/4
|
|
|
|
subq r0, r28, r0 # and correct
|
|
ret r31, (r26)
|
|
|
|
# ots_verify_mask
|
|
# This routine could be tailored by loading a longword or
|
|
# a quadword at a time and doing table lookups on the
|
|
# characters largely in parallel.
|
|
#
|
|
.globl _OtsStringVerifyMask
|
|
.aent _OtsStringVerifyMask
|
|
_OtsStringVerifyMask:
|
|
.frame sp,0,r26
|
|
|
|
lda r16, -1(r16) # bias initial address for better loop code
|
|
nop # should be lnop (unop) or fnop to dual issue
|
|
lda r0, -1(r31) # initialize position to -1
|
|
ble r17, vm_ret # return -1 if source len is zero
|
|
# slow way - ~14 cycles/byte
|
|
vm_loop:
|
|
ldq_u r21, 1(r16) # load qw containing the byte
|
|
lda r16, 1(r16) # bump pointer
|
|
addq r0, 1, r0 # bump position
|
|
subq r17, 1, r17 # decrement the length
|
|
extbl r21, r16, r21 # extract the byte
|
|
addq r21, r18, r21 # get the byte in the table
|
|
ldq_u r20, (r21) # load qw from table containing lookup
|
|
extbl r20, r21, r20 # extract table byte
|
|
and r20, r19, r20 # check if any bits in the mask match
|
|
beq r17, vm_end # if last character, handle specially
|
|
bne r20, vm_loop # if match, go do the loop again
|
|
vm_ret:
|
|
ret r31, (r26) # if not a match, we're done
|
|
vm_end: lda r21, -1(r31) # get -1
|
|
cmovne r20, r21, r0 # -1 if last char matched
|
|
ret r31, (r26)
|
|
|
|
.set at
|
|
.set reorder
|
|
.end _OtsLocation
|