Windows NT 4.0 source code leak
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

702 lines
22 KiB

#++
#
# Copyright (c) 1993 by
# Digital Equipment Corporation, Maynard, MA
#
# This software is furnished under a license and may be used and copied
# only in accordance with the terms of such license and with the
# inclusion of the above copyright notice. This software or any other
# copies thereof may not be provided or otherwise made available to any
# other person. No title to and ownership of the software is hereby
# transferred.
#
# The information in this software is subject to change without notice
# and should not be construed as a commitment by Digital Equipment
# Corporation.
#
# Digital assumes no responsibility for the use or reliability of its
# software on equipment which is not supplied by Digital.
#
# Facility:
#
# GEM/OTS - GEM compiler system support library
#
# Abstract:
#
# OTS character string support, Alpha version
# This module provides support for string index, search, and verify.
#
# Authors:
#
# Bill Noyce
# Kent Glossop
#
# long ots_index(const char *str, long strlen, const char *pat, long patlen);
#
# Searches a string for a substring
# returns r0=zero-based position if found, or -1 if not.
# Register usage: r0-r1, r16-r23 and r27-r28 ONLY (r26 is ra)
#
# long ots_search(const char *str, long strlen, const char *cset, long csetlen);
#
# Searches a string for any character in a set of characters
# returns r0=zero-based position if found, or -1 if not.
# Register usage: r0-r1, r16-r23 and r27-r28 ONLY (r26 is ra)
#
# long ots_search_char(const char *str, long strlen, char pat);
# (also known as ots_index_char)
#
# Searches a string for a signle pattern character
# returns r0=zero-based position if found, or -1 if not.
# Register usage: r0, r16-r18 and r27-r28 ONLY (r26 is ra)
# (Note: GEM presumes r19 is also killed)
#
# long ots_search_mask(const char *str, long strlen, const char maskvec[], int mask)
#
# Searches a string until a character matching at least one bit
# in a mask is found in a table (similar to a VAX SCANC instruction.)
# returns r0=zero-based position if found, or -1 if not.
# Register usage: r0-1, r16-r21 and r27-r28 ONLY (r26 is ra)
#
# long ots_verify(char *str, long strlen, char *cset, long csetlen);
#
# Verifies a string against a set of characters
# returns r0=zero-based position for mismatch, or -1 if all validate.
# Register usage: r0-r1, r16-r23 and r27-r28 ONLY (r26 is ra)
#
# long ots_verify_char(char *str, long strlen, char pat);
#
# Verifies a string against a single character
# returns r0=zero-based position for mismatch, or -1 if not.
# Register usage: r0, r16-r18 and r27-r28 ONLY (r26 is ra)
# (Note: GEM presumes r19 is also killed)
#
# long ots_verify_mask(const char *str, long strlen, const char maskvec[], int mask)
#
# Verifies a string until a character not matching at least one bit
# in a mask is found in a table (similar to a VAX SPANC instruction.)
# returns r0=zero-based position if found, or -1 if not.
# Register usage: r0-1, r16-r21 and r27-r28 ONLY (r26 is ra)
#
# Special conventions for all:
# No stack space
# No linkage pointer required.
# (Warning: The auto-loader potentially takes some regs across
# the call if this is being used in a shared lib. environment.)
#
# Modification history:
#
# 006 28 May 1992 WBN Initial version, replacing BLISS -005
#
# 007 22 Sep 1992 KDG Add case-sensitive names
#
# 008 14 Nov 1992 KDG - Merge modules together (allows index/search/verify
# to use the single-character versions w/o calls)
# - initial multi-character index/search/verify
#
# 009 4 Dec 1992 KDG Fix bgt that should have been bge (GEM_BUGS #2091)
#
# 010 26 Jan 1993 KDG Add underscore
#
# All of the routines other than the single character search/verify could
# be significantly improved at some point in the future
#--
#include "ots_defs.hs"
# "Package"
#
.globl _OtsLocation
.ent _OtsLocation
_OtsLocation:
.set noat
.set noreorder
# ots_index
# This is currently a primitive brute-force string index (only marginally
# better than the original compiled code. Should be tailored to compare
# up to 8 at a time, particularly for patterns <= 8 characters.)
# register use
# r0 - remaining match positions counter (-1)
# r1 - loop counter [rlen]
# r16 - source pointer (incremented on each match)
# r17 - source length
# r18 - pattern pointer
# r19 - pattern length
# r20 - loop source pointer [rsp]
# r21 - loop source temp [rs]
# r22 - loop pattern pointer [rpp]
# r23 - loop pattern temp [rp]
# r27 - available
# r28 - available
.globl _OtsStringIndex
.aent _OtsStringIndex
_OtsStringIndex:
.frame sp,0,r26
cmpeq r19, 1, r20 # check for single-character index
beq r19, i_ret0 # pattern length 0 always matches @0
subq r17, r19, r0 # number of match positions - 1
bne r20, search_single # single character index
blt r0, i_retm1 # return -1 if no match positions
# outer loop
i_outlp:
lda r20, -1(r16) # initialize source pointer
lda r22, -1(r18) # initialize pattern pointer
mov r19, r1 # initialize length counter
# core brute-force matching loop
i_matlp:
ldq_u r21, 1(r20) # load qw containing source byte
lda r20, 1(r20) # bump source pointer
ldq_u r23, 1(r22) # load qw containing pattern byte
lda r22, 1(r22) # bump pattern pointer
subq r1, 1, r1 # decrement length
extbl r21, r20, r21 # extract source byte
extbl r23, r22, r23 # extract pattern byte
xor r21, r23, r21 # match?
bne r21, i_mismat # if not, try pattern at next position
bgt r1, i_matlp # continue matching pattern at current position?
# matched
i_ret:
subq r17, r19, r1 # number of match positions - 1
subq r1, r0, r0 # actual position
ret r31, (r26)
# mismatch at current position - advance to next if more positions
i_mismat:
subq r0, 1, r0 # decrement match positions
lda r16, 1(r16) # set r16 to next match position
bge r0, i_outlp # if remaining positions, attempt match
i_retm1:
lda r0, -1(r31) # return -1
ret r31, (r26)
i_ret0: clr r0
ret r31, (r26)
# ots_search
# R16 -> string
# R17 = length
# R18 -> character set
# R19 = character set length
# result in R0: -1 if all matched, or position in range 0..length-1
# destroys R0-R1, R16-R23, R27-R28
#
# This routine could definitely be improved. (It should only
# be necessary to go to memory for every 8th character for both
# the string and the character set, and for character sets
# <= 8 characters, it should be possible to simply keep the
# set in a register while the string is being processed.)
#
.globl _OtsStringSearch
.aent _OtsStringSearch
_OtsStringSearch:
.frame sp,0,r26
cmpeq r19, 1, r0 # check for single-character search, clear r0 otherwise
ble r19, s_retm1 # return -1 if no characters in the match set
bne r0, search_single # single character search
nop
# outer loop
s_outlp:
ldq_u r20, (r16) # load qw containing source byte
lda r22, -1(r18) # initialize character set pointer
mov r19, r1 # initialize character set length counter
extbl r20, r16, r20 # extract the source byte to match
# core brute-force matching loop
s_matlp:
ldq_u r23, 1(r22) # load qw containing character set byte
lda r22, 1(r22) # bump character set pointer
subq r1, 1, r1 # decrement remaining cset length
extbl r23, r22, r23 # extract character set byte
xor r20, r23, r21 # match?
beq r21, s_match # if match, we're done
bgt r1, s_matlp # continue matching pattern at current position?
# no current position - advance to next if more positions
lda r16, 1(r16) # bump source pointer
addq r0, 1, r0 # increment position
subq r17, 1, r17 # decrement match count
bgt r17, s_outlp # if remaining positions, attempt match
s_retm1:lda r0, -1(r31) # if not, return -1
s_match:ret r31, (r26)
search_single:
ldq_u r19, (r18) # load the quadword containing the byte
extbl r19, r18, r18 # extract the byte of interest
# and fall through to the character search rtn
# ots_search_char (ots_index_char)
# r16 -> string
# r17 = length
# r18 = character to find
# result in r0: -1 if not found, or position in range 0..length-1
# destroys r16-r18, r27-r28
#
.globl _OtsStringSearchChar
.aent _OtsStringSearchChar
_OtsStringSearchChar:
.globl _OtsStringIndexChar
.aent _OtsStringIndexChar
_OtsStringIndexChar:
.frame sp,0,r26
search_char:
sll r18, 8, r28 # Replicate char in the quadword...
beq r17, sc_fail # Quick exit if length=0
ldq_u r27, (r16) # First quadword of string
addq r16, r17, r0 # Point to end of string
subq r17, 8, r17 # Length > 8?
or r18, r28, r18 # ...
sll r18, 16, r28 # ...
bgt r17, sc_long # Skip if length > 8
ldq_u r16, -1(r0) # Last quadword of string
extql r27, r0, r27 # Position string at high end of QW
or r18, r28, r18 # ...
sll r18, 32, r28 # ...
extqh r16, r0, r16 # Position string at high end of QW
or r18, r28, r18 # Pattern fills a quadword
or r27, r16, r27 # String fills a quadword
xor r27, r18, r27 # Diff betw. string and pattern
cmpbge r31, r27, r27 # Set 1's where string=pattern
subq r31, r17, r17 # Compute 8 - length
srl r27, r17, r27 # Shift off bits not part of string
clr r0 # Set return value
and r27, 0xF, r28 # One of first 4 characters?
blbs r27, sc_done # Return 0 if first char matched
subq r27, 1, r0 # Flip the first '1' bit
beq r28, sc_geq_4 # Skip if no match in first 4
andnot r27, r0, r0 # Make one-bit mask of first match
srl r0, 2, r0 # Map 2/4/8 -> 0/1/2
# stall
addq r0, 1, r0 # Bump by 1
ret r31, (r26) # return
sc_geq_4:
andnot r27, r0, r28 # Make one-bit mask of first match
beq r27, sc_done # Return -1 if there were none
srl r28, 5, r27 # Map 10/20/48/80 -> 0/1/2/4
srl r28, 7, r28 # Map 10/20/40/80 -> 0/0/0/1
addq r27, 4, r0 # Bump by 4
subq r0, r28, r0 # and correct
sc_done:ret r31, (r26)
# Enter here if string length > 8.
# R16 -> start of string
# R17 = length - 8
# R18 = fill in bytes 0,1
# R27 = 1st QW of string
# R28 = fill in bytes 2,3
#.odd
sc_long:or r18, r28, r18 # R18 has pattern in low 4 bytes
sll r18, 32, r28 # ...
and r16, 7, r0 # Where in QW did we start?
or r18, r28, r18 # Pattern fills a QW
ldq_u r28, 8(r16) # Get next QW (string B)
xor r27, r18, r27 # Diff Betw. string and pattern
cmpbge r31, r27, r27 # Set 1's where string=pattern
addq r17, r0, r17 # Remaining length after 1st QW
srl r27, r0, r27 # Discard bits preceding string
subq r17, 16, r17 # More than two QW's to go?
sll r27, r0, r27 # Reposition like other bits
subq r17, r0, r0 # Remember start point to compute len
ble r17, sc_bottom # Skip the loop if 2 QW's or less
sc_loop:xor r28, r18, r28 # Diff betw string B and pattern
bne r27, sc_done_a # Exit if a match in string A
cmpbge r31, r28, r28 # 1's where string B = pattern
ldq_u r27, 16(r16) # Load string A
subq r17, 16, r17 # Decrement remaining length
bne r28, sc_done_b # Exit if a match in string B
ldq_u r28, 24(r16) # Load string B
addq r16, 16, r16 # Increment pointer
xor r27, r18, r27 # Diff betw string A and pattern
cmpbge r31, r27, r27 # 1's where string A = pattern
bgt r17, sc_loop # Repeat if more than 2 QW's left
nop #.align quad
sc_bottom:
bne r27, sc_done_a # Exit if a match in string A
addq r17, 8, r27 # More than 1 QW left?
xor r28, r18, r28 # Diff betw string B and pattern
ble r27, sc_last # Skip if this is last QW
cmpbge r31, r28, r27 # 1's where string B = pattern
ldq_u r28, 16(r16) # Load string A
subq r17, 8, r17 # Adjust len for final return
bne r27, sc_done_a # Exit if a match in string B
addq r17, 8, r27 # Ensure -7 <= (r27=len-8) <= 0
xor r28, r18, r28 # Diff betw string A and pattern
sc_last:mskqh r27, r27, r27 # Nonzero in bytes beyond string
subq r17, 8, r17 # Adjust len for final return
or r28, r27, r28 # Zeros only for matches within string
cmpbge r31, r28, r27 # Where are the matches?
bne r27, sc_done_a # Compute index if a match found
sc_fail:lda r0, -1(r31) # Else return -1
ret r31, (r26)
nop #.align 8
sc_done_b:
addq r17, 8, r17 # Adjust length
mov r28, r27 # Put mask where it's expected
sc_done_a:
subq r0, r17, r0 # (start - remaining) = base index
blbs r27, sc_exit # Return R0 if first char matched
and r27, 0xF, r16 # One of first 4 characters?
subq r27, 1, r28 # Flip the first '1' bit
andnot r27, r28, r28 # Make one-bit mask of first match
beq r16, sc_geq_4x # Skip if no match in first 4
srl r28, 2, r28 # Map 2/4/8 -> 0/1/2
addq r0, 1, r0 # Bump by 1
addq r0, r28, r0 # Add byte offset
sc_exit:ret r31, (r26) # return
sc_geq_4x:
addq r0, 4, r0 # Bump by 4
srl r28, 5, r27 # Map 10/20/48/80 -> 0/1/2/4
srl r28, 7, r28 # Map 10/20/40/80 -> 0/0/0/1
addq r0, r27, r0 # Add 0/1/2/4
subq r0, r28, r0 # and correct
ret r31, (r26)
# ots_search_mask
# This routine could be tailored by loading a longword or
# a quadword at a time and doing table lookups on the
# characters largely in parallel.
#
.globl _OtsStringSearchMask
.aent _OtsStringSearchMask
_OtsStringSearchMask:
.frame sp,0,r26
lda r16, -1(r16) # bias initial address for better loop code
nop # should be lnop (unop) or fnop to dual issue
lda r0, -1(r31) # initialize position to -1
ble r17, sm_ret # return -1 if source len is zero
# slow way - ~14 cycles/byte
sm_loop:
ldq_u r21, 1(r16) # load qw containing the byte
lda r16, 1(r16) # bump pointer
addq r0, 1, r0 # bump position
subq r17, 1, r17 # decrement the length
extbl r21, r16, r21 # extract the byte
addq r21, r18, r21 # get the byte in the table
ldq_u r20, (r21) # load qw from table containing lookup
extbl r20, r21, r20 # extract table byte
and r20, r19, r20 # check if any bits in the mask match
beq r17, sm_end # if last character, handle specially
beq r20, sm_loop # if no match, go do the loop again
sm_ret:
ret r31, (r26) # if not a match, we're done
sm_end: lda r21, -1(r31) # get -1
cmoveq r20, r21, r0 # -1 if last char didn't match
ret r31, (r26)
# ots_verify
# R16 -> string
# R17 = length
# R18 -> character set
# R19 = character set length
# result in R0: -1 if all matched, or position in range 0..length-1
# destroys R0-R1, R16-R23, R27-R28
#
# This routine could definitely be improved. (It should only
# be necessary to go to memory for every 8th character for both
# the string and the character set, and for character sets
# <= 8 characters, it should be possible to simply keep the
# set in a register while the string is being processed.)
#
.globl _OtsStringVerify
.aent _OtsStringVerify
_OtsStringVerify:
.frame sp,0,r26
cmpeq r19, 1, r0 # check for single-character search, clear r0 otherwise
ble r19, v_ret0 # return 0 if no characters in the match set
bne r0, verify_single # single character verify
nop
# outer loop
v_outlp:
ldq_u r20, (r16) # load qw containing source byte
lda r22, -1(r18) # initialize character set pointer
mov r19, r1 # initialize character set length counter
extbl r20, r16, r20 # extract the source byte to match
# core brute-force matching loop
v_matlp:
ldq_u r23, 1(r22) # load qw containing character set byte
lda r22, 1(r22) # bump character set pointer
subq r1, 1, r1 # decrement remaining cset length
extbl r23, r22, r23 # extract character set byte
xor r20, r23, r21 # match?
beq r21, v_match # if match, move to the next character
bgt r1, v_matlp # continue matching pattern at current position?
# if we made it through the whole character set, this is a mismatch
v_ret0: ret r31, (r26)
v_match: # match at current position - advance to next if more positions
lda r16, 1(r16) # bump source pointer
addq r0, 1, r0 # increment position
subq r17, 1, r17 # decrement match count
bgt r17, v_outlp # if remaining positions, attempt match
lda r0, -1(r31) # if everything verified, return -1
ret r31, (r26)
verify_single:
ldq_u r19, (r18) # load the quadword containing the byte
extbl r19, r18, r18 # extract the byte of interest
# and fall through to the character verify rtn
# ots_verify_char
# R16 -> string
# R17 = length
# R18 = character to check
# result in R0: -1 if all matched, or position in range 0..length-1
# destroys R16-R18, R27-R28
#
.globl _OtsStringVerifyChar
.aent _OtsStringVerifyChar
_OtsStringVerifyChar:
.frame sp,0,r26
sll r18, 8, r28 # Replicate char in the quadword...
beq r17, vc_fail # Quick exit if length=0
ldq_u r27, (r16) # First quadword of string
addq r16, r17, r0 # Point to end of string
subq r17, 8, r17 # Length > 8?
or r18, r28, r18 # ...
sll r18, 16, r28 # ...
bgt r17, vc_long # Skip if length > 8
ldq_u r16, -1(r0) # Last quadword of string
extql r27, r0, r27 # Position string at high end of QW
or r18, r28, r18 # ...
sll r18, 32, r28 # ...
extqh r16, r0, r16 # Position string at high end of QW
or r18, r28, r18 # Pattern fills a quadword
or r27, r16, r27 # String fills a quadword
xor r27, r18, r18 # Diff betw. string and pattern
subq r31, r17, r17 # 8 - length
extql r18, r17, r28 # Shift off bytes preceding string
lda r0, -1(r31) # Prepare to return -1 for all matched
cmpbge r31, r28, r27 # Set 1's where string=pattern
addl r28, 0, r18 # Is first LW all zero?
beq r28, vc_done # Quick exit if all matched
addq r27, 1, r28 # Flip the first '0' bit
beq r18, vc_geq_4 # No diffs in first longword
andnot r28, r27, r28 # Make one-bit mask of first diff
srl r28, 2, r0 # Map 1/2/4/8 -> 0/0/1/2
and r27, 1, r27 # 1 if first character matched
addq r0, r27, r0 # Bump by 1 if so
ret r31, (r26) # return
nop #.align 8
vc_geq_4:
andnot r28, r27, r28 # Make one-bit mask of first diff
srl r28, 5, r27 # Map 10/20/48/80 -> 0/1/2/4
srl r28, 7, r28 # Map 10/20/40/80 -> 0/0/0/1
addq r27, 4, r0 # Bump by 4
subq r0, r28, r0 # and correct 4/5/6/8 -> 4/5/6/7
vc_done:ret r31, (r26)
# Enter here if string length > 8.
# R16 -> start of string
# R17 = length - 8
# R18 = fill in bytes 0,1
# R27 = 1st QW of string
# R28 = fill in bytes 2,3
#.align 8
vc_long:and r16, 7, r0 # Where in QW did we start?
or r18, r28, r18 # R18 has pattern in low 4 bytes
sll r18, 32, r28 # ...
addq r17, r0, r17 # Remaining length after 1st QW
or r18, r28, r18 # Pattern fills a QW
ldq_u r28, 8(r16) # Get next QW (string B)
xor r27, r18, r27 # Diff Betw. string and pattern
mskqh r27, r0, r27 # Discard diffs before string
subq r17, 16, r17 # More than two QW's to go?
subq r17, r0, r0 # Remember start point to compute len
ble r17, vc_bottom # Skip the loop if 2 QW's or less
vc_loop:bne r27, vc_done_a
ldq_u r27, 16(r16) # Load string A
xor r28, r18, r28 # Diff betw string B and pattern
subq r17, 16, r17 # Decrement remaining length
bne r28, vc_done_b # Exit if a diff in string B
ldq_u r28, 24(r16) # Load string B
addq r16, 16, r16 # Increment pointer
xor r27, r18, r27 # Diff betw string A and pattern
bgt r17, vc_loop # Repeat if more than 2 QW's left
vc_bottom:
bne r27, vc_done_a # Exit if a match in string A
addq r17, 8, r17 # More than 1 QW left?
xor r28, r18, r27 # Diff betw string B and pattern
ble r17, vc_last # Skip if this is last QW
subq r17, 16, r17 # Adjust len for final return
bne r27, vc_done_a # Exit if a match in string B
ldq_u r28, 16(r16) # Load string A
addq r17, 8, r17 # Ensure -7 <- (r17=len-8) <= 0
nop
xor r28, r18, r27 # Diff betw string A and pattern
vc_last:mskqh r17, r17, r28 # -1 in bytes beyond string
subq r17, 16, r17 # Adjust len for final return
andnot r27, r28, r27 # Nonzeros only for diffs within string
bne r27, vc_done_a # Compute index if a diff found
vc_fail:lda r0, -1(r31) # Else return -1
ret r31, (r26)
vc_done_b:
addq r17, 8, r17 # Adjust length
mov r28, r27 # Put difference where it's expected
vc_done_a:
cmpbge r31, r27, r28 # 1's where they match
subq r0, r17, r0 # (start - remaining) = base index
addl r27, 0, r16 # First longword all zero?
blbc r28, vc_exit # Return R0 if first char different
addq r28, 1, r27 # Flip the first '0' bit
beq r16, vc_geq_4x # Skip if no match in first 4
andnot r27, r28, r28 # Make one-bit mask of first match
srl r28, 2, r28 # Map 2/4/8 -> 0/1/2
addq r0, 1, r0 # Bump by 1
addq r0, r28, r0 # Add byte offset
vc_exit:ret r31, (r26) # return
vc_geq_4x:
andnot r27, r28, r28 # Make one-bit mask of first match
srl r28, 5, r27 # Map 10/20/48/80 -> 0/1/2/4
addq r0, 4, r0 # Bump by 4
srl r28, 7, r28 # Map 10/20/40/80 -> 0/0/0/1
addq r0, r27, r0 # Add 0/1/2/4
subq r0, r28, r0 # and correct
ret r31, (r26)
# ots_verify_mask
# This routine could be tailored by loading a longword or
# a quadword at a time and doing table lookups on the
# characters largely in parallel.
#
.globl _OtsStringVerifyMask
.aent _OtsStringVerifyMask
_OtsStringVerifyMask:
.frame sp,0,r26
lda r16, -1(r16) # bias initial address for better loop code
nop # should be lnop (unop) or fnop to dual issue
lda r0, -1(r31) # initialize position to -1
ble r17, vm_ret # return -1 if source len is zero
# slow way - ~14 cycles/byte
vm_loop:
ldq_u r21, 1(r16) # load qw containing the byte
lda r16, 1(r16) # bump pointer
addq r0, 1, r0 # bump position
subq r17, 1, r17 # decrement the length
extbl r21, r16, r21 # extract the byte
addq r21, r18, r21 # get the byte in the table
ldq_u r20, (r21) # load qw from table containing lookup
extbl r20, r21, r20 # extract table byte
and r20, r19, r20 # check if any bits in the mask match
beq r17, vm_end # if last character, handle specially
bne r20, vm_loop # if match, go do the loop again
vm_ret:
ret r31, (r26) # if not a match, we're done
vm_end: lda r21, -1(r31) # get -1
cmovne r20, r21, r0 # -1 if last char matched
ret r31, (r26)
.set at
.set reorder
.end _OtsLocation