Windows NT 4.0 source code leak
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

702 lines
22 KiB

4 years ago
  1. #++
  2. #
  3. # Copyright (c) 1993 by
  4. # Digital Equipment Corporation, Maynard, MA
  5. #
  6. # This software is furnished under a license and may be used and copied
  7. # only in accordance with the terms of such license and with the
  8. # inclusion of the above copyright notice. This software or any other
  9. # copies thereof may not be provided or otherwise made available to any
  10. # other person. No title to and ownership of the software is hereby
  11. # transferred.
  12. #
  13. # The information in this software is subject to change without notice
  14. # and should not be construed as a commitment by Digital Equipment
  15. # Corporation.
  16. #
  17. # Digital assumes no responsibility for the use or reliability of its
  18. # software on equipment which is not supplied by Digital.
  19. #
  20. # Facility:
  21. #
  22. # GEM/OTS - GEM compiler system support library
  23. #
  24. # Abstract:
  25. #
  26. # OTS character string support, Alpha version
  27. # This module provides support for string index, search, and verify.
  28. #
  29. # Authors:
  30. #
  31. # Bill Noyce
  32. # Kent Glossop
  33. #
  34. # long ots_index(const char *str, long strlen, const char *pat, long patlen);
  35. #
  36. # Searches a string for a substring
  37. # returns r0=zero-based position if found, or -1 if not.
  38. # Register usage: r0-r1, r16-r23 and r27-r28 ONLY (r26 is ra)
  39. #
  40. # long ots_search(const char *str, long strlen, const char *cset, long csetlen);
  41. #
  42. # Searches a string for any character in a set of characters
  43. # returns r0=zero-based position if found, or -1 if not.
  44. # Register usage: r0-r1, r16-r23 and r27-r28 ONLY (r26 is ra)
  45. #
  46. # long ots_search_char(const char *str, long strlen, char pat);
  47. # (also known as ots_index_char)
  48. #
  49. # Searches a string for a signle pattern character
  50. # returns r0=zero-based position if found, or -1 if not.
  51. # Register usage: r0, r16-r18 and r27-r28 ONLY (r26 is ra)
  52. # (Note: GEM presumes r19 is also killed)
  53. #
  54. # long ots_search_mask(const char *str, long strlen, const char maskvec[], int mask)
  55. #
  56. # Searches a string until a character matching at least one bit
  57. # in a mask is found in a table (similar to a VAX SCANC instruction.)
  58. # returns r0=zero-based position if found, or -1 if not.
  59. # Register usage: r0-1, r16-r21 and r27-r28 ONLY (r26 is ra)
  60. #
  61. # long ots_verify(char *str, long strlen, char *cset, long csetlen);
  62. #
  63. # Verifies a string against a set of characters
  64. # returns r0=zero-based position for mismatch, or -1 if all validate.
  65. # Register usage: r0-r1, r16-r23 and r27-r28 ONLY (r26 is ra)
  66. #
  67. # long ots_verify_char(char *str, long strlen, char pat);
  68. #
  69. # Verifies a string against a single character
  70. # returns r0=zero-based position for mismatch, or -1 if not.
  71. # Register usage: r0, r16-r18 and r27-r28 ONLY (r26 is ra)
  72. # (Note: GEM presumes r19 is also killed)
  73. #
  74. # long ots_verify_mask(const char *str, long strlen, const char maskvec[], int mask)
  75. #
  76. # Verifies a string until a character not matching at least one bit
  77. # in a mask is found in a table (similar to a VAX SPANC instruction.)
  78. # returns r0=zero-based position if found, or -1 if not.
  79. # Register usage: r0-1, r16-r21 and r27-r28 ONLY (r26 is ra)
  80. #
  81. # Special conventions for all:
  82. # No stack space
  83. # No linkage pointer required.
  84. # (Warning: The auto-loader potentially takes some regs across
  85. # the call if this is being used in a shared lib. environment.)
  86. #
  87. # Modification history:
  88. #
  89. # 006 28 May 1992 WBN Initial version, replacing BLISS -005
  90. #
  91. # 007 22 Sep 1992 KDG Add case-sensitive names
  92. #
  93. # 008 14 Nov 1992 KDG - Merge modules together (allows index/search/verify
  94. # to use the single-character versions w/o calls)
  95. # - initial multi-character index/search/verify
  96. #
  97. # 009 4 Dec 1992 KDG Fix bgt that should have been bge (GEM_BUGS #2091)
  98. #
  99. # 010 26 Jan 1993 KDG Add underscore
  100. #
  101. # All of the routines other than the single character search/verify could
  102. # be significantly improved at some point in the future
  103. #--
  104. #include "ots_defs.hs"
  105. # "Package"
  106. #
  107. .globl _OtsLocation
  108. .ent _OtsLocation
  109. _OtsLocation:
  110. .set noat
  111. .set noreorder
  112. # ots_index
  113. # This is currently a primitive brute-force string index (only marginally
  114. # better than the original compiled code. Should be tailored to compare
  115. # up to 8 at a time, particularly for patterns <= 8 characters.)
  116. # register use
  117. # r0 - remaining match positions counter (-1)
  118. # r1 - loop counter [rlen]
  119. # r16 - source pointer (incremented on each match)
  120. # r17 - source length
  121. # r18 - pattern pointer
  122. # r19 - pattern length
  123. # r20 - loop source pointer [rsp]
  124. # r21 - loop source temp [rs]
  125. # r22 - loop pattern pointer [rpp]
  126. # r23 - loop pattern temp [rp]
  127. # r27 - available
  128. # r28 - available
  129. .globl _OtsStringIndex
  130. .aent _OtsStringIndex
  131. _OtsStringIndex:
  132. .frame sp,0,r26
  133. cmpeq r19, 1, r20 # check for single-character index
  134. beq r19, i_ret0 # pattern length 0 always matches @0
  135. subq r17, r19, r0 # number of match positions - 1
  136. bne r20, search_single # single character index
  137. blt r0, i_retm1 # return -1 if no match positions
  138. # outer loop
  139. i_outlp:
  140. lda r20, -1(r16) # initialize source pointer
  141. lda r22, -1(r18) # initialize pattern pointer
  142. mov r19, r1 # initialize length counter
  143. # core brute-force matching loop
  144. i_matlp:
  145. ldq_u r21, 1(r20) # load qw containing source byte
  146. lda r20, 1(r20) # bump source pointer
  147. ldq_u r23, 1(r22) # load qw containing pattern byte
  148. lda r22, 1(r22) # bump pattern pointer
  149. subq r1, 1, r1 # decrement length
  150. extbl r21, r20, r21 # extract source byte
  151. extbl r23, r22, r23 # extract pattern byte
  152. xor r21, r23, r21 # match?
  153. bne r21, i_mismat # if not, try pattern at next position
  154. bgt r1, i_matlp # continue matching pattern at current position?
  155. # matched
  156. i_ret:
  157. subq r17, r19, r1 # number of match positions - 1
  158. subq r1, r0, r0 # actual position
  159. ret r31, (r26)
  160. # mismatch at current position - advance to next if more positions
  161. i_mismat:
  162. subq r0, 1, r0 # decrement match positions
  163. lda r16, 1(r16) # set r16 to next match position
  164. bge r0, i_outlp # if remaining positions, attempt match
  165. i_retm1:
  166. lda r0, -1(r31) # return -1
  167. ret r31, (r26)
  168. i_ret0: clr r0
  169. ret r31, (r26)
  170. # ots_search
  171. # R16 -> string
  172. # R17 = length
  173. # R18 -> character set
  174. # R19 = character set length
  175. # result in R0: -1 if all matched, or position in range 0..length-1
  176. # destroys R0-R1, R16-R23, R27-R28
  177. #
  178. # This routine could definitely be improved. (It should only
  179. # be necessary to go to memory for every 8th character for both
  180. # the string and the character set, and for character sets
  181. # <= 8 characters, it should be possible to simply keep the
  182. # set in a register while the string is being processed.)
  183. #
  184. .globl _OtsStringSearch
  185. .aent _OtsStringSearch
  186. _OtsStringSearch:
  187. .frame sp,0,r26
  188. cmpeq r19, 1, r0 # check for single-character search, clear r0 otherwise
  189. ble r19, s_retm1 # return -1 if no characters in the match set
  190. bne r0, search_single # single character search
  191. nop
  192. # outer loop
  193. s_outlp:
  194. ldq_u r20, (r16) # load qw containing source byte
  195. lda r22, -1(r18) # initialize character set pointer
  196. mov r19, r1 # initialize character set length counter
  197. extbl r20, r16, r20 # extract the source byte to match
  198. # core brute-force matching loop
  199. s_matlp:
  200. ldq_u r23, 1(r22) # load qw containing character set byte
  201. lda r22, 1(r22) # bump character set pointer
  202. subq r1, 1, r1 # decrement remaining cset length
  203. extbl r23, r22, r23 # extract character set byte
  204. xor r20, r23, r21 # match?
  205. beq r21, s_match # if match, we're done
  206. bgt r1, s_matlp # continue matching pattern at current position?
  207. # no current position - advance to next if more positions
  208. lda r16, 1(r16) # bump source pointer
  209. addq r0, 1, r0 # increment position
  210. subq r17, 1, r17 # decrement match count
  211. bgt r17, s_outlp # if remaining positions, attempt match
  212. s_retm1:lda r0, -1(r31) # if not, return -1
  213. s_match:ret r31, (r26)
  214. search_single:
  215. ldq_u r19, (r18) # load the quadword containing the byte
  216. extbl r19, r18, r18 # extract the byte of interest
  217. # and fall through to the character search rtn
  218. # ots_search_char (ots_index_char)
  219. # r16 -> string
  220. # r17 = length
  221. # r18 = character to find
  222. # result in r0: -1 if not found, or position in range 0..length-1
  223. # destroys r16-r18, r27-r28
  224. #
  225. .globl _OtsStringSearchChar
  226. .aent _OtsStringSearchChar
  227. _OtsStringSearchChar:
  228. .globl _OtsStringIndexChar
  229. .aent _OtsStringIndexChar
  230. _OtsStringIndexChar:
  231. .frame sp,0,r26
  232. search_char:
  233. sll r18, 8, r28 # Replicate char in the quadword...
  234. beq r17, sc_fail # Quick exit if length=0
  235. ldq_u r27, (r16) # First quadword of string
  236. addq r16, r17, r0 # Point to end of string
  237. subq r17, 8, r17 # Length > 8?
  238. or r18, r28, r18 # ...
  239. sll r18, 16, r28 # ...
  240. bgt r17, sc_long # Skip if length > 8
  241. ldq_u r16, -1(r0) # Last quadword of string
  242. extql r27, r0, r27 # Position string at high end of QW
  243. or r18, r28, r18 # ...
  244. sll r18, 32, r28 # ...
  245. extqh r16, r0, r16 # Position string at high end of QW
  246. or r18, r28, r18 # Pattern fills a quadword
  247. or r27, r16, r27 # String fills a quadword
  248. xor r27, r18, r27 # Diff betw. string and pattern
  249. cmpbge r31, r27, r27 # Set 1's where string=pattern
  250. subq r31, r17, r17 # Compute 8 - length
  251. srl r27, r17, r27 # Shift off bits not part of string
  252. clr r0 # Set return value
  253. and r27, 0xF, r28 # One of first 4 characters?
  254. blbs r27, sc_done # Return 0 if first char matched
  255. subq r27, 1, r0 # Flip the first '1' bit
  256. beq r28, sc_geq_4 # Skip if no match in first 4
  257. andnot r27, r0, r0 # Make one-bit mask of first match
  258. srl r0, 2, r0 # Map 2/4/8 -> 0/1/2
  259. # stall
  260. addq r0, 1, r0 # Bump by 1
  261. ret r31, (r26) # return
  262. sc_geq_4:
  263. andnot r27, r0, r28 # Make one-bit mask of first match
  264. beq r27, sc_done # Return -1 if there were none
  265. srl r28, 5, r27 # Map 10/20/48/80 -> 0/1/2/4
  266. srl r28, 7, r28 # Map 10/20/40/80 -> 0/0/0/1
  267. addq r27, 4, r0 # Bump by 4
  268. subq r0, r28, r0 # and correct
  269. sc_done:ret r31, (r26)
  270. # Enter here if string length > 8.
  271. # R16 -> start of string
  272. # R17 = length - 8
  273. # R18 = fill in bytes 0,1
  274. # R27 = 1st QW of string
  275. # R28 = fill in bytes 2,3
  276. #.odd
  277. sc_long:or r18, r28, r18 # R18 has pattern in low 4 bytes
  278. sll r18, 32, r28 # ...
  279. and r16, 7, r0 # Where in QW did we start?
  280. or r18, r28, r18 # Pattern fills a QW
  281. ldq_u r28, 8(r16) # Get next QW (string B)
  282. xor r27, r18, r27 # Diff Betw. string and pattern
  283. cmpbge r31, r27, r27 # Set 1's where string=pattern
  284. addq r17, r0, r17 # Remaining length after 1st QW
  285. srl r27, r0, r27 # Discard bits preceding string
  286. subq r17, 16, r17 # More than two QW's to go?
  287. sll r27, r0, r27 # Reposition like other bits
  288. subq r17, r0, r0 # Remember start point to compute len
  289. ble r17, sc_bottom # Skip the loop if 2 QW's or less
  290. sc_loop:xor r28, r18, r28 # Diff betw string B and pattern
  291. bne r27, sc_done_a # Exit if a match in string A
  292. cmpbge r31, r28, r28 # 1's where string B = pattern
  293. ldq_u r27, 16(r16) # Load string A
  294. subq r17, 16, r17 # Decrement remaining length
  295. bne r28, sc_done_b # Exit if a match in string B
  296. ldq_u r28, 24(r16) # Load string B
  297. addq r16, 16, r16 # Increment pointer
  298. xor r27, r18, r27 # Diff betw string A and pattern
  299. cmpbge r31, r27, r27 # 1's where string A = pattern
  300. bgt r17, sc_loop # Repeat if more than 2 QW's left
  301. nop #.align quad
  302. sc_bottom:
  303. bne r27, sc_done_a # Exit if a match in string A
  304. addq r17, 8, r27 # More than 1 QW left?
  305. xor r28, r18, r28 # Diff betw string B and pattern
  306. ble r27, sc_last # Skip if this is last QW
  307. cmpbge r31, r28, r27 # 1's where string B = pattern
  308. ldq_u r28, 16(r16) # Load string A
  309. subq r17, 8, r17 # Adjust len for final return
  310. bne r27, sc_done_a # Exit if a match in string B
  311. addq r17, 8, r27 # Ensure -7 <= (r27=len-8) <= 0
  312. xor r28, r18, r28 # Diff betw string A and pattern
  313. sc_last:mskqh r27, r27, r27 # Nonzero in bytes beyond string
  314. subq r17, 8, r17 # Adjust len for final return
  315. or r28, r27, r28 # Zeros only for matches within string
  316. cmpbge r31, r28, r27 # Where are the matches?
  317. bne r27, sc_done_a # Compute index if a match found
  318. sc_fail:lda r0, -1(r31) # Else return -1
  319. ret r31, (r26)
  320. nop #.align 8
  321. sc_done_b:
  322. addq r17, 8, r17 # Adjust length
  323. mov r28, r27 # Put mask where it's expected
  324. sc_done_a:
  325. subq r0, r17, r0 # (start - remaining) = base index
  326. blbs r27, sc_exit # Return R0 if first char matched
  327. and r27, 0xF, r16 # One of first 4 characters?
  328. subq r27, 1, r28 # Flip the first '1' bit
  329. andnot r27, r28, r28 # Make one-bit mask of first match
  330. beq r16, sc_geq_4x # Skip if no match in first 4
  331. srl r28, 2, r28 # Map 2/4/8 -> 0/1/2
  332. addq r0, 1, r0 # Bump by 1
  333. addq r0, r28, r0 # Add byte offset
  334. sc_exit:ret r31, (r26) # return
  335. sc_geq_4x:
  336. addq r0, 4, r0 # Bump by 4
  337. srl r28, 5, r27 # Map 10/20/48/80 -> 0/1/2/4
  338. srl r28, 7, r28 # Map 10/20/40/80 -> 0/0/0/1
  339. addq r0, r27, r0 # Add 0/1/2/4
  340. subq r0, r28, r0 # and correct
  341. ret r31, (r26)
  342. # ots_search_mask
  343. # This routine could be tailored by loading a longword or
  344. # a quadword at a time and doing table lookups on the
  345. # characters largely in parallel.
  346. #
  347. .globl _OtsStringSearchMask
  348. .aent _OtsStringSearchMask
  349. _OtsStringSearchMask:
  350. .frame sp,0,r26
  351. lda r16, -1(r16) # bias initial address for better loop code
  352. nop # should be lnop (unop) or fnop to dual issue
  353. lda r0, -1(r31) # initialize position to -1
  354. ble r17, sm_ret # return -1 if source len is zero
  355. # slow way - ~14 cycles/byte
  356. sm_loop:
  357. ldq_u r21, 1(r16) # load qw containing the byte
  358. lda r16, 1(r16) # bump pointer
  359. addq r0, 1, r0 # bump position
  360. subq r17, 1, r17 # decrement the length
  361. extbl r21, r16, r21 # extract the byte
  362. addq r21, r18, r21 # get the byte in the table
  363. ldq_u r20, (r21) # load qw from table containing lookup
  364. extbl r20, r21, r20 # extract table byte
  365. and r20, r19, r20 # check if any bits in the mask match
  366. beq r17, sm_end # if last character, handle specially
  367. beq r20, sm_loop # if no match, go do the loop again
  368. sm_ret:
  369. ret r31, (r26) # if not a match, we're done
  370. sm_end: lda r21, -1(r31) # get -1
  371. cmoveq r20, r21, r0 # -1 if last char didn't match
  372. ret r31, (r26)
  373. # ots_verify
  374. # R16 -> string
  375. # R17 = length
  376. # R18 -> character set
  377. # R19 = character set length
  378. # result in R0: -1 if all matched, or position in range 0..length-1
  379. # destroys R0-R1, R16-R23, R27-R28
  380. #
  381. # This routine could definitely be improved. (It should only
  382. # be necessary to go to memory for every 8th character for both
  383. # the string and the character set, and for character sets
  384. # <= 8 characters, it should be possible to simply keep the
  385. # set in a register while the string is being processed.)
  386. #
  387. .globl _OtsStringVerify
  388. .aent _OtsStringVerify
  389. _OtsStringVerify:
  390. .frame sp,0,r26
  391. cmpeq r19, 1, r0 # check for single-character search, clear r0 otherwise
  392. ble r19, v_ret0 # return 0 if no characters in the match set
  393. bne r0, verify_single # single character verify
  394. nop
  395. # outer loop
  396. v_outlp:
  397. ldq_u r20, (r16) # load qw containing source byte
  398. lda r22, -1(r18) # initialize character set pointer
  399. mov r19, r1 # initialize character set length counter
  400. extbl r20, r16, r20 # extract the source byte to match
  401. # core brute-force matching loop
  402. v_matlp:
  403. ldq_u r23, 1(r22) # load qw containing character set byte
  404. lda r22, 1(r22) # bump character set pointer
  405. subq r1, 1, r1 # decrement remaining cset length
  406. extbl r23, r22, r23 # extract character set byte
  407. xor r20, r23, r21 # match?
  408. beq r21, v_match # if match, move to the next character
  409. bgt r1, v_matlp # continue matching pattern at current position?
  410. # if we made it through the whole character set, this is a mismatch
  411. v_ret0: ret r31, (r26)
  412. v_match: # match at current position - advance to next if more positions
  413. lda r16, 1(r16) # bump source pointer
  414. addq r0, 1, r0 # increment position
  415. subq r17, 1, r17 # decrement match count
  416. bgt r17, v_outlp # if remaining positions, attempt match
  417. lda r0, -1(r31) # if everything verified, return -1
  418. ret r31, (r26)
  419. verify_single:
  420. ldq_u r19, (r18) # load the quadword containing the byte
  421. extbl r19, r18, r18 # extract the byte of interest
  422. # and fall through to the character verify rtn
  423. # ots_verify_char
  424. # R16 -> string
  425. # R17 = length
  426. # R18 = character to check
  427. # result in R0: -1 if all matched, or position in range 0..length-1
  428. # destroys R16-R18, R27-R28
  429. #
  430. .globl _OtsStringVerifyChar
  431. .aent _OtsStringVerifyChar
  432. _OtsStringVerifyChar:
  433. .frame sp,0,r26
  434. sll r18, 8, r28 # Replicate char in the quadword...
  435. beq r17, vc_fail # Quick exit if length=0
  436. ldq_u r27, (r16) # First quadword of string
  437. addq r16, r17, r0 # Point to end of string
  438. subq r17, 8, r17 # Length > 8?
  439. or r18, r28, r18 # ...
  440. sll r18, 16, r28 # ...
  441. bgt r17, vc_long # Skip if length > 8
  442. ldq_u r16, -1(r0) # Last quadword of string
  443. extql r27, r0, r27 # Position string at high end of QW
  444. or r18, r28, r18 # ...
  445. sll r18, 32, r28 # ...
  446. extqh r16, r0, r16 # Position string at high end of QW
  447. or r18, r28, r18 # Pattern fills a quadword
  448. or r27, r16, r27 # String fills a quadword
  449. xor r27, r18, r18 # Diff betw. string and pattern
  450. subq r31, r17, r17 # 8 - length
  451. extql r18, r17, r28 # Shift off bytes preceding string
  452. lda r0, -1(r31) # Prepare to return -1 for all matched
  453. cmpbge r31, r28, r27 # Set 1's where string=pattern
  454. addl r28, 0, r18 # Is first LW all zero?
  455. beq r28, vc_done # Quick exit if all matched
  456. addq r27, 1, r28 # Flip the first '0' bit
  457. beq r18, vc_geq_4 # No diffs in first longword
  458. andnot r28, r27, r28 # Make one-bit mask of first diff
  459. srl r28, 2, r0 # Map 1/2/4/8 -> 0/0/1/2
  460. and r27, 1, r27 # 1 if first character matched
  461. addq r0, r27, r0 # Bump by 1 if so
  462. ret r31, (r26) # return
  463. nop #.align 8
  464. vc_geq_4:
  465. andnot r28, r27, r28 # Make one-bit mask of first diff
  466. srl r28, 5, r27 # Map 10/20/48/80 -> 0/1/2/4
  467. srl r28, 7, r28 # Map 10/20/40/80 -> 0/0/0/1
  468. addq r27, 4, r0 # Bump by 4
  469. subq r0, r28, r0 # and correct 4/5/6/8 -> 4/5/6/7
  470. vc_done:ret r31, (r26)
  471. # Enter here if string length > 8.
  472. # R16 -> start of string
  473. # R17 = length - 8
  474. # R18 = fill in bytes 0,1
  475. # R27 = 1st QW of string
  476. # R28 = fill in bytes 2,3
  477. #.align 8
  478. vc_long:and r16, 7, r0 # Where in QW did we start?
  479. or r18, r28, r18 # R18 has pattern in low 4 bytes
  480. sll r18, 32, r28 # ...
  481. addq r17, r0, r17 # Remaining length after 1st QW
  482. or r18, r28, r18 # Pattern fills a QW
  483. ldq_u r28, 8(r16) # Get next QW (string B)
  484. xor r27, r18, r27 # Diff Betw. string and pattern
  485. mskqh r27, r0, r27 # Discard diffs before string
  486. subq r17, 16, r17 # More than two QW's to go?
  487. subq r17, r0, r0 # Remember start point to compute len
  488. ble r17, vc_bottom # Skip the loop if 2 QW's or less
  489. vc_loop:bne r27, vc_done_a
  490. ldq_u r27, 16(r16) # Load string A
  491. xor r28, r18, r28 # Diff betw string B and pattern
  492. subq r17, 16, r17 # Decrement remaining length
  493. bne r28, vc_done_b # Exit if a diff in string B
  494. ldq_u r28, 24(r16) # Load string B
  495. addq r16, 16, r16 # Increment pointer
  496. xor r27, r18, r27 # Diff betw string A and pattern
  497. bgt r17, vc_loop # Repeat if more than 2 QW's left
  498. vc_bottom:
  499. bne r27, vc_done_a # Exit if a match in string A
  500. addq r17, 8, r17 # More than 1 QW left?
  501. xor r28, r18, r27 # Diff betw string B and pattern
  502. ble r17, vc_last # Skip if this is last QW
  503. subq r17, 16, r17 # Adjust len for final return
  504. bne r27, vc_done_a # Exit if a match in string B
  505. ldq_u r28, 16(r16) # Load string A
  506. addq r17, 8, r17 # Ensure -7 <- (r17=len-8) <= 0
  507. nop
  508. xor r28, r18, r27 # Diff betw string A and pattern
  509. vc_last:mskqh r17, r17, r28 # -1 in bytes beyond string
  510. subq r17, 16, r17 # Adjust len for final return
  511. andnot r27, r28, r27 # Nonzeros only for diffs within string
  512. bne r27, vc_done_a # Compute index if a diff found
  513. vc_fail:lda r0, -1(r31) # Else return -1
  514. ret r31, (r26)
  515. vc_done_b:
  516. addq r17, 8, r17 # Adjust length
  517. mov r28, r27 # Put difference where it's expected
  518. vc_done_a:
  519. cmpbge r31, r27, r28 # 1's where they match
  520. subq r0, r17, r0 # (start - remaining) = base index
  521. addl r27, 0, r16 # First longword all zero?
  522. blbc r28, vc_exit # Return R0 if first char different
  523. addq r28, 1, r27 # Flip the first '0' bit
  524. beq r16, vc_geq_4x # Skip if no match in first 4
  525. andnot r27, r28, r28 # Make one-bit mask of first match
  526. srl r28, 2, r28 # Map 2/4/8 -> 0/1/2
  527. addq r0, 1, r0 # Bump by 1
  528. addq r0, r28, r0 # Add byte offset
  529. vc_exit:ret r31, (r26) # return
  530. vc_geq_4x:
  531. andnot r27, r28, r28 # Make one-bit mask of first match
  532. srl r28, 5, r27 # Map 10/20/48/80 -> 0/1/2/4
  533. addq r0, 4, r0 # Bump by 4
  534. srl r28, 7, r28 # Map 10/20/40/80 -> 0/0/0/1
  535. addq r0, r27, r0 # Add 0/1/2/4
  536. subq r0, r28, r0 # and correct
  537. ret r31, (r26)
  538. # ots_verify_mask
  539. # This routine could be tailored by loading a longword or
  540. # a quadword at a time and doing table lookups on the
  541. # characters largely in parallel.
  542. #
  543. .globl _OtsStringVerifyMask
  544. .aent _OtsStringVerifyMask
  545. _OtsStringVerifyMask:
  546. .frame sp,0,r26
  547. lda r16, -1(r16) # bias initial address for better loop code
  548. nop # should be lnop (unop) or fnop to dual issue
  549. lda r0, -1(r31) # initialize position to -1
  550. ble r17, vm_ret # return -1 if source len is zero
  551. # slow way - ~14 cycles/byte
  552. vm_loop:
  553. ldq_u r21, 1(r16) # load qw containing the byte
  554. lda r16, 1(r16) # bump pointer
  555. addq r0, 1, r0 # bump position
  556. subq r17, 1, r17 # decrement the length
  557. extbl r21, r16, r21 # extract the byte
  558. addq r21, r18, r21 # get the byte in the table
  559. ldq_u r20, (r21) # load qw from table containing lookup
  560. extbl r20, r21, r20 # extract table byte
  561. and r20, r19, r20 # check if any bits in the mask match
  562. beq r17, vm_end # if last character, handle specially
  563. bne r20, vm_loop # if match, go do the loop again
  564. vm_ret:
  565. ret r31, (r26) # if not a match, we're done
  566. vm_end: lda r21, -1(r31) # get -1
  567. cmovne r20, r21, r0 # -1 if last char matched
  568. ret r31, (r26)
  569. .set at
  570. .set reorder
  571. .end _OtsLocation