Windows NT 4.0 source code leak
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

161 lines
7.3 KiB

4 years ago
  1. #++
  2. # Copyright 1991, 1994, Digital Equipment Corporation
  3. #
  4. # ots_fill(char *dstptr, long dstlen, unsigned char fill)
  5. #
  6. # Fill dstlen bytes of memory at *dstptr with "fill"
  7. #
  8. # Special conventions: No stack space, r16-r19 and r27-r28 ONLY,
  9. # no linkage pointer required.
  10. # (Warning: The auto-loader potentially takes some regs across
  11. # the call if this is being used in a shared lib. environment.)
  12. #
  13. # This is a GEM support routine for filling memory with a specified value,
  14. # basically identical to the System V routine memset, with the 2nd two
  15. # parameters reversed. This is optimized for extremely high performance
  16. # both for small blocks (string padding) and large blocks (memory fill).
  17. # In order to reduce overhead for small cases, they are retired as quickly
  18. # as possible, more case analysis is reserved for cases which will do
  19. # more.
  20. #
  21. # This version of OTS_FILL provides longword granularity for Alpha.
  22. #
  23. # 011 30 Aug 1994 WBN Longword granularity version based on
  24. # OTS_FILL_ALPHA.M64 edit 010.
  25. #--
  26. #include "ots_defs.hs"
  27. # r16 = dst
  28. # r17 = len
  29. # r18 = fill byte
  30. # destroys r16-r19, r27-r28
  31. .globl _OtsFill
  32. .ent _OtsFill
  33. _OtsFill:
  34. .set noat
  35. .set noreorder
  36. .frame sp,0,r26
  37. .prologue 0
  38. sll r18, 8, r19 # Start propagating byte to quadword
  39. beq r17, done # No memory refs if len=0
  40. subq r17, 4, r28 # Length-4
  41. or r19, r18, r18 # Fill in bytes 0-1
  42. sll r18, 16, r19
  43. and r16, 3, r27 # Dst alignment (0-3)
  44. or r19, r18, r18 # Fill in bytes 0-3
  45. andnot r16, 3, r16 # LW aligned dst pointer
  46. addq r27, r28, r17 # Alignment + length - 4
  47. bge r28, geq4 # Lengths >= 4 may not need load
  48. ldl r28, (r16) # Load first LW of dst
  49. bgt r17, double # Skip if it crosses to next LW
  50. addq r17, 4, r17 # Find endpoint within LW
  51. xor r28, r18, r28 # Pre-flip all fill bits in dest
  52. mskql r28, r27, r27 # Clear from startpoint thru 7
  53. mskqh r28, r17, r28 # Clear from 0 to endpoint
  54. xor r27, r18, r27 # Combine fill with masked dest
  55. xor r28, r27, r27 # Result is fill in center part only
  56. stl r27, (r16)
  57. ret r31, (r26)
  58. double: mskqh r18, r27, r19 # Discard fill preceding startpoint
  59. mskql r28, r27, r28 # Clear from startpoint in first LW
  60. ldl r27, 4(r16) # Load second LW of dst
  61. mskql r18, r17, r18 # Discard fill following endpoint
  62. or r28, r19, r28 # Insert fill in first LW
  63. stl r28, (r16)
  64. mskqh r27, r17, r27 # Clear up to endpoint in second LW
  65. or r27, r18, r27 # Insert fill in second LW
  66. stl r27, 4(r16)
  67. ret r31, (r26)
  68. # Come here if length to be zeroed is >= 4.
  69. # r16-> dst aligned to LW
  70. # r17 = alignment + length - 4
  71. # r18 = fill in bytes 0-3
  72. # r27 = dst alignment within LW
  73. # r28 = length-4
  74. #.align quad
  75. geq4: and r16, 4, r28 # Which LW in QW to store first?
  76. beq r17, simple # Go handle single aligned LW
  77. sll r18, 32, r19
  78. bne r28, longs # Go use QW stores
  79. quad: subq r17, 4, r17 # Does dest end in first QW?
  80. or r18, r19, r18 # Fill in bytes 0-7
  81. blt r17, shortq # Ends within first QW
  82. mskqh r18, r27, r28 # Clear initial bytes of fill
  83. beq r27, wh_qw # Store a whole QW
  84. ldq r19, (r16) # Load first QW of dest
  85. mskql r19, r27, r19 # Clear from startpoint
  86. or r19, r28, r28 # Combine first QW with fill
  87. wh_qw: stq r28, (r16) # Store first QW of dest
  88. br r31, join # Go fill rest of string
  89. simple: stl r18, (r16) # Single aligned LW
  90. ret r31, (r26)
  91. shortq: ldq r28, (r16) # Load QW of dest
  92. xor r28, r18, r28 # Pre-flip all fill bits in dest
  93. mskql r28, r27, r27 # Clear from startpoint thru 7
  94. mskqh r28, r17, r28 # Clear from 0 up to endpoint
  95. xor r27, r18, r27 # Combine fill with masked dest
  96. xor r28, r27, r27 # Result is fill in center part only
  97. stq r27, (r16) # Store
  98. ret r31, (r26)
  99. longs: mskqh r18, r27, r28 # Clear initial bytes of LW fill
  100. or r18, r19, r18 # Fill in bytes 0-7
  101. beq r27, wh_lw # Store a whole LW
  102. ldl r19, (r16) # Load first LW of dest
  103. mskql r19, r27, r19 # Clear from startpoint
  104. or r19, r28, r28 # Combine first LW with fill
  105. wh_lw: stl r28, (r16) # Store first LW of dest
  106. join: subq r17, 32, r17 # At least 4 more quadwords?
  107. and r17, 24, r27 # How many after multiple of 4?
  108. bge r17, unroll # Taken branch for long strings
  109. short: and r17, 7, r17 # How many odd bytes?
  110. beq r27, last # Skip if no more whole QWs
  111. stq_u r18, 8(r16) # Clear one...
  112. subq r27, 16, r27 # Map 8/16/24 to -8/0/8
  113. addq r16, 8, r16 # Update dest pointer
  114. blt r27, last # Skip if no more whole QWs
  115. #stall
  116. stq_u r18, 8(r16) # Clear two...
  117. addq r16, 8, r16 # Update dest pointer
  118. nop
  119. beq r27, last # Skip if no more whole QWs
  120. stq_u r18, 8(r16) # Clear three...
  121. addq r16, 8, r16 # Update dest pointer
  122. last: beq r17, done # Finished if no odd bytes
  123. ldq_u r27, 8(r16) # Load last QW of dest
  124. subq r17, 4, r28 # More than a LW left?
  125. andnot r16, 7, r16 # Clean pointer for STL
  126. mskql r18, r17, r18 # Discard unneeded fill bytes
  127. #stall
  128. mskqh r27, r17, r27 # Clear up to endpoint in last QW
  129. #stall
  130. or r27, r18, r27 # Combine fill with last QW
  131. bgt r28, lastq # Go store a QW
  132. stl r27, 8(r16) # LW store for last piece
  133. done: ret r31, (r26)
  134. lastq: stq r27, 8(r16) # QW store for last piece
  135. ret r31, (r26)
  136. unroll: stq_u r18, 8(r16) # Store 4 QWs per iteration
  137. stq_u r18, 16(r16)
  138. stq_u r18, 24(r16)
  139. subq r17, 32, r17 # Decrement remaining count
  140. stq_u r18, 32(r16)
  141. addq r16, 32, r16 # Update dest pointer
  142. bge r17, unroll # repeat until done
  143. br r31, short # Then handle leftovers
  144. .set at
  145. .set reorder
  146. .end _OtsFill