Windows NT 4.0 source code leak
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

137 lines
5.9 KiB

4 years ago
  1. #++
  2. # Copyright 1991, 1994, Digital Equipment Corporation
  3. #
  4. # ots_zero(char *dstptr, long dstlen)
  5. #
  6. # Zero dstlen bytes of memory at *dstptr
  7. #
  8. # Special conventions: No stack space, r16-r17 and r27-r28 ONLY,
  9. # no linkage pointer required.
  10. # (Warning: The auto-loader potentially takes some regs across
  11. # the call if this is being used in a shared lib. environment.)
  12. #
  13. # This is a GEM support routine for zeroing a region of memory. It is
  14. # basically idential to BSD's bzero, though it has limited register
  15. # convensions to allow it to work better with compiled code. (Note that
  16. # this is just a stripped down version of ots_fill.)
  17. #
  18. # This is optimized for extremely high performance both for small and
  19. # large blocks. In order to reduce overhead for small cases, they are
  20. # retired as quickly as possible, more case analysis is reserved
  21. # for cases which will do more.
  22. #
  23. # This version of OTS_ZERO provides longword granularity for Alpha.
  24. #
  25. # 012 30 Aug 1994 WBN Longword granularity version based on
  26. # OTS_ZERO_ALPHA.M64 edit 011.
  27. #--
  28. #include "ots_defs.hs"
  29. # r16 = dst
  30. # r17 = len
  31. # destroys r16-r17, r27-r28
  32. .globl _OtsZero
  33. .ent _OtsZero
  34. _OtsZero:
  35. .set noat
  36. .set noreorder
  37. .frame sp,0,r26
  38. .prologue 0
  39. beq r17, done # No memory refs if len=0
  40. subq r17, 4, r28 # Length-4
  41. and r16, 3, r27 # Dst alignment (0-3)
  42. andnot r16, 3, r16 # LW aligned dst pointer
  43. addq r27, r28, r17 # Alignment + length - 4
  44. bge r28, geq4 # Lengths >= 4 may not need load
  45. ldl r28, (r16) # Load first LW of dst
  46. bgt r17, double # Skip if it crosses to next LW
  47. addq r17, 4, r17 # Find endpoint within LW
  48. mskql r28, r27, r27 # Clear from startpoint thru 7
  49. mskqh r28, r17, r28 # Clear from 0 to endpoint
  50. or r28, r27, r27 # Combine dest parts
  51. stl r27, (r16)
  52. ret r31, (r26)
  53. double: mskql r28, r27, r28 # Clear from startpoint in first LW
  54. ldl r27, 4(r16) # Load second LW of dst
  55. stl r28, (r16)
  56. mskqh r27, r17, r27 # Clear up to endpoint in second LW
  57. stl r27, 4(r16)
  58. ret r31, (r26)
  59. # Come here if length to be zeroed is >= 4.
  60. # r16-> dst aligned to LW
  61. # r17 = alignment + length - 4
  62. # r27 = dst alignment within LW
  63. # r28 = length-4
  64. #.align quad
  65. geq4: and r16, 4, r28 # Which LW in QW to store first?
  66. beq r17, simple # Go handle single aligned LW
  67. bne r28, longs # Go use QW stores
  68. quad: subq r17, 4, r17 # Does dest end in first QW?
  69. blt r17, shortq # Ends within first QW
  70. beq r27, wh_qw # Store a whole QW
  71. ldq r28, (r16) # Load first QW of dest
  72. mskql r28, r27, r27 # Clear from startpoint
  73. wh_qw: stq r27, (r16) # Store first QW of dest
  74. br r31, join # Go clear rest of string
  75. simple: stl r31, (r16) # Single aligned LW
  76. ret r31, (r26)
  77. shortq: ldq r28, (r16) # Load QW of dest
  78. mskql r28, r27, r27 # Clear from startpoint thru 7
  79. mskqh r28, r17, r28 # Clear from 0 up to endpoint
  80. or r28, r27, r27 # Merge
  81. stq r27, (r16) # Store
  82. ret r31, (r26)
  83. longs: beq r27, wh_lw # Store a whole LW
  84. ldl r28, (r16) # Load first LW of dest
  85. mskql r28, r27, r27 # Clear from startpoint
  86. wh_lw: stl r27, (r16) # Store first LW of dest
  87. join: subq r17, 32, r17 # At least 4 more quadwords?
  88. and r17, 24, r27 # How many after multiple of 4?
  89. bge r17, unroll # Taken branch for long strings
  90. short: and r17, 7, r17 # How many odd bytes?
  91. beq r27, last # Skip if no more whole QWs
  92. stq_u r31, 8(r16) # Clear one...
  93. subq r27, 16, r27 # Map 8/16/24 to -8/0/8
  94. addq r16, 8, r16 # Update dest pointer
  95. blt r27, last # Skip if no more whole QWs
  96. #stall
  97. stq_u r31, 8(r16) # Clear two...
  98. addq r16, 8, r16 # Update dest pointer
  99. nop
  100. beq r27, last # Skip if no more whole QWs
  101. stq_u r31, 8(r16) # Clear three...
  102. addq r16, 8, r16 # Update dest pointer
  103. last: beq r17, done # Finished if no odd bytes
  104. ldq_u r27, 8(r16) # Load last QW of dst
  105. subq r17, 4, r28 # More than a LW left?
  106. andnot r16, 7, r16 # Clean pointer for STL
  107. mskqh r27, r17, r27 # Clear up to endpoint
  108. bgt r28, lastq # Go store a QW
  109. stl r27, 8(r16) # LW store for last piece
  110. done: ret r31, (r26)
  111. lastq: stq r27, 8(r16) # QW store for last piece
  112. ret r31, (r26)
  113. unroll: stq_u r31, 8(r16) # Store 4 QWs per iteration
  114. stq_u r31, 16(r16)
  115. stq_u r31, 24(r16)
  116. subq r17, 32, r17 # Decrement remaining count
  117. stq_u r31, 32(r16)
  118. addq r16, 32, r16 # Update dest pointer
  119. bge r17, unroll # Repeat until done
  120. br r31, short # Then handle leftovers
  121. .set at
  122. .set reorder
  123. .end _OtsZero