Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

139 lines
3.4 KiB

  1. // strcpy.s: function to copy the contents of one string to another
  2. // Copyright (c) 2000, Intel Corporation
  3. // All rights reserved.
  4. //
  5. // WARRANTY DISCLAIMER
  6. //
  7. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  8. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  9. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  10. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
  11. // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  12. // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  13. // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  14. // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
  15. // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
  16. // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  17. // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  18. //
  19. // Intel Corporation is the author of this code, and requests that all
  20. // problem reports or change requests be submitted to it directly at
  21. // http://developer.intel.com/opensource.
  22. //
  23. .file "strcpy.s"
  24. .section .text
  25. // -- Begin strcpy
  26. .proc strcpy#
  27. .global strcpy#
  28. .align 32
  29. strcpy:
  30. { .mib
  31. alloc r14=ar.pfs,2,6,0,8 //8 rotating registers, 6 locals
  32. mov r11=pr //Save predicate register file
  33. brp.loop.imp .b1_4, .bw1;; // Put loop backedge target in TAR
  34. } { .mib
  35. // Setup for doing software pipelined loops
  36. or r14=r32,r33
  37. mov pr.rot=0x30000 // p16=p17=1
  38. nop.b 0 ;;
  39. } { .mfi
  40. mov r8=r32
  41. nop.f 0
  42. and r14=3,r14
  43. } { .mii
  44. mov r9=r33
  45. mov ar.ec=0 ;;
  46. cmp4.ne p10,p0=r14,r0
  47. } { .mib
  48. mov r14=r32
  49. dep r15=1,r0,32,32 // rb = 0xffffffff00000000
  50. (p10) br.spnt .b_notaligned ;;
  51. }
  52. .b1_4:
  53. { .mii
  54. ld4.s r32=[r9],4 // *s1 (r32,r33,r34)
  55. (p18) chk.s r33,.natfault1_0 //
  56. (p18) pcmp1.eq r16=r33,r15 ;; // r16 !=0 only if a zero byte is found
  57. }
  58. .bw1:
  59. { .mib
  60. (p19) st4 [r14]=r34,4 // *s2=*s1
  61. (p18) cmp4.eq p17,p0=r16,r0 // zero byte found?
  62. (p17) br.wtop.dptk .b1_4 ;; //
  63. }
  64. { .mfi
  65. nop.m 0
  66. nop.f 0
  67. czx1.r r16 = r33
  68. } ;;
  69. { .mfi
  70. cmp.leu p2, p0 = 2, r16
  71. nop.f 0
  72. shr.u r35 = r33, 8
  73. }
  74. { .mfi
  75. cmp.eq p4, p0 = 3, r16
  76. nop.f 0
  77. cmp.ne p5, p0 = r0, r16
  78. } ;;
  79. { .mfi
  80. (p5)st1 [r14] = r33, 1
  81. nop.f 0
  82. shr.u r36 = r33, 16
  83. };;
  84. { .mfi
  85. (p2)st1 [r14] = r35,1
  86. nop.f 0
  87. nop.i 0
  88. } ;;
  89. { .mfi
  90. (p4)st1 [r14] = r36,1
  91. nop.f 0
  92. nop.i 0
  93. };;
  94. { .mib
  95. (p0) st1 [r14] = r0
  96. nop.i 0
  97. clrrrb
  98. } ;;
  99. .b1_2:
  100. { .mib
  101. nop.m 0
  102. mov pr=r11,0x1003e
  103. br.ret.sptk.many b0 ;;
  104. }
  105. .b_notaligned:
  106. { .mmi
  107. ld1 r32=[r9],1 ;; // 2 cycle load causes 1 cycle stall
  108. st1 [r14]=r32,1 // 3 cycles between st1 to avoid flush
  109. cmp4.ne.unc p7,p0=r32,r0 ;; // Extra stop bit to force 3 cycles
  110. } { .mib
  111. nop.m 0
  112. nop.i 0
  113. (p7) br.cond.dptk .b_notaligned ;;
  114. } { .mib
  115. nop.m 0
  116. mov pr=r11,0x1003e
  117. br.ret.sptk.many b0 ;;
  118. }
  119. .natfault1_0:
  120. { .mmi
  121. add r33=-8,r9 ;;
  122. ld4 r33=[r33] // Redo the load
  123. nop.i 0 ;;
  124. } { .mib
  125. nop.m 0
  126. (p18) pcmp1.eq r16=r33,r15 // r16 !=0 only if a zero byte is found
  127. br.sptk .bw1 ;;
  128. }
  129. _2_1_2auto_size == 0x0
  130. // -- End strcpy
  131. .endp strcpy#
  132. // mark_proc_addr_taken strcpy;
  133. // End