You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
216 lines
4.4 KiB
216 lines
4.4 KiB
// memset.s: function to set a number of bytes to a char value - McKinley version
|
|
|
|
// Copyright (C) 2002 Intel Corporation.
|
|
//
|
|
// The information and source code contained herein is the exclusive property
|
|
// of Intel Corporation and may not be disclosed, examined, or
|
|
// reproduced in whole or in part without explicit written authorization from
|
|
// the Company.
|
|
|
|
// Author: Vadim Paretsky
|
|
// Date: February, 2002
|
|
//
|
|
.section .text
|
|
.proc memset#
|
|
.global memset#
|
|
.align 64
|
|
|
|
memset:
|
|
{ .mii
|
|
and r21 = 7, r32
|
|
mux1 r25 = r33, @brcst
|
|
add r16 = r32, r34
|
|
} { .mmb
|
|
cmp.ge p9 = 0, r34
|
|
mov r8 = r32
|
|
(p9) br.ret.spnt b0
|
|
;;
|
|
}
|
|
// align on an 8-byte boundary
|
|
{ .mmi
|
|
mov r27 = 0x88
|
|
mov r28 = 0x108
|
|
mov r29 = 0x188
|
|
} { .mmb
|
|
nop.m 0
|
|
cmp.ne p15 = 0, r21 //Low 3 bits zero?
|
|
(p15) br.cond.dpnt Align_Loop
|
|
;;
|
|
}
|
|
|
|
Is_Aligned:
|
|
{ .mmi
|
|
add r14 = 0x80, r32
|
|
mov r30 = 0x208
|
|
add r31 = 8, r32
|
|
} { .mmb
|
|
cmp.ge p7 = r34, r27
|
|
cmp.gt p10 = 0x30, r34
|
|
(p10) br.cond.dpnt Aligned_Short
|
|
;;
|
|
}
|
|
|
|
// >= 80 bytes goes through a loop
|
|
Aligned_Long:
|
|
{ .mmi
|
|
st8 [r32] = r25
|
|
(p7) st8 [r14] = r25,0x80
|
|
mov r20 = r34
|
|
} { .mmi
|
|
add r26 = 0x180, r32
|
|
cmp.ge p8 = r34, r28
|
|
cmp.ge p9 = r34, r29
|
|
;;
|
|
} { .mmi
|
|
(p8) st8 [r14] = r25
|
|
(p9) st8 [r26] = r25, 0x80
|
|
cmp.ge p10 = r34, r30
|
|
;;
|
|
}
|
|
|
|
.align 64
|
|
Long_loop:
|
|
{ .mmi
|
|
(p10) st8 [r26] = r25, 0x80
|
|
st8 [r31] = r25, 0x10
|
|
cmp.le p15,p12 = 0x20, r20
|
|
} { .mmb
|
|
add r32 = 0x10, r32
|
|
add r34 = -0x10, r34
|
|
(p12) br.cond.dpnt Aligned_Short
|
|
;;
|
|
} { .mmi
|
|
(p15) st8 [r32] = r25, 0x10
|
|
(p15) st8 [r31] = r25, 0x10
|
|
cmp.le p14,p12 = 0x30, r20
|
|
} { .mmb
|
|
nop.m 0
|
|
add r34 = -0x10, r34
|
|
(p12) br.cond.dpnt Aligned_Short
|
|
;;
|
|
} { .mmi
|
|
(p14) st8 [r32] = r25, 0x10
|
|
(p14) st8 [r31] = r25, 0x10
|
|
cmp.le p15,p12 = 0x40, r20
|
|
} { .mmb
|
|
nop.m 0
|
|
add r34 = -0x10, r34
|
|
(p12) br.cond.dpnt Aligned_Short
|
|
;;
|
|
} { .mmi
|
|
(p15) st8 [r32] = r25, 0x10
|
|
(p15) st8 [r31] = r25, 0x10
|
|
cmp.le p14,p12 = 0x50, r20
|
|
} { .mmb
|
|
nop.m 0
|
|
add r34 = -0x10, r34
|
|
(p12) br.cond.dpnt Aligned_Short
|
|
;;
|
|
} { .mmi
|
|
(p14) st8 [r32] = r25, 0x10
|
|
(p14) st8 [r31] = r25, 0x10
|
|
cmp.le p15,p12 = 0x60, r20
|
|
} { .mmb
|
|
nop.m 0
|
|
add r34 = -0x10, r34
|
|
(p12) br.cond.dpnt Aligned_Short
|
|
;;
|
|
} { .mmi
|
|
(p15) st8 [r32] = r25, 0x10
|
|
(p15) st8 [r31] = r25, 0x10
|
|
cmp.le p14,p12 = 0x70, r20
|
|
} { .mmb
|
|
add r21 = -0x80, r20
|
|
add r34 = -0x10, r34
|
|
(p12) br.cond.dpnt Aligned_Short
|
|
;;
|
|
} { .mmi
|
|
(p14) st8 [r32] = r25, 0x10
|
|
(p14) st8 [r31] = r25, 0x10
|
|
cmp.le p15,p12 = 0x80, r20
|
|
} { .mmb
|
|
cmp.ge p10 = r21, r30
|
|
add r34 = -0x10, r34
|
|
(p12) br.cond.dpnt Aligned_Short
|
|
;;
|
|
} { .mmi
|
|
(p15) st8 [r32] = r25, 0x10
|
|
(p15) st8 [r31] = r25, 0x10
|
|
add r34 = -0x10, r34
|
|
} { .mmb
|
|
mov r20 = r21
|
|
cmp.le p13 = 0x30, r21
|
|
(p13) br.sptk.many Long_loop
|
|
;;
|
|
|
|
}
|
|
|
|
//
|
|
// Do partial word stores
|
|
//
|
|
Aligned_Short:
|
|
{ .mmi
|
|
and r27 = 2, r34
|
|
nop.m 0
|
|
tbit.nz p6 = r34, 0 //bit 0 on?
|
|
} { .mmb
|
|
cmp.le p11 = 0x10, r34
|
|
cmp.eq p10 = 0, r34
|
|
(p10) br.ret.dpnt b0
|
|
;;
|
|
} { .mmi
|
|
(p11) st8 [r32] = r25, 0x10
|
|
(p11) st8 [r31] = r25, 0x10
|
|
cmp.le p12 = 0x20, r34
|
|
} { .mmi
|
|
add r17 = -2, r16
|
|
add r18 = -4, r16
|
|
tbit.nz p9 = r34, 3 //odd number of st8s?
|
|
;;
|
|
} { .mmi
|
|
(p12) st8 [r32] = r25, 0x10
|
|
(p12) st8 [r31] = r25, 0x10
|
|
nop.i 0
|
|
} { .mmi
|
|
(p6) add r18 = -1, r18
|
|
(p6) add r16 = -1, r16
|
|
cmp.ne p7 = 0, r27
|
|
;;
|
|
} { .mmi
|
|
(p9) st8 [r32] = r25
|
|
(p6) st1 [r16] = r25
|
|
tbit.nz p8 = r34, 2 //bit 2 on?
|
|
} { .mmi
|
|
(p7) add r18 = -2, r18
|
|
(p6) add r17 = -1, r17
|
|
nop.i 0
|
|
;;
|
|
} { .mmb
|
|
(p8) st4 [r18] = r25
|
|
(p7) st2 [r17] = r25
|
|
br.ret.sptk.many b0
|
|
;;
|
|
}
|
|
|
|
.align 64
|
|
// Align the input pointer to an 8-byte boundary
|
|
Align_Loop:
|
|
{ .mmi
|
|
st1 [r32] = r33,1
|
|
add r21 = 1, r21
|
|
add r34 = -1, r34
|
|
;;
|
|
} { .mmi
|
|
cmp.gt p10 = 8, r21
|
|
cmp.eq p15 = 0, r34
|
|
nop.i 0
|
|
} { .bbb
|
|
|
|
(p15) br.ret.dpnt b0
|
|
(p10) br.cond.sptk Align_Loop
|
|
br.cond.sptk Is_Aligned
|
|
;;
|
|
}
|
|
|
|
.endp memset#
|
|
// End
|