|
|
;; rdrand.asm - written and placed in public domain by Jeffrey Walton and Uri Blumenthal.
;; Copyright assigned to the Crypto++ project.
;; This ASM file provides RDRAND and RDSEED to downlevel Unix and Linux tool chains.
;; Additionally, the inline assembly code produced by GCC and Clang is not that
;; impressive. However, using this code requires NASM and an edit to the GNUmakefile.
;; nasm -f elf32 rdrand.S -DX86 -g -o rdrand-x86.o
;; nasm -f elfx32 rdrand.S -DX32 -g -o rdrand-x32.o
;; nasm -f elf64 rdrand.S -DX64 -g -o rdrand-x64.o
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Naming convention used in rdrand.{h|cpp|asm|S}
;; MSC = Microsoft Compiler (and compatibles)
;; GCC = GNU Compiler (and compatibles)
;; ALL = MSC and GCC (and compatibles)
;; RRA = RDRAND, Assembly
;; RSA = RDSEED, Assembly
;; RRI = RDRAND, Intrinsic
;; RSA = RDSEED, Intrinsic
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; C/C++ Function prototypes
;; X86, X32 and X64:
;; extern "C" int NASM_RRA_GenerateBlock(byte* ptr, size_t size, unsigned int safety);
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Return values
%define RDRAND_SUCCESS 1 %define RDRAND_FAILURE 0
%define RDSEED_SUCCESS 1 %define RDSEED_FAILURE 0
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
%ifdef X86 or X32 ;; Set via the command line
;; Arg1, byte* buffer
;; Arg2, size_t bsize
;; Arg3, unsigned int safety
;; EAX (out): success (1), failure (0)
global NASM_RRA_GenerateBlock section .text
%ifdef X86 align 8 cpu 486 %else align 16 %endif
NASM_RRA_GenerateBlock:
%ifdef X86 %define arg1 [ebp+04h] %define arg2 [ebp+08h] %define arg3 [ebp+0ch] %define MWSIZE 04h ;; machine word size
%else %define MWSIZE 08h ;; machine word size
%endif
%define buffer edi %define bsize esi %define safety edx
%ifdef X86 .Load_Arguments:
mov buffer, arg1 mov bsize, arg2 mov safety, arg3 %endif
.Validate_Pointer:
cmp buffer, 0 je .GenerateBlock_PreRet
;; Top of While loop
.GenerateBlock_Top:
;; Check remaining size
cmp bsize, 0 je .GenerateBlock_Success
%ifdef X86 .Call_RDRAND_EAX: %else .Call_RDRAND_RAX: DB 48h ;; X32 can use the full register, issue the REX.w prefix
%endif ;; RDRAND is not available prior to VS2012. Just emit
;; the byte codes using DB. This is `rdrand eax`.
DB 0Fh, 07h, F0h
;; If CF=1, the number returned by RDRAND is valid.
;; If CF=0, a random number was not available.
jc .RDRAND_succeeded
.RDRAND_failed:
;; Exit if we've reached the limit
cmp safety, 0 je .GenerateBlock_Failure
dec safety jmp .GenerateBlock_Top
.RDRAND_succeeded:
cmp bsize, MWSIZE jb .Partial_Machine_Word .Full_Machine_Word:
%ifdef X32 mov [buffer+4], eax ;; We can only move 4 at a time
DB 048h ;; Combined, these result in
shr eax, 32 ;; `shr rax, 32`
%endif
mov [buffer], eax add buffer, MWSIZE ;; No need for Intel Core 2 slow word workarounds,
sub bsize, MWSIZE ;; like `lea buffer,[buffer+MWSIZE]` for faster adds
;; Continue
jmp .GenerateBlock_Top
;; 1,2,3 bytes remain for X86
;; 1,2,3,4,5,6,7 remain for X32
.Partial_Machine_Word:
%ifdef X32 ;; Test bit 2 to see if size is at least 4
test bsize, 4 jz .Bit_2_Not_Set
mov [buffer], eax add buffer, 4
DB 048h ;; Combined, these result in
shr eax, 32 ;; `shr rax, 32`
.Bit_2_Not_Set: %endif
;; Test bit 1 to see if size is at least 2
test bsize, 2 jz .Bit_1_Not_Set
mov [buffer], ax shr eax, 16 add buffer, 2 .Bit_1_Not_Set: ;; Test bit 0 to see if size is at least 1
test bsize, 1 jz .GenerateBlock_Success
mov [buffer], al
.Bit_0_Not_Set:
;; We've hit all the bits
jmp .GenerateBlock_Success
.GenerateBlock_PreRet:
;; Test for success (was the request completely fulfilled?)
cmp bsize, 0 je .GenerateBlock_Success .GenerateBlock_Failure:
xor eax, eax mov al, RDRAND_FAILURE ret .GenerateBlock_Success:
xor eax, eax mov al, RDRAND_SUCCESS ret
%endif ;; X86 and X32
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
%ifdef X64 ;; Set via the command line
global NASM_RRA_GenerateBlock section .text align 16
;; Arg1, byte* buffer
;; Arg2, size_t bsize
;; Arg3, unsigned int safety
;; RAX (out): success (1), failure (0)
NASM_RRA_GenerateBlock:
%define MWSIZE 08h ;; machine word size
%define buffer rdi %define bsize rsi %define safety edx
;; No need for Load_Arguments due to fastcall
.Validate_Pointer:
;; Validate pointer
cmp buffer, 0 je .GenerateBlock_PreRet
;; Top of While loop
.GenerateBlock_Top:
;; Check remaining size
cmp bsize, 0 je .GenerateBlock_Success
.Call_RDRAND_RAX: ;; RDRAND is not available prior to VS2012. Just emit
;; the byte codes using DB. This is `rdrand rax`.
DB 048h, 0Fh, 0C7h, 0F0h
;; If CF=1, the number returned by RDRAND is valid.
;; If CF=0, a random number was not available.
jc .RDRAND_succeeded
.RDRAND_failed:
;; Exit if we've reached the limit
cmp safety, 0h je .GenerateBlock_Failure
dec safety jmp .GenerateBlock_Top
.RDRAND_succeeded:
cmp bsize, MWSIZE jb .Partial_Machine_Word .Full_Machine_Word:
mov [buffer], rax add buffer, MWSIZE sub bsize, MWSIZE
;; Continue
jmp .GenerateBlock_Top
;; 1,2,3,4,5,6,7 bytes remain
.Partial_Machine_Word:
;; Test bit 2 to see if size is at least 4
test bsize, 4 jz .Bit_2_Not_Set
mov [buffer], eax shr rax, 32 add buffer, 4
.Bit_2_Not_Set:
;; Test bit 1 to see if size is at least 2
test bsize, 2 jz .Bit_1_Not_Set
mov [buffer], ax shr eax, 16 add buffer, 2
.Bit_1_Not_Set:
;; Test bit 0 to see if size is at least 1
test bsize, 1 jz .GenerateBlock_Success
mov [buffer], al
.Bit_0_Not_Set:
;; We've hit all the bits
jmp .GenerateBlock_Success .GenerateBlock_PreRet:
;; Test for success (was the request completely fulfilled?)
cmp bsize, 0 je .GenerateBlock_Success .GenerateBlock_Failure:
xor rax, rax mov al, RDRAND_FAILURE ret .GenerateBlock_Success:
xor rax, rax mov al, RDRAND_SUCCESS ret
%endif ;; X64
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
%ifdef X86 or X32 ;; Set via the command line
;; Arg1, byte* buffer
;; Arg2, size_t bsize
;; Arg3, unsigned int safety
;; EAX (out): success (1), failure (0)
global NASM_RSA_GenerateBlock section .text align 8
%ifdef X86 align 8 cpu 486 %else align 16 %endif
NASM_RSA_GenerateBlock:
%ifdef X86 %define arg1 [ebp+04h] %define arg2 [ebp+08h] %define arg3 [ebp+0ch] %define MWSIZE 04h ;; machine word size
%else %define MWSIZE 08h ;; machine word size
%endif
%define buffer edi %define bsize esi %define safety edx
%ifdef X86 .Load_Arguments:
mov buffer, arg1 mov bsize, arg2 mov safety, arg3 %endif
.Validate_Pointer:
cmp buffer, 0 je .GenerateBlock_PreRet
;; Top of While loop
.GenerateBlock_Top:
;; Check remaining size
cmp bsize, 0 je .GenerateBlock_Success
%ifdef X86 .Call_RDSEED_EAX: %else .Call_RDSEED_RAX: DB 48h ;; X32 can use the full register, issue the REX.w prefix
%endif ;; RDSEED is not available prior to VS2012. Just emit
;; the byte codes using DB. This is `rdseed eax`.
DB 0Fh, 0C7h, 0F8h ;; If CF=1, the number returned by RDSEED is valid.
;; If CF=0, a random number was not available.
jc .RDSEED_succeeded
.RDSEED_failed:
;; Exit if we've reached the limit
cmp safety, 0 je .GenerateBlock_Failure
dec safety jmp .GenerateBlock_Top
.RDSEED_succeeded:
cmp bsize, MWSIZE jb .Partial_Machine_Word .Full_Machine_Word:
mov [buffer], eax add buffer, MWSIZE ;; No need for Intel Core 2 slow word workarounds,
sub bsize, MWSIZE ;; like `lea buffer,[buffer+MWSIZE]` for faster adds
;; Continue
jmp .GenerateBlock_Top
;; 1,2,3 bytes remain for X86
;; 1,2,3,4,5,6,7 remain for X32
.Partial_Machine_Word:
%ifdef X32 ;; Test bit 2 to see if size is at least 4
test bsize, 4 jz .Bit_2_Not_Set
mov [buffer], eax add buffer, 4
DB 048h ;; Combined, these result in
shr eax, 32 ;; `shr rax, 32`
.Bit_2_Not_Set: %endif
;; Test bit 1 to see if size is at least 2
test bsize, 2 jz .Bit_1_Not_Set
mov [buffer], ax shr eax, 16 add buffer, 2 .Bit_1_Not_Set: ;; Test bit 0 to see if size is at least 1
test bsize, 1 jz .GenerateBlock_Success
mov [buffer], al
.Bit_0_Not_Set:
;; We've hit all the bits
jmp .GenerateBlock_Success
.GenerateBlock_PreRet:
;; Test for success (was the request completely fulfilled?)
cmp bsize, 0 je .GenerateBlock_Success .GenerateBlock_Failure:
xor eax, eax mov al, RDSEED_FAILURE ret .GenerateBlock_Success:
xor eax, eax mov al, RDSEED_SUCCESS ret
%endif ;; X86 and X32
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
%ifdef X64 ;; Set via the command line
global NASM_RSA_GenerateBlock section .text align 16
;; Arg1, byte* buffer
;; Arg2, size_t bsize
;; Arg3, unsigned int safety
;; RAX (out): success (1), failure (0)
NASM_RSA_GenerateBlock:
%define MWSIZE 08h ;; machine word size
%define buffer rdi %define bsize rsi %define safety edx
;; No need for Load_Arguments due to fastcall
.Validate_Pointer:
;; Validate pointer
cmp buffer, 0 je .GenerateBlock_PreRet
;; Top of While loop
.GenerateBlock_Top:
;; Check remaining size
cmp bsize, 0 je .GenerateBlock_Success
.Call_RDSEED_RAX: ;; RDSEED is not available prior to VS2012. Just emit
;; the byte codes using DB. This is `rdseed rax`.
DB 048h, 0Fh, 0C7h, 0F8h
;; If CF=1, the number returned by RDSEED is valid.
;; If CF=0, a random number was not available.
jc .RDSEED_succeeded
.RDSEED_failed:
;; Exit if we've reached the limit
cmp safety, 0 je .GenerateBlock_Failure
dec safety jmp .GenerateBlock_Top
.RDSEED_succeeded:
cmp bsize, MWSIZE jb .Partial_Machine_Word .Full_Machine_Word:
mov [buffer], rax add buffer, MWSIZE sub bsize, MWSIZE
;; Continue
jmp .GenerateBlock_Top
;; 1,2,3,4,5,6,7 bytes remain
.Partial_Machine_Word:
;; Test bit 2 to see if size is at least 4
test bsize, 4 jz .Bit_2_Not_Set
mov [buffer], eax shr rax, 32 add buffer, 4
.Bit_2_Not_Set:
;; Test bit 1 to see if size is at least 2
test bsize, 2 jz .Bit_1_Not_Set
mov [buffer], ax shr eax, 16 add buffer, 2
.Bit_1_Not_Set:
;; Test bit 0 to see if size is at least 1
test bsize, 1 jz .GenerateBlock_Success
mov [buffer], al
.Bit_0_Not_Set:
;; We've hit all the bits
jmp .GenerateBlock_Success .GenerateBlock_PreRet:
;; Test for success (was the request completely fulfilled?)
cmp bsize, 0 je .GenerateBlock_Success .GenerateBlock_Failure:
xor rax, rax mov al, RDSEED_FAILURE ret .GenerateBlock_Success:
xor rax, rax mov al, RDSEED_SUCCESS ret
%endif ;; _M_X64
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|