Windows NT 4.0 source code leak
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

229 lines
4.6 KiB

/*
** Copyright 1991, 1992, 1993, Silicon Graphics, Inc.
** All Rights Reserved.
**
** This is UNPUBLISHED PROPRIETARY SOURCE CODE of Silicon Graphics, Inc.;
** the contents of this file may not be disclosed to third parties, copied or
** duplicated in any form, in whole or in part, without the prior written
** permission of Silicon Graphics, Inc.
**
** RESTRICTED RIGHTS LEGEND:
** Use, duplication or disclosure by the Government is subject to restrictions
** as set forth in subdivision (c)(1)(ii) of the Rights in Technical Data
** and Computer Software clause at DFARS 252.227-7013, and/or in similar or
** successor clauses in the FAR, DOD or NASA FAR Supplement. Unpublished -
** rights reserved under the Copyright Laws of the United States.
*/
#include "ksmips.h"
#include "glmips.h"
#define TWELVE $f4
#define THREE $f2
#define STEENTH $f14
#define XY $f12
#define SEED $f10
#define ISEED a1
#define ILEN v1
#define CON_0 t1
#define CON_1 v0
#define CON_2 t0
#define CON_3 a2
#define CON a3
#define LEN $f8
#define V2SQUARED $f6
#define V1SQUARED $f4
#define V0SQUARED $f2
#define V2 $f0
#define V1 $f18
#define V0 $f16
#define VIN a1
#define VOUT a0
#define FT7 $f14
#define FT6 $f12
#define FT5 $f10
#define FT4 $f8
#define FT3 $f6
#define FT2 $f4
#define FT1 $f2
#define FT0 $f0
#define SRC a1
#define DST a0
#ifdef SGI
// Not used
#ifdef __GL_ASM_COPYMATRIX
LEAF_ENTRY(__glCopyMatrix)
/* :invars DST = a0, SRC = a1 */
/* WARNING: Data must be aligned on double boundary */
.loc 1 24
l.d FT0,0(SRC)
.loc 1 25
l.d FT1,8(SRC)
.loc 1 26
l.d FT2,16(SRC)
.loc 1 27
l.d FT3,24(SRC)
.loc 1 28
l.d FT4,32(SRC)
.loc 1 29
l.d FT5,40(SRC)
.loc 1 30
l.d FT6,48(SRC)
.loc 1 31
l.d FT7,56(SRC)
.loc 1 32
s.d FT0,0(DST)
.loc 1 33
s.d FT1,8(DST)
.loc 1 34
s.d FT2,16(DST)
.loc 1 35
s.d FT3,24(DST)
.loc 1 36
s.d FT4,32(DST)
.loc 1 37
s.d FT5,40(DST)
.loc 1 38
s.d FT6,48(DST)
.loc 1 39
s.d FT7,56(DST)
.loc 1 40
j ra
.end __glCopyMatrix
#endif /* __GL_ASM_COPYMATRIX */
#endif /* SGI */
/************************************************************************/
#ifdef __GL_ASM_NORMALIZE
LEAF_ENTRY(__glNormalize)
.set noreorder
/* :invars VOUT = a0, VIN = a1 */
.loc 1 52
l.s V0,0(VIN)
.loc 1 53
l.s V1,4(VIN)
.loc 1 54
l.s V2,8(VIN)
.loc 1 55
mul.s V0SQUARED,V0,V0
.loc 1 56
mul.s V1SQUARED,V1,V1
.loc 1 57
mul.s V2SQUARED,V2,V2
.loc 1 59
add.s LEN,V0SQUARED,V1SQUARED
.loc 1 60
add.s LEN,V2SQUARED
/*
* This routine calculates a reciprocal square root accurate to well over
* 16 bits using Newton-Raphson approximation.
*
* To calculate the seed, the shift compresses the floating-point
* range just as sqrt() does, and the subtract inverts the range
* like reciprocation does. The constant was chosen by trial-and-error
* to minimize the maximum error of the iterated result for all values
* over the range .5 to 2.
*/
.loc 1 71
lui CON_1, 0x5F37
.loc 1 72
mfc1 ILEN, LEN
.loc 1 73
addu CON_1, CON_1, 0x5A00
.loc 1 74
srl ISEED, ILEN, 1
.loc 1 75
subu ISEED, CON_1, ISEED
.loc 1 76
mtc1 ISEED, SEED
/*
* The Newton-Raphson iteration to approximate X = 1/sqrt(Y) is:
*
* X[1] = .5*X[0]*(3 - Y*X[0]^2)
*
* A double iteration is:
*
* X[2] = .0625*X[0]*(3 - Y*X[0]^2)*[12 - (Y*X[0]^2)*(3 - Y*X[0]^2)^2]
*
* Abort if LEN overflowed or underflowed, as indicated by its exponent.
*/
.loc 1 88
lui CON_3, 0x3D80 /* .0625 */
.loc 1 89
addu CON_3, CON_3, 0x29 /* plus a little, since N-R underestimates */
.loc 1 90
mul.s XY, LEN, SEED
.loc 1 91
mtc1 CON_3, STEENTH
.loc 1 92
lui CON, 0x7F80 /* +infinity */
.loc 1 93
and ILEN, ILEN, CON
.loc 1 94
mul.s STEENTH, SEED, STEENTH
.loc 1 95
slt CON, ILEN, CON
.loc 1 96
slt ILEN, zero, ILEN
.loc 1 97
mul.s XY, XY, SEED
.loc 1 98
and ILEN, CON, ILEN
.loc 1 99
lui CON_2, 0x4040 /* 3.0 */
.loc 1 100
mtc1 CON_2, THREE
.loc 1 101
lui CON_0, 0x4140 /* 12.0 */
.loc 1 102
beq ILEN, zero, $100
.loc 1 103
mtc1 CON_0, TWELVE
.loc 1 104
sub.s THREE, THREE, XY
.loc 1 105
mul.s XY, XY, THREE
.loc 1 106
mul.s STEENTH, STEENTH, THREE
.loc 1 107
mul.s XY, XY, THREE
.loc 1 108
mul.s V0, V0, STEENTH
.loc 1 109
mul.s V1, V1, STEENTH
.loc 1 110
sub.s TWELVE, TWELVE, XY
.loc 1 111
mul.s V2, V2, STEENTH
.loc 1 112
mul.s V0, V0, TWELVE
.loc 1 113
mul.s V1, V1, TWELVE
.loc 1 114
mul.s V2, V2, TWELVE
.loc 1 115
s.s V0,0(VOUT)
.loc 1 116
s.s V1,4(VOUT)
.loc 1 117
j ra
.loc 1 118
s.s V2,8(VOUT)
/* no square root needed - bogus vector is turned into zeros */
.loc 1 121
$100: sw zero,0(VOUT)
.loc 1 122
sw zero,4(VOUT)
.loc 1 123
j ra
.loc 1 124
sw zero,8(VOUT)
.end __glNormalize
#endif /* __GL_ASM_NORMALIZE */