windows-nt-4.0/private/windows/opengl/server/generic/ppc/matrix.s


								/*

								** Copyright 1991, 1992, 1993, Silicon Graphics, Inc.

								** All Rights Reserved.

								**

								** This is UNPUBLISHED PROPRIETARY SOURCE CODE of Silicon Graphics,

								** Inc.; the contents of this file may not be disclosed to third

								** parties, copied or duplicated in any form, in whole or in part,

								** without the prior written permission of Silicon Graphics, Inc.

								**

								** RESTRICTED RIGHTS LEGEND:

								** Use, duplication or disclosure by the Government is subject to

								** restrictions as set forth in subdivision (c)(1)(ii) of the Rights

								** in Technical Data and Computer Software clause at

								** DFARS 252.227-7013, and/or in similar or successor clauses in the

								** FAR, DOD or NASA FAR Supplement. Unpublished - rights reserved

								** under the Copyright Laws of the United States.

								**

								** PowerPC version:

								**

								** Created by: Curtis Fawcett   IBM Corporation

								**

								** Created on: 7-5-94

								**

								*/


								#include "ksppc.h"

								#include "glppc.h"


								#ifdef SGI

								// Not used.

								#ifdef __GL_ASM_COPYMATRIX


									LEAF_ENTRY(__glCopyMatrix)

								//

								/* WARNING:  Data must be aligned on double boundary */

								//

									lfd	f.0,0(r.4)

									lfd	f.1,8(r.4)

									lfd	f.2,16(r.4)

									lfd	f.3,24(r.4)

									lfd	f.4,32(r.4)

									lfd	f.5,40(r.4)

									lfd	f.6,48(r.4)

									lfd	f.7,56(r.4)

									stfd	f.0,0(r.3)

									stfd	f.1,8(r.3)

									stfd	f.2,16(r.3)

									stfd	f.3,24(r.3)

									stfd	f.4,32(r.3)

									stfd	f.5,40(r.3)

									stfd	f.6,48(r.3)

									stfd	f.7,56(r.3)

								//

									LEAF_EXIT(__glCopyMatrix)


								#endif /* __GL_ASM_COPYMATRIX */

								#endif /* SGI */

								//

								//*******************************************************************

								//

								#ifdef __GL_ASM_NORMALIZE


									LEAF_ENTRY(__glNormalize)

								//

									lfs	f.0,0(r.4)		// Get 1st value (V0)

									li	r.5,0x5f37		// Get initial constant

								        slwi    r.5,r.5,16              // Shift into position

									addi	r.5,r.5,0x5a00		// Adjust constant

									lfs	f.1,4(r.4)		// Get 2nd value (V1)

									lfs	f.2,8(r.4)		// Get 3rd value (V2)

									fmul	f.7,f.0,f.0		// Get V0 ** 2

									fmadd	f.7,f.1,f.1,f.7		// Get V0**2 + V1**2

									fmadd	f.7,f.2,f.2,f.7		// Get LEN=V0**2+V1**2+V2**2

								//

								// This routine calculates a reciprocal square root accurate to

								// well over 16 bits using Newton-Raphson approximation.

								//

								// To calculate the seed, the shift compresses the floating-point

								// range just as sqrt() does, and the subtract inverts the range

								// like reciprocation does.  The constant was chosen by

								// trial-and-error to minimize the maximum error of the iterated

								// result for all values over the range .5 to 2.

								//

									stfs	f.7,0(r.3)              // Store value

									lwz	r.7,0(r.3)              // Get ILEN value

									li	r.9,0			// Get constant 0

									srwi	r.6,r.7,1		// Adjust ILEN value

									sub	r.6,r.5,r.6		// Get seed value

									stw	r.6,0(r.3)              // Store seed value

								//

								// The Newton-Raphson iteration to approximate X = 1/sqrt(Y) is:

								//

								//	X[1] = .5*X[0]*(3 - Y*X[0]^2)

								//

								// A double iteration is:

								//

								//	X[2] = .0625*X[0]*(3-Y*X[0]^2)*[12-(Y*X[0]^2)*(3-Y*X[0]^2)^2]

								//

								// Abort if LEN overflowed or underflowed, as indicated by exponent.

								//

									li	r.5,0x3d80		// Get constant .0625

								        slwi    r.5,r.5,16              // Shift into position

									lfs	f.8,0(r.3)              // Get seed value as FP

									addi	r.5,r.5,0x29		// Adjust constant

									stw	r.5,0(r.3)              // Store .0625 (1/16)

									fmul    f.3,f.7,f.8		// XY = LEN * SEED

									li	r.8,0x7f80		// Get constant +inf

									lfs	f.4,0(r.3)              // Get constant 1/16th

								        slwi    r.8,r.8,16              // Shift into position

									and	r.7,r.8,r.7		// Test value for inf

									cmpw	r.7,r.8			// Check for inf

									cmpw	cr1,r.9,r.7		// Check for =< 0

									fmul	f.4,f.4,f.8		// Get SEED * 1/16th

									fmul	f.3,f.3,f.8		// Get XY= XY * SEED

									li	r.5,0x4040		// Get constant 3.0

								        slwi    r.5,r.5,16              // Shift into position

									stw	r.5,0(r.3)              // Store 3.0

									bge-	BadVal			// Jump if bad value

									bge-	cr.1,BadVal		// Jump if Bad value

									lfs	f.5,0(r.3)              // Get constant 3.0

									li	r.5,0x4140	        // Get constant 12.0

								        slwi    r.5,r.5,16              // Shift into position

									stw	r.5,0(r.3)              // Store 12.0

									fsub	f.5,f.5,f.3		// Get 3.0 - XY (adjusted 3.0)

									fmul	f.3,f.3,f.5		// Multiply XY by adjusted 3.0

									lfs	f.6,0(r.3)              // Get constant 12.0

									fmul	f.4,f.4,f.5             // Get adjusted 3.0 * 1/16

									fmul	f.3,f.3,f.5		// Multiply XY by adjusted 3.0

									fmul	f.0,f.0,f.4		// Adjust V0 by 1/16th

									fmul	f.1,f.1,f.4		// Adjust V1 by 1/16th

									fsub	f.6,f.6,f.3		// Adjust constant 12.0

									fmul	f.2,f.2,f.4		// Adjust V2

									fmul	f.0,f.0,f.6		// Adjust V0

									fmul	f.1,f.1,f.6		// Adjust V1

									fmul	f.2,f.2,f.6		// Adjust V2

									stfs	f.0,0(r.3)		// Store adjusted V0

									stfs	f.1,4(r.3)		// Store adjusted V1

									stfs	f.2,8(r.3)		// Store adjusted V2

									b	Done			// Jump to return

								//

								// Bad input so store zeros for results

								//

								BadVal:

									stw	r.9,0(r.3)		// Store V0=0

									stw	r.9,4(r.3)		// Store V1=0

									stw	r.9,8(r.3)		// Store V2=0

								//

								// Return to calling program

								//

								Done:


									LEAF_EXIT(__glNormalize)


								#endif /* __GL_ASM_NORMALIZE */