Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

2036 lines
69 KiB

;---------------------------Module-Header------------------------------;
; Module Name: lines.asm
;
; Draws a set of connected polylines.
;
; The actual pixel-lighting code is different depending on if the lines
; are styled/unstyled and we're doing an arbitrary ROP or set-style ROP.
;
; Lines are drawn from left to right. So if a line moves from right
; to left, the endpoints are swapped and the line is drawn from left to
; right.
;
; See s3\lines.cxx for a portable version (sans simple clipping).
;
; Copyright (c) 1992 Microsoft Corporation
;-----------------------------------------------------------------------;
.386
.model small,c
assume cs:FLAT,ds:FLAT,es:FLAT,ss:FLAT
assume fs:nothing,gs:nothing
.xlist
include stdcall.inc ;calling convention cmacros
include i386\egavga.inc
include i386\strucs.inc
include i386\driver.inc
include i386\lines.inc
.list
.data
public gaflRoundTable
gaflRoundTable label dword
dd FL_H_ROUND_DOWN + FL_V_ROUND_DOWN ; no flips
dd FL_H_ROUND_DOWN + FL_V_ROUND_DOWN ; D flip
dd FL_H_ROUND_DOWN ; V flip
dd FL_V_ROUND_DOWN ; D & V flip
dd FL_V_ROUND_DOWN ; slope one
dd 0baadf00dh
dd FL_H_ROUND_DOWN ; slope one & V flip
dd 0baadf00dh
.code
;--------------------------------Macro----------------------------------;
; testb ebx, <mask>
;
; Substitutes a byte compare if the mask is entirely in the lo-byte or
; hi-byte (thus saving 3 bytes of code space).
;
;-----------------------------------------------------------------------;
TESTB macro targ,mask,thirdarg
local mask2,delta
ifnb <thirdarg>
.err TESTB mask must be enclosed in brackets!
endif
delta = 0
mask2 = mask
if mask2 AND 0ffff0000h
test targ,mask ; If bit set in hi-word,
exitm ; test entire dword
endif
if mask2 AND 0ff00h
if mask2 AND 0ffh ; If bit set in lo-byte and
test targ,mask ; hi-byte, test entire dword
exitm
endif
mask2 = mask2 SHR 8
delta = 1
endif
ifidni <targ>,<EBX>
if delta
test bh,mask2
else
test bl,mask2
endif
exitm
endif
.err Too bad TESTB doesn't support targets other than ebx!
endm
;---------------------------Public-Routine------------------------------;
; BOOL bLines(ppdev, pptfxFirst, pptfxBuf, prun, cptfx, pls,
; prclClip, apfn[], flStart)
;
; Do all the DDA calculations for lines.
;
; Doing Lines Right
; -----------------
;
; In NT, all lines are given to the device driver in fractional
; coordinates, in a 28.4 fixed point format. The lower 4 bits are
; fractional for sub-pixel positioning.
;
; Note that you CANNOT! just round the coordinates to integers
; and pass the results to your favorite integer Bresenham routine!!
; (Unless, of course, you have such a high resolution device that
; nobody will notice -- not likely for a display device.) The
; fractions give a more accurate rendering of the line -- this is
; important for things like our Bezier curves, which would have 'kinks'
; if the points in its polyline approximation were rounded to integers.
;
; Unfortunately, for fractional lines there is more setup work to do
; a DDA than for integer lines. However, the main loop is exactly
; the same (and can be done entirely with 32 bit math).
;
; If You've Got Hardware That Does Bresenham
; ------------------------------------------
;
; A lot of hardware limits DDA error terms to 'n' bits. With fractional
; coordinates, 4 bits are given to the fractional part, letting
; you draw in hardware only those lines that lie entirely in a 2^(n-4)
; by 2^(n-4) pixel space.
;
; And you still have to correctly draw those lines with coordinates
; outside that space! Remember that the screen is only a viewport
; onto a 28.4 by 28.4 space -- if any part of the line is visible
; you MUST render it precisely, regardless of where the end points lie.
; So even if you do it in software, somewhere you'll have to have a
; 32 bit DDA routine.
;
; Our Implementation
; ------------------
;
; We employ a run length slice algorithm: our DDA calculates the
; number of pixels that are in each row (or 'strip') of pixels.
;
; We've separated the running of the DDA and the drawing of pixels:
; we run the DDA for several iterations and store the results in
; a 'strip' buffer (which are the lengths of consecutive pixel rows of
; the line), then we crank up a 'strip drawer' that will draw all the
; strips in the buffer.
;
; We also employ a 'half-flip' to reduce the number of strip
; iterations we need to do in the DDA and strip drawing loops: when a
; (normalized) line's slope is more than 1/2, we do a final flip
; about the line y = (1/2)x. So now, instead of each strip being
; consecutive horizontal or vertical pixel rows, each strip is composed
; of those pixels aligned in 45 degree rows. So a line like (0, 0) to
; (128, 128) would generate only one strip.
;
; We also always draw only left-to-right.
;
; Style lines may have arbitrary style patterns. We specially
; optimize the default patterns (and call them 'masked' styles).
;
; The DDA Derivation
; ------------------
;
; Here is how I like to think of the DDA calculation.
;
; We employ Knuth's "diamond rule": rendering a one-pixel-wide line
; can be thought of as dragging a one-pixel-wide by one-pixel-high
; diamond along the true line. Pixel centers lie on the integer
; coordinates, and so we light any pixel whose center gets covered
; by the "drag" region (John D. Hobby, Journal of the Association
; for Computing Machinery, Vol. 36, No. 2, April 1989, pp. 209-229).
;
; We must define which pixel gets lit when the true line falls
; exactly half-way between two pixels. In this case, we follow
; the rule: when two pels are equidistant, the upper or left pel
; is illuminated, unless the slope is exactly one, in which case
; the upper or right pel is illuminated. (So we make the edges
; of the diamond exclusive, except for the top and left vertices,
; which are inclusive, unless we have slope one.)
;
; This metric decides what pixels should be on any line BEFORE it is
; flipped around for our calculation. Having a consistent metric
; this way will let our lines blend nicely with our curves. The
; metric also dictates that we will never have one pixel turned on
; directly above another that's turned on. We will also never have
; a gap; i.e., there will be exactly one pixel turned on for each
; column between the start and end points. All that remains to be
; done is to decide how many pixels should be turned on for each row.
;
; So lines we draw will consist of varying numbers of pixels on
; successive rows, for example:
;
; ******
; *****
; ******
; *****
;
; We'll call each set of pixels on a row a "strip".
;
; (Please remember that our coordinate space has the origin as the
; upper left pixel on the screen; postive y is down and positive x
; is right.)
;
; Device coordinates are specified as fixed point 28.4 numbers,
; where the first 28 bits are the integer coordinate, and the last
; 4 bits are the fraction. So coordinates may be thought of as
; having the form (x, y) = (M/F, N/F) where F is the constant scaling
; factor F = 2^4 = 16, and M and N are 32 bit integers.
;
; Consider the line from (M0/F, N0/F) to (M1/F, N1/F) which runs
; left-to-right and whose slope is in the first octant, and let
; dM = M1 - M0 and dN = N1 - N0. Then dM >= 0, dN >= 0 and dM >= dN.
;
; Since the slope of the line is less than 1, the edges of the
; drag region are created by the top and bottom vertices of the
; diamond. At any given pixel row y of the line, we light those
; pixels whose centers are between the left and right edges.
;
; Let mL(n) denote the line representing the left edge of the drag
; region. On pixel row j, the column of the first pixel to be
; lit is
;
; iL(j) = ceiling( mL(j * F) / F)
;
; Since the line's slope is less than one:
;
; iL(j) = ceiling( mL([j + 1/2] F) / F )
;
; Recall the formula for our line:
;
; n(m) = (dN / dM) (m - M0) + N0
;
; m(n) = (dM / dN) (n - N0) + M0
;
; Since the line's slope is less than one, the line representing
; the left edge of the drag region is the original line offset
; by 1/2 pixel in the y direction:
;
; mL(n) = (dM / dN) (n - F/2 - N0) + M0
;
; From this we can figure out the column of the first pixel that
; will be lit on row j, being careful of rounding (if the left
; edge lands exactly on an integer point, the pixel at that
; point is not lit because of our rounding convention):
;
; iL(j) = floor( mL(j F) / F ) + 1
;
; = floor( ((dM / dN) (j F - F/2 - N0) + M0) / F ) + 1
;
; = floor( F dM j - F/2 dM - N0 dM + dN M0) / F dN ) + 1
;
; F dM j - [ dM (N0 + F/2) - dN M0 ]
; = floor( ---------------------------------- ) + 1
; F dN
;
; dM j - [ dM (N0 + F/2) - dN M0 ] / F
; = floor( ------------------------------------ ) + 1 (1)
; dN
;
; = floor( (dM j + alpha) / dN ) + 1
;
; where
;
; alpha = - [ dM (N0 + F/2) - dN M0 ] / F
;
; We use equation (1) to calculate the DDA: there are iL(j+1) - iL(j)
; pixels in row j. Because we are always calculating iL(j) for
; integer quantities of j, we note that the only fractional term
; is constant, and so we can 'throw away' the fractional bits of
; alpha:
;
; beta = floor( - [ dM (N0 + F/2) - dN M0 ] / F ) (2)
;
; so
;
; iL(j) = floor( (dM j + beta) / dN ) + 1 (3)
;
; for integers j.
;
; Note if iR(j) is the line's rightmost pixel on row j, that
; iR(j) = iL(j + 1) - 1.
;
; Similarly, rewriting equation (1) as a function of column i,
; we can determine, given column i, on which pixel row j is the line
; lit:
;
; dN i + [ dM (N0 + F/2) - dN M0 ] / F
; j(i) = ceiling( ------------------------------------ ) - 1
; dM
;
; Floors are easier to compute, so we can rewrite this:
;
; dN i + [ dM (N0 + F/2) - dN M0 ] / F + dM - 1/F
; j(i) = floor( ----------------------------------------------- ) - 1
; dM
;
; dN i + [ dM (N0 + F/2) - dN M0 ] / F + dM - 1/F - dM
; = floor( ---------------------------------------------------- )
; dM
;
; dN i + [ dM (N0 + F/2) - dN M0 - 1 ] / F
; = floor( ---------------------------------------- )
; dM
;
; We can once again wave our hands and throw away the fractional bits
; of the remainder term:
;
; j(i) = floor( (dN i + gamma) / dM ) (4)
;
; where
;
; gamma = floor( [ dM (N0 + F/2) - dN M0 - 1 ] / F ) (5)
;
; We now note that
;
; beta = -gamma - 1 = ~gamma (6)
;
; To draw the pixels of the line, we could evaluate (3) on every scan
; line to determine where the strip starts. Of course, we don't want
; to do that because that would involve a multiply and divide for every
; scan. So we do everything incrementally.
;
; We would like to easily compute c , the number of pixels on scan j:
; j
;
; c = iL(j + 1) - iL(j)
; j
;
; = floor((dM (j + 1) + beta) / dN) - floor((dM j + beta) / dN) (7)
;
; This may be rewritten as
;
; c = floor(i + r / dN) - floor(i + r / dN) (8)
; j j+1 j+1 j j
;
; where i , i are integers and r < dN, r < dN.
; j j+1 j j+1
;
; Rewriting (7) again:
;
; c = floor(i + r / dN + dM / dN) - floor(i + r / dN)
; j j j j j
;
;
; = floor((r + dM) / dN) - floor(r / dN)
; j j
;
; This may be rewritten as
;
; c = dI + floor((r + dR) / dN) - floor(r / dN)
; j j j
;
; where dI + dR / dN = dM / dN, dI is an integer and dR < dN.
;
; r is the remainder (or "error") term in the DDA loop: r / dN
; j j
; is the exact fraction of a pixel at which the strip ends. To go
; on to the next scan and compute c we need to know r .
; j+1 j+1
;
; So in the main loop of the DDA:
;
; c = dI + floor((r + dR) / dN) and r = (r + dR) % dN
; j j j+1 j
;
; and we know r < dN, r < dN, and dR < dN.
; j j+1
;
; We have derived the DDA only for lines in the first octant; to
; handle other octants we do the common trick of flipping the line
; to the first octant by first making the line left-to-right by
; exchanging the end-points, then flipping about the lines y = 0 and
; y = x, as necessary. We must record the transformation so we can
; undo them later.
;
; We must also be careful of how the flips affect our rounding. If
; to get the line to the first octant we flipped about x = 0, we now
; have to be careful to round a y value of 1/2 up instead of down as
; we would for a line originally in the first octant (recall that
; "In the case where two pels are equidistant, the upper or left
; pel is illuminated...").
;
; To account for this rounding when running the DDA, we shift the line
; (or not) in the y direction by the smallest amount possible. That
; takes care of rounding for the DDA, but we still have to be careful
; about the rounding when determining the first and last pixels to be
; lit in the line.
;
; Determining The First And Last Pixels In The Line
; -------------------------------------------------
;
; Fractional coordinates also make it harder to determine which pixels
; will be the first and last ones in the line. We've already taken
; the fractional coordinates into account in calculating the DDA, but
; the DDA cannot tell us which are the end pixels because it is quite
; happy to calculate pixels on the line from minus infinity to positive
; infinity.
;
; The diamond rule determines the start and end pixels. (Recall that
; the sides are exclusive except for the left and top vertices.)
; This convention can be thought of in another way: there are diamonds
; around the pixels, and wherever the true line crosses a diamond,
; that pel is illuminated.
;
; Consider a line where we've done the flips to the first octant, and the
; floor of the start coordinates is the origin:
;
; +-----------------------> +x
; |
; | 0 1
; | 0123456789abcdef
; |
; | 0 00000000?1111111
; | 1 00000000 1111111
; | 2 0000000 111111
; | 3 000000 11111
; | 4 00000 ** 1111
; | 5 0000 ****1
; | 6 000 1***
; | 7 00 1 ****
; | 8 ? ***
; | 9 22 3 ****
; | a 222 33 ***
; | b 2222 333 ****
; | c 22222 3333 **
; | d 222222 33333
; | e 2222222 333333
; | f 22222222 3333333
; |
; | 2 3
; v
; +y
;
; If the start of the line lands on the diamond around pixel 0 (shown by
; the '0' region here), pixel 0 is the first pel in the line. The same
; is true for the other pels.
;
; A little more work has to be done if the line starts in the
; 'nether-land' between the diamonds (as illustrated by the '*' line):
; the first pel lit is the first diamond crossed by the line (pixel 1 in
; our example). This calculation is determined by the DDA or slope of
; the line.
;
; If the line starts exactly half way between two adjacent pixels
; (denoted here by the '?' spots), the first pixel is determined by our
; round-down convention (and is dependent on the flips done to
; normalize the line).
;
; Last Pel Exclusive
; ------------------
;
; To eliminate repeatedly lit pels between continuous connected lines,
; we employ a last-pel exclusive convention: if the line ends exactly on
; the diamond around a pel, that pel is not lit. (This eliminates the
; checks we had in the old code to see if we were re-lighting pels.)
;
; The Half Flip
; -------------
;
; To make our run length algorithm more efficient, we employ a "half
; flip". If after normalizing to the first octant, the slope is more
; than 1/2, we subtract the y coordinate from the x coordinate. This
; has the effect of reflecting the coordinates through the line of slope
; 1/2. Note that the diagonal gets mapped into the x-axis after a half
; flip.
;
; How Many Bits Do We Need, Anyway?
; ---------------------------------
;
; Note that if the line is visible on your screen, you must light up
; exactly the correct pixels, no matter where in the 28.4 x 28.4 device
; space the end points of the line lie (meaning you must handle 32 bit
; DDAs, you can certainly have optimized cases for lesser DDAs).
;
; We move the origin to (floor(M0 / F), floor(N0 / F)), so when we
; calculate gamma from (5), we know that 0 <= M0, N0 < F. And we
; are in the first octant, so dM >= dN. Then we know that gamma can
; be in the range [(-1/2)dM, (3/2)dM]. The DDI guarantees us that
; valid lines will have dM and dN values at most 31 bits (unsigned)
; of significance. So gamma requires 33 bits of significance (we store
; this as a 64 bit number for convenience).
;
; When running through the DDA loop, r + dR can have a value in the
; j
; range 0 <= r < 2 dN; thus the result must be a 32 bit unsigned value.
; j
;
; Testing Lines
; -------------
;
; To be NT compliant, a display driver must exactly adhere to GIQ,
; which means that for any given line, the driver must light exactly
; the same pels as does GDI. This can be tested using the Guiman tool
; provided elsewhere in the DDK, and 'ZTest', which draws random lines
; on the screen and to a bitmap, and compares the results.
;
; If You've Got Line Hardware
; ---------------------------
;
; If your hardware already adheres to GIQ, you're all set. Otherwise
; you'll want to look at the S3 sample code and read the following:
;
; 1) You'll want to special case integer-only lines, since they require
; less processing time and are more common (CAD programs will probably
; only ever give integer lines). GDI does not provide a flag saying
; that all lines in a path are integer lines; consequently, you will
; have to explicitly check every line.
;
; 2) You are required to correctly draw any line in the 28.4 device
; space that intersects the viewport. If you have less than 32 bits
; of significance in the hardware for the Bresenham terms, extremely
; long lines would overflow the hardware. For such (rare) cases, you
; can fall back to strip-drawing code, of which there is a C version in
; the S3's lines.cxx (or if your display is a frame buffer, fall back
; to the engine).
;
; 3) If you can explicitly set the Bresenham terms in your hardware, you
; can draw non-integer lines using the hardware. If your hardware has
; 'n' bits of precision, you can draw GIQ lines that are up to 2^(n-5)
; pels long (4 bits are required for the fractional part, and one bit is
; used as a sign bit). Note that integer lines don't require the 4
; fractional bits, so if you special case them as in 1), you can do
; integer lines that are up to 2^(n - 1) pels long. See the S3's
; fastline.asm for an example.
;
;-----------------------------------------------------------------------;
cProc bLines,36,< \
uses esi edi ebx, \
ppdev: ptr, \
pptfxFirst: ptr, \
pptfxBuf: ptr, \
prun: ptr, \
cptfx: dword, \
pls: ptr, \
prclClip: ptr, \
apfn: ptr, \
flStart: dword >
; ppdev: Surface data
; pptfxFirst: Start point of first line
; pptfxBuf: All subsequent points
; prun: Array of runs if doing complex clipping
; cptfx: Number of points in pptfxBuf (i.e., # lines)
; pls: Line state
; prclClip: Clip rectangle if doing simple clipping
; apfn: Pointer to table of strip drawers
; flStart: Flags for all lines
local cPelsAfterThisBank: dword ; For bank switching
local cStripsInNextRun: dword ; For bank switching
local pptfxBufEnd: ptr ; Last point in pptfxBuf
local M0: dword ; Normalized x0 in device coords
local dM: dword ; Delta-x in device coords
local N0: dword ; Normalized y0 in device coords
local dN: dword ; Delta-y in device coords
local fl: dword ; Flags for current line
local x: dword ; Normalized start pixel x-coord
local y: dword ; Normalized start pixel y-coord
local eqGamma_lo: dword ; Upper 32 bits of Gamma
local eqGamma_hi: dword ; Lower 32 bits of Gamma
local x0: dword ; Start pixel x-offset
local y0: dword ; Start pixel y-offset
local ulSlopeOneAdjustment: dword ; Special offset if line of slope 1
local cStylePels: dword ; # of pixels in line (before clip)
local xStart: dword ; Start pixel x-offset before clip
local pfn: ptr ; Pointer to strip drawing function
local cPels: dword ; # pixels to be drawn (after clip)
local i: dword ; # pixels in strip
local r: dword ; Remainder (or "error") term
local d_I: dword ; Delta-I
local d_R: dword ; Delta-R
local plStripEnd: ptr ; Last strip in buffer
local ptlStart[size POINTL]: byte ; Unnormalized start coord
local dN_Original: dword ; dN before half-flip
local xClipLeft: dword ; Left side of clip rectangle
local xClipRight: dword ; Right side of clip rectangle
local strip[size STRIPS]: byte ; Our strip buffer
; Do some initializing:
mov esi, pls
mov ecx, cptfx
mov edx, pptfxBuf
lea eax, [edx + ecx * (size POINTL) - (size POINTL)]
mov pptfxBufEnd, eax ; pptfxBufEnd is inclusive of end point
mov eax, [esi].LS_chAndXor ; copy chAndXor from LINESTATE to STRIPS
mov strip.ST_chAndXor, eax ; buffer
mov eax, [edx].ptl_x ; Load up end point (M1, N1)
mov edi, [edx].ptl_y
mov edx, pptfxFirst ; Load up start point (M0, N0)
mov esi, [edx].ptl_x
mov ecx, [edx].ptl_y
mov ebx, flStart
;-----------------------------------------------------------------------;
; Flip to the first octant. ;
;-----------------------------------------------------------------------;
; Register state: esi = M0
; ecx = N0
; eax = dM (M1)
; edi = dN (N1)
; ebx = fl
; Make sure we go left to right:
public the_main_loop
the_main_loop::
cmp esi, eax
jle short is_left_to_right ; skip if M0 <= M1
xchg esi, eax ; swap M0, M1
xchg ecx, edi ; swap N0, N1
or ebx, FL_FLIP_H
is_left_to_right:
; Compute the deltas, remembering that the DDI says we should get
; deltas less than 2^31. If we get more, we ensure we don't crash
; later on by simply skipping the line:
sub eax, esi ; eax = dM
jo next_line ; dM must be less than 2^31
sub edi, ecx ; edi = dN
jo next_line ; dN must be less than 2^31
jge short is_top_to_bottom ; skip if dN >= 0
neg ecx ; N0 = -N0
neg edi ; N1 = -N1
or ebx, FL_FLIP_V
is_top_to_bottom:
cmp edi, eax
jb short done_flips ; skip if dN < dM
jne short slope_more_than_one
; We must special case slopes of one (because of our rounding convention):
or ebx, FL_FLIP_SLOPE_ONE
jmp short done_flips
slope_more_than_one:
xchg eax, edi ; swap dM, dN
xchg esi, ecx ; swap M0, N0
or ebx, FL_FLIP_D
done_flips:
mov edx, ebx
and edx, FL_ROUND_MASK
.errnz FL_ROUND_SHIFT - 2
or ebx, [gaflRoundTable + edx] ; get our rounding flags
mov dM, eax ; save some info
mov dN, edi
mov fl, ebx
; We're going to shift our origin so that it's at the closest integer
; coordinate to the left/above our fractional start point (it makes
; the math quicker):
mov edx, esi ; x = LFLOOR(M0)
sar edx, FLOG2
mov x, edx
mov edx, ecx ; y = LFLOOR(N0)
sar edx, FLOG2
mov y, edx
;-----------------------------------------------------------------------;
; Compute the fractional remainder term ;
;-----------------------------------------------------------------------;
; By shifting the origin we've contrived to eliminate the integer
; portion of our fractional start point, giving us start point
; fractional coordinates in the range [0, F - 1]:
and esi, F - 1 ; M0 = FXFRAC(M0)
and ecx, F - 1 ; N0 = FXFRAC(N0)
; We now compute Gamma:
mov M0, esi ; save M0, N0 for later
mov N0, ecx
lea edx, [ecx + F/2]
mul edx ; [edx:eax] = dM * (N0 + F/2)
xchg eax, edi
mov ecx, edx ; [ecx:edi] = dM * (N0 + F/2)
; (we just nuked N0)
mul esi ; [edx:eax] = dN * M0
; Now gamma = dM * (N0 + F/2) - dN * M0 - bRoundDown
.errnz FL_V_ROUND_DOWN - 8000h
ror bh, 8
sbb edi, eax
sbb ecx, edx
shrd edi, ecx, FLOG2
sar ecx, FLOG2 ; gamma = [ecx:edi] >>= 4
mov eqGamma_hi, ecx
mov eqGamma_lo, edi
mov eax, N0
; Register state:
; eax = N0
; ebx = fl
; ecx = eqGamma_hi
; edx = garbage
; esi = M0
; edi = eqGamma_lo
testb ebx, FL_FLIP_H
jnz line_runs_right_to_left
;-----------------------------------------------------------------------;
; Figure out which pixels are at the ends of a left-to-right line. ;
; --------> ;
;-----------------------------------------------------------------------;
public line_runs_left_to_right
line_runs_left_to_right::
or esi, esi
jz short LtoR_check_slope_one
; skip ahead if M0 == 0
; (in that case, x0 = 0 which is to be
; kept in esi, and is already
; conventiently zero)
or eax, eax
jnz short LtoR_N0_not_zero
.errnz FL_H_ROUND_DOWN - 80h
ror bl, 8
sbb esi, -F/2
shr esi, FLOG2
jmp short LtoR_check_slope_one
; esi = x0 = rounded M0
LtoR_N0_not_zero:
sub eax, F/2
sbb edx, edx
xor eax, edx
sub eax, edx
cmp esi, eax
sbb esi, esi
inc esi ; esi = x0 = (abs(N0 - F/2) <= M0)
public LtoR_check_slope_one
LtoR_check_slope_one::
mov ulSlopeOneAdjustment, 0
mov eax, ebx
and eax, FL_FLIP_SLOPE_ONE + FL_H_ROUND_DOWN
cmp eax, FL_FLIP_SLOPE_ONE + FL_H_ROUND_DOWN
jne short LtoR_compute_y0_from_x0
; We have to special case lines that are exactly of slope 1 or -1:
;
; if (M1 > 0) AMD (N1 == M1 + 8)
;
mov eax, N0
add eax, dN
and eax, F - 1 ; eax = N1
mov edx, M0
add edx, dM
and edx, F - 1 ; edx = M1
jz short LtoR_slope_one_check_start_point
add edx, F/2 ; M1 + 8
cmp edx, eax ; cmp N1, M1 + 8
jne short LtoR_slope_one_check_start_point
mov ulSlopeOneAdjustment, -1
LtoR_slope_one_check_start_point:
;
; if (M0 > 0) AMD (N0 == M0 + 8)
;
mov eax, M0
or eax, eax
jz short LtoR_compute_y0_from_x0
add eax, F/2
cmp eax, N0 ; cmp M0 + 8, N0
jne short LtoR_compute_y0_from_x0
xor esi, esi ; x0 = 0
LtoR_compute_y0_from_x0:
; ecx = eqGamma_hi
; esi = x0
; edi = eqGamma_lo
mov eax, dN
mov edx, dM
mov x0, esi
mov y0, 0
cmp ecx, 0
jl short LtoR_compute_x1
neg esi
and esi, eax
sub edx, esi
cmp edi, edx
mov edx, dM
jb short LtoR_compute_x1 ; Bug fix: Must be unsigned!
mov y0, 1 ; y0 = floor((dN * x0 + eqGamma) / dM)
LtoR_compute_x1:
; Register state:
; eax = dN
; ebx = fl
; ecx = garbage
; edx = dM
; esi = garbage
; edi = garbage
mov esi, M0
add esi, edx
mov ecx, esi
shr esi, FLOG2
dec esi ; x1 = ((M0 + dM) >> 4) - 1
add esi, ulSlopeOneAdjustment
and ecx, F-1 ; M1 = (M0 + dM) & 15
jz done_first_pel_last_pel
add eax, N0
and eax, F-1 ; N1 = (N0 + dN) & 15
jnz short LtoR_N1_not_zero
.errnz FL_H_ROUND_DOWN - 80h
ror bl, 8
sbb ecx, -F/2
shr ecx, FLOG2 ; ecx = LROUND(M1, fl & FL_ROUND_DOWN)
add esi, ecx
jmp done_first_pel_last_pel
LtoR_N1_not_zero:
sub eax, F/2
sbb edx, edx
xor eax, edx
sub eax, edx
cmp eax, ecx
jg done_first_pel_last_pel
inc esi
jmp done_first_pel_last_pel
;-----------------------------------------------------------------------;
; Figure out which pixels are at the ends of a right-to-left line. ;
; <-------- ;
;-----------------------------------------------------------------------;
; Compute x0:
public line_runs_right_to_left
line_runs_right_to_left::
mov x0, 1 ; x0 = 1
or eax, eax
jnz short RtoL_N0_not_zero
xor edx, edx ; ulDelta = 0
.errnz FL_H_ROUND_DOWN - 80h
ror bl, 8
sbb esi, -F/2
shr esi, FLOG2 ; esi = LROUND(M0, fl & FL_H_ROUND_DOWN)
jz short RtoL_check_slope_one
mov x0, 2
mov edx, dN
jmp short RtoL_check_slope_one
RtoL_N0_not_zero:
sub eax, F/2
sbb edx, edx
xor eax, edx
sub eax, edx
add eax, esi ; eax = ABS(N0 - F/2) + M0
xor edx, edx ; ulDelta = 0
cmp eax, F
jle short RtoL_check_slope_one
mov x0, 2 ; x0 = 2
mov edx, dN ; ulDelta = dN
public RtoL_check_slope_one
RtoL_check_slope_one::
mov ulSlopeOneAdjustment, 0
mov eax, ebx
and eax, FL_FLIP_SLOPE_ONE + FL_H_ROUND_DOWN
cmp eax, FL_FLIP_SLOPE_ONE
jne short RtoL_compute_y0_from_x0
; We have to special case lines that are exactly of slope 1 or -1:
;
; if ((N1 > 0) && (M1 == N1 + 8))
;
mov eax, N0
add eax, dN
and eax, F - 1 ; eax = N1
jz short RtoL_slope_one_check_start_point
mov esi, M0
add esi, dM
and esi, F - 1 ; esi = M1
add eax, F/2 ; N1 + 8
cmp esi, eax ; cmp M1, N1 + 8
jne short RtoL_slope_one_check_start_point
mov ulSlopeOneAdjustment, 1
RtoL_slope_one_check_start_point:
;
; if ((N0 > 0) && (M0 == N0 + 8))
;
mov eax,N0 ; eax = N0
or eax,eax ; check for N0 == 0
jz short RtoL_compute_y0_from_x0
mov esi, M0 ; esi = M0
add eax, F/2 ; N0 + 8
cmp eax, esi ; cmp M0 , N0 + 8
jne short RtoL_compute_y0_from_x0
mov x0, 2 ; x0 = 2
mov edx, dN ; ulDelta = dN
RtoL_compute_y0_from_x0:
; eax = garbage
; ebx = fl
; ecx = eqGamma_hi
; edx = ulDelta
; esi = garbage
; edi = eqGamma_lo
mov eax, dN ; eax = dN
mov y0, 0 ; y0 = 0
add edi, edx
adc ecx, 0 ; eqGamma += ulDelta
; NOTE: Setting flags here!
mov edx, dM ; edx = dM
jl short RtoL_compute_x1 ; NOTE: Looking at the flags here!
jg short RtoL_y0_is_2
lea ecx, [edx + edx]
sub ecx, eax ; ecx = 2 * dM - dN
cmp edi, ecx
jae short RtoL_y0_is_2 ; Bug fix: Must be unsigned!
sub ecx, edx ; ecx = dM - dN
cmp edi, ecx
jb short RtoL_compute_x1 ; Bug fix: Must be unsigned!
mov y0, 1
jmp short RtoL_compute_x1
RtoL_y0_is_2:
mov y0, 2
RtoL_compute_x1:
; Register state:
; eax = dN
; ebx = fl
; ecx = garbage
; edx = dM
; esi = garbage
; edi = garbage
mov esi, M0
add esi, edx
mov ecx, esi
shr esi, FLOG2 ; x1 = (M0 + dM) >> 4
add esi, ulSlopeOneAdjustment
and ecx, F-1 ; M1 = (M0 + dM) & 15
add eax, N0
and eax, F-1 ; N1 = (N0 + dN) & 15
jnz short RtoL_N1_not_zero
.errnz FL_H_ROUND_DOWN - 80h
ror bl, 8
sbb ecx, -F/2
shr ecx, FLOG2 ; ecx = LROUND(M1, fl & FL_ROUND_DOWN)
add esi, ecx
jmp done_first_pel_last_pel
RtoL_N1_not_zero:
sub eax, F/2
sbb edx, edx
xor eax, edx
sub eax, edx
add eax, ecx ; eax = ABS(N1 - F/2) + M1
cmp eax, F+1
sbb esi, -1
done_first_pel_last_pel:
; Register state:
; eax = garbage
; ebx = fl
; ecx = garbage
; edx = garbage
; esi = x1
; edi = garbage
mov ecx, x0
lea edx, [esi + 1]
sub edx, ecx ; edx = x1 - x0 + 1
jle next_line
mov cStylePels, edx
mov xStart, ecx
;-----------------------------------------------------------------------;
; See if clipping or styling needs to be done. ;
;-----------------------------------------------------------------------;
testb ebx, FL_CLIP
jnz do_some_clipping
; Register state:
; eax = garbage
; ebx = fl
; ecx = x0 (stack variable correct too)
; edx = garbage
; esi = x1
; edi = garbage
done_clipping:
mov eax, y0
sub esi, ecx
inc esi ; esi = cPels = x1 - x0 + 1
mov cPels, esi
mov esi, ppdev
add ecx, x ; ecx = ptlStart.ptl_x
add eax, y ; eax = ptlStart.ptl_y
mov esi, [esi].pdev_lNextScan ; we'll compute the sign of lNextScan
testb ebx, FL_FLIP_D
jz short do_v_unflip
xchg ecx, eax
do_v_unflip:
testb ebx, FL_FLIP_V
jz short done_unflips
neg eax
neg esi
done_unflips:
mov strip.ST_lNextScan, esi ; lNextScan now right for y-direction
testb ebx, FL_STYLED
jnz do_some_styling
done_styling:
lea edx, [strip.ST_alStrips + (STRIP_MAX * 4)]
mov plStripEnd, edx
mov cPelsAfterThisBank, 0
mov cStripsInNextRun, 7fffffffh
;-----------------------------------------------------------------------;
; Do banking setup. ;
;-----------------------------------------------------------------------;
public bank_setup
bank_setup::
; Register state:
; eax = ptlStart.ptl_y
; ebx = fl
; ecx = ptlStart.ptl_x
; edx = garbage
; esi = garbage
; edi = garbage
mov esi, ppdev
cmp eax, [esi].pdev_rcl1WindowClip.yTop
jl short bank_get_initial_bank ; ptlStart.y < rcl1WindowClip.yTop
cmp eax, [esi].pdev_rcl1WindowClip.yBottom
jl short bank_got_initial_bank ; ptlStart.y < rcl1WindowClip.yBot
bank_get_initial_bank:
mov ptlStart.ptl_y, eax ; Save ptlStart.ptl_y
mov edi, ecx ; Save ptlStart.ptl_x
.errnz JustifyTop
.errnz JustifyBottom - 1
.errnz FL_FLIP_V - 8
mov ecx, ebx ; JustifyTop if line goes down,
shr ecx, 3 ; JustifyBottom if line goes up
and ecx, 1
bank_justified:
ptrCall <dword ptr [esi].pdev_pfnBankControl>, \
<esi, eax, ecx>
mov eax, ptlStart.ptl_y
mov ecx, edi
bank_got_initial_bank:
testb ebx, FL_FLIP_D
jz short bank_major_x
bank_major_y:
testb ebx, FL_FLIP_V
jz short bank_major_y_down
bank_major_y_up:
lea edi, [eax + 1]
sub edi, [esi].pdev_rcl1WindowClip.yTop
jmp short bank_done_y_major
bank_major_y_down:
mov edi, [esi].pdev_rcl1WindowClip.yBottom
sub edi, eax
bank_done_y_major:
mov esi, cPels
sub esi, edi ; edi = cPelsInBank
mov cPelsAfterThisBank, esi
jle short done_bank_setup
mov cPels, edi
jmp short done_bank_setup
bank_major_x:
mov edi, dN
shr edi, FLOG2
add edi, y
; We're guessing at the y-position of the end pixel (it's too much work
; to compute the actual value) to see if the line spans more than one
; bank. We have to add at least a slop value of '3' because the actual
; start pixel may be may 2 off from 'y' because of end-pixel exclusiveness,
; and we have to add 1 more because we're taking the floor of (dN / F), to
; account for rounding:
add edi, 3 ; yEnd = edi = y + LFLOOR(dN) + 3
testb ebx, FL_FLIP_V
jz short bank_major_x_down
bank_major_x_up:
mov edx, 1
sub edx, [esi].pdev_rcl1WindowClip.yTop ; edx = -yNextBankStart
cmp edi, edx
lea edx, [edx + eax] ; edx = cStripsInNextRun
jl short bank_major_x_done
; Line may go over bank boundary, so don't do a half flip:
or ebx, FL_DONT_DO_HALF_FLIP
jmp short bank_major_x_done
bank_major_x_down:
mov esi, [esi].pdev_rcl1WindowClip.yBottom ; esi = yNextBankStart
mov edx, esi
sub edx, eax ; edx = cStripsInNextRun
cmp edi, esi
jl short bank_major_x_done
or ebx, FL_DONT_DO_HALF_FLIP
bank_major_x_done:
sub edx, STRIP_MAX
mov cStripsInNextRun, edx
jge short done_bank_setup
lea edx, [strip.ST_alStrips + edx * 4 + (STRIP_MAX * 4)]
mov plStripEnd, edx
done_bank_setup:
;-----------------------------------------------------------------------;
; Setup to do DDA. ;
;-----------------------------------------------------------------------;
; Register state:
; eax = ptlStart.ptl_y
; ebx = fl
; ecx = ptlStart.ptl_x
; edx = garbage
; esi = garbage
; edi = garbage
mov esi, ppdev
mov edi, eax ; Now edi = ptlStart.ptl_y
imul [esi].pdev_lNextScan
add eax, [esi].pdev_pvBitmapStart
add eax, ecx
mov strip.ST_pjScreen, eax ; pjScreen = pchBits + ptlStart.y *
; cjDelta + ptlStart.x
mov eax, dM
mov ecx, dN
mov esi, eqGamma_lo
mov edi, eqGamma_hi
; Register state:
; eax = dM
; ebx = fl
; ecx = dN
; edx = garbage
; esi = eqGamma_lo
; edi = eqGamma_hi
lea edx, [ecx + ecx] ; if (2 * dN > dM)
cmp edx, eax
mov edx, y0 ; Load y0 again
jbe short after_half_flip
test ebx, FL_DONT_DO_HALF_FLIP
jnz short after_half_flip
or ebx, FL_FLIP_HALF
mov fl, ebx
; Do a half flip!
not esi
not edi
add esi, eax
adc edi, 0 ; eqGamma = -eqGamma - 1 + dM
neg ecx
add ecx, eax ; dN = dM - dN
neg edx
add edx, x0 ; y0 = x0 - y0
after_half_flip:
mov strip.ST_flFlips, ebx
and ebx, FL_STRIP_MASK
.errnz FL_STRIP_SHIFT
mov eax, apfn
lea eax, [eax + ebx * 4]
mov eax, [eax]
mov pfn, eax
mov eax, dM
; Register state:
; eax = dM
; ebx = garbage
; ecx = dN
; edx = y0
; esi = eqGamma_lo
; edi = eqGamma_hi
or ecx, ecx
jz short zero_slope
compute_dda_stuff:
inc edx
mul edx
stc ; set the carry to accomplish -1
sbb eax, esi
sbb edx, edi ; (y0 + 1) * dM - eqGamma - 1
div ecx
mov esi, eax ; esi = i
mov edi, edx ; edi = r
xor edx, edx
mov eax, dM
div ecx ; edx = d_R, eax = d_I
mov d_I, eax
sub esi, x0
inc esi
done_dda_stuff:
lea eax, [strip.ST_alStrips]
mov ebx, cPels
;-----------------------------------------------------------------------;
; Do our main DDA loop. ;
;-----------------------------------------------------------------------;
sub edi, ecx ; offset remainder term from [0..dN)
; to [-dN..0) so test in inner
; loop is quicker
; Register state:
; eax = plStrip ; current pointer into strip array
; ebx = cPels ; total number of pels in line
; ecx = dN ; delta-N = rise in line
; edx = d_R ; d_I + d_R/dN = exact strip length
; esi = i ; length of current strip
; edi = r ; remainder term for current strip
; ; in range [-dN..0)
public dda_loop
dda_loop::
sub ebx, esi ; subtract strip length from line length
jle final_strip ; if negative, done with line
mov [eax], esi ; write strip length to strip array
add eax, 4
cmp plStripEnd, eax ; is the strip array buffer full?
jbe short output_strips ; if so, empty it
; The output_strips routine jumps to here when done:
done_output_strips:
mov esi, d_I ; our normal strip length
add edi, edx ; adjust our remainder term
jl short dda_loop
sub edi, ecx ; our remainder became 1 or more, so
inc esi ; we increment this strip length
; and adjust the remainder term
; We've unrolled our loop a bit, so this should look familiar to the above:
sub ebx, esi ; subtract strip length from line length
jle final_strip ; if negative, done with line
mov [eax], esi ; write strip length to strip array
add eax, 4 ; adjust strip pointer
; Note that banking requires us to check if the strip array is full here
; too (and note that if output_strips is called it will return to
; done_output_strips):
cmp plStripEnd, eax
jbe short output_strips
mov esi, d_I ; our normal strip length
add edi, edx ; adjust our remainder term
jl short dda_loop
sub edi, ecx ; our remainder became 1 or more, so
inc esi ; adjust
jmp short dda_loop
zero_slope:
mov esi, 7fffffffh
jmp short done_dda_stuff
;-----------------------------------------------------------------------;
; Empty strips buffer & possibly do x-major bank switch. ;
;-----------------------------------------------------------------------;
output_strips:
mov d_R, edx
mov cPels, ebx
mov i, esi
mov r, edi
mov dN, ecx
lea edx, [strip]
mov ecx, pls
; Call our strip routine:
ptrCall <dword ptr pfn>, \
<edx, ecx, eax>
; It may be that we ran out of run in our strips buffer, and don't
; actually have to switch banks. See if that's the case:
mov eax, cStripsInNextRun
or eax, eax
jg short done_strip_bank_switch
; We have to switch banks. See if we're going up or down:
mov esi, ppdev
test fl, FL_FLIP_V
jz short bank_x_down
bank_x_up:
mov edi, strip.ST_pjScreen
sub edi, [esi].pdev_pvBitmapStart
mov ebx, [esi].pdev_rcl1WindowClip.yTop
dec ebx ; we want yTop - 1 to be mapped in
; Map in the next higher bank:
ptrCall <dword ptr [esi].pdev_pfnBankControl>, \
<esi, ebx, JustifyBottom>; ebx, esi and edi are preserved
lea eax, [ebx + 1]
sub eax, [esi].pdev_rcl1WindowClip.yTop
; eax = # of scans can do in bank
add edi, [esi].pdev_pvBitmapStart
mov strip.ST_pjScreen, edi
jmp short done_strip_bank_switch
bank_x_down:
mov edi, strip.ST_pjScreen
sub edi, [esi].pdev_pvBitmapStart
mov ebx, [esi].pdev_rcl1WindowClip.yBottom
; Map in the next lower bank:
ptrCall <dword ptr [esi].pdev_pfnBankControl>, \
<esi, ebx, JustifyTop> ; ebx, esi and edi are preserved
mov eax, [esi].pdev_rcl1WindowClip.yBottom
sub eax, ebx ; eax = # scans can do in bank
add edi, [esi].pdev_pvBitmapStart
mov strip.ST_pjScreen,edi
done_strip_bank_switch:
; eax = cStripsInNextRun
lea edx, [strip.ST_alStrips + (STRIP_MAX * 4)]
sub eax, STRIP_MAX
mov cStripsInNextRun, eax
jge short get_ready_for_more_strips
lea edx, [edx + eax * 4]
get_ready_for_more_strips:
mov plStripEnd, edx
mov esi, i
mov edi, r
mov ebx, cPels
mov edx, d_R
mov ecx, dN
lea eax, [strip.ST_alStrips]
jmp done_output_strips
;-----------------------------------------------------------------------;
; Empty strips buffer. Either get new line or do y-major bank switch. ;
;-----------------------------------------------------------------------;
final_strip:
add ebx, esi
mov [eax], ebx
add eax, 4
cmp cPelsAfterThisBank, 0
jg short bank_y_major
very_final_strip:
lea edx, [strip]
mov ecx, pls
ptrCall <dword ptr pfn>, \
<edx, ecx, eax>
; NOTE: next_line is jumped to from various places, and it cannot assume
; any registers are loaded.
next_line:
mov ebx, flStart
testb ebx, FL_COMPLEX_CLIP
jnz short see_if_done_complex_clipping
mov edx, pptfxBuf
cmp edx, pptfxBufEnd
je short all_done
mov esi, [edx].ptl_x
mov ecx, [edx].ptl_y
add edx, size POINTL
mov pptfxBuf, edx
mov eax, [edx].ptl_x
mov edi, [edx].ptl_y
jmp the_main_loop
all_done:
mov eax, 1
cRet bLines
see_if_done_complex_clipping:
mov ebx, fl
dec cptfx
jz short all_done
and ebx, NOT FL_FLIP_HALF ; Make sure the next run doesn't have
mov fl, ebx ; to do a half-flip if it doesn't
; want to
jmp continue_complex_clipping
;-----------------------------------------------------------------------;
; Switch banks for a y-major line. ;
;-----------------------------------------------------------------------;
public bank_y_major
bank_y_major::
mov d_R, edx
mov i, esi
mov r, edi
mov dN, ecx
sub ebx, esi ; Undo our offset
bank_y_output_strips:
lea edx, [strip]
mov ecx, pls
ptrCall <dword ptr pfn>, \
<edx, ecx, eax>
mov esi, ppdev
test fl, FL_FLIP_V
jz short bank_y_down
bank_y_up:
mov edi, strip.ST_pjScreen
sub edi, [esi].pdev_pvBitmapStart
mov ecx, [esi].pdev_rcl1WindowClip.yTop
push ecx
dec ecx ; we want yTop - 1 to be mapped in
; Map in the next higher bank:
ptrCall <dword ptr [esi].pdev_pfnBankControl>, \
<esi, ecx, JustifyBottom>; ebx, esi and edi are preserved
pop ecx
sub ecx, [esi].pdev_rcl1WindowClip.yTop
; ecx = # of scans can do in bank
add edi, [esi].pdev_pvBitmapStart
mov strip.ST_pjScreen, edi
mov edx, cPelsAfterThisBank ; edx = cPelsAfterBank
lea eax, [strip.ST_alStrips] ; eax = plStrip
or ebx, ebx ; ebx = cPels
jge bank_y_done_partial_strip
jmp short bank_y_done_switch
bank_y_down:
mov edi, strip.ST_pjScreen
sub edi, [esi].pdev_pvBitmapStart
mov ecx, [esi].pdev_rcl1WindowClip.yBottom
push ecx
; Map in the next lower bank:
ptrCall <dword ptr [esi].pdev_pfnBankControl>, \
<esi, ecx, JustifyTop> ; ebx, esi and edi are preserved
pop eax
mov ecx, [esi].pdev_rcl1WindowClip.yBottom
sub ecx, eax ; ecx = # scans can do in bank
add edi, [esi].pdev_pvBitmapStart
mov strip.ST_pjScreen,edi
mov edx, cPelsAfterThisBank ; edx = cPelsAfterBank
lea eax, [strip.ST_alStrips] ; eax = plStrip
or ebx, ebx ; ebx = cPels
jge short bank_y_done_partial_strip
bank_y_done_switch:
; Handle a single strip stretching over multiple banks:
test fl, FL_FLIP_HALF
jz short bank_y_no_half_flip
; We now have to adjust for the fact that the strip drawers always leave
; the state ready for the next new strip (e.g., if we're doing vertical
; strips, it advances pjScreen one to the right after drawing each strip).
; But the problem is that since we crossed a bank, we have to continue the
; *old* strip, so we have to undo that advance:
bank_y_half_flip:
inc strip.ST_pjScreen
jmp short bank_y_done_bit_adjust
bank_y_no_half_flip:
dec strip.ST_pjScreen
bank_y_done_bit_adjust:
mov esi, ebx
neg esi ; esi = # pels left in strip
; eax = pointer to first strip entry
; ebx = negative esi
; ecx = # of pels we can put down in this window
; edx = # of pels remaining to do in line
; esi = # of pels left in strip
; We have three special cases to check here:
;
; 1) If the strip spans the entire next window
; 2) This is the last strip in the line
; 3) Neither of the above
cmp edx,ecx ;if line shorter than bank,
jle short bank_y_check_if_last_strip; know strip doesn't span bank
cmp esi,ecx ;if line spans bank, don't have
jl short bank_y_continue_strip ; to check if last strip
; If ((# of pels in line > window size) && (# of pels in strip > window size))
; then the strip spans this bank:
mov [eax], ecx
add eax, 4
add ebx, ecx
sub edx, ecx
mov cPelsAfterThisBank, edx
jmp bank_y_output_strips
bank_y_check_if_last_strip:
cmp esi, edx ;if strip is shorter than line,
jl short bank_y_continue_strip ; we know this isn't the last
; strip
; Handle case where this is the last strip in the line and it overlaps a bank:
mov [eax], edx
add eax, 4
jmp very_final_strip
bank_y_continue_strip:
mov [eax], esi
add eax, 4
bank_y_done_partial_strip:
add ebx, edx ; cPels += cPelsAfterThisBank
sub edx, ecx ; cPelsAfterThisBank -= cyWindow
jle short bank_y_get_ready
sub ebx, edx
bank_y_get_ready:
mov cPelsAfterThisBank, edx
mov edi, r
mov edx, d_R
mov ecx, dN
jmp done_output_strips
;---------------------------Private-Routine-----------------------------;
; do_some_styling
;
; Inputs:
; eax = ptlStart.ptl_y
; ebx = fl
; ecx = ptlStart.ptl_x
; Preserves:
; eax, ebx, ecx
; Output:
; Exits to done_styling.
;
;-----------------------------------------------------------------------;
public do_some_styling
do_some_styling::
mov esi, pls
mov ptlStart.ptl_x, ecx
mov edi, [esi].LS_spNext ; spThis
mov edx, edi
add edx, cStylePels ; spNext
do_non_alternate_style:
; For styles, we don't bother to keep the style position normalized.
; (we do ensure that it's positive, though). If a figure is over 2
; billion pels long, we'll be a pel off in our style state (oops!).
and edx, 7fffffffh
mov [esi].LS_spNext, edx
mov ptlStart.ptl_y, eax
testb ebx, FL_FLIP_H
jz short arbitrary_left_to_right
sub edx, x0
add edx, xStart
mov eax, edx
xor edx, edx
div [esi].LS_spTotal
neg edx
jge short continue_right_to_left
add edx, [esi].LS_spTotal
not eax
continue_right_to_left:
mov edi, dword ptr [esi].LS_bStartIsGap
not edi
mov ecx, [esi].LS_aspRtoL
jmp short compute_arbitrary_stuff
arbitrary_left_to_right:
add edi, x0
sub edi, xStart
mov eax, edi
xor edx, edx
div [esi].LS_spTotal
mov edi, dword ptr [esi].LS_bStartIsGap
mov ecx, [esi].LS_aspLtoR
compute_arbitrary_stuff:
; eax = sp / spTotal
; ebx = fl
; ecx = pspStart
; edx = sp % spTotal
; esi = pls
; edi = bIsGap
and eax, [esi].LS_cStyle ; if odd length style and second run
and al, 1 ; through style array, flip the
jz short odd_style_array_done ; meaning of the elements
not edi
odd_style_array_done:
mov eax, [esi].LS_cStyle
mov strip.ST_pspStart, ecx
lea eax, [ecx + eax * 4 - 4]
mov strip.ST_pspEnd, eax
find_psp:
sub edx, [ecx]
jl short found_psp
add ecx, 4
jmp short find_psp
found_psp:
mov strip.ST_psp, ecx
neg edx
mov strip.ST_spRemaining, edx
sub ecx, strip.ST_pspStart
test ecx, 4 ; size STYLEPOS
jz short done_arbitrary
not edi
done_arbitrary:
mov dword ptr strip.ST_bIsGap, edi
mov eax, ptlStart.ptl_y
mov ecx, ptlStart.ptl_x
jmp done_styling
;---------------------------Private-Routine-----------------------------;
; do_some_clipping
;
; Inputs:
; eax = garbage
; ebx = fl
; ecx = x0
; edx = garbage
; esi = x1
; edi = garbage
;
; Decides whether to do simple or complex clipping.
;
;-----------------------------------------------------------------------;
public do_some_clipping
do_some_clipping::
testb ebx, FL_COMPLEX_CLIP
jnz initialize_complex_clipping
;-----------------------------------------------------------------------;
; simple_clipping
;
; Inputs:
; ebx = fl
; ecx = x0
; esi = x1
; Output:
; ebx = fl
; ecx = new x0 (stack variable updated too)
; esi = new x1
; y0 stack variable updated
; Uses:
; All registers
; Exits:
; to done_clipping
;
; This routine handles clipping the line to the clip rectangle (it's
; faster to handle this case in the driver than to call the engine to
; clip for us).
;
; Fractional end-point lines complicate our lives a bit when doing
; clipping:
;
; 1) For styling, we must know the unclipped line's length in pels, so
; that we can correctly update the styling state when the line is
; clipped. For this reason, I do clipping after doing the hard work
; of figuring out which pixels are at the ends of the line (this is
; wasted work if the line is not styled and is completely clipped,
; but I think it's simpler this way). Another reason is that we'll
; have calculated eqGamma already, which we use for the intercept
; calculations.
;
; With the assumption that most lines will not be completely clipped
; away, this strategy isn't too painful.
;
; 2) x0, y0 are not necessarily zero, where (x0, y0) is the start pel of
; the line.
;
; 3) We know x0, y0 and x1, but not y1. We haven't needed to calculate
; y1 until now. We'll need the actual value, and not an upper bound
; like y1 = LFLOOR(dM) + 2 because we have to be careful when
; calculating x(y) that y0 <= y <= y1, otherwise we can cause an
; overflow on the divide (which, needless to say, is bad).
;
;-----------------------------------------------------------------------;
public simple_clipping
simple_clipping::
mov edi, prclClip ; get pointer to normalized clip rect
and ebx, FL_RECTLCLIP_MASK ; (it's lower-right exclusive)
.errnz (FL_RECTLCLIP_SHIFT - 2); ((ebx AND FL_RECTLCLIP_MASK) shr
.errnz (size RECTL) - 16 ; FL_RECTLCLIP_SHIFT) is our index
lea edi, [edi + ebx*4] ; into the array of rectangles
mov edx, [edi].xRight ; load the rect coordinates
mov eax, [edi].xLeft
mov ebx, [edi].yBottom
mov edi, [edi].yTop
; Translate to our origin and so some quick completely clipped tests:
sub edx, x
cmp ecx, edx
jge totally_clipped ; totally clipped if x0 >= xRight
sub eax, x
cmp esi, eax
jl totally_clipped ; totally clipped if x1 < xLeft
sub ebx, y
cmp y0, ebx
jge totally_clipped ; totally clipped if y0 >= yBottom
sub edi, y
; Save some state:
mov xClipRight, edx
mov xClipLeft, eax
cmp esi, edx ; if (x1 >= xRight) x1 = xRight - 1
jl short calculate_y1
lea esi, [edx - 1]
calculate_y1:
mov eax, esi ; y1 = (x1 * dN + eqGamma) / dM
mul dN
add eax, eqGamma_lo
adc edx, eqGamma_hi
div dM
cmp edi, eax ; if (yTop > y1) clipped
jg short totally_clipped
cmp ebx, eax ; if (yBottom > y1) know x1
jg short x1_computed
mov eax, ebx ; x1 = (yBottom * dM + eqBeta) / dN
mul dM
stc
sbb eax, eqGamma_lo
sbb edx, eqGamma_hi
div dN
mov esi, eax
; At this point, we've taken care of calculating the intercepts with the
; right and bottom edges. Now we work on the left and top edges:
x1_computed:
mov edx, y0
mov eax, xClipLeft ; don't have to compute y intercept
cmp eax, ecx ; at left edge if line starts to
jle short top_intercept ; right of left edge
mov ecx, eax ; x0 = xLeft
mul dN ; y0 = (xLeft * dN + eqGamma) / dM
add eax, eqGamma_lo
adc edx, eqGamma_hi
div dM
cmp ebx, eax ; if (yBottom <= y0) clipped
jle short totally_clipped
mov edx, eax
mov y0, eax
top_intercept:
mov ebx, fl ; get ready to leave
mov x0, ecx
cmp edi, edx ; if (yTop <= y0) done clipping
jle done_clipping
mov eax, edi ; x0 = (yTop * dM + eqBeta) / dN + 1
mul dM
stc
sbb eax, eqGamma_lo
sbb edx, eqGamma_hi
div dN
lea ecx, [eax + 1]
cmp xClipRight, ecx ; if (xRight <= x0) clipped
jle short totally_clipped
mov y0, edi ; y0 = yTop
mov x0, ecx
jmp done_clipping ; all done!
totally_clipped:
; The line is completely clipped. See if we have to update our style state:
mov ebx, fl
testb ebx, FL_STYLED
jz next_line
; Adjust our style state:
mov esi, pls
mov eax, [esi].LS_spNext
add eax, cStylePels
mov [esi].LS_spNext, eax
cmp eax, [esi].LS_spTotal2
jb next_line
; Have to normalize first:
xor edx, edx
div [esi].LS_spTotal2
mov [esi].LS_spNext, edx
jmp next_line
;-----------------------------------------------------------------------;
initialize_complex_clipping:
mov eax, dN ; save a copy of original dN
mov dN_Original, eax
;---------------------------Private-Routine-----------------------------;
; continue_complex_clipping
;
; Inputs:
; ebx = fl
; Output:
; ebx = fl
; ecx = x0
; esi = x1
; Uses:
; All registers.
; Exits:
; to done_clipping
;
; This routine handles the necessary initialization for the next
; run in the CLIPLINE structure.
;
; NOTE: This routine is jumped to from two places!
;-----------------------------------------------------------------------;
public continue_complex_clipping
continue_complex_clipping::
mov edi, prun
mov ecx, xStart
testb ebx, FL_FLIP_H
jz short complex_left_to_right
complex_right_to_left:
; Figure out x0 and x1 for right-to-left lines:
add ecx, cStylePels
dec ecx
mov esi, ecx ; esi = ecx = xStart + cStylePels - 1
sub ecx, [edi].RUN_iStop ; New x0
sub esi, [edi].RUN_iStart ; New x1
jmp short complex_reset_variables
complex_left_to_right:
; Figure out x0 and x1 for left-to-right lines:
mov esi, ecx ; esi = ecx = xStart
add ecx, [edi].RUN_iStart ; New x0
add esi, [edi].RUN_iStop ; New x1
complex_reset_variables:
mov x0, ecx
; The half flip mucks with some of our variables, and we have to reset
; them every pass. We would have to reset eqGamma too, but it never
; got saved to memory in its modified form.
add edi, size RUN
mov prun, edi ; Increment run pointer for next time
mov edi, pls
mov eax, [edi].LS_spComplex
mov [edi].LS_spNext, eax ; pls->spNext = pls->spComplex
mov eax, dN_Original ; dN = dN_Original
mov dN, eax
mul ecx
add eax, eqGamma_lo
adc edx, eqGamma_hi ; [edx:eax] = dN*x0 + eqGamma
div dM
mov y0, eax
jmp done_clipping
endProc bLines
end