windows-server-2003/drivers/video/ms/vga/disp/8bpp/i386/lines.asm


								;---------------------------Module-Header------------------------------;

								; Module Name: lines.asm

								;

								; Draws a set of connected polylines.

								;

								; The actual pixel-lighting code is different depending on if the lines

								; are styled/unstyled and we're doing an arbitrary ROP or set-style ROP.

								;

								; Lines are drawn from left to right.  So if a line moves from right

								; to left, the endpoints are swapped and the line is drawn from left to

								; right.

								;

								; See s3\lines.cxx for a portable version (sans simple clipping).

								;

								; Copyright (c) 1992 Microsoft Corporation

								;-----------------------------------------------------------------------;


								        .386


								        .model  small,c


								        assume cs:FLAT,ds:FLAT,es:FLAT,ss:FLAT

								        assume fs:nothing,gs:nothing


								        .xlist

								        include stdcall.inc             ;calling convention cmacros

								        include i386\egavga.inc

								        include i386\strucs.inc

								        include i386\driver.inc

								        include i386\lines.inc

								        .list


								        .data


								        public gaflRoundTable

								gaflRoundTable       label  dword

								        dd      FL_H_ROUND_DOWN + FL_V_ROUND_DOWN       ; no flips

								        dd      FL_H_ROUND_DOWN + FL_V_ROUND_DOWN       ; D flip

								        dd      FL_H_ROUND_DOWN                         ; V flip

								        dd      FL_V_ROUND_DOWN                         ; D & V flip

								        dd      FL_V_ROUND_DOWN                         ; slope one

								        dd      0baadf00dh

								        dd      FL_H_ROUND_DOWN                         ; slope one & V flip

								        dd      0baadf00dh


								        .code


								;--------------------------------Macro----------------------------------;

								; testb ebx, <mask>

								;

								; Substitutes a byte compare if the mask is entirely in the lo-byte or

								; hi-byte (thus saving 3 bytes of code space).

								;

								;-----------------------------------------------------------------------;


								TESTB   macro   targ,mask,thirdarg

								        local   mask2,delta


								ifnb <thirdarg>

								        .err    TESTB mask must be enclosed in brackets!

								endif


								        delta = 0

								        mask2 = mask


								        if mask2 AND 0ffff0000h

								            test targ,mask                      ; If bit set in hi-word,

								            exitm                               ; test entire dword

								        endif


								        if mask2 AND 0ff00h

								            if mask2 AND 0ffh                   ; If bit set in lo-byte and

								                test targ,mask                  ; hi-byte, test entire dword

								                exitm

								            endif


								            mask2 = mask2 SHR 8

								            delta = 1

								        endif


								ifidni <targ>,<EBX>

								        if delta

								            test bh,mask2

								        else

								            test bl,mask2

								        endif

								        exitm

								endif


								        .err    Too bad TESTB doesn't support targets other than ebx!

								endm


								;---------------------------Public-Routine------------------------------;

								; BOOL bLines(ppdev, pptfxFirst, pptfxBuf, prun, cptfx, pls,

								;        prclClip, apfn[], flStart)

								;

								; Do all the DDA calculations for lines.

								;

								; Doing Lines Right

								; -----------------

								;

								; In NT, all lines are given to the device driver in fractional

								; coordinates, in a 28.4 fixed point format.  The lower 4 bits are

								; fractional for sub-pixel positioning.

								;

								; Note that you CANNOT! just round the coordinates to integers

								; and pass the results to your favorite integer Bresenham routine!!

								; (Unless, of course, you have such a high resolution device that

								; nobody will notice -- not likely for a display device.)  The

								; fractions give a more accurate rendering of the line -- this is

								; important for things like our Bezier curves, which would have 'kinks'

								; if the points in its polyline approximation were rounded to integers.

								;

								; Unfortunately, for fractional lines there is more setup work to do

								; a DDA than for integer lines.  However, the main loop is exactly

								; the same (and can be done entirely with 32 bit math).

								;

								; If You've Got Hardware That Does Bresenham

								; ------------------------------------------

								;

								; A lot of hardware limits DDA error terms to 'n' bits.  With fractional

								; coordinates, 4 bits are given to the fractional part, letting

								; you draw in hardware only those lines that lie entirely in a 2^(n-4)

								; by 2^(n-4) pixel space.

								;

								; And you still have to correctly draw those lines with coordinates

								; outside that space!  Remember that the screen is only a viewport

								; onto a 28.4 by 28.4 space -- if any part of the line is visible

								; you MUST render it precisely, regardless of where the end points lie.

								; So even if you do it in software, somewhere you'll have to have a

								; 32 bit DDA routine.

								;

								; Our Implementation

								; ------------------

								;

								; We employ a run length slice algorithm: our DDA calculates the

								; number of pixels that are in each row (or 'strip') of pixels.

								;

								; We've separated the running of the DDA and the drawing of pixels:

								; we run the DDA for several iterations and store the results in

								; a 'strip' buffer (which are the lengths of consecutive pixel rows of

								; the line), then we crank up a 'strip drawer' that will draw all the

								; strips in the buffer.

								;

								; We also employ a 'half-flip' to reduce the number of strip

								; iterations we need to do in the DDA and strip drawing loops: when a

								; (normalized) line's slope is more than 1/2, we do a final flip

								; about the line y = (1/2)x.  So now, instead of each strip being

								; consecutive horizontal or vertical pixel rows, each strip is composed

								; of those pixels aligned in 45 degree rows.  So a line like (0, 0) to

								; (128, 128) would generate only one strip.

								;

								; We also always draw only left-to-right.

								;

								; Style lines may have arbitrary style patterns.  We specially

								; optimize the default patterns (and call them 'masked' styles).

								;

								; The DDA Derivation

								; ------------------

								;

								; Here is how I like to think of the DDA calculation.

								;

								; We employ Knuth's "diamond rule": rendering a one-pixel-wide line

								; can be thought of as dragging a one-pixel-wide by one-pixel-high

								; diamond along the true line.  Pixel centers lie on the integer

								; coordinates, and so we light any pixel whose center gets covered

								; by the "drag" region (John D. Hobby, Journal of the Association

								; for Computing Machinery, Vol. 36, No. 2, April 1989, pp. 209-229).

								;

								; We must define which pixel gets lit when the true line falls

								; exactly half-way between two pixels.  In this case, we follow

								; the rule: when two pels are equidistant, the upper or left pel

								; is illuminated, unless the slope is exactly one, in which case

								; the upper or right pel is illuminated.  (So we make the edges

								; of the diamond exclusive, except for the top and left vertices,

								; which are inclusive, unless we have slope one.)

								;

								; This metric decides what pixels should be on any line BEFORE it is

								; flipped around for our calculation.  Having a consistent metric

								; this way will let our lines blend nicely with our curves.  The

								; metric also dictates that we will never have one pixel turned on

								; directly above another that's turned on.  We will also never have

								; a gap; i.e., there will be exactly one pixel turned on for each

								; column between the start and end points.  All that remains to be

								; done is to decide how many pixels should be turned on for each row.

								;

								; So lines we draw will consist of varying numbers of pixels on

								; successive rows, for example:

								;

								;       ******

								;             *****

								;                  ******

								;                        *****

								;

								; We'll call each set of pixels on a row a "strip".

								;

								; (Please remember that our coordinate space has the origin as the

								; upper left pixel on the screen; postive y is down and positive x

								; is right.)

								;

								; Device coordinates are specified as fixed point 28.4 numbers,

								; where the first 28 bits are the integer coordinate, and the last

								; 4 bits are the fraction.  So coordinates may be thought of as

								; having the form (x, y) = (M/F, N/F) where F is the constant scaling

								; factor F = 2^4 = 16, and M and N are 32 bit integers.

								;

								; Consider the line from (M0/F, N0/F) to (M1/F, N1/F) which runs

								; left-to-right and whose slope is in the first octant, and let

								; dM = M1 - M0 and dN = N1 - N0.  Then dM >= 0, dN >= 0 and dM >= dN.

								;

								; Since the slope of the line is less than 1, the edges of the

								; drag region are created by the top and bottom vertices of the

								; diamond.  At any given pixel row y of the line, we light those

								; pixels whose centers are between the left and right edges.

								;

								; Let mL(n) denote the line representing the left edge of the drag

								; region.  On pixel row j, the column of the first pixel to be

								; lit is

								;

								;       iL(j) = ceiling( mL(j * F) / F)

								;

								; Since the line's slope is less than one:

								;

								;       iL(j) = ceiling( mL([j + 1/2] F) / F )

								;

								; Recall the formula for our line:

								;

								;       n(m) = (dN / dM) (m - M0) + N0

								;

								;       m(n) = (dM / dN) (n - N0) + M0

								;

								; Since the line's slope is less than one, the line representing

								; the left edge of the drag region is the original line offset

								; by 1/2 pixel in the y direction:

								;

								;       mL(n) = (dM / dN) (n - F/2 - N0) + M0

								;

								; From this we can figure out the column of the first pixel that

								; will be lit on row j, being careful of rounding (if the left

								; edge lands exactly on an integer point, the pixel at that

								; point is not lit because of our rounding convention):

								;

								;       iL(j) = floor( mL(j F) / F ) + 1

								;

								;             = floor( ((dM / dN) (j F - F/2 - N0) + M0) / F ) + 1

								;

								;             = floor( F dM j - F/2 dM - N0 dM + dN M0) / F dN ) + 1

								;

								;                      F dM j - [ dM (N0 + F/2) - dN M0 ]

								;             = floor( ---------------------------------- ) + 1

								;                                   F dN

								;

								;                      dM j - [ dM (N0 + F/2) - dN M0 ] / F

								;             = floor( ------------------------------------ ) + 1       (1)

								;                                     dN

								;

								;             = floor( (dM j + alpha) / dN ) + 1

								;

								; where

								;

								;       alpha = - [ dM (N0 + F/2) - dN M0 ] / F

								;

								; We use equation (1) to calculate the DDA: there are iL(j+1) - iL(j)

								; pixels in row j.  Because we are always calculating iL(j) for

								; integer quantities of j, we note that the only fractional term

								; is constant, and so we can 'throw away' the fractional bits of

								; alpha:

								;

								;       beta = floor( - [ dM (N0 + F/2) - dN M0 ] / F )                 (2)

								;

								; so

								;

								;       iL(j) = floor( (dM j + beta) / dN ) + 1                         (3)

								;

								; for integers j.

								;

								; Note if iR(j) is the line's rightmost pixel on row j, that

								; iR(j) = iL(j + 1) - 1.

								;

								; Similarly, rewriting equation (1) as a function of column i,

								; we can determine, given column i, on which pixel row j is the line

								; lit:

								;

								;                       dN i + [ dM (N0 + F/2) - dN M0 ] / F

								;       j(i) = ceiling( ------------------------------------ ) - 1

								;                                       dM

								;

								; Floors are easier to compute, so we can rewrite this:

								;

								;                     dN i + [ dM (N0 + F/2) - dN M0 ] / F + dM - 1/F

								;       j(i) = floor( ----------------------------------------------- ) - 1

								;                                       dM

								;

								;                     dN i + [ dM (N0 + F/2) - dN M0 ] / F + dM - 1/F - dM

								;            = floor( ---------------------------------------------------- )

								;                                       dM

								;

								;                     dN i + [ dM (N0 + F/2) - dN M0 - 1 ] / F

								;            = floor( ---------------------------------------- )

								;                                       dM

								;

								; We can once again wave our hands and throw away the fractional bits

								; of the remainder term:

								;

								;       j(i) = floor( (dN i + gamma) / dM )                             (4)

								;

								; where

								;

								;       gamma = floor( [ dM (N0 + F/2) - dN M0 - 1 ] / F )              (5)

								;

								; We now note that

								;

								;       beta = -gamma - 1 = ~gamma                                      (6)

								;

								; To draw the pixels of the line, we could evaluate (3) on every scan

								; line to determine where the strip starts.  Of course, we don't want

								; to do that because that would involve a multiply and divide for every

								; scan.  So we do everything incrementally.

								;

								; We would like to easily compute c , the number of pixels on scan j:

								;                                  j

								;

								;    c  = iL(j + 1) - iL(j)

								;     j

								;

								;       = floor((dM (j + 1) + beta) / dN) - floor((dM j + beta) / dN)   (7)

								;

								; This may be rewritten as

								;

								;    c  = floor(i    + r    / dN) - floor(i  + r  / dN)                 (8)

								;     j          j+1    j+1                j    j

								;

								; where i , i    are integers and r  < dN, r    < dN.

								;        j   j+1                   j        j+1

								;

								; Rewriting (7) again:

								;

								;    c  = floor(i  + r  / dN + dM / dN) - floor(i  + r  / dN)

								;     j          j    j                          j    j

								;

								;

								;       = floor((r  + dM) / dN) - floor(r  / dN)

								;                 j                      j

								;

								; This may be rewritten as

								;

								;    c  = dI + floor((r  + dR) / dN) - floor(r  / dN)

								;     j                j                      j

								;

								; where dI + dR / dN = dM / dN, dI is an integer and dR < dN.

								;

								; r  is the remainder (or "error") term in the DDA loop: r  / dN

								;  j                                                      j

								; is the exact fraction of a pixel at which the strip ends.  To go

								; on to the next scan and compute c    we need to know r   .

								;                                  j+1                  j+1

								;

								; So in the main loop of the DDA:

								;

								;    c  = dI + floor((r  + dR) / dN) and r    = (r  + dR) % dN

								;     j                j                  j+1     j

								;

								; and we know r  < dN, r    < dN, and dR < dN.

								;              j        j+1

								;

								; We have derived the DDA only for lines in the first octant; to

								; handle other octants we do the common trick of flipping the line

								; to the first octant by first making the line left-to-right by

								; exchanging the end-points, then flipping about the lines y = 0 and

								; y = x, as necessary.  We must record the transformation so we can

								; undo them later.

								;

								; We must also be careful of how the flips affect our rounding.  If

								; to get the line to the first octant we flipped about x = 0, we now

								; have to be careful to round a y value of 1/2 up instead of down as

								; we would for a line originally in the first octant (recall that

								; "In the case where two pels are equidistant, the upper or left

								; pel is illuminated...").

								;

								; To account for this rounding when running the DDA, we shift the line

								; (or not) in the y direction by the smallest amount possible.  That

								; takes care of rounding for the DDA, but we still have to be careful

								; about the rounding when determining the first and last pixels to be

								; lit in the line.

								;

								; Determining The First And Last Pixels In The Line

								; -------------------------------------------------

								;

								; Fractional coordinates also make it harder to determine which pixels

								; will be the first and last ones in the line.  We've already taken

								; the fractional coordinates into account in calculating the DDA, but

								; the DDA cannot tell us which are the end pixels because it is quite

								; happy to calculate pixels on the line from minus infinity to positive

								; infinity.

								;

								; The diamond rule determines the start and end pixels.  (Recall that

								; the sides are exclusive except for the left and top vertices.)

								; This convention can be thought of in another way: there are diamonds

								; around the pixels, and wherever the true line crosses a diamond,

								; that pel is illuminated.

								;

								; Consider a line where we've done the flips to the first octant, and the

								; floor of the start coordinates is the origin:

								;

								;        +-----------------------> +x

								;        |

								;        | 0                     1

								;        |     0123456789abcdef

								;        |

								;        |   0 00000000?1111111

								;        |   1 00000000 1111111

								;        |   2 0000000   111111

								;        |   3 000000     11111

								;        |   4 00000    ** 1111

								;        |   5 0000       ****1

								;        |   6 000           1***

								;        |   7 00             1  ****

								;        |   8 ?                     ***

								;        |   9 22             3         ****

								;        |   a 222           33             ***

								;        |   b 2222         333                ****

								;        |   c 22222       3333                    **

								;        |   d 222222     33333

								;        |   e 2222222   333333

								;        |   f 22222222 3333333

								;        |

								;        | 2                     3

								;        v

								;        +y

								;

								; If the start of the line lands on the diamond around pixel 0 (shown by

								; the '0' region here), pixel 0 is the first pel in the line.  The same

								; is true for the other pels.

								;

								; A little more work has to be done if the line starts in the

								; 'nether-land' between the diamonds (as illustrated by the '*' line):

								; the first pel lit is the first diamond crossed by the line (pixel 1 in

								; our example).  This calculation is determined by the DDA or slope of

								; the line.

								;

								; If the line starts exactly half way between two adjacent pixels

								; (denoted here by the '?' spots), the first pixel is determined by our

								; round-down convention (and is dependent on the flips done to

								; normalize the line).

								;

								; Last Pel Exclusive

								; ------------------

								;

								; To eliminate repeatedly lit pels between continuous connected lines,

								; we employ a last-pel exclusive convention: if the line ends exactly on

								; the diamond around a pel, that pel is not lit.  (This eliminates the

								; checks we had in the old code to see if we were re-lighting pels.)

								;

								; The Half Flip

								; -------------

								;

								; To make our run length algorithm more efficient, we employ a "half

								; flip".  If after normalizing to the first octant, the slope is more

								; than 1/2, we subtract the y coordinate from the x coordinate.  This

								; has the effect of reflecting the coordinates through the line of slope

								; 1/2.  Note that the diagonal gets mapped into the x-axis after a half

								; flip.

								;

								; How Many Bits Do We Need, Anyway?

								; ---------------------------------

								;

								; Note that if the line is visible on your screen, you must light up

								; exactly the correct pixels, no matter where in the 28.4 x 28.4 device

								; space the end points of the line lie (meaning you must handle 32 bit

								; DDAs, you can certainly have optimized cases for lesser DDAs).

								;

								; We move the origin to (floor(M0 / F), floor(N0 / F)), so when we

								; calculate gamma from (5), we know that 0 <= M0, N0 < F.  And we

								; are in the first octant, so dM >= dN.  Then we know that gamma can

								; be in the range [(-1/2)dM, (3/2)dM].  The DDI guarantees us that

								; valid lines will have dM and dN values at most 31 bits (unsigned)

								; of significance.  So gamma requires 33 bits of significance (we store

								; this as a 64 bit number for convenience).

								;

								; When running through the DDA loop, r  + dR can have a value in the

								;                                     j

								; range 0 <= r  < 2 dN; thus the result must be a 32 bit unsigned value.

								;             j

								;

								; Testing Lines

								; -------------

								;

								; To be NT compliant, a display driver must exactly adhere to GIQ,

								; which means that for any given line, the driver must light exactly

								; the same pels as does GDI.  This can be tested using the Guiman tool

								; provided elsewhere in the DDK, and 'ZTest', which draws random lines

								; on the screen and to a bitmap, and compares the results.

								;

								; If You've Got Line Hardware

								; ---------------------------

								;

								; If your hardware already adheres to GIQ, you're all set.  Otherwise

								; you'll want to look at the S3 sample code and read the following:

								;

								; 1) You'll want to special case integer-only lines, since they require

								;    less processing time and are more common (CAD programs will probably

								;    only ever give integer lines).  GDI does not provide a flag saying

								;    that all lines in a path are integer lines; consequently, you will

								;    have to explicitly check every line.

								;

								; 2) You are required to correctly draw any line in the 28.4 device

								;    space that intersects the viewport.  If you have less than 32 bits

								;    of significance in the hardware for the Bresenham terms, extremely

								;    long lines would overflow the hardware.  For such (rare) cases, you

								;    can fall back to strip-drawing code, of which there is a C version in

								;    the S3's lines.cxx (or if your display is a frame buffer, fall back

								;    to the engine).

								;

								; 3) If you can explicitly set the Bresenham terms in your hardware, you

								;    can draw non-integer lines using the hardware.  If your hardware has

								;    'n' bits of precision, you can draw GIQ lines that are up to 2^(n-5)

								;    pels long (4 bits are required for the fractional part, and one bit is

								;    used as a sign bit).  Note that integer lines don't require the 4

								;    fractional bits, so if you special case them as in 1), you can do

								;    integer lines that are up to 2^(n - 1) pels long.  See the S3's

								;    fastline.asm for an example.

								;

								;-----------------------------------------------------------------------;


								cProc   bLines,36,< \

								    uses esi edi ebx,  \

								    ppdev:     ptr,   \

								    pptfxFirst: ptr,   \

								    pptfxBuf:   ptr,   \

								    prun:       ptr,   \

								    cptfx:      dword, \

								    pls:        ptr,   \

								    prclClip:   ptr,   \

								    apfn:       ptr,   \

								    flStart:    dword  >


								; ppdev:     Surface data

								; pptfxFirst: Start point of first line

								; pptfxBuf:   All subsequent points

								; prun:       Array of runs if doing complex clipping

								; cptfx:      Number of points in pptfxBuf (i.e., # lines)

								; pls:        Line state

								; prclClip:   Clip rectangle if doing simple clipping

								; apfn:       Pointer to table of strip drawers

								; flStart:    Flags for all lines


								        local cPelsAfterThisBank:    dword ; For bank switching

								        local cStripsInNextRun:      dword ; For bank switching

								        local pptfxBufEnd:           ptr   ; Last point in pptfxBuf

								        local M0:                    dword ; Normalized x0 in device coords

								        local dM:                    dword ; Delta-x in device coords

								        local N0:                    dword ; Normalized y0 in device coords

								        local dN:                    dword ; Delta-y in device coords

								        local fl:                    dword ; Flags for current line

								        local x:                     dword ; Normalized start pixel x-coord

								        local y:                     dword ; Normalized start pixel y-coord

								        local eqGamma_lo:            dword ; Upper 32 bits of Gamma

								        local eqGamma_hi:            dword ; Lower 32 bits of Gamma

								        local x0:                    dword ; Start pixel x-offset

								        local y0:                    dword ; Start pixel y-offset

								        local ulSlopeOneAdjustment:  dword ; Special offset if line of slope 1

								        local cStylePels:            dword ; # of pixels in line (before clip)

								        local xStart:                dword ; Start pixel x-offset before clip

								        local pfn:                   ptr   ; Pointer to strip drawing function

								        local cPels:                 dword ; # pixels to be drawn (after clip)

								        local i:                     dword ; # pixels in strip

								        local r:                     dword ; Remainder (or "error") term

								        local d_I:                   dword ; Delta-I

								        local d_R:                   dword ; Delta-R

								        local plStripEnd:            ptr   ; Last strip in buffer

								        local ptlStart[size POINTL]: byte  ; Unnormalized start coord

								        local dN_Original:           dword ; dN before half-flip

								        local xClipLeft:             dword ; Left side of clip rectangle

								        local xClipRight:            dword ; Right side of clip rectangle

								        local strip[size STRIPS]:    byte  ; Our strip buffer


								; Do some initializing:


								        mov     esi, pls

								        mov     ecx, cptfx

								        mov     edx, pptfxBuf

								        lea     eax, [edx + ecx * (size POINTL) - (size POINTL)]

								        mov     pptfxBufEnd, eax        ; pptfxBufEnd is inclusive of end point


								        mov     eax, [esi].LS_chAndXor  ; copy chAndXor from LINESTATE to STRIPS

								        mov     strip.ST_chAndXor, eax  ;   buffer


								        mov     eax, [edx].ptl_x        ; Load up end point (M1, N1)

								        mov     edi, [edx].ptl_y


								        mov     edx, pptfxFirst         ; Load up start point (M0, N0)

								        mov     esi, [edx].ptl_x

								        mov     ecx, [edx].ptl_y


								        mov     ebx, flStart


								;-----------------------------------------------------------------------;

								; Flip to the first octant.                                             ;

								;-----------------------------------------------------------------------;


								; Register state:       esi = M0

								;                       ecx = N0

								;                       eax = dM (M1)

								;                       edi = dN (N1)

								;                       ebx = fl


								; Make sure we go left to right:


								        public  the_main_loop

								the_main_loop::

								        cmp     esi, eax

								        jle     short is_left_to_right  ; skip if M0 <= M1

								        xchg    esi, eax                ; swap M0, M1

								        xchg    ecx, edi                ; swap N0, N1

								        or      ebx, FL_FLIP_H


								is_left_to_right:


								; Compute the deltas, remembering that the DDI says we should get

								; deltas less than 2^31.  If we get more, we ensure we don't crash

								; later on by simply skipping the line:


								        sub     eax, esi                ; eax = dM

								        jo      next_line               ; dM must be less than 2^31

								        sub     edi, ecx                ; edi = dN

								        jo      next_line               ; dN must be less than 2^31


								        jge     short is_top_to_bottom  ; skip if dN >= 0

								        neg     ecx                     ; N0 = -N0

								        neg     edi                     ; N1 = -N1

								        or      ebx, FL_FLIP_V


								is_top_to_bottom:

								        cmp     edi, eax

								        jb      short done_flips        ; skip if dN < dM

								        jne     short slope_more_than_one


								; We must special case slopes of one (because of our rounding convention):


								        or      ebx, FL_FLIP_SLOPE_ONE

								        jmp     short done_flips


								slope_more_than_one:

								        xchg    eax, edi                ; swap dM, dN

								        xchg    esi, ecx                ; swap M0, N0

								        or      ebx, FL_FLIP_D


								done_flips:


								        mov     edx, ebx

								        and     edx, FL_ROUND_MASK

								        .errnz  FL_ROUND_SHIFT - 2

								        or      ebx, [gaflRoundTable + edx]  ; get our rounding flags


								        mov     dM, eax                 ; save some info

								        mov     dN, edi

								        mov     fl, ebx


								; We're going to shift our origin so that it's at the closest integer

								; coordinate to the left/above our fractional start point (it makes

								; the math quicker):


								        mov     edx, esi                ; x = LFLOOR(M0)

								        sar     edx, FLOG2

								        mov     x, edx


								        mov     edx, ecx                ; y = LFLOOR(N0)

								        sar     edx, FLOG2

								        mov     y, edx


								;-----------------------------------------------------------------------;

								; Compute the fractional remainder term                                 ;

								;-----------------------------------------------------------------------;


								; By shifting the origin we've contrived to eliminate the integer

								; portion of our fractional start point, giving us start point

								; fractional coordinates in the range [0, F - 1]:


								        and     esi, F - 1              ; M0 = FXFRAC(M0)

								        and     ecx, F - 1              ; N0 = FXFRAC(N0)


								; We now compute Gamma:


								        mov     M0, esi                 ; save M0, N0 for later

								        mov     N0, ecx


								        lea     edx, [ecx + F/2]

								        mul     edx                     ; [edx:eax] = dM * (N0 + F/2)

								        xchg    eax, edi

								        mov     ecx, edx                ; [ecx:edi] = dM * (N0 + F/2)

								                                        ; (we just nuked N0)


								        mul     esi                     ; [edx:eax] = dN * M0


								; Now gamma = dM * (N0 + F/2) - dN * M0 - bRoundDown


								        .errnz  FL_V_ROUND_DOWN - 8000h

								        ror     bh, 8

								        sbb     edi, eax

								        sbb     ecx, edx


								        shrd    edi, ecx, FLOG2

								        sar     ecx, FLOG2              ; gamma = [ecx:edi] >>= 4


								        mov     eqGamma_hi, ecx

								        mov     eqGamma_lo, edi


								        mov     eax, N0


								; Register state:

								;                       eax = N0

								;                       ebx = fl

								;                       ecx = eqGamma_hi

								;                       edx = garbage

								;                       esi = M0

								;                       edi = eqGamma_lo


								        testb   ebx, FL_FLIP_H

								        jnz     line_runs_right_to_left


								;-----------------------------------------------------------------------;

								; Figure out which pixels are at the ends of a left-to-right line.      ;

								;                               -------->                               ;

								;-----------------------------------------------------------------------;


								        public line_runs_left_to_right

								line_runs_left_to_right::

								        or      esi, esi

								        jz      short LtoR_check_slope_one

								                                        ; skip ahead if M0 == 0

								                                        ;   (in that case, x0 = 0 which is to be

								                                        ;   kept in esi, and is already

								                                        ;   conventiently zero)


								        or      eax, eax

								        jnz     short LtoR_N0_not_zero


								        .errnz  FL_H_ROUND_DOWN - 80h

								        ror     bl, 8

								        sbb     esi, -F/2

								        shr     esi, FLOG2

								        jmp     short LtoR_check_slope_one

								                                        ; esi = x0 = rounded M0


								LtoR_N0_not_zero:

								        sub     eax, F/2

								        sbb     edx, edx

								        xor     eax, edx

								        sub     eax, edx

								        cmp     esi, eax

								        sbb     esi, esi

								        inc     esi                     ; esi = x0 = (abs(N0 - F/2) <= M0)


								        public  LtoR_check_slope_one

								LtoR_check_slope_one::

								        mov     ulSlopeOneAdjustment, 0

								        mov     eax, ebx

								        and     eax, FL_FLIP_SLOPE_ONE + FL_H_ROUND_DOWN

								        cmp     eax, FL_FLIP_SLOPE_ONE + FL_H_ROUND_DOWN

								        jne     short LtoR_compute_y0_from_x0


								; We have to special case lines that are exactly of slope 1 or -1:


								        ;

								        ;       if (M1 > 0) AMD (N1 == M1 + 8)

								        ;


								        mov     eax, N0

								        add     eax, dN

								        and     eax, F - 1              ; eax = N1


								        mov     edx, M0

								        add     edx, dM

								        and     edx, F - 1              ; edx = M1


								        jz      short LtoR_slope_one_check_start_point


								        add     edx, F/2                ; M1 + 8

								        cmp     edx, eax                ; cmp N1, M1 + 8

								        jne     short LtoR_slope_one_check_start_point

								        mov     ulSlopeOneAdjustment, -1


								LtoR_slope_one_check_start_point:


								        ;

								        ;       if (M0 > 0) AMD (N0 == M0 + 8)

								        ;


								        mov     eax, M0

								        or      eax, eax

								        jz      short LtoR_compute_y0_from_x0


								        add     eax, F/2

								        cmp     eax, N0                 ; cmp M0 + 8, N0

								        jne     short LtoR_compute_y0_from_x0


								        xor     esi, esi                ; x0 = 0


								LtoR_compute_y0_from_x0:


								; ecx = eqGamma_hi

								; esi = x0

								; edi = eqGamma_lo


								        mov     eax, dN

								        mov     edx, dM


								        mov     x0, esi

								        mov     y0, 0

								        cmp     ecx, 0

								        jl      short LtoR_compute_x1


								        neg     esi

								        and     esi, eax

								        sub     edx, esi

								        cmp     edi, edx

								        mov     edx, dM

								        jb      short LtoR_compute_x1   ; Bug fix: Must be unsigned!

								        mov     y0, 1                   ; y0 = floor((dN * x0 + eqGamma) / dM)


								LtoR_compute_x1:


								; Register state:

								;                       eax = dN

								;                       ebx = fl

								;                       ecx = garbage

								;                       edx = dM

								;                       esi = garbage

								;                       edi = garbage


								        mov     esi, M0

								        add     esi, edx

								        mov     ecx, esi

								        shr     esi, FLOG2

								        dec     esi                     ; x1 = ((M0 + dM) >> 4) - 1

								        add     esi, ulSlopeOneAdjustment

								        and     ecx, F-1                ; M1 = (M0 + dM) & 15

								        jz      done_first_pel_last_pel


								        add     eax, N0

								        and     eax, F-1                ; N1 = (N0 + dN) & 15

								        jnz     short LtoR_N1_not_zero


								        .errnz  FL_H_ROUND_DOWN - 80h

								        ror     bl, 8

								        sbb     ecx, -F/2

								        shr     ecx, FLOG2              ; ecx = LROUND(M1, fl & FL_ROUND_DOWN)

								        add     esi, ecx

								        jmp     done_first_pel_last_pel


								LtoR_N1_not_zero:

								        sub     eax, F/2

								        sbb     edx, edx

								        xor     eax, edx

								        sub     eax, edx

								        cmp     eax, ecx

								        jg      done_first_pel_last_pel

								        inc     esi

								        jmp     done_first_pel_last_pel


								;-----------------------------------------------------------------------;

								; Figure out which pixels are at the ends of a right-to-left line.      ;

								;                               <--------                               ;

								;-----------------------------------------------------------------------;


								; Compute x0:


								        public  line_runs_right_to_left

								line_runs_right_to_left::

								        mov     x0, 1                   ; x0 = 1

								        or      eax, eax

								        jnz     short RtoL_N0_not_zero


								        xor     edx, edx                ; ulDelta = 0

								        .errnz  FL_H_ROUND_DOWN - 80h

								        ror     bl, 8

								        sbb     esi, -F/2

								        shr     esi, FLOG2              ; esi = LROUND(M0, fl & FL_H_ROUND_DOWN)

								        jz      short RtoL_check_slope_one


								        mov     x0, 2

								        mov     edx, dN

								        jmp     short RtoL_check_slope_one


								RtoL_N0_not_zero:

								        sub     eax, F/2

								        sbb     edx, edx

								        xor     eax, edx

								        sub     eax, edx

								        add     eax, esi                ; eax = ABS(N0 - F/2) + M0

								        xor     edx, edx                ; ulDelta = 0

								        cmp     eax, F

								        jle     short RtoL_check_slope_one


								        mov     x0, 2                   ; x0 = 2

								        mov     edx, dN                 ; ulDelta = dN


								        public  RtoL_check_slope_one

								RtoL_check_slope_one::

								        mov     ulSlopeOneAdjustment, 0

								        mov     eax, ebx

								        and     eax, FL_FLIP_SLOPE_ONE + FL_H_ROUND_DOWN

								        cmp     eax, FL_FLIP_SLOPE_ONE

								        jne     short RtoL_compute_y0_from_x0


								; We have to special case lines that are exactly of slope 1 or -1:


								        ;

								        ;  if ((N1 > 0) && (M1 == N1 + 8))

								        ;


								        mov     eax, N0

								        add     eax, dN

								        and     eax, F - 1              ; eax = N1

								        jz      short RtoL_slope_one_check_start_point


								        mov     esi, M0

								        add     esi, dM

								        and     esi, F - 1              ; esi = M1


								        add     eax, F/2                ; N1 + 8

								        cmp     esi, eax                ; cmp M1, N1 + 8

								        jne     short RtoL_slope_one_check_start_point

								        mov     ulSlopeOneAdjustment, 1


								RtoL_slope_one_check_start_point:


								        ;

								        ;  if ((N0 > 0) && (M0 == N0 + 8))

								        ;


								        mov     eax,N0                  ; eax = N0

								        or      eax,eax                 ; check for N0 == 0

								        jz      short RtoL_compute_y0_from_x0


								        mov     esi, M0                 ; esi = M0


								        add     eax, F/2                ; N0 + 8

								        cmp     eax, esi                ; cmp M0 , N0 + 8

								        jne     short RtoL_compute_y0_from_x0


								        mov     x0, 2                   ; x0 = 2

								        mov     edx, dN                 ; ulDelta = dN


								RtoL_compute_y0_from_x0:


								; eax = garbage

								; ebx = fl

								; ecx = eqGamma_hi

								; edx = ulDelta

								; esi = garbage

								; edi = eqGamma_lo


								        mov     eax, dN                 ; eax = dN

								        mov     y0, 0                   ; y0 = 0


								        add     edi, edx

								        adc     ecx, 0                  ; eqGamma += ulDelta

								                                        ; NOTE: Setting flags here!

								        mov     edx, dM                 ; edx = dM

								        jl      short RtoL_compute_x1   ; NOTE: Looking at the flags here!

								        jg      short RtoL_y0_is_2


								        lea     ecx, [edx + edx]

								        sub     ecx, eax                ; ecx = 2 * dM - dN

								        cmp     edi, ecx

								        jae     short RtoL_y0_is_2      ; Bug fix: Must be unsigned!


								        sub     ecx, edx                ; ecx = dM - dN

								        cmp     edi, ecx

								        jb      short RtoL_compute_x1   ; Bug fix: Must be unsigned!


								        mov     y0, 1

								        jmp     short RtoL_compute_x1


								RtoL_y0_is_2:

								        mov     y0, 2


								RtoL_compute_x1:


								; Register state:

								;                       eax = dN

								;                       ebx = fl

								;                       ecx = garbage

								;                       edx = dM

								;                       esi = garbage

								;                       edi = garbage


								        mov     esi, M0

								        add     esi, edx

								        mov     ecx, esi

								        shr     esi, FLOG2              ; x1 = (M0 + dM) >> 4

								        add     esi, ulSlopeOneAdjustment

								        and     ecx, F-1                ; M1 = (M0 + dM) & 15


								        add     eax, N0

								        and     eax, F-1                ; N1 = (N0 + dN) & 15

								        jnz     short RtoL_N1_not_zero


								        .errnz  FL_H_ROUND_DOWN - 80h

								        ror     bl, 8

								        sbb     ecx, -F/2

								        shr     ecx, FLOG2              ; ecx = LROUND(M1, fl & FL_ROUND_DOWN)

								        add     esi, ecx

								        jmp     done_first_pel_last_pel


								RtoL_N1_not_zero:

								        sub     eax, F/2

								        sbb     edx, edx

								        xor     eax, edx

								        sub     eax, edx

								        add     eax, ecx                ; eax = ABS(N1 - F/2) + M1

								        cmp     eax, F+1

								        sbb     esi, -1


								done_first_pel_last_pel:


								; Register state:

								;                       eax = garbage

								;                       ebx = fl

								;                       ecx = garbage

								;                       edx = garbage

								;                       esi = x1

								;                       edi = garbage


								        mov     ecx, x0

								        lea     edx, [esi + 1]

								        sub     edx, ecx                ; edx = x1 - x0 + 1


								        jle     next_line

								        mov     cStylePels, edx

								        mov     xStart, ecx


								;-----------------------------------------------------------------------;

								; See if clipping or styling needs to be done.                          ;

								;-----------------------------------------------------------------------;


								        testb   ebx, FL_CLIP

								        jnz     do_some_clipping


								; Register state:

								;                       eax = garbage

								;                       ebx = fl

								;                       ecx = x0        (stack variable correct too)

								;                       edx = garbage

								;                       esi = x1

								;                       edi = garbage


								done_clipping:

								        mov     eax, y0


								        sub     esi, ecx

								        inc     esi                     ; esi = cPels = x1 - x0 + 1

								        mov     cPels, esi


								        mov     esi, ppdev

								        add     ecx, x                  ; ecx = ptlStart.ptl_x

								        add     eax, y                  ; eax = ptlStart.ptl_y


								        mov     esi, [esi].pdev_lNextScan ; we'll compute the sign of lNextScan


								        testb   ebx, FL_FLIP_D

								        jz      short do_v_unflip

								        xchg    ecx, eax


								do_v_unflip:

								        testb   ebx, FL_FLIP_V

								        jz      short done_unflips

								        neg     eax

								        neg     esi


								done_unflips:

								        mov     strip.ST_lNextScan, esi ; lNextScan now right for y-direction

								        testb   ebx, FL_STYLED

								        jnz     do_some_styling


								done_styling:

								        lea     edx, [strip.ST_alStrips + (STRIP_MAX * 4)]

								        mov     plStripEnd, edx


								        mov     cPelsAfterThisBank, 0

								        mov     cStripsInNextRun, 7fffffffh


								;-----------------------------------------------------------------------;

								; Do banking setup.                                                     ;

								;-----------------------------------------------------------------------;


								        public  bank_setup

								bank_setup::


								; Register state:

								;                       eax = ptlStart.ptl_y

								;                       ebx = fl

								;                       ecx = ptlStart.ptl_x

								;                       edx = garbage

								;                       esi = garbage

								;                       edi = garbage


								        mov     esi, ppdev

								        cmp     eax, [esi].pdev_rcl1WindowClip.yTop

								        jl      short bank_get_initial_bank   ; ptlStart.y < rcl1WindowClip.yTop


								        cmp     eax, [esi].pdev_rcl1WindowClip.yBottom

								        jl      short bank_got_initial_bank   ; ptlStart.y < rcl1WindowClip.yBot


								bank_get_initial_bank:

								        mov     ptlStart.ptl_y, eax     ; Save ptlStart.ptl_y

								        mov     edi, ecx                ; Save ptlStart.ptl_x


								        .errnz  JustifyTop

								        .errnz  JustifyBottom - 1

								        .errnz  FL_FLIP_V - 8


								        mov     ecx, ebx                ; JustifyTop if line goes down,

								        shr     ecx, 3                  ; JustifyBottom if line goes up

								        and     ecx, 1


								bank_justified:

								        ptrCall <dword ptr [esi].pdev_pfnBankControl>, \

								                <esi, eax, ecx>


								        mov     eax, ptlStart.ptl_y

								        mov     ecx, edi


								bank_got_initial_bank:

								        testb   ebx, FL_FLIP_D

								        jz      short bank_major_x


								bank_major_y:

								        testb   ebx, FL_FLIP_V

								        jz      short bank_major_y_down

								bank_major_y_up:

								        lea     edi, [eax + 1]

								        sub     edi, [esi].pdev_rcl1WindowClip.yTop

								        jmp     short bank_done_y_major

								bank_major_y_down:

								        mov     edi, [esi].pdev_rcl1WindowClip.yBottom

								        sub     edi, eax

								bank_done_y_major:

								        mov     esi, cPels

								        sub     esi, edi                ; edi = cPelsInBank

								        mov     cPelsAfterThisBank, esi

								        jle     short done_bank_setup

								        mov     cPels, edi

								        jmp     short done_bank_setup


								bank_major_x:

								        mov     edi, dN

								        shr     edi, FLOG2

								        add     edi, y


								; We're guessing at the y-position of the end pixel (it's too much work

								; to compute the actual value) to see if the line spans more than one

								; bank.  We have to add at least a slop value of '3' because the actual

								; start pixel may be may 2 off from 'y' because of end-pixel exclusiveness,

								; and we have to add 1 more because we're taking the floor of (dN / F), to

								; account for rounding:


								        add     edi, 3                  ; yEnd = edi = y + LFLOOR(dN) + 3

								        testb   ebx, FL_FLIP_V

								        jz      short bank_major_x_down

								bank_major_x_up:

								        mov     edx, 1

								        sub     edx, [esi].pdev_rcl1WindowClip.yTop    ; edx = -yNextBankStart


								        cmp     edi, edx

								        lea     edx, [edx + eax]        ; edx = cStripsInNextRun

								        jl      short bank_major_x_done


								; Line may go over bank boundary, so don't do a half flip:


								        or      ebx, FL_DONT_DO_HALF_FLIP

								        jmp     short bank_major_x_done


								bank_major_x_down:

								        mov     esi, [esi].pdev_rcl1WindowClip.yBottom  ; esi = yNextBankStart


								        mov     edx, esi

								        sub     edx, eax                ; edx = cStripsInNextRun


								        cmp     edi, esi

								        jl      short bank_major_x_done

								        or      ebx, FL_DONT_DO_HALF_FLIP


								bank_major_x_done:

								        sub     edx, STRIP_MAX

								        mov     cStripsInNextRun, edx

								        jge     short done_bank_setup


								        lea     edx, [strip.ST_alStrips + edx * 4 + (STRIP_MAX * 4)]

								        mov     plStripEnd, edx


								done_bank_setup:


								;-----------------------------------------------------------------------;

								; Setup to do DDA.                                                      ;

								;-----------------------------------------------------------------------;


								; Register state:

								;                       eax = ptlStart.ptl_y

								;                       ebx = fl

								;                       ecx = ptlStart.ptl_x

								;                       edx = garbage

								;                       esi = garbage

								;                       edi = garbage


								        mov     esi, ppdev

								        mov     edi, eax                ; Now edi = ptlStart.ptl_y

								        imul    [esi].pdev_lNextScan

								        add     eax, [esi].pdev_pvBitmapStart

								        add     eax, ecx

								        mov     strip.ST_pjScreen, eax  ; pjScreen = pchBits + ptlStart.y *

								                                        ;   cjDelta + ptlStart.x


								        mov     eax, dM

								        mov     ecx, dN

								        mov     esi, eqGamma_lo

								        mov     edi, eqGamma_hi


								; Register state:

								;                       eax = dM

								;                       ebx = fl

								;                       ecx = dN

								;                       edx = garbage

								;                       esi = eqGamma_lo

								;                       edi = eqGamma_hi


								        lea     edx, [ecx + ecx]        ; if (2 * dN > dM)

								        cmp     edx, eax

								        mov     edx, y0                 ; Load y0 again

								        jbe     short after_half_flip


								        test    ebx, FL_DONT_DO_HALF_FLIP

								        jnz     short after_half_flip


								        or      ebx, FL_FLIP_HALF

								        mov     fl, ebx


								; Do a half flip!


								        not     esi

								        not     edi

								        add     esi, eax

								        adc     edi, 0                  ; eqGamma = -eqGamma - 1 + dM


								        neg     ecx

								        add     ecx, eax                ; dN = dM - dN


								        neg     edx

								        add     edx, x0                 ; y0 = x0 - y0


								after_half_flip:

								        mov     strip.ST_flFlips, ebx

								        and     ebx, FL_STRIP_MASK


								        .errnz  FL_STRIP_SHIFT

								        mov     eax, apfn

								        lea     eax, [eax + ebx * 4]

								        mov     eax, [eax]

								        mov     pfn, eax

								        mov     eax, dM


								; Register state:

								;                       eax = dM

								;                       ebx = garbage

								;                       ecx = dN

								;                       edx = y0

								;                       esi = eqGamma_lo

								;                       edi = eqGamma_hi


								        or      ecx, ecx

								        jz      short zero_slope


								compute_dda_stuff:

								        inc     edx

								        mul     edx

								        stc                             ; set the carry to accomplish -1

								        sbb     eax, esi

								        sbb     edx, edi                ; (y0 + 1) * dM - eqGamma - 1

								        div     ecx


								        mov     esi, eax                ; esi = i

								        mov     edi, edx                ; edi = r


								        xor     edx, edx

								        mov     eax, dM

								        div     ecx                     ; edx = d_R, eax = d_I

								        mov     d_I, eax


								        sub     esi, x0

								        inc     esi


								done_dda_stuff:

								        lea     eax, [strip.ST_alStrips]

								        mov     ebx, cPels


								;-----------------------------------------------------------------------;

								; Do our main DDA loop.                                                 ;

								;-----------------------------------------------------------------------;


								        sub     edi, ecx                ; offset remainder term from [0..dN)

								                                        ;   to [-dN..0) so test in inner

								                                        ;   loop is quicker


								; Register state:

								;                       eax = plStrip   ; current pointer into strip array

								;                       ebx = cPels     ; total number of pels in line

								;                       ecx = dN        ; delta-N = rise in line

								;                       edx = d_R       ; d_I + d_R/dN = exact strip length

								;                       esi = i         ; length of current strip

								;                       edi = r         ; remainder term for current strip

								;                                       ;   in range [-dN..0)


								        public  dda_loop

								dda_loop::

								        sub     ebx, esi                ; subtract strip length from line length

								        jle     final_strip             ; if negative, done with line


								        mov     [eax], esi              ; write strip length to strip array

								        add     eax, 4

								        cmp     plStripEnd, eax         ; is the strip array buffer full?

								        jbe     short output_strips     ; if so, empty it


								; The output_strips routine jumps to here when done:


								done_output_strips:

								        mov     esi, d_I                ; our normal strip length

								        add     edi, edx                ; adjust our remainder term

								        jl      short dda_loop


								        sub     edi, ecx                ; our remainder became 1 or more, so

								        inc     esi                     ;   we increment this strip length

								                                        ;   and adjust the remainder term


								; We've unrolled our loop a bit, so this should look familiar to the above:


								        sub     ebx, esi                ; subtract strip length from line length

								        jle     final_strip             ; if negative, done with line


								        mov     [eax], esi              ; write strip length to strip array

								        add     eax, 4                  ; adjust strip pointer


								; Note that banking requires us to check if the strip array is full here

								; too (and note that if output_strips is called it will return to

								; done_output_strips):


								        cmp     plStripEnd, eax

								        jbe     short output_strips


								        mov     esi, d_I                ; our normal strip length

								        add     edi, edx                ; adjust our remainder term

								        jl      short dda_loop


								        sub     edi, ecx                ; our remainder became 1 or more, so

								        inc     esi                     ; adjust

								        jmp     short dda_loop


								zero_slope:

								        mov     esi, 7fffffffh

								        jmp     short done_dda_stuff


								;-----------------------------------------------------------------------;

								; Empty strips buffer & possibly do x-major bank switch.                ;

								;-----------------------------------------------------------------------;


								output_strips:

								        mov     d_R, edx

								        mov     cPels, ebx

								        mov     i, esi

								        mov     r, edi

								        mov     dN, ecx


								        lea     edx, [strip]

								        mov     ecx, pls


								; Call our strip routine:


								        ptrCall <dword ptr pfn>, \

								                <edx, ecx, eax>


								; It may be that we ran out of run in our strips buffer, and don't

								; actually have to switch banks.  See if that's the case:


								        mov     eax, cStripsInNextRun

								        or      eax, eax

								        jg      short done_strip_bank_switch


								; We have to switch banks.  See if we're going up or down:


								        mov     esi, ppdev

								        test    fl, FL_FLIP_V

								        jz      short bank_x_down


								bank_x_up:

								        mov     edi, strip.ST_pjScreen

								        sub     edi, [esi].pdev_pvBitmapStart

								        mov     ebx, [esi].pdev_rcl1WindowClip.yTop

								        dec     ebx                     ; we want yTop - 1 to be mapped in


								; Map in the next higher bank:


								        ptrCall <dword ptr [esi].pdev_pfnBankControl>, \

								                <esi, ebx, JustifyBottom>; ebx, esi and edi are preserved


								        lea     eax, [ebx + 1]

								        sub     eax, [esi].pdev_rcl1WindowClip.yTop

								                                        ; eax = # of scans can do in bank


								        add     edi, [esi].pdev_pvBitmapStart

								        mov     strip.ST_pjScreen, edi


								        jmp     short done_strip_bank_switch


								bank_x_down:

								        mov     edi, strip.ST_pjScreen

								        sub     edi, [esi].pdev_pvBitmapStart

								        mov     ebx, [esi].pdev_rcl1WindowClip.yBottom


								; Map in the next lower bank:


								        ptrCall <dword ptr [esi].pdev_pfnBankControl>, \

								                <esi, ebx, JustifyTop>  ; ebx, esi and edi are preserved


								        mov     eax, [esi].pdev_rcl1WindowClip.yBottom

								        sub     eax, ebx                ; eax = # scans can do in bank


								        add     edi, [esi].pdev_pvBitmapStart

								        mov     strip.ST_pjScreen,edi


								done_strip_bank_switch:


								; eax = cStripsInNextRun


								        lea     edx, [strip.ST_alStrips + (STRIP_MAX * 4)]

								        sub     eax, STRIP_MAX

								        mov     cStripsInNextRun, eax

								        jge     short get_ready_for_more_strips

								        lea     edx, [edx + eax * 4]


								get_ready_for_more_strips:

								        mov     plStripEnd, edx


								        mov     esi, i

								        mov     edi, r

								        mov     ebx, cPels

								        mov     edx, d_R

								        mov     ecx, dN

								        lea     eax, [strip.ST_alStrips]

								        jmp     done_output_strips


								;-----------------------------------------------------------------------;

								; Empty strips buffer.  Either get new line or do y-major bank switch.  ;

								;-----------------------------------------------------------------------;


								final_strip:

								        add     ebx, esi

								        mov     [eax], ebx

								        add     eax, 4


								        cmp     cPelsAfterThisBank, 0

								        jg      short bank_y_major


								very_final_strip:

								        lea     edx, [strip]

								        mov     ecx, pls


								        ptrCall <dword ptr pfn>, \

								                <edx, ecx, eax>


								; NOTE: next_line is jumped to from various places, and it cannot assume

								;       any registers are loaded.


								next_line:

								        mov     ebx, flStart

								        testb   ebx, FL_COMPLEX_CLIP

								        jnz     short see_if_done_complex_clipping


								        mov     edx, pptfxBuf

								        cmp     edx, pptfxBufEnd

								        je      short all_done


								        mov     esi, [edx].ptl_x

								        mov     ecx, [edx].ptl_y

								        add     edx, size POINTL

								        mov     pptfxBuf, edx

								        mov     eax, [edx].ptl_x

								        mov     edi, [edx].ptl_y

								        jmp     the_main_loop


								all_done:

								        mov     eax, 1


								        cRet    bLines


								see_if_done_complex_clipping:

								        mov     ebx, fl

								        dec     cptfx

								        jz      short all_done


								        and     ebx, NOT FL_FLIP_HALF   ; Make sure the next run doesn't have

								        mov     fl, ebx                 ;   to do a half-flip if it doesn't

								                                        ;   want to

								        jmp     continue_complex_clipping


								;-----------------------------------------------------------------------;

								; Switch banks for a y-major line.                                      ;

								;-----------------------------------------------------------------------;


								        public  bank_y_major

								bank_y_major::

								        mov     d_R, edx

								        mov     i, esi

								        mov     r, edi

								        mov     dN, ecx

								        sub     ebx, esi                ; Undo our offset


								bank_y_output_strips:

								        lea     edx, [strip]

								        mov     ecx, pls


								        ptrCall <dword ptr pfn>, \

								                <edx, ecx, eax>


								        mov     esi, ppdev

								        test    fl, FL_FLIP_V

								        jz      short bank_y_down


								bank_y_up:

								        mov     edi, strip.ST_pjScreen

								        sub     edi, [esi].pdev_pvBitmapStart

								        mov     ecx, [esi].pdev_rcl1WindowClip.yTop

								        push    ecx

								        dec     ecx                     ; we want yTop - 1 to be mapped in


								; Map in the next higher bank:


								        ptrCall <dword ptr [esi].pdev_pfnBankControl>, \

								                <esi, ecx, JustifyBottom>; ebx, esi and edi are preserved


								        pop     ecx

								        sub     ecx, [esi].pdev_rcl1WindowClip.yTop

								                                        ; ecx = # of scans can do in bank


								        add     edi, [esi].pdev_pvBitmapStart

								        mov     strip.ST_pjScreen, edi


								        mov     edx, cPelsAfterThisBank                 ; edx = cPelsAfterBank

								        lea     eax, [strip.ST_alStrips]                ; eax = plStrip

								        or      ebx, ebx                                ; ebx = cPels

								        jge     bank_y_done_partial_strip

								        jmp     short bank_y_done_switch


								bank_y_down:

								        mov     edi, strip.ST_pjScreen

								        sub     edi, [esi].pdev_pvBitmapStart

								        mov     ecx, [esi].pdev_rcl1WindowClip.yBottom

								        push    ecx


								; Map in the next lower bank:


								        ptrCall <dword ptr [esi].pdev_pfnBankControl>, \

								                <esi, ecx, JustifyTop>  ; ebx, esi and edi are preserved


								        pop     eax

								        mov     ecx, [esi].pdev_rcl1WindowClip.yBottom

								        sub     ecx, eax                ; ecx = # scans can do in bank


								        add     edi, [esi].pdev_pvBitmapStart

								        mov     strip.ST_pjScreen,edi


								        mov     edx, cPelsAfterThisBank                 ; edx = cPelsAfterBank

								        lea     eax, [strip.ST_alStrips]                ; eax = plStrip

								        or      ebx, ebx                                ; ebx = cPels

								        jge     short bank_y_done_partial_strip


								bank_y_done_switch:


								; Handle a single strip stretching over multiple banks:


								        test    fl, FL_FLIP_HALF

								        jz      short bank_y_no_half_flip


								; We now have to adjust for the fact that the strip drawers always leave

								; the state ready for the next new strip (e.g., if we're doing vertical

								; strips, it advances pjScreen one to the right after drawing each strip).

								; But the problem is that since we crossed a bank, we have to continue the

								; *old* strip, so we have to undo that advance:


								bank_y_half_flip:

								        inc     strip.ST_pjScreen

								        jmp     short bank_y_done_bit_adjust


								bank_y_no_half_flip:

								        dec     strip.ST_pjScreen


								bank_y_done_bit_adjust:

								        mov     esi, ebx

								        neg     esi                             ; esi = # pels left in strip


								; eax = pointer to first strip entry

								; ebx = negative esi

								; ecx = # of pels we can put down in this window

								; edx = # of pels remaining to do in line

								; esi = # of pels left in strip


								; We have three special cases to check here:

								;

								;       1) If the strip spans the entire next window

								;       2) This is the last strip in the line

								;       3) Neither of the above


								        cmp     edx,ecx                         ;if line shorter than bank,

								        jle     short bank_y_check_if_last_strip;  know strip doesn't span bank


								        cmp     esi,ecx                         ;if line spans bank, don't have

								        jl      short bank_y_continue_strip     ;  to check if last strip


								; If ((# of pels in line > window size) && (# of pels in strip > window size))

								; then the strip spans this bank:


								        mov     [eax], ecx

								        add     eax, 4

								        add     ebx, ecx

								        sub     edx, ecx

								        mov     cPelsAfterThisBank, edx

								        jmp     bank_y_output_strips


								bank_y_check_if_last_strip:

								        cmp     esi, edx                        ;if strip is shorter than line,

								        jl      short bank_y_continue_strip     ;  we know this isn't the last

								                                                ;  strip


								; Handle case where this is the last strip in the line and it overlaps a bank:


								        mov     [eax], edx

								        add     eax, 4

								        jmp     very_final_strip


								bank_y_continue_strip:

								        mov     [eax], esi

								        add     eax, 4


								bank_y_done_partial_strip:

								        add     ebx, edx                ; cPels += cPelsAfterThisBank

								        sub     edx, ecx                ; cPelsAfterThisBank -= cyWindow


								        jle     short bank_y_get_ready

								        sub     ebx, edx


								bank_y_get_ready:

								        mov     cPelsAfterThisBank, edx

								        mov     edi, r

								        mov     edx, d_R

								        mov     ecx, dN

								        jmp     done_output_strips


								;---------------------------Private-Routine-----------------------------;

								; do_some_styling

								;

								; Inputs:

								;       eax = ptlStart.ptl_y

								;       ebx = fl

								;       ecx = ptlStart.ptl_x

								; Preserves:

								;       eax, ebx, ecx

								; Output:

								;       Exits to done_styling.

								;

								;-----------------------------------------------------------------------;


								        public  do_some_styling

								do_some_styling::

								        mov     esi, pls

								        mov     ptlStart.ptl_x, ecx


								        mov     edi, [esi].LS_spNext    ; spThis

								        mov     edx, edi

								        add     edx, cStylePels         ; spNext


								do_non_alternate_style:


								; For styles, we don't bother to keep the style position normalized.

								; (we do ensure that it's positive, though).  If a figure is over 2

								; billion pels long, we'll be a pel off in our style state (oops!).


								        and     edx, 7fffffffh

								        mov     [esi].LS_spNext, edx

								        mov     ptlStart.ptl_y, eax


								        testb   ebx, FL_FLIP_H

								        jz      short arbitrary_left_to_right


								        sub     edx, x0

								        add     edx, xStart

								        mov     eax, edx

								        xor     edx, edx

								        div     [esi].LS_spTotal


								        neg     edx

								        jge     short continue_right_to_left

								        add     edx, [esi].LS_spTotal

								        not     eax


								continue_right_to_left:

								        mov     edi, dword ptr [esi].LS_bStartIsGap

								        not     edi

								        mov     ecx, [esi].LS_aspRtoL

								        jmp     short compute_arbitrary_stuff


								arbitrary_left_to_right:

								        add     edi, x0

								        sub     edi, xStart

								        mov     eax, edi

								        xor     edx, edx

								        div     [esi].LS_spTotal

								        mov     edi, dword ptr [esi].LS_bStartIsGap

								        mov     ecx, [esi].LS_aspLtoR


								compute_arbitrary_stuff:

								;       eax = sp / spTotal

								;       ebx = fl

								;       ecx = pspStart

								;       edx = sp % spTotal

								;       esi = pls

								;       edi = bIsGap


								        and     eax, [esi].LS_cStyle        ; if odd length style and second run

								        and     al, 1                       ; through style array, flip the

								        jz      short odd_style_array_done  ; meaning of the elements

								        not     edi


								odd_style_array_done:

								        mov     eax, [esi].LS_cStyle

								        mov     strip.ST_pspStart, ecx

								        lea     eax, [ecx + eax * 4 - 4]

								        mov     strip.ST_pspEnd, eax


								find_psp:

								        sub     edx, [ecx]

								        jl      short found_psp

								        add     ecx, 4

								        jmp     short find_psp


								found_psp:

								        mov     strip.ST_psp, ecx

								        neg     edx

								        mov     strip.ST_spRemaining, edx


								        sub     ecx, strip.ST_pspStart

								        test    ecx, 4                      ; size STYLEPOS

								        jz      short done_arbitrary

								        not     edi


								done_arbitrary:

								        mov     dword ptr strip.ST_bIsGap, edi

								        mov     eax, ptlStart.ptl_y

								        mov     ecx, ptlStart.ptl_x

								        jmp     done_styling


								;---------------------------Private-Routine-----------------------------;

								; do_some_clipping

								;

								; Inputs:

								;       eax = garbage

								;       ebx = fl

								;       ecx = x0

								;       edx = garbage

								;       esi = x1

								;       edi = garbage

								;

								; Decides whether to do simple or complex clipping.

								;

								;-----------------------------------------------------------------------;


								        public  do_some_clipping

								do_some_clipping::

								        testb   ebx, FL_COMPLEX_CLIP

								        jnz     initialize_complex_clipping


								;-----------------------------------------------------------------------;

								; simple_clipping

								;

								; Inputs:

								;       ebx = fl

								;       ecx = x0

								;       esi = x1

								; Output:

								;       ebx = fl

								;       ecx = new x0 (stack variable updated too)

								;       esi = new x1

								;       y0 stack variable updated

								; Uses:

								;       All registers

								; Exits:

								;       to done_clipping

								;

								; This routine handles clipping the line to the clip rectangle (it's

								; faster to handle this case in the driver than to call the engine to

								; clip for us).

								;

								; Fractional end-point lines complicate our lives a bit when doing

								; clipping:

								;

								; 1) For styling, we must know the unclipped line's length in pels, so

								;    that we can correctly update the styling state when the line is

								;    clipped.  For this reason, I do clipping after doing the hard work

								;    of figuring out which pixels are at the ends of the line (this is

								;    wasted work if the line is not styled and is completely clipped,

								;    but I think it's simpler this way).  Another reason is that we'll

								;    have calculated eqGamma already, which we use for the intercept

								;    calculations.

								;

								;    With the assumption that most lines will not be completely clipped

								;    away, this strategy isn't too painful.

								;

								; 2) x0, y0 are not necessarily zero, where (x0, y0) is the start pel of

								;    the line.

								;

								; 3) We know x0, y0 and x1, but not y1.  We haven't needed to calculate

								;    y1 until now.  We'll need the actual value, and not an upper bound

								;    like y1 = LFLOOR(dM) + 2 because we have to be careful when

								;    calculating x(y) that y0 <= y <= y1, otherwise we can cause an

								;    overflow on the divide (which, needless to say, is bad).

								;

								;-----------------------------------------------------------------------;


								        public  simple_clipping

								simple_clipping::

								        mov     edi, prclClip           ; get pointer to normalized clip rect

								        and     ebx, FL_RECTLCLIP_MASK  ;   (it's lower-right exclusive)


								        .errnz  (FL_RECTLCLIP_SHIFT - 2); ((ebx AND FL_RECTLCLIP_MASK) shr

								        .errnz  (size RECTL) - 16       ;   FL_RECTLCLIP_SHIFT) is our index

								        lea     edi, [edi + ebx*4]      ;   into the array of rectangles


								        mov     edx, [edi].xRight       ; load the rect coordinates

								        mov     eax, [edi].xLeft

								        mov     ebx, [edi].yBottom

								        mov     edi, [edi].yTop


								; Translate to our origin and so some quick completely clipped tests:


								        sub     edx, x

								        cmp     ecx, edx

								        jge     totally_clipped         ; totally clipped if x0 >= xRight


								        sub     eax, x

								        cmp     esi, eax

								        jl      totally_clipped         ; totally clipped if x1 < xLeft


								        sub     ebx, y

								        cmp     y0, ebx

								        jge     totally_clipped         ; totally clipped if y0 >= yBottom


								        sub     edi, y


								; Save some state:


								        mov     xClipRight, edx

								        mov     xClipLeft, eax


								        cmp     esi, edx                ; if (x1 >= xRight) x1 = xRight - 1

								        jl      short calculate_y1

								        lea     esi, [edx - 1]


								calculate_y1:

								        mov     eax, esi                ; y1 = (x1 * dN + eqGamma) / dM

								        mul     dN

								        add     eax, eqGamma_lo

								        adc     edx, eqGamma_hi

								        div     dM


								        cmp     edi, eax                ; if (yTop > y1) clipped

								        jg      short totally_clipped


								        cmp     ebx, eax                ; if (yBottom > y1) know x1

								        jg      short x1_computed


								        mov     eax, ebx                ; x1 = (yBottom * dM + eqBeta) / dN

								        mul     dM

								        stc

								        sbb     eax, eqGamma_lo

								        sbb     edx, eqGamma_hi

								        div     dN

								        mov     esi, eax


								; At this point, we've taken care of calculating the intercepts with the

								; right and bottom edges.  Now we work on the left and top edges:


								x1_computed:

								        mov     edx, y0


								        mov     eax, xClipLeft          ; don't have to compute y intercept

								        cmp     eax, ecx                ;   at left edge if line starts to

								        jle     short top_intercept     ;   right of left edge


								        mov     ecx, eax                ; x0 = xLeft

								        mul     dN                      ; y0 = (xLeft * dN + eqGamma) / dM

								        add     eax, eqGamma_lo

								        adc     edx, eqGamma_hi

								        div     dM


								        cmp     ebx, eax                ; if (yBottom <= y0) clipped

								        jle     short totally_clipped


								        mov     edx, eax

								        mov     y0, eax


								top_intercept:

								        mov     ebx, fl                 ; get ready to leave

								        mov     x0, ecx


								        cmp     edi, edx                ; if (yTop <= y0) done clipping

								        jle     done_clipping


								        mov     eax, edi                ; x0 = (yTop * dM + eqBeta) / dN + 1

								        mul     dM

								        stc

								        sbb     eax, eqGamma_lo

								        sbb     edx, eqGamma_hi

								        div     dN

								        lea     ecx, [eax + 1]


								        cmp     xClipRight, ecx         ; if (xRight <= x0) clipped

								        jle     short totally_clipped


								        mov     y0, edi                 ; y0 = yTop

								        mov     x0, ecx

								        jmp     done_clipping           ; all done!


								totally_clipped:


								; The line is completely clipped.  See if we have to update our style state:


								        mov     ebx, fl

								        testb   ebx, FL_STYLED

								        jz      next_line


								; Adjust our style state:


								        mov     esi, pls

								        mov     eax, [esi].LS_spNext

								        add     eax, cStylePels

								        mov     [esi].LS_spNext, eax


								        cmp     eax, [esi].LS_spTotal2

								        jb      next_line


								; Have to normalize first:


								        xor     edx, edx

								        div     [esi].LS_spTotal2

								        mov     [esi].LS_spNext, edx


								        jmp     next_line


								;-----------------------------------------------------------------------;


								initialize_complex_clipping:

								        mov     eax, dN                 ; save a copy of original dN

								        mov     dN_Original, eax


								;---------------------------Private-Routine-----------------------------;

								; continue_complex_clipping

								;

								; Inputs:

								;       ebx = fl

								; Output:

								;       ebx = fl

								;       ecx = x0

								;       esi = x1

								; Uses:

								;       All registers.

								; Exits:

								;       to done_clipping

								;

								; This routine handles the necessary initialization for the next

								; run in the CLIPLINE structure.

								;

								; NOTE: This routine is jumped to from two places!

								;-----------------------------------------------------------------------;


								        public  continue_complex_clipping

								continue_complex_clipping::

								        mov     edi, prun

								        mov     ecx, xStart

								        testb   ebx, FL_FLIP_H

								        jz      short complex_left_to_right


								complex_right_to_left:


								; Figure out x0 and x1 for right-to-left lines:


								        add     ecx, cStylePels

								        dec     ecx

								        mov     esi, ecx                ; esi = ecx = xStart + cStylePels - 1

								        sub     ecx, [edi].RUN_iStop    ; New x0

								        sub     esi, [edi].RUN_iStart   ; New x1

								        jmp     short complex_reset_variables


								complex_left_to_right:


								; Figure out x0 and x1 for left-to-right lines:


								        mov     esi, ecx                ; esi = ecx = xStart

								        add     ecx, [edi].RUN_iStart   ; New x0

								        add     esi, [edi].RUN_iStop    ; New x1


								complex_reset_variables:

								        mov     x0, ecx


								; The half flip mucks with some of our variables, and we have to reset

								; them every pass.  We would have to reset eqGamma too, but it never

								; got saved to memory in its modified form.


								        add     edi, size RUN

								        mov     prun, edi               ; Increment run pointer for next time


								        mov     edi, pls

								        mov     eax, [edi].LS_spComplex

								        mov     [edi].LS_spNext, eax    ; pls->spNext = pls->spComplex


								        mov     eax, dN_Original        ; dN = dN_Original

								        mov     dN, eax


								        mul     ecx

								        add     eax, eqGamma_lo

								        adc     edx, eqGamma_hi         ; [edx:eax] = dN*x0 + eqGamma


								        div     dM

								        mov     y0, eax

								        jmp     done_clipping


								endProc bLines


								        end