Windows NT 4.0 source code leak
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

1348 lines
51 KiB

page ,132
title BitBLT
;---------------------------Module-Header------------------------------;
; Module Name: cblt.asm
;
; Copyright (c) 1992 Microsoft Corporation
;-----------------------------------------------------------------------;
.386
;!!! All the code to convert from color to mono in this file needs to
;!!! be deleted. We don't need to do it anymore.
ifndef DOS_PLATFORM
.model small,c
else
ifdef STD_CALL
.model small,c
else
.model small,pascal
endif; STD_CALL
endif; DOS_PLATFORM
assume ds:FLAT,es:FLAT,ss:FLAT
assume fs:nothing,gs:nothing
.code
_TEXT$01 SEGMENT DWORD USE32 PUBLIC 'CODE'
ASSUME DS:FLAT, ES:FLAT, SS:NOTHING, FS:NOTHING, GS:NOTHING
.xlist
include stdcall.inc ; calling convention cmacros
include i386\cmacFLAT.inc ; FLATland cmacros
include i386\display.inc ; Display specific structures
include i386\ppc.inc ; Pack pel conversion structure
include i386\bitblt.inc ; General definitions
include i386\ropdefs.inc ; Rop definitions
include i386\egavga.inc ; EGA register definitions
include i386\devdata.inc
.list
extrn roptable:byte
;-----------------------------Public-Routine----------------------------;
; CBLT
;
; Compile a BLT onto the stack.
;
; Entry:
; EDI --> memory on stack to receive BLT program
; EBP --> fr structure
; Returns:
; Nothing
;-----------------------------------------------------------------------;
fr equ [ebp] ;For consistancy with other sources
cProc cblt
subttl Compile - Outer Loop
page
; If converting a packed pel format to planer format, add the code
; to convert one source scan into planer format
test fr.ppcBlt.fb,PPC_NEEDED
jz no_pack_pel_conversion
mov al,I_MOV_EBP_DWORD_I ;Give conversion routine access
stosb ; to conversion data
lea eax,fr.ppcBlt
stosd
mov al,I_CALL_DISP32 ;Call the static conversion code
stosb
mov eax,fr.ppcBlt.pfnConvert
sub eax,edi
sub eax,4 ;4 for length of displacement
stosd
no_pack_pel_conversion:
; Initialize plane indicator.
mov ax,(PLANE_1*256)+I_MOV_BL_BYTE_I
stosw
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
; Create the outerloop code. The first part of this code will save
; the scan line count register, destination pointer, and the source
; pointer (if there is a source).
;
; The generated code should look like:
;
; push ecx ;Save scan line count
; push edi ;Save destination pointer
; < push esi > ;Save source pointer
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
mov fr.pNextPlane,edi ;Save address of next plane code
mov bl,fr.the_flags
mov ax,I_PUSH_ECX_PUSH_EDI ;Save scan line count, destination ptr
stosw
test bl,F0_SRC_PRESENT ;Is a source needed?
jz cblt_2020 ; No
mov al,I_PUSH_ESI ; Yes, save source pointer
stosb
cblt_2020:
subttl Compile - Plane Selection
page
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
; If the destination device is color and the display is involved in
; the blt, then the color plane selection logic must be added in.
; If the destination is monochrome, then no plane logic is needed.
; Two color memory bitmaps will not cause the plane selection logic
; to be copied.
;
; The generated code should look like:
;
; < push ebx > ;Save plane index
; < plane selection > ;Select plane
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
test bl,F0_DEST_IS_COLOR ;Is the destination color?
jz cblt_pattern_fetch ; No
mov al,I_PUSH_EBX ;Save plane index
stosb
test bl,F0_DEST_IS_DEV+F0_SRC_IS_DEV ;Is the device involved?
jz cblt_pattern_fetch ; No
; The device is involved for a color blt. Copy the logic for selecting
; the read/write plane
mov esi,offset FLAT:cps ;--> plane select logic
mov ecx,LENGTH_CPS
rep movsb
subttl Compile - Pattern Fetch
page
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
; Set up any pattern fetch code that might be needed.
; The pattern code has many fixups, so it isn't taken from a
; template. It is just stuffed as it is created.
;
; Entry: None
;
; Exit: DH = pattern
;
; Uses: AX,BX,CX,DH,flags
;
; For solid color brushes:
;
; mov dh,color
;
; For monochrome brushes:
;
; mov ebx,12345678h ;Load address of the brush
; mov dh,7[ebx] ;Get next brush byte
; mov al,[12345678h] ;Get brush index
; add al,direction ;Add displacement to next byte (+1/-1)
; and al,00000111b ;Keep it in range
; mov [12345678h],al ;Store displacement to next plane's bits
;
; For color brushes:
;
; mov ebx,12345678h ;Load address of the brush
; mov dh,7[bx] ;Get next brush byte
; mov al,[12345678h] ;Get brush index
; add al,SIZE Pattern ;Add displacement to next plane's bits
; and al,00011111b ;Keep it within the brush
; mov [12345678h],al ;Store displacement to next plane's bits
;
; The address of the increment for the brush is saved for
; the plane looping logic if the destination is a three plane
; color device. For a four plane color device, the AND
; automatically handles the wrap and no fixup is needed at
; the end of the plane loop.
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
cblt_pattern_fetch:
test bl,F0_PAT_PRESENT ;Is a pattern needed?
jz cblt_initial_byte_fetch ; No, skip pattern code
mov al,fr.brush_accel ;Solid color needs no fetch logic
test al,SOLID_BRUSH
jz cblt_nonsolid_brush
and al,MM_ALL
shl eax,16
mov ax,I_TEST_BL_BYTE_I
stosd
dec edi ;Was only a three byte instruction
mov eax,I_SETNZ_DH
stosd
dec edi ;Was only a three byte instruction
mov ax,I_NEG_DH
stosw
jmp short cblt_initial_byte_fetch
cblt_nonsolid_brush:
mov al,I_MOV_EBX_DWORD_I ;mov ebx,lpPBrush
stosb
mov eax,fr.lpPBrush
stosd
mov ax,I_MOV_DH_EBX_DISP8 ;mov dh,pat_row[ebx]
stosw
mov edx,edi ;Save address of the brush index
mov al,fr.pat_row ;Set initial pattern row
mov bh,00000111b ;Set brush index mask
and al,bh ;Make sure it's legal at start
stosb
mov al,I_MOV_AL_MEM
stosb ;mov al,[xxxxxxxx]
mov eax,edx
stosd
mov al,I_ADD_AL_BYTE_I
mov ah,direction ;Set brush index
errnz INCREASE-1 ;Must be a 1
errnz DECREASE+1 ;Must be a -1
test bl,F0_COLOR_PAT ;Color pattern required?
jz cblt_2060 ; No
mov fr.addr_brush_index,edx ;Save address of brush index
mov ah,SIZE_PATTERN ;Set increment to next plane
mov bh,00011111b ;Set brush index mask
cblt_2060:
stosw
mov ah,bh ;and al,BrushIndexMask
mov al,I_AND_AL_BYTE_I
stosw
mov al,I_MOV_MEM_AL
stosb ;mov [xxxxxxxx],al
mov eax,edx
stosd
subttl Compile - Initial Byte Fetch
page
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
; Create the initial byte code. This may consist of one or two
; initial fetches (if there is a source), followed by the required
; logic action. The code should look something like:
;
; BLTouterloop:
; < mov bp,mask_p > ;Load phase mask for entire loop
; < xor bh,bh > ;Clear previous unused bits
;
; ; Perform first byte fetch
;
; < lodsb > ;Get source byte
; < color<==>mono munge > ;Color <==> mono conversion
; < phase alignment > ;Align bits as needed
;
; ; If an optional second fetch is needed, perform one
;
; < lodsb > ;Get source byte
; < color to mono munge > ;Color to mono munging
; < phase alignment > ;Align bits as needed
;
; logical action ;Perform logical action required
;
; mov ah,[edi] ;Get destination
; and ax,cx ;Saved unaltered bits
; or al,ah ; and mask in altered bits
; stosb ;Save the result
;
; The starting address of the first fetch/logical combination will be
; saved so that the code can be copied later instead of recreating it
; (if there are two fecthes, the first fetch will not be copied)
;
; The length of the code up to the masking for altered/unaltered bits
; will be saved so the code can be copied into the inner loop.
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
cblt_initial_byte_fetch:
xor dx,dx
or dh,fr.phase_h ;Is the phase 0? (also get the phase)
jz cblt_3020 ; Yes, so no phase alignment needed
mov al,I_SIZE_OVERRIDE
stosb
mov al,I_MOV_BP_WORD_I ;Set up the phase mask
stosb
mov ax,fr.mask_p ;Place the mask into the instruction
stosw
mov ax,I_XOR_BH_BH ;Clear previous unused bits
stosw
cblt_3020:
mov fr.start_fl,edi ;Save starting address of action
test fr.the_flags,F0_SRC_PRESENT ;Is there a source?
jz cblt_4000 ; No, don't generate fetch code
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
; Generate the required sequence of instructions for a fetch
; sequence. Only the minimum code required is generated.
;
; The code generated will look something like the following:
;
; BLTfetch:
; < lodsb > ;Get the next byte
; < color munging > ;Mono <==> color munging
;
; ; If the phase alignment isn't zero, then generate the minimum
; ; phase alignment needed. RORs or ROLs will be generated,
; ; depending on the fastest sequence. If the phase alignment
; ; is zero, than no phase alignment code will be generated.
;
; < ror al,n > ;Rotate as needed
; < mov ah,al > ;Mask used, unused bits
; < and ax,bp > ;(BP) = phase mask
; < or al,bh > ;Mask in old unused bits
; < mov bh,ah > ;Save new unused bits
;
;
; The nice thing about the above is it is possible for the fetch to
; degenerate into a simple LODSB instruction.
;
; Currently: BL = the_flags
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
cblt_3040:
mov fr.moore_flags,0 ;Assume REP cannot be used
shl bl,1 ;Color conversion?
jnc cblt_3180 ; No, we were lucky this time
errnz F0_GAG_CHOKE-10000000b
js cblt_3100 ;Mono ==> color
errnz F0_COLOR_PAT-01000000b
subttl Compile - Initial Byte Fetch, Color ==> Mono
page
; !!! Color to mono should not be needed anymore since the Engine will
; !!! not be calling me to do it! Let's remove this code!
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
; Generate the code to go from color to mono. Color to mono
; should map all colors that are background to 1's (white), and
; all colors which aren't background to 0's (black). If the source
; is the display, then the color compare register will be used.
; If the source is a memory bitmap, each byte of the plane will be
; XORed with the color from that plane, with the results all ORed
; together. The final result will then be complemented, giving
; the desired result.
;
; The generated code for bitmaps should look something like:
;
; mov al,next_plane[esi] ;Get C1 byte of source
; mov ah,2*next_plane[esi] ;Get C2 byte of source
; xor ax,C1BkColor+(C2BkColor*256) ;XOR with plane's color
; or ah,al ;OR the result
; mov al,3*next_plane[esi] ;Get C3 byte of source
; xor al,C3BkColor
; or ah,al
; lodsb ;Get C0 source
; xor al,C0BkColor ;XOR with C0BkColor
; or al,ah ;OR with previous result
; not al ;NOT to give 1's where background
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
cblt_3070:
test bl,F0_SRC_IS_DEV SHL 1 ;If device, use color compare register
jz cblt_3080 ;It's a memory bitmap
; We're in luck, the color compare register can be used. Set up
; for a color read, and use the normal mono fetch code. Show the
; innerloop code that the REP instruction can be used if this is
; a source copy.
mov fr.moore_flags,F1_REP_OK
mov ecx,edx ;Save dx
mov ah,fr.bkColor.SPECIAL ;Get SPECIAL byte of color
and ah,MM_ALL
mov al,GRAF_COL_COMP ;Stuff color into compare register
mov dx,EGA_BASE+GRAF_ADDR
out dx,ax
mov ax,GRAF_CDC ;Set Color Don't Care register
out dx,ax
mov ax,M_COLOR_READ SHL 8 + GRAF_MODE
out dx,ax
mov edx,ecx
jmp cblt_3180 ;Go generate mono fetch code
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
; The source is a memory bitmap. Generate the code to compute
; the result of the four planes:
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
cblt_3080:
mov ax,I_MOV_AL_ESI_DISP32
stosw
mov eax,fr.src.next_plane
stosd
mov ebx,eax ;Save plane width
mov ax,I_MOV_AH_ESI_DISP32
stosw
lea eax,[ebx*2]
stosd
mov al,I_SIZE_OVERRIDE
stosb
mov al,I_XOR_AX_WORD_I
stosb
mov al,fr.bkColor.SPECIAL ;get the color index byte
mov ah,al ;have the same in AH
and ax,(C2_BIT shl 8) or C1_BIT
neg al
sbb al,al ;al will be 0ffh if plane bit is 1
neg ah
sbb ah,ah ;ah wil be 0ffh if plane bit is 1
stosw
mov ax,I_OR_AH_AL
stosw
mov ax,I_MOV_AL_ESI_DISP32
stosw
lea eax,[ebx*2][ebx]
stosd
mov al,I_XOR_AL_BYTE_I
mov ah,fr.bkColor.SPECIAL
and ah,C3_BIT
neg ah
sbb ah,ah
stosw
mov ax,I_OR_AH_AL
stosw
mov ax,I_LODSB+(I_XOR_AL_BYTE_I*256)
stosw
mov al,fr.bkColor.SPECIAL
shr al,1 ;get C0_BIT into carry
sbb al,al ;make it 0ffh if bit was set
.errnz C0_BIT - 00000001b
stosb ;save the modified value
errnz pcol_C0
mov ax,I_OR_AL_AH
stosw
mov ax,I_NOT_AL
stosw
jmp cblt_3240 ;Go create logic code
subttl Compile - Initial Byte Fetch, Mono ==> Color
page
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
; The conversion is mono to color. Generate the code to
; do the conversion, and generate the table which will
; have the conversion values in it.
;
; When going from mono to color, 1 bits are considered to be
; the background color, and 0 bits are considered to be the
; foreground color.
;
; For each plane:
;
; If the foreground=background=1, then 1 can be used in
; place of the source.
;
; If the foreground=background=0, then 0 can be used in
; place of the source.
;
; If the foreground=0 and background=1, then the source
; can be used as is.
;
; If the foreground=1 and background=0, then the source
; must be complemented before using.
;
; Looks like a boolean function to me.
;
; An AND mask and an XOR mask will be computed for each plane,
; based on the above. The source will then be processed against
; the table. The generated code should look like
;
; lodsb
; and al,[xxxx]
; xor al,[xxxx+1]
;
; The table for munging the colors as stated above should look like:
;
; BackGnd ForeGnd Result AND XOR
; 1 1 1 00 FF
; 0 0 0 00 00
; 1 0 S FF 00
; 0 1 not S FF FF
;
; From this, it can be seen that the XOR mask is the same as the
; foreground color. The AND mask is the XOR of the foreground
; and the background color. Not too hard to compute
;
; It can also be seen that if the background color is white and the
; foreground (text) color is black, then the conversion needn't be
; generated (it just gives the source). This is advantageous since
; it will allow phased aligned source copies to use REP MOVSW.
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
; Check to see if the background color is black, and the
; foreground color is white. This can be determined by
; looking at the accelerator flags in the physical color.
cblt_3100:
mov ah,fr.TextColor.SPECIAL
xor ah,MONO_BIT ;Map black to white
and ah,fr.bkColor.SPECIAL ;AND in background color
cmp ah,MONO_BIT+ONES_OR_ZEROS
jne cblt_3110 ;Not black
mov fr.moore_flags,F1_REP_OK+F1_NO_MUNGE ;Show reps as ok, no color munge table
jmp short cblt_3180 ;Normal fetch required
; No way around it. The color conversion table and code
; must be generated.
cblt_3110:
mov cl,fr.bkColor.SPECIAL ;Get BackGround Colors
mov ch,fr.TextColor.SPECIAL ;Get ForeGround Colors
xor cl,ch
shr cl,1
sbb al,al
shr ch,1
sbb ah,ah
mov word ptr fr.ajM2C.(pcol_C0 * 2),ax
shr cl,1
sbb al,al
shr ch,1
sbb ah,ah
mov word ptr fr.ajM2C.(pcol_C1 * 2),ax
shr cl,1
sbb al,al
shr ch,1
sbb ah,ah
mov word ptr fr.ajM2C.(pcol_C2 * 2),ax
shr cl,1
sbb al,al
shr ch,1
sbb ah,ah
mov word ptr fr.ajM2C.(pcol_C3 * 2),ax
errnz <TextColor - bkColor - 4>
; Generate the code for munging the color as stated above.
mov ax,I_LODSB
stosb ;lodsb
mov ax,I_AND_AL_MEM ;and al,[xxxx]
stosw
lea eax,fr.ajM2C ; Set address of color munge
stosd
mov ebx,eax ; Save address
mov ax,I_XOR_AL_MEM ;xor al,[xxxx]
stosw
lea eax,1[ebx] ; Set address of XOR mask
stosd
jmp short cblt_3240
; Just need to generate the normal fetch sequence (lodsb)
cblt_3180:
mov al,I_LODSB ;Generate source fetch
stosb
subttl Compile - Phase Alignment
page
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
; Generate the phase alignment if any.
;
; It is assumed that AL contains the source byte
;
; Currently:
; DH = phase alignment
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
cblt_3240:
mov ecx,edi ;end of fetch code
sub ecx,fr.start_fl ;start of fetch code
mov fr.cFetchCode,ecx ;save size of fetch code
xor ecx,ecx ;Might have garbage in it
or dh,dh ;Any phase alignment?
jz cblt_3280 ; No, so skip alignment
mov cl,dh ;Get horizontal phase for rotating
mov ax,I_ROL_AL_N ;Assume rotate left n times
cmp cl,5 ;4 or less rotates?
jc cblt_3260 ; Yes
neg cl ; No, compute ROR count
add cl,8
mov ah,HIGH I_ROR_AL_N
errnz <(LOW I_ROL_AL_N)-(LOW I_ROR_AL_N)>
cblt_3260:
stosw ;Stuff the phase alignment rotates
mov al,cl ; then the phase alignment code
stosb
; Do not generate phase masking if there is only 1 src And only 1 dest byte.
; This is not just an optimization, see comments where these flags are set.
xor ch,ch
mov al,fr.first_fetch
and al,FF_ONLY_1_SRC_BYTE or FF_ONLY_1_DEST_BYTE
xor al,FF_ONLY_1_SRC_BYTE or FF_ONLY_1_DEST_BYTE
jz cblt_3280
mov esi,offset FLAT:phase_align
mov ecx,PHASE_ALIGN_LEN
rep movsb
cblt_3280:
test fr.first_fetch,FF_TWO_INIT_FETCHES ;Generate another fetch?
jz cblt_4000 ; No
; A second fetch needs to be stuffed. Copy the one just created.
mov esi,fr.start_fl ;Set new start, get old
mov fr.start_fl,edi
mov ecx,edi ;Compute how long fetch is
sub ecx,esi ; and move the bytes
rep movsb
subttl Compile - ROP Generation
page
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
; Create the logic action code
;
; The given ROP will be converted into the actual code that
; performs the ROP.
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
; Copy the ROP template into the BLT
cblt_4000:
mov ax,fr.operands ;Get back rop data
mov bl,ah ;Get count of number of bits to move
and ebx,HIGH ROPLength
shr ebx,2
movzx ecx,roptable+256[ebx] ;Get length into cx
errnz ROPLength-0001110000000000b
mov ebx,eax ;Get offset of the template
and ebx,ROPOffset
lea esi,roptable[ebx] ;--> the template
rep movsb ;Move the template
cblt_4020:
mov bx,ax ;Keep rop around
or ah,ah ;Generate a negate?
jns cblt_4040 ; No
mov ax,I_NOT_AL
stosw
public cblt_4040
cblt_4040::
mov fr.end_fl,edi ;Save end of fetch/logic operation
subttl Compile - Mask And Save
page
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
; Generate code to mask and save the result. If the destination
; isn't in a register, it will be loaded from ES:[DI] first. The
; mask operation will then be performed, and the result stored.
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
mov ax,I_MOV_AH_DEST ; ah,[edi]
stosw
mov esi,offset FLAT:masked_store;Move rest of masked store template
movsb ;Move size override
movsd
movsw
errnz MASKED_STORE_LEN-7 ;Must be seven bytes long
mov ax,fr.start_mask ;Stuff start mask into
xchg ah,al ; the template
mov [edi][MASKED_STORE_MASK],ax
mov fr.end_fls,edi ;Save end of fetch/logic/store operation
subttl Compile - Inner Loop Generation
page
;-----------------------------------------------------------------------;
; Now for the hard stuff; The inner loop (said with a "gasp!").
;
; If there is no innerloop, then no code will be generated
; (now that's fast!).
;-----------------------------------------------------------------------;
cblt_5000:
mov edx,fr.inner_loop_count ;Get the loop count
or dx,dx ;If the count is null
jz cblt_6000 ; don't generate any code
;!!! Since we no longer pass in the old style rops, we can;t enable this code
;!!! and shold remove/alter it someday. Besides, most of it is in special.asm
if 0 ;!!!
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
; We have something for a loop count. If this just happens to be
; a source copy (S) with a phase of zero, then the innerloop degenerates
; to a repeated MOVSB instruction. This little special case is
; worth checking for and handling!
;
; Also, if this is one of the special cases {P, Pn, DDx, DDxn}, then it
; will also be special cased since these are all pattern fills (pattern,
; not pattern, 0, 1).
;
; The same code can be shared for these routines, with the exception
; that patterns use a STOSx instruction instead of a MOVSx instruction
; and need a value loaded in AX
;
; So we lied a little. If a color conversion is going on, then the
; REP MOVSB might not be usable. If the F1_REP_OK flag has been set, then
; we can use it. The F1_REP_OK flag will be set for a mono ==> color
; conversion where the background color is white and the foreground
; color is black, or for a color ==> mono conversion with the screen
; as the source (the color compare register will be used).
;
; For the special cases {P, Pn, DDx, DDxn}, color conversion is
; not possible, so ignore it for them.
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
mov bl,byte ptr fr.Rop ;Get the raster op
test bl,EPS_INDEX ;Can this be special cased?
jnz cblt_5500 ; No
errnz <HIGH EPS_INDEX>
errnz SPEC_PARSE_STR_INDEX ;The special case index must be 0
test bl,EPS_OFF ;Is this a source copy
jz cblt_5040 ; Yes
errnz <SOURCE_COPY AND 11b> ;Offset for source copy must be 0
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
; We should have one of the following fill operations:
;
; P - Pattern
; Pn - NOT pattern
; DDx - 0 fill
; DDxn - 1 fill
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
mov ax,I_MOV_AL_0FFH ;Assume this is a 0 or 1 fill
test bl,01h ;Is it 0 or 1 fill?
jz cblt_5020 ; Yes, initialize AX with 0FFh
mov ax,I_MOV_AL_DH ; No, initialize AX with pattern
errnz PAT_COPY-0000000000100001b
errnz NOTPAT_COPY-0000000000000001b
errnz FILL_BLACK-0000000001000010b
errnz FILL_WHITE-0000000001100010b
cblt_5020:
stosw
mov ax,I_MOV_AH_AL
stosw
mov si,I_STOSB ;Set up for repeated code processor
test bl,LogPar ;If Pn or 0, then complement pattern
jnz cblt_5060 ; Is just P or 1
errnz <HIGH LogPar>
mov al,I_SIZE_OVERRIDE
stosb
mov ax,I_NOT_AX ; Is Pn or 0, complement AX
stosw
jmp short cblt_5060
errnz PAT_COPY-00100001b
errnz NOTPAT_COPY-00000001b
errnz FILL_BLACK-01000010b
errnz FILL_WHITE-01100010b
; This is a source copy. The phase must be zero for a source copy
; to be condensed into a REP MOVSx.
cblt_5040:
test fr.phase_h,0FFh ;Is horizontal phase zero?
jnz cblt_5500 ; No, can't condense source copy
mov si,I_MOVSB ;Set register for moving bytes
; For a color conversion, F1_REP_OK must be set.
test fr.the_flags,F0_GAG_CHOKE ;Color conversion?
jz cblt_5060 ; No, rep is OK to use
test fr.moore_flags,F1_REP_OK ; Yes, can we rep it?
jz cblt_5500 ; No, do it the hard way
;-----------------------------------------------------------------------;
; This is a source copy or pattern fill. Process an odd byte with
; a MOVSB or STOSB, then process the rest of the bytes with a REP
; MOVSW or a REP STOSW. If the REP isn't needed, leave it out.
;
; Don't get caught on this like I did! If the direction of the
; BLT is from right to left (decrementing addresses), then both
; the source and destination pointers must be decremented by one
; so that the next two bytes are processed, not the next byte and
; the byte just processed. Also, after all words have been processed,
; the source and destination pointers must be incremented by one to
; point to the last byte (since the last MOVSW or STOSW would have
; decremented both pointers by 2).
;
; If the target machine is an 8086, then it would be well worth the
; extra logic to align the fields on word boundaries before the MOVSxs
; if at all possible.
;
; The generated code should look something like:
;
; WARP8: ;This code for moving left to right
; movsb ;Process an odd byte
; mov ecx,gl_inner_loop_count/2 ;Set word count
; rep ;If a count, then repeat is needed
; movsw ;Move words until done
;
;
; WARP8: ;This code for moving left to right
; movsb ;Process an odd byte
; dec si ;adjust pointer for moving words
; dec di
; mov ecx,gl_inner_loop_count/2 ;Set word count
; rep ;If a count, then repeat is needed
; movsw ;Move words until done
; inc si ;adjust since words were moved
; inc di
;
;
; Of course, if any part of the above routine isn't needed, it isn't
; generated (i.e. the generated code might just be a single MOVSB)
;-----------------------------------------------------------------------;
cblt_5060:
shr edx,1 ;Byte count / 2 for words
jnc cblt_5080 ; No odd byte to move
mov ax,si ; Odd byte, move it
stosb
cblt_5080:
jz cblt_5140 ;No more bytes to move
xor bx,bx ;Flag as stepping from left to right
cmp bl,fr.step_direction ;Moving from the right to the left?
errnz STEPLEFT ; (left direction must be zero)
jnz cblt_5100 ; No
mov ax,I_DEC_ESI_DEC_EDI ; Yes, decrement both pointers
stosw
mov bx,I_INC_ESI_INC_EDI ;Set up to increment the pointers later
0cblt_5100:
cmp edx,1 ;Move one word or many words?
jz cblt_5120 ; Only one word
mov al,I_MOV_ECX_DWORD_I ; Many words, load count
stosb
mov eax,edx
stosd
mov al,I_REP ;a repeat instruction
stosb
cblt_5120:
mov al,I_SIZE_OVERRIDE
stosb
mov ax,si ;Set the word instruction
inc ax
stosb
errnz I_MOVSW-I_MOVSB-1 ;The word form of the instruction
errnz I_STOSW-I_STOSB-1 ; must be the byte form + 1
or bx,bx ;Need to increment the pointers?
jz cblt_5140 ; No
mov ax,bx ; Yes, increment both pointers
stosw
cblt_5140:
jmp cblt_6000 ;Done setting up the innerloop
page
endif
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
; There is some count for the innerloop of the BLT. Generate the
; required BLT. Two or four copies of the BLT will be placed on the
; stack. This allows the LOOP instruction at the end to be distributed
; over two or four bytes instead of 1, saving 11 or 12 clocks for each
; byte (for 4). Multiply 12 clocks by ~ 16K and you save a lot of
; clocks!
;
; If there are less than four (two) bytes to be BLTed, then no looping
; instructions will be generated. If there are more than four (two)
; bytes, then there is the possibility of an initial jump instruction
; to enter the loop to handle the modulo n result of the loop count.
;
; The innerloop code will look something like:
;
; < mov cx,loopcount/n> ;load count if >n innerloop bytes
; < jmp short ??? > ;If a first jump is needed, do one
;
; BLTloop:
; replicate initial byte BLT code up to n times
;
; < loop BLTloop > ;Loop until all bytes processed
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
cblt_5500:
mov ebx,fr.end_fl ;Compute size of the fetch code
sub ebx,fr.start_fl
inc ebx ;A stosb will be appended
mov esi,4 ;Assume replication 4 times
mov cl,2 ; (shift count two bits left)
cmp ebx,32 ;Small enough for 4 times?
jc cblt_5520 ; Yes, replicate 4 times
shr esi,1 ; No, replicate 2 times
dec ecx
cblt_5520:
cmp edx,esi ;Generate a loop? (edx = loopcount)
jle cblt_5540 ; No, just copy code
mov al,I_MOV_ECX_DWORD_I
stosb ;mov cx,loopcount/n
mov eax,edx ;Compute loop count
shr eax,cl
stosd
shl eax,cl ;See if loopcount MOD n is 0
sub eax,edx
jz cblt_5540 ;Zero, no odd count to handle
page
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
; There is an odd portion of bytes to be processed. Increment
; the loop counter for the odd pass through the loop and then
; compute the displacement for entering the loop.
;
; To compute the displacement, subtract the number of odd bytes
; from the modulus being used (i.e. 4-3=1). This gives the
; number of bytes to skip over the first time through the loop.
;
; Multiply this by the number of bytes for a logic sequence,
; and the result will be the displacement for the jump.
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
inc dword ptr [edi][-4] ;Not zero, adjust for partial loop
add eax,esi ;Compute where to enter the loop at
push edx
mul ebx
pop edx
mov ecx,eax
mov al,I_JMP_DISP32 ;Stuff jump instruction
stosb
mov eax,ecx ;Stuff displacement for jump
stosd
;-----------------------------------------------------------------------;
; Currently: EDX = loop count
; ESI = loop modulus
; EBX = size of one logic operation
; EDI --> next location in the loop
;-----------------------------------------------------------------------;
cblt_5540:
mov ecx,ebx ;Set move count
mov ebx,edx ;Set maximum for move
cmp ebx,esi ;Is the max > what's left?
jle cblt_5560 ; No, just use what's left
mov ebx,esi ; Yes, copy the max
cblt_5560:
sub edx,esi ;If dx > 0, then loop logic needed
mov esi,fr.start_fl ;--> fetch code to copy
mov eax,ecx ;Save a copy of fetch length
rep movsb ;Move fetch code and stuff stosb
mov esi,edi ;--> new source (and top of loop)
sub esi,eax
mov byte ptr [edi][-1],I_STOSB
dec ebx ;One copy has been made
push edx
mul ebx ;Compute # bytes left to move
pop edx
mov ecx,eax ;Set move count
rep movsb ;Move the fetches
sub esi,eax ;Restore pointer to start of loop
page
; The innermost BLT code has been created and needs the looping
; logic added to it. If there is any looping to be done, then
; generate the loop code. The code within the innerloop may be
; greater than 126 bytes, so a LOOP instruction may not be used
; in this case.
cblt_5580:
or edx,edx ;Need a loop?
jle cblt_6000 ; No, don't generate one
mov al,I_DEC_ECX
stosb
mov ax,I_JNZ_DISP32
stosw
mov eax,esi ;Compute offset of loop
sub eax,edi
sub eax,4 ;Bias by DISP32
stosd
subttl Compile - Last Byte Processing
page
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
; All the innerloop stuff has been processed. Now generate the code for
; the final byte if there is one. This code is almost identical to the
; code for the first byte except there will only be one fetch (if a
; fetch is needed at all).
;
; The code generated will look something like:
;
; < fetch > ;Get source byte
; < align > ;Align source if needed
; action ;Perform desired action
; mask and store
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
cblt_6000:
mov dx,fr.last_mask ;Get last byte mask
or dh,dh ;Is there a last byte to be processed?
jz cblt_6100 ; No.
mov ecx,fr.end_fls ;Get end of fetch/logic/store operation
mov esi,fr.start_fl ;Get start of fetch/logic sequence
sub ecx,esi ;Compute length of the code
test fr.first_fetch,FF_NO_LAST_FETCH
jz cblt_include_fetch
test fr.the_flags,F0_SRC_PRESENT ; was there a fetch?
jz cblt_was_no_fetch
cmp fr.phase_h,0 ; Phase zero case is not combined
; into innerloop as it should be.
; If the final byte is full then we
; better not remove the lodsb ( i.e.
je cblt_include_fetch ; 0 - 0 = 0 would make us think we could)
mov eax,fr.cFetchCode ; don't copy the fetch (lodsb)
add esi,eax
sub ecx,eax
cblt_was_no_fetch:
cblt_include_fetch:
rep movsb ;Copy the fetch/action/store code
xchg dh,dl
mov [edi][MASKED_STORE_MASK],dx ;Stuff last byte mask into the code
skip_save:
subttl Compile - Looping Logic
page
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
; Looping logic.
;
; The looping logic must handle monochrome bitmaps, color bitmaps,
; huge bitmaps, the device, the presence or absence of a source
; or pattern, and mono <==> color interactions.
;
; The type of looping logic is always based on the destination.
;
; Plane Update Facts:
;
; 1) If the destination device is color, then there will be
; logic for plane selection. Plane selection is performed
; at the start of the loop for the display. Plane selection
; for bitmaps is performed at the end of the loop in anticipation
; of the next plane.
;
; The following applies when the destination is color:
;
; a) The destination update consists of:
;
; 1) If the destination is the display, the next plane will
; be selected by the plane selection code at the start
; of the scan line loop.
;
; 2) If not the display, then the PDevice must a bitmap.
; The next plane will be selected by updating the
; destination offset by the next_plane value.
;
;
; b) If F0_GAG_CHOKE isn't specified, then there may be a source.
; If there is a source, it must be color, and the update
; consists of:
;
; 1) If the source is the display, the next plane will be
; selected by the plane selection code at the start of
; the loop.
;
; 2) If not the display, then the PDevice must a bitmap.
; The next plane will be selected by updating the
; destination offset by the next_plane value.
;
;
; c) If F0_GAG_CHOKE is specified, then the source must be a
; monochrome bitmap which is undergoing mono to color
; conversion. The AND & XOR mask table which is used
; for the conversion will have to be updated, unless
; the F1_NO_MUNGE flag is set indicating that the color
; conversion really wasn't needed.
;
; The source's pointer will not be updated. It will
; remain pointing to the same scan of the source until
; all planes of the destination have been processed.
;
;
; d) In all cases, the plane mask rotation code will be
; generated. If the plane indicator doesn't overflow,
; then start at the top of the scan line loop for the
; next plane.
;
; If the plane indicator overflows, then:
;
; 1) If there is a pattern present, it's a color
; pattern fetch. The index of which scan of
; the brush to use will have to be updated.
;
; 2) Enter the scan line update routine
;
;
; 2) If the destination is monochrome, then there will be no
; plane selection logic.
;
; If F0_GAG_CHOKE is specified, then color ==> mono conversion
; is taking place. Any plane selection logic is internal
; to the ROP byte fetch code. Any color brush was pre-
; processed into a monochrome brush, so no brush updating
; need be done
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
subttl Looping Logic - Plane Selection
page
; Get saved parameters off of the stack.
;
; < pop ebx > ;Get plane indicator
; < pop esi > ;Get source pointer
; pop edi ;Get destination pointer
; pop ecx ;Get loop count
cblt_6100:
mov bh,fr.the_flags ;These flags will be used a lot
test bh,F0_DEST_IS_COLOR ;Is the destination color?
jz cblt_6120 ; No
mov al,I_POP_EBX ;Restore plane index
stosb
cblt_6120:
test bh,F0_SRC_PRESENT ;Is a source needed?
jz cblt_6140 ; No
mov al,I_POP_ESI ; Yes, get source pointer
stosb
cblt_6140:
mov ax,I_POP_EDI_POP_ECX ;Get destination pointer
stosw ;Get loop count
test bh,F0_DEST_IS_COLOR ;Color scanline update?
jz cblt_6300 ; No, just do the mono scanline update
; The scanline update is for color. Generate the logic to update
; a brush, perform plane selection, process mono ==> color conversion,
; and test for plane overflow.
cblt_6160:
or bh,bh ;Color conversion?
jns cblt_6180 ; No
errnz F0_GAG_CHOKE-10000000b
page
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
; The source is monochrome. Handle mono ==> color conversion.
; The AND & XOR mask table will need to be rotated for the next
; pass over the source.
;
; The source scanline pointer will not be updated until all planes
; have been processed for the current scan.
;
; If F1_NO_MUNGE has been specified, then the color conversion table
; and the color conversion code was not generated, and no update
; code will be needed.
;
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
test fr.moore_flags,F1_NO_MUNGE ;Is there really a conversion table?
jnz short cblt_6200 ; No, so skip the code
mov al,I_MOV_EBP_DWORD_I ;lea ebp,fr.ajM2C
stosb
lea eax,fr.ajM2c ;Get address of table
stosd
mov esi,offset FLAT:rot_and_xor ;--> rotate code
mov cx,LEN_ROT_AND_XOR
rep movsb
jmp short cblt_6200
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
; If there is a source, it must be color. If it is a memory
; bitmap, then the next plane must be selected, else it is
; the display and the next plane will be selected through
; the hardware registers.
;
; < add si,next_plane>
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
cblt_6180:
test bh,F0_SRC_PRESENT ;Is there really a source?
jz cblt_6200 ;No source.
test bh,F0_SRC_IS_DEV ;Is the source the display?
jnz cblt_6200 ; Yes, use hardware plane selection
mov ax,I_ADD_ESI_DWORD_I ; No, generate plane update
stosw ;Add si,next_plane
mov eax,fr.src.next_plane
stosd
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
; If the destination isn't the device, then it must be a color
; memory bitamp, and it's pointer will have to be updated by
; bmWidthPlanes. If it is the display, then the next plane
; will be selected through the hardware registers.
;
; < add di,next_plane>
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
cblt_6200:
test bh,F0_DEST_IS_DEV ;Is the destination the display
jnz cblt_6220 ; Yes, don't generate update code
mov ax,I_ADD_EDI_DWORD_I ; No, update bitmap to the next plane
stosw
mov eax,fr.dest.next_plane
stosd
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
; The source and destination pointers have been updated.
; Now generate the plane looping logic.
;
; < shl bl,1 > ;Select next plane
; < jnc StartOfLoop > ; Yes, go process next
; < mov bl,PLANE_1 > ;Reset plane indicator
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
cblt_6220:
mov ax,I_SHL_BL_1 ;Stuff plane looping logic
stosw
mov edx,fr.pNextPlane ;Compute relative offset of
sub edx,edi ; start of loop
sub edx,6 ;Bias offset by length of jnc inst.
mov ax,I_JNC_DISP32
stosw ;jnc StartOfLoop
mov eax,edx
stosd
subttl Looping Logic - Color Brush Update
page
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
; The plane update logic has been copied. If a pattern was
; involved for a color BLT, then the pattern index will need
; to be updated to the next scanline for three plane mode.
;
; This will involve subtracting off 3*SIZE_PATTERN (MonoPlane),
; and adding in the increment. The result must be masked with
; 00000111b to select the correct source. Note that the update
; can be done with an add instruction and a mask operation.
;
; inc index+MonoPlane inc-MonoPlane result AND 07h
;
; 1 0+32 = 32 1-32 = -31 1 1
; 1 7+32 = 39 1-32 = -31 8 0
; -1 0+32 = 32 -1-32 = -33 FF 7
; -1 7+32 = 39 -1-32 = -33 6 6
;
; < mov al,[12345678] > ;Get brush index
; < add al,n > ;Add displacement to next byte
; < and al,00000111b > ;Keep it in range
; < mov [12345678],al > ;Store displacement to next byte
;
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
test bh,F0_PAT_PRESENT ;Is a pattern involved?
jz cblt_6300 ; No
test fr.brush_accel,SOLID_BRUSH
jnz cblt_6300 ;Solid color fetch needs no updating
mov al,I_MOV_AL_MEM
stosb ;mov al,[xxxxxxxx]
mov edx,fr.addr_brush_index
mov eax,edx
stosd
mov al,I_ADD_AL_BYTE_I
mov ah,fr.direction ;add al,bais
sub ah,oem_brush_mono ;Anybody ever fly one of these things?
errnz INCREASE-1 ;Must be a 1
errnz DECREASE+1 ;Must be a -1
stosw
mov ax,0700h+I_AND_AL_BYTE_I ;and al,00000111b
stosw
mov al,I_MOV_MEM_AL
stosb ;mov [xxxxxxxx],al
mov eax,edx
stosd
subttl Looping Logic - Scan Line Update
page
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
; Generate the next scanline code. The next scan line code must
; handle monochrome bitmaps, the device, the presence or absence
; of a source.
;
; Also color bitmaps, and mono <==> color interactions.
;
; < add si,gl_src.next_scan> ;Normal source scan line update
; add di,gl_dest.next_scan ;Normal destination scan line update
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
;!!! We have the problem in that this code assumes that cPlanes*cjBytesScan
;!!! is the same as next_scan. This might not always be the case, and we
;!!! should do somehting about fixing this. This would require pushing an
;!!! extra copy of pScan_n_Plane0 and then adding next-scan to this when we
;!!! have exhausted the planes for scan n
cblt_6300:
test bh,F0_SRC_PRESENT ;Is there a source?
jz cblt_6340 ; No, skip source processing
mov ax,I_ADD_ESI_DWORD_I ;add esi,increment
stosw
mov eax,fr.src.next_scan
stosd
cblt_6340:
mov ax,I_ADD_EDI_DWORD_I ;add edi,increment
stosw
mov eax,fr.dest.next_scan
stosd
; Compile the scan line loop. The code simply jumps to the start
; of the outer loop if more scans exist to be processed.
cblt_6380:
mov al,I_DEC_ECX
stosb
mov ax,I_JNZ_DISP32
stosw
mov eax,fr.blt_addr ;Compute relative offset of
sub eax,edi ; start of loop
sub eax,4 ;Adjust jump bias for DISP32
stosd ; and store it into jump
cblt_6420:
mov al,I_RET ;Stuff the far return instruction
stosb
cRet cblt
endProc cblt
_TEXT$01 ends
end