page ,132 ;/* ; * Microsoft Confidential ; * Copyright (C) Microsoft Corporation 1991 ; * All Rights Reserved. ; */ ; SCCSID = @(#)parse.asm 1.1 85/05/14 ; SCCSID = @(#)parse.asm 1.1 85/05/14 .sall .xlist .xcref INCLUDE DOSSYM.INC INCLUDE DEVSYM.INC include comsw.asm include comseg.asm include comequ.asm .list .cref break ;---------------------------------------------------------------------------- ; PARSE.ASM contains the routines to perform command line parsing. ; Parse and Path share a buffer and argv[] definitions. ; Invoking maps the unparsed command line in COMBUF into an ; array of pointers to the parsed tokens. The resulting array, argv[], ; also contains extra information provided by cparse about each token ; should be executed prior to ; ; Alan L, OS/MSDOS August 15, 1983 ; ; ; ENTRY: ; : command line in COMTAB. ; EXIT: ; : success flag, argcnt (number of args), argv[]. ; NOTE(S): ; * handily turns an array index into an absolute pointer. ; The computation depends on the size of an argv[] element (arg_ele). ; * calls for chunks of the command line. ; does not function as specified; see for more details. ; * now knows about the flags the internals of COMMAND.COM ; need to know about. This extra information is stored in a switch_flag ; word with each command-line argument; the switches themselves will not ; appear in the resulting arg structure. ; * With the exception of CARRY, flags are generally preserved across calls. ;--------------- ; CONSTANTS: ;--------------- DEBUGx equ FALSE ; prints out debug info ;--------------- ; DATA: ;--------------- DATARES SEGMENT PUBLIC BYTE EXTRN FORFLAG:BYTE DATARES ENDS TRANSPACE SEGMENT PUBLIC BYTE ;AC000; EXTRN combuf:byte EXTRN cpyflag:byte EXTRN expand_star:byte EXTRN RESSEG:word EXTRN STARTEL:word TRANSPACE ENDS TRANCODE SEGMENT PUBLIC BYTE ;AC000; PUBLIC argv_calc ; convert array index into address PUBLIC parseline assume cs:trangroup, ds:trangroup, es:trangroup, ss:nothing break ;---------------------------------------------------------------------------- ; PARSELINE takes an MSDOS command line and maps it into a UNIX-style ; argv[argvcnt] array. The most important difference between this array and ; the tradition UNIX format is the extra cparse information included with ; each argument element. ;--------------- ; ENTRY: ; (BL special delimiter for cparse -- not implemented) ;--------------- ; EXIT: ; CF set if error ; AL error code (carry set). Note AH clobbered in any event. ; argv[] array of cparse flags and pointers to arguments ; argvcnt argument count ;--------------- ; NOTE(S): ; * BL (special delimiter) is ignored, for now (set to space). ; * Parseflags record contains cparse flags, as follows: ; sw_flag -- was this arg a switch? ; wildcard -- whether or not it contained a * or ? ; path_sep -- maybe it was a pathname ; unused -- for future expansion ; special_delim -- was there an initial special delimiter? ; * argv[] and argvcnt are undefined if CF/AL indicates an error. ; * Relationship between input, cparse output, and comtail can be ; found in the following chart. Despite the claim of the cparse ; documentation that, "Token buffer always starts d: for non switch ; tokens", such is not the case (see column two, row two). ; Similarly, [STARTEL] is not null when the command line is one of ; the forms, "d:", "d:\", or "d:/". In fact, *STARTEL (i.e., what ; STARTEL addresses) will be null. This is clearly just a ; documentation error. ; * cparse also returns a switch code in BP for each switch it ; recognizes on the command line. ; * arglen for each token does NOT include the terminating null. ; * Finally, note that interesting constructions like 'foodir/*.exe' ; parse as three separate tokens, and the asterisk is NOT a wildcard. ; For example, 'for %i in (foodir/*.exe) do echo %i' will first ; echo 'foodir', then '*', then '.exe'. Using cparse for command- ; line parsing may result in slightly different behavior than ; previously observed with the old COMMAND.COM command-line parser. ; ; Input Cparse Command Line (80H) ; \alan\foo.bat c:\alan\foo.bat \alan\foo.bat ; alan\foo.bat alan\foo.bat alan\foo.bat ; foo.bat foo.bat foo.bat ; c:\alan\foo.bat c:\alan\foo.bat c:\alan\foo.bat ; c:alan\foo.bat c:alan\foo.bat c:alan\foo.bat ; c:foo.bat c:foo.bat c:foo.bat ;--------------- ; CONSTANTS: ;--------------- ;--------------- ; DATA: ;--------------- TRANSPACE SEGMENT PUBLIC BYTE ;AC000; EXTRN arg:byte EXTRN argbufptr:word EXTRN comptr:word EXTRN last_arg:word EXTRN tpbuf:byte TRANSPACE ENDS ;--------------- parseline: ;--------------- push AX ; most of these are clobbered push BX ; by cparse... push CX push DX push DI push SI pushf mov cpyflag,0 ; Turn "CPARSE called from COPY flag" off mov [LAST_ARG], -1 ; last argument at which to accumulate xor ax,ax mov cx,SIZE arg_unit mov di,offset trangroup:arg rep stosb mov argbufptr,offset trangroup:arg.argbuf mov arg.argswinfo, 0 ; switch information, and info to date mov arg.argvcnt, 0 ; initialize argvcnt/argv[] mov SI, OFFSET TRANGROUP:combuf+2 ; prescan leaves cooked input in combuf ; This next section of code (up to pcont:) makes sure that si is set up for ; parsing. It should point at COMBUF if FORFLAG is set and arg.argforcombuf ; otherwise. This is done so that commands can get arg pointers into their ; original command line (or an exact copy of it) in arg_ocomptr. ; Arg.argforcombuf is used so that the for loop processor will always be able ; to get a hold of its original command line; even after COMBUF is blasted by ; the command to be repeated or the transient part of command has been ; reloaded. push ds mov ds,[RESSEG] assume ds:resgroup cmp FORFLAG,0 pop ds assume ds:trangroup jnz pcont mov di,OFFSET TRANGROUP:arg.argforcombuf xor ch,ch mov cl,[COMBUF+1] inc cl rep movsb mov si,OFFSET TRANGROUP:arg.argforcombuf pcont: mov DI, OFFSET TRANGROUP:tpbuf ; destination is temporary token buffer mov BL, ' ' ; no special delimiter, for now parseloop: mov comptr,si ; save ptr into original command buffer xor BP, BP ; switch information put here by cparse mov byte ptr [expand_star],0 ; don't expand *'s to ?'s invoke scanoff ; skip leading blanks... invoke cparse ; byte off a token (args in SI, DI, BL) jnc More_prse or BP,BP ; Check for trailing switch character jz parsedone call newarg ; We hit CR but BP is non-zero. The ; typical cause of this is that a ; switch char IMMEDIATELY preceeds ; the CR. We have an argument, but it ; is sort of an error. jmp short parsedone ; We're done (found the CR). More_prse: mov cpyflag,2 ; tell CPARSE that 1st token is done call newarg ; add to argv array (CX has char count) jnc parseloop ; was everything OK? jmp short parse_error ; NO, it wasn't -- bug out (CF set) parsedone: ; successful completion of parseline popf clc jmp short parse_exit parse_error: ; error entry (er, exit) point popf stc parse_exit: ; depend on not changing CF pop SI pop DI pop DX pop CX pop BX pop AX ret ;--------------- ; parseline ends ;---------------------------------------------------------------------------- break ;---------------------------------------------------------------------------- ; NEWARG adds the supplied argstring and cparse data to arg.argv[]. ; ENTRY: ; BH argflags ; CX character count in argstring ; DI pointer to argstring ; comptr ptr to starting loc of current token in original command ; [STARTEL] cparse's answer to where the last element starts ; EXIT: ; argbufptr points to next free section of argbuffer ; arg.argbuf contains null-terminated argument strings ; arg.argvcnt argument count ; arg.argv[] array of flags and pointers ; arg.arg_ocomptr ptr to starting loc of current token in original command ; CF set if error ; AL carry set: error code; otherwise, zero ;--------------- newarg: ;--------------- push BX push CX push DX ; one never knows, do one? push DI push SI pushf call arg_switch ; if it's a switch, record switch info ; LEAVE SWITCH ON COMMAND LINE!! ;;; jc newarg_done ; previous arg's switches -- and leave cmp arg.argvcnt, ARGMAX ; check to ensure we've not jge too_many_args ; exceeded array limits mov DH, BH ; save argflags mov BX, arg.argvcnt ; argv[argvcnt++] = arg data inc arg.argvcnt mov AX, OFFSET TRANGROUP:arg.argv call argv_calc ; convert offset to pointer mov [BX].argsw_word, 0 ; no switch information, yet... mov [BX].arglen, CX ; argv[argvcnt].arglen = arg length mov [BX].argflags, DH ; argv[argvcnt].argflags = cparse flags mov SI, argbufptr mov [BX].argpointer, SI ; argv[argvcnt].argpointer = [argbufptr] add SI, [STARTEL] ; save startel from new location sub SI, DI ; form pointer into argbuf mov [BX].argstartel, SI ; argv[argvcnt].argstartel = new [STARTEL] mov si,[comptr] mov [BX].arg_ocomptr,si ; arg_ocomptr=ptr into original com line mov SI, DI ; now save argstring in argbuffer mov DI, argbufptr ; load the argbuf pointer and make add DI, CX ; sure we're not about to run off cmp DI, OFFSET TRANGROUP:arg.argbuf+ARGBLEN-1 jge buf_ovflow ; the end of the buffer (plus null byte) sub DI, CX ; adjust the pointer cld rep movsb ; and save the string in argbuffer mov AL, ANULL ; tack a null byte on the end stosb mov argbufptr, DI ; update argbufptr after copy newarg_done: popf clc jmp short newarg_exit too_many_args: mov AX, arg_cnt_error jmp short newarg_error buf_ovflow: mov AX, arg_buf_ovflow newarg_error: popf stc newarg_exit: pop SI pop DI pop DX pop CX pop BX ret ;--------------- ; NewArg ends ;---------------------------------------------------------------------------- break ;---------------------------------------------------------------------------- ; ARG_SWITCH decides if an argument might really be a switch. In the ; event that it is, and we can recognize ; ENTRY: ; As in . ; EXIT: ; CF -- clear (wasn't a switch); set (was a switch) ; NOTE(S): ; * The mechanism mapping a switch into a bit-value depends entirely ; on the order of definition in the variable and the ; values chosen to define the bits in CMDT:COMEQU.ASM. Change either ; or the definitions in CMDT:COMEQU.ASM -- and rewrite ; this mechanism. This code taken from CMDT:TCODE.ASM. ; * The declared below is redundant to one declared in ; TDATA.ASM, and used in TCODE.ASM. ; * An ugly routine. ;--------------- ; CONSTANTS: ;--------------- ; Constants come from the definitions in CMDT:COMEQU.ASM. ;--------------- ; DATA: ;--------------- TRANSPACE SEGMENT PUBLIC BYTE ;AC000; extrn switch_list:byte switch_count EQU $-switch_list transpace ends ;--------------- Arg_Switch: ;--------------- push AX push BX push CX push DI pushf test BH, MASK sw_flag ; is it a switch? (preserve flag word) jz arg_no_switch0 cmp [LAST_ARG], -1 ; have we encountered any REAL args yet? je arg_no_switch1 ; no, so leading switches don't matter mov BX, [LAST_ARG] ; yes, add switch info to last REAL arg mov AX, OFFSET TRANGROUP:arg.argv call argv_calc or [BX].argsw_word, BP or arg.argswinfo, BP arg_yes_switch: ; ah, sweet success... popf stc jmp short arg_switch_exit arg_no_switch0: mov AX, arg.argvcnt ; future switches should then affect mov [LAST_ARG], AX ; this argument arg_no_switch1: ; wasn't a switch, or we're pretending popf clc arg_switch_exit: pop DI pop CX pop BX pop AX ret ;--------------- ; Arg_Switch ends ;---------------------------------------------------------------------------- break ;---------------------------------------------------------------------------- ; ARGV_CALC maps an array index into a byte-offset from the base of ; the supplied array. Method used for computing the address is: ; Array Index * Array Elt Size + Base Addr = Elt Addr ; ENTRY: ; AX -- base of array ; BX -- array index ; EXIT: ; BX -- byte offset ;--------------- argv_calc: push ax ; Save base mov al,bl ; al = array index mov bl,SIZE argv_ele ; bl = size of an argv element mul bl ; ax = base offset pop bx ; Get base add ax,bx ; Add in base offset xchg ax,bx ; Restore ax and put byte offset in bx ret ;--------------- ; argv_calc ends ;---------------------------------------------------------------------------- trancode ends end