Windows NT 4.0 source code leak
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

719 lines
28 KiB

/**********************************************************************
// NEWFIND.C
//
// Copyright (c) 1992 - Microsoft Corp.
// All rights reserved.
// Microsoft Confidential
//
// TABS = 3
**********************************************************************/
#include "shellprv.h"
#pragma hdrstop
#pragma warning( disable:4001 ) // Disable new type remark warning
//*********************************************************************
//
// Miscellaneous constants and macros
//
//*********************************************************************
//*********************************************************************
// Global data
//*********************************************************************
// BUGBUG (DavePl) This code throws exceptions, and I don't see anyone
// trying to catch them (for allocation failures)
//*********************************************************************
// Intializes all global buffers and variables. aGlobalBufs[] is an
// array of pointers to the global pointers which are allocated to
// the sizes in the array aBufLens[]. DummyFirst and nDummyFirst
// are the first wordss of the block of variables associated with
// normal and /NOT searches. Each block of variables is initialized
// by copying the static block starting with InitialSearchData
// to the repective data areas.
//
// int InitGrepBufs( void )
//
// ARGUMENTS:
// NONE
// RETURNS:
// int - OK if no errors ELSE ERR_NOMEMORY.
//
//*********************************************************************
LPGREPINFO InitGrepBufs( void )
{
//
// We first want to allocate a global structure for this Grep instance
//
LPGREPINFO lpgi;
lpgi = (LPGREPINFO)Alloc(SIZEOF(GREPINFO));
if (lpgi == NULL)
return(NULL);
// Our Alloc function initializes the data to zero.
//lpgi->Flags = 0;
lpgi->CaseSen = 1; // Assume case-sensitivity
// Initialize Search data
lpgi->ge.TblEntriesUsed = 1;
//lpgi->ge.ExprEntriesUsed = 0;
//lpgi->ge.StrCount = 0;
//lpgi->ge.TargetLen = 0;
//lpgi->ge.MaxChar = 0;
lpgi->ge.MinChar = 0xffff;
lpgi->ge.ShortStrLen = 0xffff;
// Initialize Not Search data
lpgi->geNot.TblEntriesUsed = 1;
//lpgi->geNotnExprEntriesUsed = 0;
//lpgi->geNotnStrCount = 0;
//lpgi->geNotnTargetLen = 0;
//lpgi->geNotnMaxChar = 0;
lpgi->geNot.MinChar = 0xffff;
lpgi->geNot.ShortStrLen = 0xffff;
//lpgi->addstr = NULL; // Initialize function pointers
//lpgi->find = NULL;
_fmemset( lpgi->ge.td1, 1, TRTABLEN ); // Set up TD1 for startup
_fmemset( lpgi->geNot.td1, 1, TRTABLEN ); // Set up /NOT TD1 for startup
return( lpgi );
}
//*********************************************************************
// Frees all previously allocated global buffers. aPtrLst is an array
// of pointers to the all of the global pointers. Only those pointers
// which are not NULL are freed and then set to NULL.
//
// Must first free all of the memory blocks allocated with alloc()
// for the linked lists in both the normal and /NOT search trees.
//
// int FreeGrepBufs( LPGREPIFIN lpgi )
//
// ARGUMENTS:
// NONE
// RETURNS:
// int - OK (0) if successfull else ERR_MEM_CORRUPT
//
//*********************************************************************
int FreeGrepBufs(LPGREPINFO lpgi )
{
int i;
int x;
for ( x = 0; x < 2; x++ )
{
for ( i = 0; i < lpgi->ge.TblEntriesUsed; i++ )
freenode( lpgi, lpgi->ge.StringList[ i ] );
for ( i = 0; i < lpgi->ge.ExprEntriesUsed; i++ )
if ( !Free(lpgi->ge.ExprStrList[ i ] ))
return( ERR_MEM_CORRUPT );
SwapSrchTables(lpgi);
}
// Now lets free the actual header struture
if (!Free(lpgi))
return( ERR_MEM_CORRUPT );
return( OK );
}
//*********************************************************************
// Swaps all variables directly associated with the normal search
// with those used in the /NOT search.
//
// void SwapSrchTables( void )
//
// ARGUMENTS:
// NONE
// RETURNS:
// void
//*********************************************************************
void SwapSrchTables( LPGREPINFO lpgi )
{
GREPELEMENTS geTemp;
geTemp = lpgi->ge;
lpgi->ge = lpgi->geNot;
lpgi->geNot = geTemp;
}
//*********************************************************************
// Frees the memory allocated to previously allocated node and frees
// all nodes attached to the specified node.
//
// void freenode( register STRINGNODE FAR *pNode )
//
// ARGUMENTS:
// pNode - Pointer to node to free
// RETURNS:
// void
//
//*********************************************************************
void freenode(LPGREPINFO lpgi, register STRINGNODE *pNode )
{
register STRINGNODE *pTmpNode; // Pointer to next node in list
while ( pNode != NULL ) // While not at end of list
{
if ( pNode->s_suf != NULL ) // Free suffix list if not end
freenode(lpgi, pNode->s_suf );
else
lpgi->ge.StrCount--; // Else decrement string count
pTmpNode = pNode; // Save pointer
pNode = pNode->s_alt; // Move down the list
if ( pTmpNode != NULL && !Free( pTmpNode ) )
RaiseException(ERROR_INVALID_BLOCK, 0, 0, 0);
}
}
//*********************************************************************
// Allocates memory if this fails we thrown an exception...
// This is basically lazy programming and should be changed later...
//
// LPVOID AllocThrow ( long cb )
//
// ARGUMENTS:
// cb - Number of bytes to allocate
//
// RETURNS:
// LPVOID - Ptr to new memory
//*********************************************************************
LPVOID AllocThrow (LPGREPINFO lpgi, long cb)
{
LPVOID lp;
lp = Alloc(cb);
if (lp == NULL)
RaiseException(ERROR_NOT_ENOUGH_MEMORY, 0, 0, 0);
return lp;
}
//*********************************************************************
// Allocates memory for a new node large enough to hold a copy of
// a caller specifed string and then initializing the new node with
// the caller supplied string.
//
// STRINGNODE FAR *newnode( LPSTR String, int StrLen )
//
// ARGUMENTS:
// *String - String to include in new node
// StrLen - Length of string argument
// RETURNS:
// STRINGNODE* - Ptr to a new node
//*********************************************************************
STRINGNODE FAR* newnode( LPGREPINFO lpgi, LPSTR String, int StrLen )
{
register STRINGNODE *pNewNode; // Pointer to new node
// Allocate string node
pNewNode = (STRINGNODE *)AllocThrow(
lpgi, SIZEOF( STRINGNODE ) + (unsigned)StrLen +
(unsigned)(StrLen & 1) + 1);
pNewNode->s_alt = NULL; // No alternates yet
pNewNode->s_suf = NULL; // No suffixes yet
pNewNode->s_must = StrLen; // Set string length
// Copy string text into node buffer
lstrcpynA( s_text( pNewNode ), String, (unsigned)StrLen + 1);
return( pNewNode ); // Return pointer to new node
}
//*********************************************************************
// Updates an existing node with a new string passed by the caller.
//
// WARNING:
// The new string must be shorter than the original string for
// the specified node or the buffer will overflow.
//
// ARGUMENTS:
// pNode - Pointer to node
// s - String
// n - Length of string
//*********************************************************************
STRINGNODE FAR* reallocnode( register STRINGNODE *pNode, LPSTR pStr, int StrLen )
{
#ifndef NDEBUG
Assert( StrLen < pNode->s_must );
#endif
pNode->s_must = StrLen; // Set new length
// Copy new text
hmemcpy( s_text( pNode ), pStr, (unsigned)StrLen );
return( pNode ); // Return pointer to original node
}
//*********************************************************************
// maketd1 - add entry for TD1 shift table
//
// This function fills in the TD1 table for the given
// search string. The idea is adapted from Daniel M.
// Sunday's QuickSearch algorithm as described in an
// article in the August 1990 issue of "Communications
// of the ACM". As described, the algorithm is suitable
// for single-string searches. The idea to extend it for
// multiple search strings is mine and is described below.
//
// Think of searching for a match as shifting the search
// pattern p of length n over the source text s until the
// search pattern is aligned with matching text or until
// the end of the source text is reached.
//
// At any point when we find a mismatch, we know
// we will shift our pattern to the right in the
// source text at least one position. Thus,
// whenever we find a mismatch, we know the character
// s[n] will figure in our next attempt to match.
//
// For some character c, TD1[c] is the 1-based index
// from right to left of the first occurrence of c
// in p. Put another way, it is the count of places
// to shift p to the right on s so that the rightmost
// c in p is aligned with s[n]. If p does not contain
// c, then TD1[c] = n + 1, meaning we shift p to align
// p[0] with s[n + 1] and try our next match there.
//
// Computing TD1 for a single string is easy:
//
// _fmemset(TD1,n + 1,SIZEOF TD1);
// for (i = 0; i < n; ++i) {
// TD1[p[i]] = n - i;
// }
//
// Generalizing this computation to a case where there
// are multiple strings of differing lengths is trickier.
// The key is to generate a TD1 that is as conservative
// as necessary, meaning that no shift value can be larger
// than one plus the length of the shortest string for
// which you are looking. The other key is to realize
// that you must treat each string as though it were only
// as long as the shortest string. This is best illustrated
// with an example. Consider the following two strings:
//
// DYNAMIC PROCEDURE
// 7654321 927614321
//
// The numbers under each letter indicate the values of the
// TD1 entries if we computed the array for each string
// separately. Taking the union of these two sets, and taking
// the smallest value where there are conflicts would yield
// the following TD1:
//
// DYNAMICPODURE
// 7654321974321
//
// Note that TD1['P'] equals 9; since n, the length of our
// shortest string is 7, we know we should not have any
// shift value larger than 8. If we clamp our shift values
// to this value, then we get
//
// DYNAMICPODURE
// 7654321874321
//
// Already, this looks fishy, but let's try it out on
// s = "DYNAMPROCEDURE". We know we should match on
// the trailing procedure, but watch:
//
// DYNAMPROCEDURE
// ^^^^^^^|
//
// Since DYNAMPR doesn't match one of our search strings,
// we look at TD1[s[n]] == TD1['O'] == 7. Applying this
// shift, we get
//
// DYNAMPROCEDURE
// ^^^^^^^
//
// As you can see, by shifting 7, we have gone too far, and
// we miss our match. When computing TD1 for "PROCEDURE",
// we must take only the first 7 characters, "PROCEDU".
// Any trailing characters can be ignored (!) since they
// have no effect on matching the first 7 characters of
// the string. Our modified TD1 then becomes
//
// DYNAMICPODURE
// 7654321752163
//
// When applied to s, we get TD1[s[n]] == TD1['O'] == 5,
// leaving us with
//
// DYNAMPROCEDURE
// ^^^^^^^
// which is just where we need to be to match on "PROCEDURE".
//
// Going to this algorithm has speeded qgrep up on multi-string
// searches from 20-30%. The all-C version with this algorithm
// became as fast or faster than the C+ASM version of the old
// algorithm. Thank you, Daniel Sunday, for your inspiration!
//
// Note: if we are case-insensitive, then we expect the input
// string to be upper-cased on entry to this routine.
//
// Pete Stewart, August 14, 1990.
//*********************************************************************
void maketd1( LPGREPINFO lpgi, LPBYTE pch, int cch, int cchstart )
{
int ch; // Character
int i; // String index
if ( (cch += cchstart) > lpgi->ge.ShortStrLen )
cch = lpgi->ge.ShortStrLen; // Use smaller count
for ( i = cchstart; i < cch; i++ )
{ // Examine each char left to right
ch = (int)((unsigned char)*pch++); // Get the character
for (;;)
{ // Loop to set up entries
if ( ch < lpgi->ge.MinChar )
lpgi->ge.MinChar = ch; // Remember if smallest
if ( ch > lpgi->ge.MaxChar )
lpgi->ge.MaxChar = ch; // Remember if largest
if ( lpgi->ge.ShortStrLen - i < (int)lpgi->ge.td1[ ch ] )
lpgi->ge.td1[ ch ] = (unsigned char)(lpgi->ge.ShortStrLen - i);
// Set value if smaller than previous
if (lpgi->CaseSen || !IsCharUpper( (char)ch ) )
break; // Exit loop if done
ch = LOWORD((DWORD)CharLowerA( (LPSTR)(DWORD)ch )); // Force to lower case
}
}
}
//*********************************************************************
// static int newstring( LPGREPINFO lpgi, LPBYTE s, int StrLen )
//
// ARGUMENTS:
// pStr - String to add
// StrLen - Length of string
// RETURNS:
//
//*********************************************************************
int newstring( LPGREPINFO lpgi, LPBYTE pStr, int StrLen )
{
register STRINGNODE *pCurNode; // Current string
register STRINGNODE * *ppPrevNode; // Pointer to previous link
STRINGNODE *pNewNode; // New string
int i; // Index
int iNumMatched; // Count of matched chars in 2 strings
int iRelation; // Count
if ( lpgi->ge.ShortStrLen == -1 || StrLen < lpgi->ge.ShortStrLen )
lpgi->ge.ShortStrLen = StrLen; // Remember length of shortest string
if ( (i = lpgi->ge.TransTable[ *pStr ]) == 0 ) // If no existing list
{
// We have to start a new list
// Die if too many string lists
if ( (i = lpgi->ge.TblEntriesUsed++) >= ASCII_LEN )
RaiseException(ERROR_INVALID_PARAMETER, 0, 0, 0);
#ifndef NDEBUG
Assert( lpgi->ge.StringList[ i ] == NULL ); // Assert already initialized
#endif
lpgi->ge.TransTable[ *pStr ] = (unsigned char)i; // Set pointer to new list
// Set pointer for other case
if ( !lpgi->CaseSen && IsCharAlpha( *pStr ) )
lpgi->ge.TransTable[ *pStr ^ '\040' ] = (unsigned char)i;
}
else if ( lpgi->ge.StringList[ i ] == NULL )
return( 0 );
if ( --StrLen == 0) // If 1-byte string
{
freenode( lpgi, lpgi->ge.StringList[ i ] ); // Free any existing stuff
lpgi->ge.StringList[ i ] = NULL; // No record here
lpgi->ge.StrCount++; // We have a new string
return( 1 ); // String added
}
pStr++; // Skip first char
ppPrevNode = lpgi->ge.StringList + i; // Get pointer to link
pCurNode = *ppPrevNode; // Get pointer to node
while ( pCurNode != NULL ) // Loop to traverse match tree
{
// Find minimum of string lengths
i = (StrLen > pCurNode->s_must) ? pCurNode->s_must : StrLen;
// Compare the strings
matchstrings( lpgi, (LPSTR )pStr, s_text( pCurNode ),
i, &iNumMatched, &iRelation );
if ( iNumMatched == 0 ) // If complete mismatch
{
if ( iRelation < 0 ) // Was pStr < s_text( pCurNode )
break; // Break if insertion point found
ppPrevNode = &(pCurNode->s_alt); // Get pointer to alternate link
pCurNode = *ppPrevNode; // Follow the link
}
else if ( i == iNumMatched ) // Else if strings matched
{
if ( i == StrLen ) // If new is prefix of current
{
// Shorten text of node
pCurNode = reallocnode(pCurNode, s_text( pCurNode ), StrLen);
*ppPrevNode = pCurNode;
// If there are suffixes
if ( pCurNode->s_suf != NULL )
{
freenode( lpgi, pCurNode->s_suf );
// Suffixes no longer needed
pCurNode->s_suf = NULL;
lpgi->ge.StrCount++; // Account for this string
}
return( 1 ); // String added
}
ppPrevNode = &(pCurNode->s_suf); // Get pointer to suffix link
if ( (pCurNode = *ppPrevNode) == NULL )
return (0); // Done if current is prefix of new
pStr += i; // Skip matched portion
StrLen -= i;
}
else // Else partial match
{
// We must split an existing node.
// This is the trickiest case.
pNewNode = newnode( lpgi, s_text( pCurNode ) + iNumMatched,
pCurNode->s_must - iNumMatched );
// Unmatched part of current string
pCurNode = reallocnode( pCurNode, s_text( pCurNode ), iNumMatched );
*ppPrevNode = pCurNode;
// Set length to matched portion
pNewNode->s_suf = pCurNode->s_suf; // Current string's suffixes
if ( iRelation < 0 ) // If new preceded current
{ // First suffix is new string
pCurNode->s_suf = newnode( lpgi, (LPSTR )pStr + iNumMatched, StrLen - iNumMatched );
// Alternate is part of current
pCurNode->s_suf->s_alt = pNewNode;
}
else // Else new followed current
{ // Unmatched new string is alternate
pNewNode->s_alt = newnode( lpgi, (LPSTR )(pStr + iNumMatched), StrLen - iNumMatched );
pCurNode->s_suf = pNewNode; // New suffix list
}
lpgi->ge.StrCount++; // One more string
return( 1 ); // String added
}
}
// Set pointer to new node
*ppPrevNode = newnode( lpgi, (LPSTR )pStr, StrLen );
(*ppPrevNode)->s_alt = pCurNode; // Attach alternates
lpgi->ge.StrCount++; // One more string
return( 1 ); // String added
}
//*********************************************************************
// void addstring( LPGREPINFO lpgi, register LPSTR pStr, int StrLen )
//
// ARGUMENTS:
// pStr - String to add
// StrLen - Length of string
// RETURNS:
// void
//*********************************************************************
void addstring( LPGREPINFO lpgi, register LPSTR pStr, int StrLen )
{
register LPSTR pchChar; // Char pointer
int EndLine; // Match-at-end-of-line flag
char szTemp[MAXSTRLEN]; // Used as a scratchpad...
BOOL fOemDiff; // Is the oem characters different
EndLine = lpgi->Flags & ENDLINE; // Initialize flag
pchChar = lpgi->Target; // Initialize pointer
while ( StrLen-- > 0 ) // While not at end of string
{
switch ( *pchChar = *(pStr++) ) // Switch on character
{
case '\\': // Escape
// If next character "special"
if ( StrLen > 0 && !IsCharAlphaNumeric( *pStr ) )
{
StrLen--; // Decrement counter
*pchChar = *(pStr++); // Copy next character
}
pchChar++; // Increment pointer
break;
case '$': // Special end character
if ( StrLen == 0 ) // If end of string
{
EndLine = ENDLINE; // Set flag
break; // Exit switch
}
// Drop through
default: // All others
pchChar++; // Increment pointer
break;
}
}
if ( EndLine )
*pchChar++ = EOS; // Add end character if needed
lpgi->ge.TargetLen = pchChar - lpgi->Target;// Compute Target string length
CharToOemBuffA(lpgi->Target, szTemp, lpgi->ge.TargetLen);
fOemDiff = memcmp(lpgi->Target, szTemp, lpgi->ge.TargetLen) != 0;
if ( !lpgi->CaseSen )
CharUpperBuffA( lpgi->Target, lpgi->ge.TargetLen ); // Force to upper case if necessary
// Add the string
newstring( lpgi, (LPBYTE )lpgi->Target, lpgi->ge.TargetLen );
// So far we have worked with Ansi characters. Now lets try converting
// this to OEM. If the strings don't match we also add the OEM version
// as to allow files typically made by Dos Apps to find their strings...
if (fOemDiff)
{
DebugMsg(DM_TRACE, TEXT("Grep add OEM version of string %s"), szTemp);
newstring( lpgi, (LPBYTE )szTemp, lpgi->ge.TargetLen );
}
}
//*********************************************************************
//
// int addstrings( register LPSTR pBuffer, register LPSTR pBufEnd,
// LPSTR pSepList, int *IsFirst )
//
// ARGUMENTS:
// pBuffer - String list buffer containing search strings
// pBufEnd - End of search string buffer
// pSepList - List of search string separators
// IsFirst - Unused
// RETURNS:
// int - Always returns 0
//
//*********************************************************************
int addstrings( LPGREPINFO lpgi, register LPSTR pBuffer, register LPSTR pBufEnd,
LPSTR pSepList, int *IsFirst )
{
int StrLen; // String length
while ( pBuffer < pBufEnd ) // While buffer not empty
{
// Count leading separators
StrLen = strnspn( pBuffer, pSepList, pBufEnd - pBuffer );
// Skip leading separators
if ( (pBuffer += StrLen) >= pBufEnd )
break;
// Get length of search string
StrLen = strncspn( pBuffer, pSepList, pBufEnd - pBuffer );
// Select search string type
if ( lpgi->addstr == NULL )
lpgi->addstr = isexpr( lpgi, pBuffer, StrLen ) ? addexpr : addstring;
// If no match within string
if ( lpgi->addstr == addexpr || (lpgi->Flags & (BEGLINE | COLNOS)) ||
findlist( lpgi, pBuffer, pBuffer + StrLen ) == NULL )
(*lpgi->addstr)( lpgi, pBuffer, StrLen ); // Add string to list
pBuffer += StrLen; // Skip the string
}
return( 0 );
}
//*********************************************************************
// int enumlist( LPGREPINFO lpgi, register STRINGNODE FAR *pNode, int cchprev )
//
// ARGUMENTS:
// pNode - Pointer to list to dump
// cchprev - Count of preceding characters
// RETURNS:
// int - Number of strings in pNode
//
//*********************************************************************
int enumlist( LPGREPINFO lpgi, register STRINGNODE *pNode, int cchprev )
{
int StrCnt; // String count
StrCnt = 0; // Initialize string count
while ( pNode != NULL ) // While not at end of list
{
// Make TD1 entries
maketd1( lpgi, (LPBYTE )s_text( pNode ), (int)pNode->s_must, cchprev );
// Recurse to do suffixes
StrCnt += (pNode->s_suf != NULL) ?
enumlist( lpgi, pNode->s_suf, cchprev + pNode->s_must ) : 1;
pNode = pNode->s_alt; // Do next alternate in list
}
return( StrCnt ? StrCnt : 1 ); // Return string count
}
//*********************************************************************
// int enumstrings( LPGREPINFO lpgi )
//
// ARGUMENTS:
// NONE
// RETURNS:
// int - Total number of strings in all StringList nodes
//*********************************************************************
int enumstrings( LPGREPINFO lpgi )
{
unsigned char uchChar; // Character
int i; // Index
int StrCnt; // String count
StrCnt = 0; // Initialize
for ( i = 0; i < TRTABLEN; i++ ) // Loop through translation table
{
if ( lpgi->CaseSen || !IsCharLower( (unsigned char)i ) )
{ // If case sensitive or not lower
if (lpgi->ge.TransTable[ i ] == 0)
continue; // Skip null entries
uchChar = (unsigned char)i; // Get character
maketd1( lpgi, &uchChar, 1, 0); // Make TD1 entry
// Enumerate the list
StrCnt += enumlist( lpgi,
lpgi->ge.StringList[ lpgi->ge.TransTable[ i ] ], 1 );
}
}
return( StrCnt ); // Return string count
}