Windows NT 4.0 source code leak
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

1515 lines
43 KiB

/*
* slmdiff.c: diff utility for SLM
*
* Copyright (C) 1992 Microsoft Corporation. All Rights Reserved.
* Microsoft Confidential
*
*
* The purpose of this utility is to provide a diff equil to the
* AT&T derived diff in speed and compactness of output while
* eliminating any possibility of infringement on the AT&T code.
* This was done by having Greg Cox, the original author of this
* code, not view the AT&T sources before completion of this utility
* and also having this diff based on an entirely different
* algorithm. Another purpose of this utility is to work in a 32
* bit flat memory model which allows us to diff arbitrarily
* large text files.
*
* This file impliments a diff algorithm that compares two files on a
* line-by-line basis using the algorithm described by Paul Heckel
* in "A Technique for Isolating Differences Between Files",
* Communications of the ACM, April 1978, Volume 21, Number 4
*
* As described in Heckel's document, the basic algorithm derives
* from two observations:
* 1) "A line that occurs once and only once in each file
* must be the same line (unchanged but possibly moved).
* This "finds" most lines and thus excludes them from
* further consideration.
* 2) "If in each file immediately adjacent to a "found"
* line pair there are lines identical to each other,
* these lines must be the same line. Repeated
* application will "find" sequences of unchanged lines."
*
*
*/
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <malloc.h>
#include <string.h>
#include <fcntl.h>
#include <io.h>
#include "slmdiff_.h"
#include "version.h"
#include "unicode.h"
EnableAssert
FDD fdd1;
FDD fdd2;
HE *rghe;
long che;
long cheMac;
CBLK *rgCBlkCand;
long cCBlkCand;
long cCBlkCandMac;
CBLK *rgCBlkFinal;
long cCBlkFinal;
long cCBlkFinalMac;
BOOL fFilesSame = fTrue; /* files are same unless proved otherwise! */
BOOL fSLM = fFalse; /* called from SLM? */
BOOL fTruncSpace = fFalse; /* truncate spaces and tabs */
BOOL fCkSum = fFalse; /* output checksum at end */
BOOL fCkSumErr = fFalse; /* error occurred generating checksum */
BOOL fCtrlZAsText = fFalse; /* consider ^Z as text instead of end of file */
char szUsage1[] = "\nSLMDIFF %u.%u.%02u Beta\n\n";
char szUsage2[] = "ERROR - Illegal switches or incorrect number of arguments.\n";
char szUsage3[] = "(This utility is for SLM use only. DO NOT REMOVE!)\n";
unsigned long ulCkSum;
BOOL bUnicode;
/*-------------------------------------------------------------------
* Name: main
* Purpose: entry point for this utility
* Assumes:
* Returns: retFatal if error, otherwise exits thru ExitDiff()
*/
int __cdecl
main(
int argc,
BYTE *argv[])
{
BYTE *pchFlags;
BOOL bUnicode2;
for (argc--, argv++; argc > 0; argc--, argv++) {
pchFlags = argv[0];
if ('-' == *pchFlags++) {
while (*pchFlags != 0) {
switch (*pchFlags++) {
case 'b':
fTruncSpace = fTrue;
break;
case 'c':
fCkSum = fTrue;
ulCkSum = 0;
break;
case 's':
/* SLM flag */
fSLM = fTrue;
break;
case 'z':
fCtrlZAsText = fTrue;
break;
default:
printf(szUsage1, rmj, rmm, rup);
printf(szUsage2);
printf(szUsage3);
return (retFatal);
}
}
}
else
break;
}
if (!fSLM || (argc != 2)) {
printf(szUsage1, rmj, rmm, rup);
printf(szUsage2);
printf(szUsage3);
return (retFatal);
}
bUnicode = IsFileUnicode (argv[0]);
bUnicode2 = IsFileUnicode (argv[1]);
if (bUnicode != bUnicode2) {
printf(szUsage1, rmj, rmm, rup);
printf("ERROR - Cannot run diff between an ANSI file and an Unicode file.\n");
return(retFatal);
}
/* initialize and open the files */
InitDiff(argv[0], argv[1]);
/* compare the two files */
ExitDiff(FFileCompare() ? retSuccEQ : (fCkSum ? retSuccNECS : retSuccNE));
}
/*-------------------------------------------------------------------
* Name: InitDiff
* Purpose: general utility initialization
* Assumes:
* Returns:
*/
void
InitDiff(
unsigned char *szFileName1,
unsigned char *szFileName2)
{
long cchTextBufTry;
_setmode(HSTDOUT, O_BINARY);
InitFDD(&fdd1, szFileName1);
InitFDD(&fdd2, szFileName2);
/* initialize the allocated memory pointers */
rghe = NULL;
rgCBlkCand = NULL;
rgCBlkFinal = NULL;
/* estimate the number of hash entries to be the same as the number of
* lines in the first file
*/
cheMac = fdd1.cfhMac;
if (NULL == (rghe = pvAllocMem(cheMac * sizeof (HE))))
ExitDiff(retFatal);
/* initialize 1st entry */
rghe[0].Hash = ulongMax;
rghe[0].cLine1 = 0;
rghe[0].cLine2 = 0;
rghe[0].iLine1 = 0;
che = 1;
/* initialize the common block arrays */
cCBlkCandMac = cCBlkInc;
if (NULL == (rgCBlkCand = pvAllocMem(cCBlkCandMac * sizeof (CBLK))))
ExitDiff(retFatal);
cCBlkFinalMac = cCBlkInc;
if (NULL == (rgCBlkFinal = pvAllocMem(cCBlkFinalMac * sizeof (CBLK))))
ExitDiff(retFatal);
cCBlkFinal = 2;
rgCBlkFinal[0].iLine1 = rgCBlkFinal[0].iLine2 = ulongMax;
rgCBlkFinal[1].iLine1 = rgCBlkFinal[1].iLine2 = longMax;
rgCBlkFinal[0].cLine = rgCBlkFinal[1].cLine = 1;
/* initialize the text buffers */
fdd1.fFreepchTextBuf = fdd2.fFreepchTextBuf = fTrue;
/* try to allocate enough memory for the entire file */
fdd1.cchTextBufMac = fdd1.cbFile + 10;
if (bUnicode)
fdd1.cchTextBufMac = (fdd1.cbFile + 10) * sizeof(WCHAR);
if (NULL == (fdd1.pchTextBuf = pvAllocMem(fdd1.cchTextBufMac))) {
for (cchTextBufTry = fdd1.cchTextBufMac - cchTextBufDec;
cchTextBufTry > cchTextBuf1Min;
cchTextBufTry -= cchTextBufDec) {
if ((fdd1.pchTextBuf = pvAllocMem(cchTextBufTry)) != NULL) {
fdd1.cchTextBufMac = cchTextBufTry / 2;
break;
}
}
if (cchTextBufTry <= cchTextBuf1Min)
ExitDiff(retFatal);
fdd2.pchTextBuf = fdd1.pchTextBuf + (cchTextBufTry / 2);
fdd2.cchTextBufMac = cchTextBufTry / 2;
fdd2.fFreepchTextBuf = fFalse;
}
else {
/* try to allocate enough memory for the entire file */
fdd2.cchTextBufMac = fdd2.cbFile + 10;
if (bUnicode)
fdd2.cchTextBufMac = (fdd2.cbFile + 10) * sizeof(WCHAR);
if (NULL == (fdd2.pchTextBuf = pvAllocMem(fdd2.cchTextBufMac))) {
for (cchTextBufTry = fdd2.cchTextBufMac - cchTextBufDec;
cchTextBufTry > cchTextBuf2Min;
cchTextBufTry -= cchTextBufDec) {
if ((fdd2.pchTextBuf = pvAllocMem(cchTextBufTry)) != NULL) {
fdd2.cchTextBufMac = cchTextBufTry;
break;
}
}
if (cchTextBufTry <= cchTextBuf2Min) {
fdd2.pchTextBuf = fdd1.pchTextBuf + (fdd1.cchTextBufMac / 2);
fdd1.cchTextBufMac = fdd2.cchTextBufMac = fdd1.cchTextBufMac / 2;
fdd2.fFreepchTextBuf = fFalse;
}
}
}
}
/*-------------------------------------------------------------------
* Name: InitFDD
* Purpose: initialize a File Diff Data structure
* Assumes:
* Returns:
*/
void
InitFDD(
FDD *pfdd,
unsigned char *szFileName)
{
memset(pfdd, 0, sizeof (FDD));
/* initialize the allocated memory pointers */
pfdd->pchTextBuf = NULL;
pfdd->rgfh = NULL;
pfdd->hFile = NULL;
if (NULL == (pfdd->hFile = fopen(szFileName, "rb")))
ExitDiff(retFatal);
/* find out the size of this file */
if (fseek(pfdd->hFile, 0L, SEEK_END) != 0)
ExitDiff(retFatal);
if (-1L == (pfdd->cbFile = ftell(pfdd->hFile)))
ExitDiff(retFatal);
if (fseek(pfdd->hFile, 0L, SEEK_SET) != 0)
ExitDiff(retFatal);
if (bUnicode)
pfdd->cfhMac = pfdd->cLinepchMac = (pfdd->cbFile / sizeof(WCHAR)) / cchPerLine + 1;
else
pfdd->cfhMac = pfdd->cLinepchMac = pfdd->cbFile / cchPerLine + 1;
if (NULL == (pfdd->rgfh = pvAllocMem(pfdd->cfhMac * sizeof (FH))))
ExitDiff(retFatal);
if (NULL == (pfdd->mpLinepch = pvAllocMem(pfdd->cLinepchMac * sizeof (BYTE *))))
ExitDiff(retFatal);
pfdd->iLineEOF = iLineMax;
/* pfdd->fCtrlZ = fFalse; */
}
/*-------------------------------------------------------------------
* Name: FFileCompare
* Purpose: main file comparison routine
* Assumes:
* Returns: fTrue if the files are the same, fFalse otherwise
*/
BOOL
FFileCompare(
void)
{
long iLine1, iLine2;
long iBlk;
CBLK *pBlk;
long cBlocks;
/* convert all lines in both files to hash entries */
FileToHash(&fdd2);
FileToHash(&fdd1);
/* set each file phe to the appropriate hash entry */
SetAllBuffHE(&fdd1);
SetAllBuffHE(&fdd2);
/* find all common blocks of lines */
for (iLine1 = iLine2 = 0, iBlk = 1, pBlk = &rgCBlkFinal[iBlk];
iBlk < cCBlkFinal;
iBlk++, pBlk++) {
/* recurse on each block until we can't find any more common lines,
* then loop to look at the next block
*/
for (cBlocks = 1; (cBlocks > 0) && ((pBlk->iLine1 - iLine1) > 1) &&
((pBlk->iLine2 - iLine2) > 1);) {
cBlocks = cCBLKFindCommonLines(iLine1, iLine2, pBlk->iLine1, pBlk->iLine2);
pBlk = &rgCBlkFinal[iBlk]; /* in case rgCBlkFinal moved */
}
iLine1 = pBlk->iLine1 + pBlk->cLine;
iLine2 = pBlk->iLine2 + pBlk->cLine;
}
#ifdef DEBUGPRINT
/* print out a list of the final group of common blocks */
printf("\n");
for (iBlk = 1, pBlk = &rgCBlkFinal[1]; iBlk < (cCBlkFinal - 1); iBlk++, pBlk++) {
printf("final common block: iLine1 = %ld iLine2 = %ld cLine = %ld\r\n",
pBlk->iLine1, pBlk->iLine2, pBlk->cLine);
}
printf("\n");
#endif
/* if the beginning of each text buffer is not set to the beginning
* of its file we must reinitialize that text buffer to the beginning
* of the file for the check pass
*/
if (fdd1.iLineFirst != 0) {
fdd1.iLineFirst = 0;
fdd1.cLineBuff = 0;
fdd1.oFileNextLine = 0;
fdd1.fCtrlZ = fFalse;
}
if (fdd2.iLineFirst != 0) {
fdd2.iLineFirst = 0;
fdd2.cLineBuff = 0;
fdd2.oFileNextLine = 0;
fdd2.fCtrlZ = fFalse;
}
/* now walk through each block of common lines - output the
* lines between each block since they are the ones that
* changed and verify that what we thought were identical
* lines are really identical (hey, even with a 32 bit hash
* code, we sometimes get a duplicate hash for 2 different
* lines)
*/
for (iBlk = 1, pBlk = &rgCBlkFinal[1]; iBlk < cCBlkFinal; iBlk++, pBlk++) {
iLine1 = ((pBlk - 1)->iLine1 + (pBlk - 1)->cLine);
iLine2 = ((pBlk - 1)->iLine2 + (pBlk - 1)->cLine);
PrintDiffs(pBlk->iLine1 - iLine1, pBlk->iLine2 - iLine2, iLine1, iLine2);
if (iBlk < (cCBlkFinal - 1))
CheckDiffBlk(pBlk);
}
if (!fFilesSame && fCkSum) {
if (fCkSumErr)
printf("\r\n#D CKSUM ERROR\r\n\r\n\r\n");
else
printf("\r\n#D %11lu\r\n\r\n\r\n", ulCkSum);
}
return (fFilesSame);
}
/*-------------------------------------------------------------------
* Name: ExitDiff
* Purpose: clean up allocated memory blocks, close input
* files, and exit back to the operating system
* Assumes:
* Returns: returns exit code back to the operating system
*/
void ExitDiff(RET retExitCode)
{
if (fdd1.pchTextBuf != NULL)
FreeMem(fdd1.pchTextBuf);
if (fdd2.fFreepchTextBuf)
FreeMem(fdd2.pchTextBuf);
if (fdd1.rgfh != NULL)
FreeMem(fdd1.rgfh);
if (fdd2.rgfh != NULL)
FreeMem(fdd2.rgfh);
if (rghe != NULL)
FreeMem(rghe);
if (rgCBlkCand != NULL)
FreeMem(rgCBlkCand);
if (rgCBlkFinal != NULL)
FreeMem(rgCBlkFinal);
if (fdd1.hFile != NULL)
fclose(fdd1.hFile);
if (fdd2.hFile != NULL)
fclose(fdd2.hFile);
exit(retExitCode);
}
#if DBG
/*-------------------------------------------------------------------
* Name: Fail
* Purpose: print out an asserting failure string to the user,
clean up, and exit
* Assumes:
* Returns: nothing - exits thru ExitDiff()
*/
void
Fail(
WCHAR *sz,
int ln)
{
printf("assertion failed in %ws, line %d\n", sz, ln);
ExitDiff(retFatal);
}
#endif
/*-------------------------------------------------------------------
* Name: SetAllBuffHE
* Purpose: set each phe in rgfh of a file to the appropriate hash entry
* Assumes:
* Returns:
*/
void
SetAllBuffHE(
FDD *pfdd)
{
long iLine;
FH *pfh;
long ihe;
for (iLine = 0, pfh = pfdd->rgfh;
iLine < pfdd->iLineEOF;
iLine++, pfh++) {
if (!FFindHE(pfh->u.Hash, &ihe))
Assert(fFalse);
pfh->phe = &rghe[ihe];
}
}
/*-------------------------------------------------------------------
* Name: cCBLKFindCommonLines
* Purpose: look at a block of lines from each file and match up the
* appropriate lines
* Assumes:
* Returns: number of blocks of common lines found
*/
long
cCBLKFindCommonLines(
long iLine1Start,
long iLine2Start,
long iLine1Mac,
long iLine2Mac)
{
HE *phe;
FH *pfh1;
FH *pfh2;
long iLine1, iLine2;
long cLineBlock;
long iBlk;
CBLK *pBlk;
long cCBlkFinalprev = cCBlkFinal;
BOOL fFoundUnique;
/* limit ourselves to the end of the file */
if (iLine1Mac > fdd1.iLineEOF)
iLine1Mac = fdd1.iLineEOF;
if (iLine2Mac > fdd2.iLineEOF)
iLine2Mac = fdd2.iLineEOF;
/* initialize each HE corresponding to a line's hash value */
InitHE(&fdd2, iLine2Start, iLine2Mac);
InitHE(&fdd1, iLine1Start, iLine1Mac);
/* mark each instance of a line in rghe - legal values are: 0, 1, many (2) */
for (iLine2 = iLine2Start, pfh2 = &fdd2.rgfh[iLine2Start];
iLine2 < iLine2Mac;
iLine2++, pfh2++) {
phe = pfh2->phe;
if (phe->cLine2 < 2)
phe->cLine2++;
}
/* mark each instance of a line in rghe - legal values are: 0, 1, many (2)
* also save the file 1 line number
*/
for (iLine1 = iLine1Start, pfh1 = &fdd1.rgfh[iLine1Start];
iLine1 < iLine1Mac;
iLine1++, pfh1++) {
phe = pfh1->phe;
if (phe->cLine1 < 2)
phe->cLine1++;
phe->iLine1 = iLine1;
}
/* now walk through rgfh looking for lines that occur only
* once in this block - this provides an "anchor point" where we
* decide these two lines MUST be matching lines
* we then go into each line's rgfh entry and point it
* at the other file's matching line
*/
for (fFoundUnique = fFalse, iLine2 = iLine2Start,
pfh2 = &fdd2.rgfh[iLine2Start];
iLine2 < iLine2Mac; iLine2++, pfh2++) {
phe = pfh2->phe;
if ((1 == phe->cLine1) && (1 == phe->cLine2)) {
iLine1 = phe->iLine1;
pfh2->u.iLineMatch = iLine1;
pfh2->fLineMatch = fTrue;
fdd1.rgfh[iLine1].u.iLineMatch = iLine2;
fdd1.rgfh[iLine1].fLineMatch = fTrue;
fFoundUnique = fTrue;
}
}
if (!fFoundUnique) {
/* panic mode: no unique lines were found!
* since iLine1 in each HE element will be set to the
* LAST time it occurred in the 1st file, we walk back from
* the end of the 2nd file's block and arbitrarily pick
* matching lines in the 2nd file as "anchor points" just like
* we would have if they were lines that occurred only once
* in each block
*/
for (iLine2 = iLine2Mac - 1, pfh2 = &fdd2.rgfh[iLine2Mac - 1];
iLine2 >= iLine2Start;
iLine2--, pfh2--) {
phe = pfh2->phe;
if ((phe->cLine1 > 0) && (phe->cLine2 > 0)) {
iLine1 = phe->iLine1;
if (!fdd1.rgfh[iLine1].fLineMatch) {
pfh2->u.iLineMatch = iLine1;
pfh2->fLineMatch = fTrue;
fdd1.rgfh[iLine1].u.iLineMatch = iLine2;
fdd1.rgfh[iLine1].fLineMatch = fTrue;
}
}
}
}
/* now walk forward from each "anchor point" and assume that if
* the next lines from each file have the same hash value, then
* they must be the same line and treat them as if we had another
* "anchor point". continue walking forward matching lines until
* we hit another "anchor point" or the lines don't share the same
* hash value. then start over looking for the next "anchor point",
* etc. until we run into the end of the block
*/
for (iLine2 = iLine2Start, pfh2 = &fdd2.rgfh[iLine2Start];
iLine2 < iLine2Mac;) {
if (pfh2->fLineMatch) {
if ((iLine1 = pfh2->u.iLineMatch + 1) < iLine1Mac) {
pfh1 = &fdd1.rgfh[iLine1];
iLine2++;
pfh2++;
for (; (iLine1 < iLine1Mac) && (iLine2 < iLine2Mac);
iLine1++, iLine2++, pfh1++, pfh2++) {
if (pfh1->fLineMatch || pfh2->fLineMatch ||
(pfh1->phe != pfh2->phe))
break;
pfh2->u.iLineMatch = iLine1;
pfh2->fLineMatch = fTrue;
pfh1->u.iLineMatch = iLine2;
pfh1->fLineMatch = fTrue;
}
continue;
}
}
iLine2++;
pfh2++;
}
/* now we do exactly what we just did above only this time we
* walk backwards from the end of each block
*/
iLine2 = iLine2Mac - 1;
for (pfh2 = &fdd2.rgfh[iLine2]; iLine2 > iLine2Start;) {
if (pfh2->fLineMatch) {
if ((iLine1 = pfh2->u.iLineMatch - 1) >= iLine1Start) {
pfh1 = &fdd1.rgfh[iLine1];
iLine2--;
pfh2--;
for (; (iLine1 >= iLine1Start) && (iLine2 >= iLine2Start);
iLine1--, iLine2--, pfh1--, pfh2--) {
if (pfh1->fLineMatch || pfh2->fLineMatch ||
(pfh1->phe != pfh2->phe))
break;
pfh2->u.iLineMatch = iLine1;
pfh2->fLineMatch = fTrue;
pfh1->u.iLineMatch = iLine2;
pfh1->fLineMatch = fTrue;
}
continue;
}
}
iLine2--;
pfh2--;
}
/* find the start point of each common block and its size and register
* each candidate in rgCBlkCand
*/
for (iLine1 = iLine1Start, iLine2 = iLine2Start, cCBlkCand = 0,
pfh2 = &fdd2.rgfh[iLine2Start];
iLine2 < iLine2Mac;) {
cLineBlock = 0;
if (pfh2->fLineMatch) {
iLine1 = pfh2->u.iLineMatch;
while (pfh2->fLineMatch) {
iLine1++;
iLine2++;
if (iLine2 < fdd2.iLineEOF) {
pfh2++;
if (pfh2->fLineMatch)
pfh1 = &fdd1.rgfh[pfh2->u.iLineMatch];
}
cLineBlock++;
if (!pfh2->fLineMatch || (pfh1->u.iLineMatch != iLine2) ||
(pfh2->u.iLineMatch != iLine1) || (iLine2 >= iLine2Mac)) {
LogCBlkCand(iLine1, iLine2, cLineBlock);
break;
}
}
}
else {
iLine2++;
pfh2++;
}
}
#ifdef DEBUGPRINT
/* print out a list of the candidiate group of common blocks */
printf("\n");
for (iBlk = 0, pBlk = &rgCBlkCand[0]; iBlk < cCBlkCand; iBlk++, pBlk++) {
printf("Cand common block: iLine1 = %ld iLine2 = %ld cLine = %ld\r\n",
pBlk->iLine1, pBlk->iLine2, pBlk->cLine);
}
printf("\n");
#endif
/* sort the common blocks keyed on size of block */
qsort(rgCBlkCand, (size_t)cCBlkCand, (size_t)(sizeof (CBLK)),
(int (*)(const void *, const void *))CBlkCmp);
/* from largest to smallest block add the blocks to the rgCBlkFinal array */
for (iBlk = 0, pBlk = rgCBlkCand; iBlk < cCBlkCand; iBlk++, pBlk++)
AddBlk(pBlk);
/* return number of new blocks discovered */
return (cCBlkFinal - cCBlkFinalprev);
}
/*-------------------------------------------------------------------
* Name: CBlkCmp
* Purpose: routine called from qsort() to compare two common blocks
* Assumes:
* Returns: -1 if number of lines in block 1 > number of lines in block 2,
* 0 if number of lines in block 1 == number of lines in block 2,
* 1 if number of lines in block 1 < number of lines in block 2
*/
int
CBlkCmp(
CBLK *pBlk1,
CBLK *pBlk2)
{
if (pBlk1->cLine > pBlk2->cLine)
return (-1);
else if (pBlk1->cLine < pBlk2->cLine)
return (1);
else
return (0);
}
/*-------------------------------------------------------------------
* Name: AddBlk
* Purpose: add a common block to rgCBlkFinal if not a "move" outside
* the current sequence, if a "move" then ignore block
* Assumes:
* Returns:
*/
void
AddBlk(
CBLK *pBlk)
{
long iLow, iHigh, iTry, iBlk;
CBLK *pBlkT;
/* find place in rgCBlkFinal to insert new block */
for (iLow = 0, iHigh = cCBlkFinal - 1, iTry = 0; (iHigh - iLow) >= 2;) {
iTry = ((iHigh - iLow) / 2) + iLow;
pBlkT = &rgCBlkFinal[iTry];
if (pBlk->iLine1 == pBlkT->iLine1)
Assert(fFalse);
else if (pBlkT->iLine1 < pBlk->iLine1)
iLow = iTry;
else
iHigh = iTry;
}
iBlk = ((pBlk->iLine1 < rgCBlkFinal[iLow].iLine1) ? iLow : iHigh);
Assert(iBlk != 0);
/* if the range of lines for both files in this new block
* don't both fit between the same blocks already in the
* rgCBlkFinal array (i.e. - we have a move of a block
* of lines) simply throw away this block
*/
pBlkT = &rgCBlkFinal[iBlk - 1];
if (pBlk->iLine2 < (pBlkT->iLine2 + pBlkT->cLine))
return;
pBlkT++;
if ((pBlk->iLine2 + pBlk->cLine) > pBlkT->iLine2)
return;
/* do the actual insertion of the block into the sorted array */
memmove(pBlkT + 1, pBlkT, (size_t)((cCBlkFinal - iBlk) * sizeof (CBLK)));
*pBlkT = *pBlk;
if (++cCBlkFinal >= cCBlkFinalMac) {
cCBlkFinalMac += cCBlkInc;
if (NULL == (rgCBlkFinal = pvReAllocMem(rgCBlkFinal, cCBlkFinalMac * sizeof (CBLK))))
ExitDiff(retFatal);
}
}
/*-------------------------------------------------------------------
* Name: FileToHash
* Purpose: get each line of a file in sequence and save its hash value
* Assumes:
* Returns:
*/
void
FileToHash(
FDD *pfdd)
{
long iLine;
WCHAR *pLine;
WCHAR *pLineT;
if (fTruncSpace)
if (NULL == (pLineT = pvAllocMem(1)))
ExitDiff(retFatal);
for (iLine = 0; ; iLine++) {
pLine = pchGetLine(pfdd, iLine);
if (NULL == pLine)
break;
if (iLine >= pfdd->cfhMac) {
pfdd->cfhMac += cfhInc;
if (NULL == (pfdd->rgfh = pvReAllocMem(pfdd->rgfh, pfdd->cfhMac * sizeof (FH))))
ExitDiff(retFatal);
}
AddHE(pfdd->rgfh[iLine].u.Hash =
HASHGetHash(fTruncSpace ? szCpLineStripSp(pLine, &pLineT) : pLine));
}
if (fTruncSpace)
FreeMem(pLineT);
}
/* master character to hash code scrambler
*
* This table was copied from "Fast Hashing of Variable-Length
* Text Strings" by Peter K. Pearson, Communucations of the ACM,
* June 1990, Volume 33, Number 6 which Peter Pearson derived
* and recommended as providing good hashing behavior.
*/
BYTE mpChHash[256] =
{
1, 87, 49, 12, 176, 178, 102, 166, 121, 193, 6, 84, 249, 230, 44, 163,
14, 197, 213, 181, 161, 85, 218, 80, 64, 239, 24, 226, 236, 142, 38, 200,
110, 177, 104, 103, 141, 253, 255, 50, 77, 101, 81, 18, 45, 96, 31, 222,
25, 107, 190, 70, 86, 237, 240, 34, 72, 242, 20, 214, 244, 227, 149, 235,
97, 234, 57, 22, 60, 250, 82, 175, 208, 5, 127, 199, 111, 62, 135, 248,
174, 169, 211, 58, 66, 154, 106, 195, 245, 171, 17, 187, 182, 179, 0, 243,
132, 56, 148, 75, 128, 133, 158, 100, 130, 126, 91, 13, 153, 246, 216, 219,
119, 68, 223, 78, 83, 88, 201, 99, 122, 11, 92, 32, 136, 114, 52, 10,
138, 30, 48, 183, 156, 35, 61, 26, 143, 74, 251, 94, 129, 162, 63, 152,
170, 7, 115, 167, 241, 206, 3, 150, 55, 59, 151, 220, 90, 53, 23, 131,
125, 173, 15, 238, 79, 95, 89, 16, 105, 137, 225, 224, 217, 160, 37, 123,
118, 73, 2, 157, 46, 116, 9, 145, 134, 228, 207, 212, 202, 215, 69, 229,
27, 188, 67, 124, 168, 252, 42, 4, 29, 108, 21, 247, 19, 205, 39, 203,
233, 40, 186, 147, 198, 192, 155, 33, 164, 191, 98, 204, 165, 180, 117, 76,
140, 36, 210, 172, 41, 54, 159, 8, 185, 232, 113, 196, 231, 47, 146, 120,
51, 65, 28, 144, 254, 221, 93, 189, 194, 139, 112, 43, 71, 109, 184, 209
};
/*-------------------------------------------------------------------
* Name: HASHGetHash
* Purpose: return a 32 bit hash for a given line
* Assumes:
* Returns: hash code
*/
HASH
HASHGetHash(
WCHAR *szLine)
{
HASH Hash;
BYTE bhashT;
WCHAR *pch;
/* REVIEW - what do we want to recognize as a line ending? - 9/27/92 gregc */
for (pch = szLine, Hash = bhashT = 0;
(*pch != L'\r') && (*pch != L'\n') && (*pch != L'\0'); pch++) {
bhashT = mpChHash[bhashT ^ LOBYTE(*pch)];
Hash = (Hash << 8) + bhashT;
}
return Hash;
}
/*-------------------------------------------------------------------
* Name: FFindHE
* Purpose: try to find a hash entry in rghe[]
* Assumes:
* Returns: if hash entry found return fTrue and its index in *pihe else
* return fFalse and the proper place to insert it in *pihe
*/
BOOL
FFindHE(
HASH Hash,
long *pihe)
{
HE *phe;
long iLow, iHigh, iTry;
for (iLow = 0, iHigh = che - 1, iTry = 0; (iHigh - iLow) >= 2;) {
iTry = ((iHigh - iLow) / 2) + iLow;
phe = &rghe[iTry];
if (Hash == phe->Hash) {
*pihe = iTry;
return (fTrue);
}
else if (phe->Hash < Hash)
iLow = iTry;
else
iHigh = iTry;
}
for (iTry = iLow; iTry <= iHigh; iTry++) {
phe = &rghe[iTry];
if (Hash == phe->Hash) {
*pihe = iTry;
return (fTrue);
}
}
*pihe = (Hash < rghe[iLow].Hash) ? iLow : iHigh;
return (fFalse);
}
/*-------------------------------------------------------------------
* Name: AddHE
* Purpose: add a hash entry to rghe if it doesn't currently exist
* Assumes:
* Returns:
*/
void
AddHE(
HASH Hash)
{
HE *phe;
long ihe;
if (!FFindHE(Hash, &ihe)) {
if ((che + 1) >= cheMac) {
cheMac += cheInc;
if (NULL == (rghe = pvReAllocMem(rghe, cheMac * sizeof (HE))))
ExitDiff(retFatal);
}
phe = &rghe[ihe];
memmove(phe + 1, phe, (size_t)((che - ihe) * sizeof (HE)));
phe->Hash = Hash;
che++;
}
}
/*-------------------------------------------------------------------
* Name: LoadTextBuff
* Purpose: fill the text buffer from the input file, then walk through
* the just read in characters, decide where lines end, and log
* them in the pfdd->mpLinepch array
* Assumes:
* Returns:
*/
void
LoadTextBuff(
FDD *pfdd)
{
long iLine;
long cbRead, ich, ichLineEnd;
WCHAR *pch;
#ifdef DEBUGPRINT
printf("Resync\r\n");
#endif
if (pfdd->fCtrlZ) {
/* found end of file */
pfdd->iLineEOF = pfdd->iLineFirst + pfdd->cLineBuff;
return;
}
while (fTrue) {
if (fseek(pfdd->hFile, pfdd->oFileNextLine, SEEK_SET) != 0)
ExitDiff(retFatal);
cbRead = freadW((WCHAR *)pfdd->pchTextBuf, pfdd->cchTextBufMac / sizeof(WCHAR) - 4,
pfdd->hFile, bUnicode);
if (0L == cbRead) {
if (ferror(pfdd->hFile))
ExitDiff(retFatal);
else {
/* found end of file */
pfdd->iLineEOF = pfdd->iLineFirst + pfdd->cLineBuff;
return;
}
}
if (!bUnicode)
cbRead *= sizeof(WCHAR);
if ((bUnicode && ((long)(pfdd->oFileNextLine + cbRead) == pfdd->cbFile)) ||
((long)(pfdd->oFileNextLine + cbRead / sizeof(WCHAR)) == pfdd->cbFile)) {
/* make sure last line in file is terminated by CR-LF */
cbRead = (cbRead + 1) & ~1; // Make sure it's a multiple of 2
pch = (WCHAR *)&pfdd->pchTextBuf[cbRead];
if (*(pch - 1) != chLF) {
if (*(pch - 1) == chCR)
pch--;
*pch++ = chCR;
*pch++ = chLF;
cbRead = ((BYTE *)pch) - pfdd->pchTextBuf;
}
}
*(WCHAR *)(&pfdd->pchTextBuf[cbRead]) = L'\0';
pfdd->iLineFirst += pfdd->cLineBuff;
/* find all the lines in the text just read in by saving
* the beginning address in mpLinepch[] and changing
* all line feed characters into null characters
*/
for (iLine = pfdd->cLineBuff = 0, pch = (WCHAR *)pfdd->pchTextBuf, ich = ichLineEnd = 0;
(ich < cbRead) && !pfdd->fCtrlZ;
iLine++) {
if (iLine >= pfdd->cLinepchMac) {
pfdd->cLinepchMac += cLinepchInc;
if (NULL == (pfdd->mpLinepch = pvReAllocMem(pfdd->mpLinepch,
pfdd->cLinepchMac * sizeof(BYTE *))))
ExitDiff(retFatal);
}
/* save address of beginning of this line */
pfdd->mpLinepch[iLine] = (BYTE *)pch;
/* scan line and mark end by replacing the linefeed
* character with a NULL
*/
for (; (*pch != L'\n') && ich < cbRead; pch++, ich += sizeof(WCHAR)) {
if (!fCtrlZAsText && (chCtrlZ == *pch)) {
/* found end of file */
pfdd->fCtrlZ = fTrue;
iLine++;
*pch = L'\0';
// if (ichLineEnd == ich)
// break;
// *pch++ = chCR;
// *pch = chLF;
// ich += sizeof(WCHAR);
break;
}
}
if (L'\n' == *pch) {
*pch = L'\0';
ich += sizeof(WCHAR);
pch++;
ichLineEnd = ich;
}
else
break;
}
if (0L == iLine) {
/* we need to make the buffer bigger to fit the line in */
if (NULL == (pfdd->pchTextBuf = pvReAllocMem(pfdd->pchTextBuf,
pfdd->cchTextBufMac += cchTextBufInc)))
ExitDiff(retFatal);
}
else {
pfdd->cLineBuff = iLine;
if (bUnicode)
pfdd->oFileNextLine += ichLineEnd;
else
pfdd->oFileNextLine += ichLineEnd / sizeof(WCHAR);
break;
}
}
}
/*-------------------------------------------------------------------
* Name: pchGetLine
* Purpose: get the address in memory of the specified line
* Assumes: iLine >= pfdd->iLineFirst
* Returns: line address
*/
WCHAR *
pchGetLine(
FDD *pfdd,
long iLine)
{
if (iLine >= (pfdd->iLineFirst + pfdd->cLineBuff))
LoadTextBuff(pfdd);
if (iLine < (pfdd->iLineFirst + pfdd->cLineBuff))
return ((WCHAR *)(pfdd->mpLinepch[iLine - pfdd->iLineFirst]));
if (iLine == pfdd->iLineEOF)
return (NULL);
else
Assert(fFalse);
}
/*-------------------------------------------------------------------
* Name: PrintDiffs
* Purpose: calculate what type of difference occurred and print out accordingly
* Assumes:
* Returns:
*/
void
PrintDiffs(
long cLine1,
long cLine2,
long iLine1,
long iLine2)
{
enum {modeAppend, modeChange, modeDelete}; /* type of difference between files */
int modeDiff;
/* clip at EOF */
if ((iLine1 + cLine1) > fdd1.iLineEOF)
cLine1 = fdd1.iLineEOF - iLine1;
if ((iLine2 + cLine2) > fdd2.iLineEOF)
cLine2 = fdd2.iLineEOF - iLine2;
if ((cLine1 > 0) || (cLine2 > 0)) {
/* make sure we know the files aren't really the same... */
fFilesSame = fFalse;
if (cLine1 > 0) {
if (SLMprintf(L"%ld", iLine1 + 1) < 0)
ExitDiff(retFatalWE);
if (cLine1 > 1)
if (SLMprintf(L",%ld", cLine1 + iLine1) < 0)
ExitDiff(retFatalWE);
if (cLine2 > 0) {
modeDiff = modeChange;
if (SLMprintf(L"c") < 0)
ExitDiff(retFatalWE);
}
else {
modeDiff = modeDelete;
if (SLMprintf(L"d") < 0)
ExitDiff(retFatalWE);
}
}
else {
modeDiff = modeAppend;
if (SLMprintf(L"%lda", iLine1) < 0)
ExitDiff(retFatalWE);
}
if (cLine2 > 1) {
if (SLMprintf(L"%ld,%ld", iLine2 + 1, cLine2 + iLine2 -
((modeDelete == modeDiff) ? 1 : 0)) < 0)
ExitDiff(retFatalWE);
}
else
if (SLMprintf(L"%ld", iLine2 + ((modeDelete == modeDiff) ? 0 : 1)) < 0)
ExitDiff(retFatalWE);
if (SLMprintf(L"\r\n") < 0)
ExitDiff(retFatalWE);
switch (modeDiff) {
case modeAppend:
PrintLines(&fdd2, iLine2, cLine2, chNew);
break;
case modeChange:
PrintLines(&fdd1, iLine1, cLine1, chOld);
if (SLMprintf(L"---\r\n") < 0)
ExitDiff(retFatalWE);
PrintLines(&fdd2, iLine2, cLine2, chNew);
break;
case modeDelete:
PrintLines(&fdd1, iLine1, cLine1, chOld);
break;
}
}
}
/*-------------------------------------------------------------------
* Name: PrintLines
* Purpose: print out a block of lines from a file
* Assumes:
* Returns:
*/
void
PrintLines(
FDD *pfdd,
long iLineStart,
long cLine,
WCHAR chLead)
{
long iLine;
WCHAR *pLine;
for (iLine = iLineStart; cLine > 0; cLine--, iLine++) {
pLine = pchGetLine(pfdd, iLine);
Assert(pLine != NULL);
if (SLMprintf(L"%c ", chLead) < 0)
ExitDiff(retFatalWE);
if (SLMprintfLn(pLine) < 0)
ExitDiff(retFatalWE);
if (SLMprintf(L"\n") < 0)
ExitDiff(retFatalWE);
}
}
/*-------------------------------------------------------------------
* Name: CheckDiffBlk
* Purpose: verify that a block of lines from each file are really
* identical to each other
* Assumes:
* Returns:
*/
void
CheckDiffBlk(
CBLK *pBlk)
{
long iLine1 = pBlk->iLine1;
long iLine2 = pBlk->iLine2;
long cLine = pBlk->cLine;
WCHAR *pLine1;
WCHAR *pLine1T;
WCHAR *pLine2;
WCHAR *pLine2T;
int val;
if (fTruncSpace) {
if (NULL == (pLine1T = pvAllocMem(1)))
ExitDiff(retFatal);
if (NULL == (pLine2T = pvAllocMem(1)))
ExitDiff(retFatal);
}
if ((iLine1 + cLine) > fdd1.iLineEOF)
cLine = fdd1.iLineEOF - iLine1;
for (; cLine > 0; iLine1++, iLine2++, cLine--) {
pLine1 = pchGetLine(&fdd1, iLine1);
pLine2 = pchGetLine(&fdd2, iLine2);
Assert(pLine1 != NULL);
Assert(pLine2 != NULL);
if (fTruncSpace)
val = wcscmp(szCpLineStripSp(pLine1, &pLine1T),
szCpLineStripSp(pLine2, &pLine2T));
else
val = wcscmp(pLine1, pLine2);
if (val != 0) {
/* lines were actually different so print out a "change" record */
if (SLMprintf(L"%ldc%ld\r\n", iLine1 + 1, iLine2 + 1) < 0)
ExitDiff(retFatalWE);
if ((SLMprintf(L"< %s\n---\r\n", pLine1) < 0) ||
(SLMprintf(L"> %s\n", pLine2) < 0))
ExitDiff(retFatalWE);
/* make sure we know the files aren't really the same... */
fFilesSame = fFalse;
}
}
if (fTruncSpace) {
FreeMem(pLine1T);
FreeMem(pLine2T);
}
}
/*-------------------------------------------------------------------
* Name: pvAllocMem
* Purpose: allocate a block of memory from the heap
* Assumes:
* Returns: address of allocated block
*/
void *
pvAllocMem(
long cbAlloc)
{
#ifdef BIT16
if (cbAlloc > ushortMax)
return (NULL);
#endif
return (malloc((size_t)cbAlloc));
}
/*-------------------------------------------------------------------
* Name: pvReAllocMem
* Purpose: resize a block of memory from the heap
* Assumes:
* Returns: address of reallocated block
*/
void *
pvReAllocMem(
void *pv,
long cbAlloc)
{
#ifdef BIT16
if (cbAlloc > ushortMax)
return (NULL);
#endif
return (realloc(pv, (size_t)cbAlloc));
}
/*-------------------------------------------------------------------
* Name: FreeMem
* Purpose: free a block of memory from the heap
* Assumes:
* Returns:
*/
void
FreeMem(
void *pv)
{
free(pv);
}
/*-------------------------------------------------------------------
* Name: InitHE
* Purpose: initialize each HE corresponding to a line's hash value
* Assumes:
* Returns:
*/
void
InitHE(
FDD *pfdd,
long iLineStart,
long iLineMac)
{
long iLine;
FH *pfh;
HE *phe;
for (iLine = iLineStart, pfh = &pfdd->rgfh[iLineStart];
iLine < iLineMac;
iLine++, pfh++) {
phe = pfh->phe;
phe->cLine1 = 0;
phe->cLine2 = 0;
pfh->fLineMatch = fFalse;
}
}
/*-------------------------------------------------------------------
* Name: LogCBlkCand
* Purpose: add a common block description entry to rgCBlkCand
* Assumes:
* Returns:
*/
void
LogCBlkCand(
long iLine1,
long iLine2,
long cLineBlock)
{
#ifdef DEBUGPRINT
printf("pre sort common block: iLine1 = %ld iLine2 = %ld cLine = %ld\r\n",
iLine1 - cLineBlock, iLine2 - cLineBlock, cLineBlock);
#endif
rgCBlkCand[cCBlkCand].iLine1 = iLine1 - cLineBlock;
rgCBlkCand[cCBlkCand].iLine2 = iLine2 - cLineBlock;
rgCBlkCand[cCBlkCand].cLine = cLineBlock;
if (++cCBlkCand >= cCBlkCandMac) {
cCBlkCandMac += cCBlkInc;
if (NULL == (rgCBlkCand = pvReAllocMem(rgCBlkCand, cCBlkCandMac * sizeof (CBLK))))
ExitDiff(retFatal);
}
}
/*-------------------------------------------------------------------
* Name: szCpLineStripSp
* Purpose: Copies a line. Each group of spaces or tabs that occur
* before the last printable character of a line is converted
* to a single space character. All spaces or tabs that occur
* after the last printable character of a line are truncated.
* Assumes: *pszDst is a heap handle that can be reallocated
* Returns: pointer to destination string
*/
WCHAR *
szCpLineStripSp(
WCHAR *pchSrc,
WCHAR **ppchDst)
{
WCHAR *pchDst;
BOOL fSpace = fFalse;
if (NULL == (*ppchDst = pchDst = pvReAllocMem(*ppchDst,
(wcslen(pchSrc) + 1) * sizeof(WCHAR))))
ExitDiff(retFatal);
while (*pchSrc != L'\0') {
switch (*pchSrc) {
case L' ':
case chTab:
pchSrc++;
fSpace = fTrue;
break;
default:
if (fSpace) {
*pchDst++ = L' ';
fSpace = fFalse;
}
/* fall through */
case chCR:
case chLF:
*pchDst++ = *pchSrc++;
break;
}
}
*pchDst = L'\0';
return (*ppchDst);
}
/*-------------------------------------------------------------------
* Name: SLMprintf
* Purpose: prints text to stdout. If fCkSum is fTrue then also
* add the text to the checksum
* Returns: return value from printf
*/
static WCHAR szText[2048];
static BYTE chBuf[2048];
int
SLMprintf(
WCHAR *szformat, ...)
{
va_list marker;
int iret = 0;
int count;
long cbWritten;
DWORD dwMode;
va_start(marker, szformat);
count = vswprintf(szText, szformat, marker);
if (!bUnicode) {
memset (chBuf, 0, sizeof(chBuf));
#ifdef _WIN32
WideCharToMultiByte (CP_ACP, 0, szText, count, chBuf, sizeof(chBuf), NULL, NULL);
#else
wcstombs (chBuf, szText, sizeof(chBuf));
#endif
iret = printf("%s", chBuf);
if (fCkSum)
CheckSum(chBuf, strlen(chBuf));
}
else {
#ifdef _WIN32
if (GetConsoleMode((HANDLE)_get_osfhandle(_fileno(stdout)), &dwMode)) {
WriteConsoleW ((HANDLE)_get_osfhandle(_fileno(stdout)),
szText,
count,
&cbWritten,
NULL);
iret = cbWritten;
}
else
iret = wprintf(szText);
#else
iret = wprintf(szText);
#endif
if (fCkSum)
CheckSum((char *)szText, wcslen(szText) * sizeof(WCHAR));
}
va_end(marker);
return (iret);
}
/*-------------------------------------------------------------------
* Name: SLMprintfLn
* Purpose: prints a line to stdout. If fCkSum is fTrue then also
* add the text to the checksum
* Returns: return value from printf
*/
int
SLMprintfLn(
WCHAR *szLine)
{
long cbWritten;
WORD count;
BYTE *lpBuf;
DWORD dwMode;
count = wcslen(szLine);
if (!bUnicode) {
lpBuf = pvAllocMem (count + 50);
memset (lpBuf, 0, count + 50);
#ifdef _WIN32
WideCharToMultiByte (CP_ACP, 0, szLine, count, lpBuf, count + 50, NULL, NULL);
#else
wcstombs (lpBuf, szLine, count + 50);
#endif
count = printf("%s", lpBuf);
if (fCkSum)
CheckSum(lpBuf, strlen(lpBuf));
FreeMem (lpBuf);
return (count);
}
else {
if (fCkSum)
CheckSum((char *)szLine, count * sizeof(WCHAR));
#ifdef _WIN32
if (GetConsoleMode((HANDLE)_get_osfhandle(_fileno(stdout)), &dwMode))
return (WriteConsoleW ((HANDLE)_get_osfhandle(_fileno(stdout)),
szLine,
wcslen(szLine),
&cbWritten,
NULL));
else
return (wprintf(L"%s", szLine));
#else
return (wprintf(L"%s", szLine));
#endif
}
}
/*-------------------------------------------------------------------
* Name: CheckSum
* Purpose:
* Returns:
*/
void
CheckSum(
unsigned char *lpb,
unsigned int cb)
{
while (cb--)
ulCkSum += *lpb++;
}