Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1274 lines
30 KiB

  1. /* SORT
  2. * %Z% %M% %I% %D% %Q%
  3. *
  4. * Copyright (C) Microsoft Corporation, 1983
  5. *
  6. * This Module contains Proprietary Information of Microsoft
  7. * Corporation and AT&T, and should be treated as Confidential.
  8. */
  9. /*** diff - differential file comparison
  10. *
  11. * MODIFICATION HISTORY
  12. * M000 18 Apr 83 andyp
  13. * - 3.0 upgrade. No changes.
  14. * M001 22 Mar 84 vich
  15. * - Don't try to unlink NULL. Trying to do so doesn't break anything,
  16. * but it makes kernel debugging a pain due to faults in user mode.
  17. * M002 ??
  18. * - added the MSDOS flag.
  19. * M006 31 Mar 86 craigwi
  20. * - for the MSDOS version, fixed -b feature so that it ignores all \r
  21. * M010 15 Dec 86 craigwi
  22. * - after printing the result, diff aborts with status = 2 if any error
  23. * occurred on stdout.
  24. * M013 21 Mar 88 jangr
  25. * - added -s flag to return SLM specific error statuses:
  26. * 10 files identical
  27. * 11 files different
  28. * 12 other errors
  29. * 13 write error
  30. * M017 27 Oct 88 alanba
  31. * - changed messages to not specify using the -h option and giving
  32. * a clear error message if being executed from within SLM.
  33. */
  34. /*
  35. * Uses an algorithm due to Harold Stone, which finds
  36. * a pair of longest identical subsequences in the two
  37. * files.
  38. *
  39. * The major goal is to generate the match vector J.
  40. * J[i] is the index of the line in file1 corresponding
  41. * to line i file0. J[i] = 0 if there is no
  42. * such line in file1.
  43. *
  44. * Lines are hashed so as to work in core. All potential
  45. * matches are located by sorting the lines of each file
  46. * on the hash (called value). In particular, this
  47. * collects the equivalence classes in file1 together.
  48. * Subroutine equiv replaces the value of each line in
  49. * file0 by the index of the first element of its
  50. * matching equivalence in (the reordered) file1.
  51. * To save space equiv squeezes file1 into a single
  52. * array member in which the equivalence classes
  53. * are simply concatenated, except that their first
  54. * members are flagged by changing sign.
  55. *
  56. * Next the indices that point into member are unsorted into
  57. * array class according to the original order of file0.
  58. *
  59. * The cleverness lies in routine stone. This marches
  60. * through the lines of file0, developing a vector klist
  61. * of "k-candidates". At step i a k-candidate is a matched
  62. * pair of lines x,y (x in file0 y in file1) such that
  63. * there is a common subsequence of lenght k
  64. * between the first i lines of file0 and the first y
  65. * lines of file1, but there is no such subsequence for
  66. * any smaller y. x is the earliest possible mate to y
  67. * that occurs in such a subsequence.
  68. *
  69. * Whenever any of the members of the equivalence class of
  70. * lines in file1 matable to a line in file0 has serial number
  71. * less than the y of some k-candidate, that k-candidate
  72. * with the smallest such y is replaced. The new
  73. * k-candidate is chained (via pred) to the current
  74. * k-1 candidate so that the actual subsequence can
  75. * be recovered. When a member has serial number greater
  76. * that the y of all k-candidates, the klist is extended.
  77. * At the end, the longest subsequence is pulled out
  78. * and placed in the array J by unravel.
  79. *
  80. * With J in hand, the matches there recorded are
  81. * checked against reality to assure that no spurious
  82. * matches have crept in due to hashing. If they have,
  83. * they are broken, and "jackpot " is recorded--a harmless
  84. * matter except that a true match for a spuriously
  85. * mated line may now be unnecessarily reported as a change.
  86. *
  87. * Much of the complexity of the program comes simply
  88. * from trying to minimize core utilization and
  89. * maximize the range of doable problems by dynamically
  90. * allocating what is needed and reusing what is not.
  91. * The core requirements for problems larger than somewhat
  92. * are (in words) 2*length(file0) + length(file1) +
  93. * 3*(number of k-candidates installed), typically about
  94. * 6n words for files of length n.
  95. */
  96. #include <stdio.h>
  97. #include <io.h>
  98. #include <stdlib.h>
  99. #include <ctype.h>
  100. #include <sys/types.h>
  101. #include <sys/stat.h>
  102. #include <excpt.h>
  103. #include <process.h>
  104. #include <fcntl.h>
  105. #ifdef _OS2_SUBSYS_
  106. #define INCL_DOSSIGNALS
  107. #include <os2.h>
  108. #else
  109. #include <nt.h>
  110. #include <ntrtl.h>
  111. #include <nturtl.h>
  112. #include <windows.h>
  113. /*
  114. * Signal subtypes for XCPT_SIGNAL
  115. */
  116. #define XCPT_SIGNAL 0xC0010003
  117. #define XCPT_SIGNAL_INTR 1
  118. #define XCPT_SIGNAL_KILLPROC 3
  119. #define XCPT_SIGNAL_BREAK 4
  120. #endif
  121. #define isslash(c) (c=='/'||c=='\\')
  122. #define DIFFH "diffh.exe"
  123. #ifndef _MAX_PATH
  124. #if defined(LFNMAX) && defined(LPNMAX)
  125. #define _MAX_PATH (LFNMAX + LPNMAX + 1)
  126. #else
  127. #define _MAX_PATH (80)
  128. #endif
  129. #endif
  130. #ifndef _HEAP_MAXREQ
  131. #define _HEAP_MAXREQ ((~(unsigned int) 0) - (unsigned) 32)
  132. #endif
  133. #define HALFLONG 16
  134. #define low(x) (x&((1L<<HALFLONG)-1))
  135. #define high(x) (x>>HALFLONG)
  136. struct cand **clist; /* merely a free storage pot for candidates */
  137. int clistcnt = 0; /* number of arrays of struct cand in clist */
  138. unsigned clen = 0; /* total number of struct cand in all clist arrays */
  139. /*
  140. Number of struct cand in one clist array
  141. (the largest power of 2 smaller than (64k / sizeof(struct cand))
  142. is 2^13. Thus, these gross hacks to make the array references
  143. more efficient, and still permit huge files.
  144. */
  145. #define CLISTSEG (0x2000)
  146. #define CLISTDIV(x) ((x) >> 13)
  147. #define CLISTMOD(x) ((x) & (CLISTSEG - 1))
  148. #define CLIST(x) (clist[CLISTDIV(x)][CLISTMOD(x)])
  149. PVOID input[2];
  150. char *inputfile[2];
  151. int inputfilesize[2];
  152. char *inputfilep[2];
  153. int inputfileleft[2];
  154. #define EndOfFile(x) (inputfileleft[x] <= 0)
  155. #define GetChar(x) ((char)((inputfileleft[x]--) ? \
  156. (*(inputfilep[x])++) : \
  157. EOF))
  158. #define SEARCH(c1,k1,y1) (CLIST(c1[k1]).y < y1) ? (k1+1) : search(c1,k1,y1)
  159. #if 0
  160. char
  161. GetChar( int x );
  162. char
  163. GetChar( int x ) {
  164. if ( inputfileleft[x]-- ) {
  165. return *(inputfilep[x])++;
  166. } else {
  167. return EOF;
  168. }
  169. }
  170. #endif
  171. struct cand {
  172. int x;
  173. int y;
  174. unsigned pred;
  175. } cand;
  176. struct line {
  177. int serial;
  178. int value;
  179. } *file[2], line;
  180. typedef struct _FILEMAP *PFILEMAP;
  181. typedef struct _FILEMAP {
  182. HANDLE FileHandle;
  183. HANDLE MapHandle;
  184. DWORD Access;
  185. DWORD Create;
  186. DWORD Share;
  187. PVOID Base;
  188. DWORD Offset;
  189. DWORD Size;
  190. DWORD Allocated;
  191. } FILEMAP;
  192. PVOID
  193. Open(
  194. const char *FileName,
  195. const char *Mode,
  196. DWORD Size
  197. );
  198. int
  199. Close (
  200. PVOID Map
  201. );
  202. /* fn prototypes gen'd from cl -Zg */
  203. void done(void);
  204. char *talloc(unsigned n);
  205. char *ralloc(char *p,unsigned n);
  206. void myfree( char *p );
  207. void noroom(void);
  208. int __cdecl sortcmp(void const *first, void const *second);
  209. void unsort(struct line *f,unsigned l,int *b);
  210. void filename(char * *pa1,char * *pa2);
  211. void prepare(int i,char *arg);
  212. void prune(void);
  213. void equiv(struct line *a,int n,struct line *b,int m,int *c);
  214. int stone(int *a,unsigned n,int *b,unsigned *c);
  215. unsigned newcand(int x,int y,unsigned pred);
  216. int search(unsigned *c,int k,int y);
  217. void unravel(unsigned p);
  218. void check(char * *argv);
  219. char * skipline(int f);
  220. void output(char * *argv);
  221. void change(int a,int b,int c,int d);
  222. void range(int a,int b,char *separator);
  223. void fetch(char * *f,int a,int b, int lb,char *s);
  224. int readhash( int f);
  225. void mesg(char *s,char *t);
  226. void SetOutputFile (char *FileName);
  227. unsigned len[2];
  228. struct line *sfile[2]; /*shortened by pruning common prefix and suffix*/
  229. unsigned slen[2];
  230. unsigned int pref, suff; /*length of prefix and suffix*/
  231. int *class; /*will be overlaid on file[0]*/
  232. int *member; /*will be overlaid on file[1]*/
  233. unsigned *klist; /*will be overlaid on file[0] after class*/
  234. int *J; /*will be overlaid on class*/
  235. char * *ixold; /*will be overlaid on klist*/
  236. char * *ixnew; /*will be overlaid on file[1]*/
  237. int opt; /* -1,0,1 = -e,normal,-f */
  238. int status = 2; /*abnormal status; set to 0/1 just before successful exit */
  239. int anychange = 0;
  240. char *empty = "";
  241. int bflag;
  242. int slmFlag;
  243. FILE* OutputFile;
  244. char *tempfile; /*used when comparing against std input*/
  245. #ifndef MSDOS
  246. char *dummy; /*used in resetting storage search ptr*/
  247. #endif
  248. void
  249. done()
  250. {
  251. if (tempfile != NULL)
  252. _unlink(tempfile);
  253. if (OutputFile && OutputFile != stdout) {
  254. fclose(OutputFile);
  255. }
  256. exit(10*slmFlag + status);
  257. }
  258. #define MALLOC(n) talloc(n)
  259. #define REALLOC(p,n) ralloc(p,n)
  260. #define FREE(p) myfree(p)
  261. // #define DEBUG_MALLOC
  262. #ifdef DEBUG_MALLOC
  263. #define MALLOC_SIG 0xABCDEF00
  264. #define FREE_SIG 0x00FEDCBA
  265. typedef struct _MEMBLOCK {
  266. DWORD Sig;
  267. } MEMBLOCK, *PMEMBLOCK;
  268. #endif
  269. char *
  270. talloc(
  271. unsigned n
  272. )
  273. {
  274. #ifdef DEBUG_MALLOC
  275. PMEMBLOCK mem;
  276. char DbgB[128];
  277. //sprintf(DbgB, "MALLOC size %d -> ", n );
  278. //OutputDebugString( DbgB );
  279. mem = malloc( n + sizeof(MEMBLOCK)+1 );
  280. if ( !mem ) {
  281. noroom();
  282. }
  283. mem->Sig = MALLOC_SIG;
  284. //sprintf(DbgB, "%lX\n", mem );
  285. //OutputDebugString( DbgB );
  286. return (char *)((PBYTE)mem + sizeof(MEMBLOCK));
  287. #else
  288. register char *p;
  289. p = malloc(++n);
  290. if (p == NULL) {
  291. noroom();
  292. }
  293. return p;
  294. #endif
  295. }
  296. char *
  297. ralloc(
  298. char *p,
  299. unsigned n
  300. )
  301. {
  302. #ifdef DEBUG_MALLOC
  303. PMEMBLOCK mem;
  304. char DbgB[128];
  305. mem = (PMEMBLOCK)((PBYTE)p - sizeof(MEMBLOCK));
  306. //sprintf(DbgB, "REALLOC: %lX, %d -> ", mem, n );
  307. //OutputDebugString( DbgB );
  308. if ( mem->Sig != MALLOC_SIG ) {
  309. sprintf(DbgB, "REALLOC ERROR: Reallocating %lX\n", mem );
  310. OutputDebugString( DbgB );
  311. }
  312. mem->Sig = FREE_SIG;
  313. mem = (PMEMBLOCK)realloc(mem, n + sizeof(MEMBLOCK)+1);
  314. if (!mem) {
  315. noroom();
  316. }
  317. mem->Sig = MALLOC_SIG;
  318. //sprintf(DbgB, "%lX\n", mem );
  319. //OutputDebugString( DbgB );
  320. return (char *)((PBYTE)mem + sizeof(MEMBLOCK));
  321. #else
  322. p = realloc(p, ++n);
  323. if (p==NULL) {
  324. noroom();
  325. }
  326. return(p);
  327. #endif
  328. }
  329. void
  330. myfree(
  331. char *p
  332. )
  333. {
  334. #ifdef DEBUG_MALLOC
  335. PMEMBLOCK mem;
  336. char DbgB[128];
  337. mem = (PMEMBLOCK)((PBYTE)p - sizeof(MEMBLOCK));
  338. //sprintf(DbgB, "FREE: %lX -> ", mem );
  339. //OutputDebugString( DbgB);
  340. if ( mem->Sig != MALLOC_SIG ) {
  341. sprintf(DbgB, "\n\tFREE ERROR: FREEING %lX\n", mem );
  342. OutputDebugString( DbgB );
  343. }
  344. mem->Sig = FREE_SIG;
  345. free(mem);
  346. //sprintf(DbgB, "Ok\n", mem );
  347. //OutputDebugString( DbgB);
  348. #else
  349. if (p) {
  350. free(p);
  351. }
  352. #endif
  353. }
  354. void
  355. noroom()
  356. {
  357. if (slmFlag == 1) {
  358. mesg("file too big; do delfile filename/addfile filename, or",empty);
  359. mesg("reduce the size of the file.",empty);
  360. done();
  361. }
  362. mesg("files too big",empty); /* end M017 */
  363. done();
  364. }
  365. int
  366. __cdecl
  367. sortcmp(
  368. const void *first,
  369. const void *second
  370. )
  371. {
  372. struct line *one = (struct line *)first;
  373. struct line *two = (struct line *)second;
  374. if (one->value < two->value)
  375. return -1;
  376. else if (one->value > two->value)
  377. return 1;
  378. else if (one->serial < two->serial)
  379. return -1;
  380. else if (one->serial > two->serial)
  381. return 1;
  382. else
  383. return 0;
  384. }
  385. void
  386. unsort(
  387. struct line *f,
  388. unsigned l,
  389. int *b
  390. )
  391. {
  392. register int *a;
  393. register unsigned int i;
  394. a = (int *)MALLOC((l+1)*sizeof(int));
  395. if (a) {
  396. memset(a, 0, (l+1)*sizeof(int));
  397. for (i=1;i<=l;i++)
  398. a[f[i].serial] = f[i].value;
  399. for (i=1;i<=l;i++)
  400. b[i] = a[i];
  401. FREE((char *)a);
  402. }
  403. }
  404. void
  405. filename(
  406. char **pa1,
  407. char **pa2
  408. )
  409. {
  410. register char *a1, *b1, *a2;
  411. char buf[BUFSIZ];
  412. struct _stat stbuf;
  413. int i, f;
  414. a1 = *pa1;
  415. a2 = *pa2;
  416. if (_stat(a1,&stbuf)!=-1 && ((stbuf.st_mode&S_IFMT)==S_IFDIR)) {
  417. b1 = *pa1 = MALLOC((unsigned) _MAX_PATH);
  418. while (*b1++ = *a1++) ;
  419. if (isslash(b1[-2]))
  420. b1--;
  421. else
  422. b1[-1] = '/';
  423. a1 = b1;
  424. if ( a2[1] == ':' ) {
  425. a2 += 2;
  426. }
  427. while (*a1++ = *a2++)
  428. if (*a2 && !isslash(*a2) && isslash(a2[-1])) /*M002*/
  429. a1 = b1;
  430. } else if (a1[0]=='-'&&a1[1]==0&&tempfile==NULL) {
  431. /* the signal handling in original source
  432. **
  433. ** signal(SIGINT,done);
  434. ** #ifndef MSDOS
  435. ** signal(SIGHUP,done);
  436. ** signal(SIGPIPE,done);
  437. ** signal(SIGTERM,done);
  438. ** #endif
  439. */
  440. if ((*pa1 = tempfile = _tempnam(getenv("TEMP"), "d")) == NULL) {
  441. mesg("cannot create temporary file", "");
  442. done();
  443. }
  444. if ((f = _open(tempfile,O_WRONLY|O_CREAT|O_TRUNC, 0600)) < 0) {
  445. mesg("cannot create ",tempfile);
  446. done();
  447. }
  448. while ((i=_read(0,buf,BUFSIZ))>0)
  449. _write(f,buf,i);
  450. _close(f);
  451. }
  452. }
  453. void
  454. prepare(
  455. int i,
  456. char *arg
  457. )
  458. {
  459. #define CHUNKSIZE 100
  460. register struct line *p;
  461. register unsigned j;
  462. register int h;
  463. char *c;
  464. PVOID f;
  465. unsigned int MaxSize;
  466. if ((f = input[i] = Open(arg,"r", 0)) == NULL) {
  467. mesg("cannot open ", arg);
  468. done();
  469. }
  470. inputfile[i] = ((PFILEMAP)f)->Base;
  471. inputfilesize[i] = ((PFILEMAP)f)->Size;
  472. inputfilep[i] = inputfile[i];
  473. inputfileleft[i] = inputfilesize[i];
  474. //
  475. // Lets assume that lines are 30 characters on average
  476. //
  477. MaxSize = inputfilesize[i] / 30;
  478. p = (struct line *)MALLOC((3+MaxSize)*sizeof(line));
  479. for (j=0; h=readhash(i);) {
  480. j++;
  481. if ( j >= MaxSize ) {
  482. MaxSize += CHUNKSIZE;
  483. p = (struct line *)REALLOC((char *)p,(MaxSize+3)*sizeof(line));
  484. }
  485. p[j].value = h;
  486. }
  487. p = (struct line *)REALLOC((char *)p,(j+3+1)*sizeof(line));
  488. len[i] = j;
  489. file[i] = p;
  490. //Close(input[i]);
  491. }
  492. void
  493. prune()
  494. {
  495. register unsigned int i,j;
  496. for (pref=0;pref<len[0]&&pref<len[1]&&
  497. file[0][pref+1].value==file[1][pref+1].value;
  498. pref++ ) ;
  499. for (suff=0;suff<len[0]-pref&&suff<len[1]-pref&&
  500. file[0][len[0]-suff].value==file[1][len[1]-suff].value;
  501. suff++) ;
  502. for (j=0;j<2;j++) {
  503. sfile[j] = file[j]+pref;
  504. slen[j] = len[j]-pref-suff;
  505. for (i=0;i<=slen[j];i++)
  506. sfile[j][i].serial = i;
  507. }
  508. }
  509. void
  510. equiv(
  511. struct line *a,
  512. int n,
  513. struct line *b,
  514. int m,
  515. int *c
  516. )
  517. {
  518. register int i, j;
  519. i = j = 1;
  520. while (i<=n && j<=m) {
  521. if (a[i].value <b[j].value)
  522. a[i++].value = 0;
  523. else if (a[i].value == b[j].value)
  524. a[i++].value = j;
  525. else
  526. j++;
  527. }
  528. while (i <= n)
  529. a[i++].value = 0;
  530. b[m+1].value = 0;
  531. j = 0;
  532. while (++j <= m) {
  533. c[j] = -b[j].serial;
  534. while (b[j+1].value == b[j].value) {
  535. j++;
  536. c[j] = b[j].serial;
  537. }
  538. }
  539. c[j] = -1;
  540. }
  541. char **args;
  542. void
  543. __cdecl
  544. main(
  545. int argc,
  546. char **argv
  547. )
  548. {
  549. register int k;
  550. args = argv;
  551. OutputFile = stdout; // Init to default
  552. argc--;
  553. argv++;
  554. while (argc > 0 && argv[0][0]=='-') {
  555. BOOL Skip = FALSE;
  556. for (k=1; (!Skip) && argv[0][k]; k++) {
  557. switch (argv[0][k]) {
  558. case 'e':
  559. opt = -1;
  560. break;
  561. case 'f':
  562. opt = 1;
  563. break;
  564. case 'b':
  565. bflag = 1;
  566. break;
  567. case 'h':
  568. _execvp(DIFFH, args);
  569. mesg("cannot run diffh",empty);
  570. done();
  571. case 's':
  572. slmFlag = 1;
  573. break;
  574. case 'o':
  575. //
  576. // Dirty hack: Redirection is not working, so if
  577. // this flag is present, output goes to
  578. // file.
  579. //
  580. argc--;
  581. argv++;
  582. if (argc < 3) {
  583. mesg("arg count",empty);
  584. done();
  585. }
  586. SetOutputFile(argv[0]);
  587. Skip = TRUE;
  588. break;
  589. }
  590. }
  591. argc--;
  592. argv++;
  593. }
  594. if (argc!=2) {
  595. mesg("arg count",empty);
  596. done();
  597. }
  598. #ifndef MSDOS
  599. dummy = malloc(1);
  600. #endif
  601. _setmode(_fileno(OutputFile), O_BINARY);
  602. _setmode(_fileno(stdin),O_TEXT);
  603. filename(&argv[0], &argv[1]);
  604. filename(&argv[1], &argv[0]);
  605. prepare(0, argv[0]);
  606. prepare(1, argv[1]);
  607. prune();
  608. qsort((char *) (sfile[0] + 1), slen[0], sizeof(struct line), sortcmp);
  609. qsort((char *) (sfile[1] + 1), slen[1], sizeof(struct line), sortcmp);
  610. member = (int *)file[1];
  611. equiv(sfile[0], slen[0], sfile[1], slen[1], member);
  612. member = (int *)REALLOC((char *)member,(slen[1]+2)*sizeof(int));
  613. class = (int *)file[0];
  614. unsort(sfile[0], slen[0], class);
  615. class = (int *)REALLOC((char *)class,(slen[0]+2)*sizeof(int));
  616. klist = (unsigned *)MALLOC((slen[0]+2)*sizeof(int));
  617. clist = (struct cand **)MALLOC(sizeof(struct cand *));
  618. clist[0] = (struct cand *) MALLOC(sizeof(struct cand));
  619. clistcnt = 1;
  620. k = stone(class, slen[0], member, klist);
  621. FREE((char *)member);
  622. FREE((char *)class);
  623. J = (int *)MALLOC((len[0]+2)*sizeof(int));
  624. unravel(klist[k]);
  625. for (k = 0; k < clistcnt; ++k)
  626. FREE((char *)(clist[k]));
  627. FREE((char *)clist);
  628. FREE((char *)klist);
  629. ixold = (char **)MALLOC((len[0]+2)*sizeof(char *));
  630. ixnew = (char **)MALLOC((len[1]+2)*sizeof(char *));
  631. check(argv);
  632. output(argv);
  633. status = anychange;
  634. Close(input[0]);
  635. Close(input[1]);
  636. done();
  637. }
  638. stone(
  639. int *a,
  640. unsigned n,
  641. int *b,
  642. unsigned *c
  643. )
  644. {
  645. register int i, k,y;
  646. int j, l;
  647. unsigned oldc, tc;
  648. int oldl;
  649. k = 0;
  650. c[0] = newcand(0,0,0);
  651. for (i=1; i<=(int)n; i++) {
  652. j = a[i];
  653. if (j==0)
  654. continue;
  655. y = -b[j];
  656. oldl = 0;
  657. oldc = c[0];
  658. do {
  659. if (y <= CLIST(oldc).y)
  660. continue;
  661. l = SEARCH(c, k, y);
  662. if (l!=oldl+1)
  663. oldc = c[l-1];
  664. if (l<=k) {
  665. if (CLIST(c[l]).y <= y)
  666. continue;
  667. tc = c[l];
  668. c[l] = newcand(i,y,oldc);
  669. oldc = tc;
  670. oldl = l;
  671. } else {
  672. c[l] = newcand(i,y,oldc);
  673. k++;
  674. break;
  675. }
  676. } while ((y=b[++j]) > 0);
  677. }
  678. return(k);
  679. }
  680. unsigned
  681. newcand(
  682. int x,
  683. int y,
  684. unsigned pred
  685. )
  686. {
  687. register struct cand *q;
  688. ++clen;
  689. if ((int)CLISTDIV(clen) > (clistcnt - 1)) {
  690. // printf("diff: surpassing segment boundry..\n");
  691. clist = (struct cand **) REALLOC((char *) clist,
  692. ++clistcnt * sizeof(struct cand *));
  693. clist[clistcnt-1] = (struct cand *) MALLOC(sizeof(struct cand));
  694. }
  695. clist[clistcnt-1] = (struct cand *)
  696. REALLOC((char *)(clist[clistcnt-1]),
  697. (1 + CLISTMOD(clen)) * sizeof(struct cand));
  698. q = &CLIST(clen - 1);
  699. q->x = x;
  700. q->y = y;
  701. q->pred = pred;
  702. return(clen-1);
  703. }
  704. search(
  705. unsigned *c,
  706. int k,
  707. int y
  708. )
  709. {
  710. register int i, j;
  711. int l;
  712. int t;
  713. //if(CLIST(c[k]).y<y) /*quick look for typical case*/
  714. // return(k+1);
  715. i = 0;
  716. j = k+1;
  717. while ((l=(i+j)/2) > i) {
  718. t = CLIST(c[l]).y;
  719. if (t > y)
  720. j = l;
  721. else if (t < y)
  722. i = l;
  723. else
  724. return(l);
  725. }
  726. return(l+1);
  727. }
  728. void
  729. unravel(
  730. unsigned p
  731. )
  732. {
  733. register unsigned int i;
  734. register struct cand *q;
  735. for (i=0; i<=len[0]; i++)
  736. J[i] = i<=pref ? i:
  737. i>len[0]-suff ? i+len[1]-len[0]:
  738. 0;
  739. for (q=&CLIST(p);q->y!=0;q=&CLIST(q->pred)) {
  740. J[q->x+pref] = q->y+pref;
  741. }
  742. }
  743. /* check does double duty:
  744. 1. ferret out any fortuitous correspondences due
  745. to confounding by hashing (which result in "jackpot")
  746. 2. collect random access indexes to the two files */
  747. void
  748. check(
  749. char **argv
  750. )
  751. {
  752. register unsigned int i, j;
  753. int jackpot;
  754. char c,d;
  755. //input[0] = fopen(argv[0],"r");
  756. //input[1] = fopen(argv[1],"r");
  757. inputfilep[0] = inputfile[0];
  758. inputfilep[1] = inputfile[1];
  759. inputfileleft[0] = inputfilesize[0];
  760. inputfileleft[1] = inputfilesize[1];
  761. j = 1;
  762. ixold[0] = ixnew[0] = 0L;
  763. ixold[0] = inputfilep[0];
  764. ixnew[0] = inputfilep[1];
  765. //ixold[1] = inputfilep[0];
  766. //ixnew[1] = inputfilep[1];
  767. jackpot = 0;
  768. for (i=1;i<=len[0];i++) {
  769. if (J[i]==0) {
  770. ixold[i] = skipline(0);
  771. continue;
  772. }
  773. while (j<(unsigned)J[i]) {
  774. ixnew[j] = skipline(1);
  775. j++;
  776. }
  777. for (;;) {
  778. c = GetChar(0);
  779. d = GetChar(1);
  780. if (bflag && isspace(c) && isspace(d)) {
  781. do {
  782. if (c=='\n') break;
  783. } while (isspace(c=GetChar(0)));
  784. do {
  785. if (d=='\n') break;
  786. } while (isspace(d=GetChar(1)));
  787. }
  788. if (c!=d) {
  789. jackpot++;
  790. J[i] = 0;
  791. if (c!='\n')
  792. skipline(0);
  793. if (d!='\n')
  794. skipline(1);
  795. break;
  796. }
  797. if (c=='\n')
  798. break;
  799. }
  800. ixold[i] = inputfilep[0];
  801. ixnew[j] = inputfilep[1];
  802. j++;
  803. }
  804. for (;j<=len[1];j++) {
  805. ixnew[j] = skipline(1);
  806. }
  807. //fclose(input[0]);
  808. //fclose(input[1]);
  809. /*
  810. if(jackpot)
  811. mesg("jackpot",empty);
  812. */
  813. }
  814. char *
  815. skipline(
  816. int f
  817. )
  818. {
  819. while (GetChar(f) != '\n' )
  820. ;
  821. return inputfilep[f];
  822. }
  823. void
  824. output(
  825. char **argv
  826. )
  827. {
  828. int m;
  829. register int i0, i1, j1;
  830. int j0;
  831. input[0] = Open(argv[0],"r", 0);
  832. input[1] = Open(argv[1],"r", 0);
  833. m = len[0];
  834. J[0] = 0;
  835. J[m+1] = len[1]+1;
  836. if (opt!=-1) for (i0=1;i0<=m;i0=i1+1) {
  837. while (i0<=m&&J[i0]==J[i0-1]+1) i0++;
  838. j0 = J[i0-1]+1;
  839. i1 = i0-1;
  840. while (i1<m&&J[i1+1]==0) i1++;
  841. j1 = J[i1+1]-1;
  842. J[i1] = j1;
  843. change(i0,i1,j0,j1);
  844. } else for (i0=m;i0>=1;i0=i1-1) {
  845. while (i0>=1&&J[i0]==J[i0+1]-1&&J[i0]!=0) i0--;
  846. j0 = J[i0+1]-1;
  847. i1 = i0+1;
  848. while (i1>1&&J[i1-1]==0) i1--;
  849. j1 = J[i1-1]+1;
  850. J[i1] = j1;
  851. change(i1,i0,j1,j0);
  852. }
  853. if (m==0)
  854. change(1,0,1,len[1]);
  855. }
  856. void
  857. change(
  858. int a,
  859. int b,
  860. int c,
  861. int d
  862. )
  863. {
  864. if (a>b&&c>d)
  865. return;
  866. anychange = 1;
  867. if (opt!=1) {
  868. range(a,b,",");
  869. putc(a>b?'a':c>d?'d':'c', OutputFile);
  870. if (opt!=-1)
  871. range(c,d,",");
  872. } else {
  873. putc(a>b?'a':c>d?'d':'c', OutputFile);
  874. range(a,b," ");
  875. }
  876. putc('\r',OutputFile);
  877. putc('\n',OutputFile);
  878. if (opt==0) {
  879. fetch(ixold,a,b,0,"< ");
  880. if (a<=b&&c<=d)
  881. fputs("---\r\n", OutputFile);
  882. }
  883. fetch(ixnew,c,d,1,opt==0?"> ":empty);
  884. if (opt!=0&&c<=d)
  885. fputs(".",OutputFile);
  886. }
  887. void
  888. range(
  889. int a,
  890. int b,
  891. char *separator
  892. )
  893. {
  894. fprintf(OutputFile,"%d", a>b?b:a);
  895. if (a<b)
  896. fprintf(OutputFile,"%s%d", separator, b);
  897. }
  898. void
  899. fetch(
  900. char **f,
  901. int a,
  902. int b,
  903. int lb,
  904. char *s
  905. )
  906. {
  907. register int i, j;
  908. register int nc;
  909. register char c;
  910. char *p;
  911. UNREFERENCED_PARAMETER( lb );
  912. for (i=a;i<=b;i++) {
  913. p = f[i-1];
  914. nc = (int)(f[i]-f[i-1]);
  915. fputs(s, OutputFile);
  916. for (j=0;j<nc;j++) {
  917. c = *p++;
  918. if (c == '\n' ) {
  919. //putc( '\r', OutputFile );
  920. putc( '\n', OutputFile );
  921. if ( p >= f[i] ) break;
  922. } else {
  923. putc(c, OutputFile);
  924. }
  925. }
  926. }
  927. }
  928. /* hashing has the effect of
  929. * arranging line in 7-bit bytes and then
  930. * summing 1-s complement in 16-bit hunks
  931. */
  932. readhash(
  933. int f
  934. )
  935. {
  936. register unsigned shift;
  937. register char t;
  938. register int space;
  939. long sum = 1L;
  940. space = 0;
  941. if (!bflag) for (shift=0;(t=GetChar(f))!='\n';shift+=7) {
  942. if (t==(char)EOF && EndOfFile(f) )
  943. return(0);
  944. sum += (long)t << (shift%=HALFLONG);
  945. } else for (shift=0;;) {
  946. switch (t=GetChar(f)) {
  947. case '\t':
  948. case ' ':
  949. case '\r':
  950. space++;
  951. continue;
  952. default:
  953. if ( t==(char)EOF && EndOfFile(f) ) {
  954. return(0);
  955. }
  956. if (space) {
  957. shift += 7;
  958. space = 0;
  959. }
  960. sum += (long)t << (shift%=HALFLONG);
  961. shift += 7;
  962. continue;
  963. case '\n':
  964. break;
  965. }
  966. break;
  967. }
  968. sum = low(sum) + high(sum);
  969. return((short)low(sum) + (short)high(sum));
  970. }
  971. void
  972. mesg(
  973. char *s,
  974. char *t
  975. )
  976. {
  977. fprintf(stderr,"diff: %s%s\n",s,t);
  978. }
  979. void
  980. SetOutputFile (
  981. char *FileName
  982. )
  983. {
  984. OutputFile = fopen(FileName, "ab");
  985. if (!OutputFile) {
  986. mesg("Unable to open: ", FileName);
  987. done();
  988. }
  989. }
  990. PVOID
  991. Open(
  992. const char *FileName,
  993. const char *Mode,
  994. DWORD Size
  995. )
  996. {
  997. PFILEMAP FileMap = NULL;
  998. FileMap = (PFILEMAP)malloc(sizeof(FILEMAP));
  999. if ( FileMap ) {
  1000. FileMap->Access = 0;
  1001. FileMap->Share = FILE_SHARE_READ | FILE_SHARE_WRITE;
  1002. while ( *Mode ) {
  1003. switch ( *Mode ) {
  1004. case 'r':
  1005. FileMap->Access |= GENERIC_READ;
  1006. FileMap->Create = OPEN_EXISTING;
  1007. break;
  1008. case 'w':
  1009. FileMap->Access |= GENERIC_WRITE;
  1010. FileMap->Create = CREATE_ALWAYS;
  1011. break;
  1012. case 'a':
  1013. FileMap->Access += GENERIC_WRITE;
  1014. FileMap->Create = OPEN_ALWAYS;
  1015. break;
  1016. case '+':
  1017. FileMap->Access |= (GENERIC_READ | GENERIC_WRITE);
  1018. break;
  1019. default:
  1020. break;
  1021. }
  1022. Mode++;
  1023. }
  1024. FileMap->FileHandle = CreateFile(
  1025. FileName,
  1026. FileMap->Access,
  1027. FileMap->Share,
  1028. NULL,
  1029. FileMap->Create,
  1030. FILE_ATTRIBUTE_NORMAL,
  1031. NULL
  1032. );
  1033. if ( FileMap->FileHandle != INVALID_HANDLE_VALUE ) {
  1034. FileMap->Size = GetFileSize( FileMap->FileHandle, NULL );
  1035. FileMap->Allocated = (FileMap->Access == GENERIC_READ) ? FileMap->Size : Size;
  1036. FileMap->MapHandle = CreateFileMapping(
  1037. FileMap->FileHandle,
  1038. NULL,
  1039. (FileMap->Access & GENERIC_WRITE) ? PAGE_READWRITE : PAGE_READONLY,
  1040. 0,
  1041. (FileMap->Access == GENERIC_READ) ? 0 : (DWORD)Size,
  1042. NULL
  1043. );
  1044. if ( FileMap->MapHandle ) {
  1045. FileMap->Base = MapViewOfFile(
  1046. FileMap->MapHandle,
  1047. (FileMap->Access & GENERIC_WRITE) ? FILE_MAP_ALL_ACCESS : FILE_MAP_READ,
  1048. 0,
  1049. 0,
  1050. (FileMap->Access == GENERIC_READ) ? 0 : Size
  1051. );
  1052. if ( FileMap->Base ) {
  1053. if ( FileMap->Create == OPEN_ALWAYS ) {
  1054. FileMap->Offset = FileMap->Size;
  1055. }
  1056. goto Done;
  1057. }
  1058. CloseHandle( FileMap->MapHandle );
  1059. }
  1060. CloseHandle( FileMap->FileHandle );
  1061. }
  1062. free( FileMap );
  1063. FileMap = NULL;
  1064. }
  1065. Done:
  1066. return (PVOID)FileMap;
  1067. }
  1068. int
  1069. Close (
  1070. PVOID Map
  1071. )
  1072. {
  1073. PFILEMAP FileMap = (PFILEMAP)Map;
  1074. UnmapViewOfFile( FileMap->Base );
  1075. CloseHandle( FileMap->MapHandle );
  1076. if ( FileMap->Access & GENERIC_WRITE ) {
  1077. SetFilePointer( FileMap->FileHandle,
  1078. FileMap->Size,
  1079. 0,
  1080. FILE_BEGIN );
  1081. SetEndOfFile( FileMap->FileHandle );
  1082. }
  1083. CloseHandle( FileMap->FileHandle );
  1084. free( FileMap );
  1085. return 0;
  1086. }