Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1278 lines
31 KiB

  1. /* SORT
  2. * %Z% %M% %I% %D% %Q%
  3. *
  4. * Copyright (C) Microsoft Corporation, 1983
  5. *
  6. * This Module contains Proprietary Information of Microsoft
  7. * Corporation and AT&T, and should be treated as Confidential.
  8. */
  9. /*** diff - differential file comparison
  10. *
  11. * MODIFICATION HISTORY
  12. * M000 18 Apr 83 andyp
  13. * - 3.0 upgrade. No changes.
  14. * M001 22 Mar 84 vich
  15. * - Don't try to unlink NULL. Trying to do so doesn't break anything,
  16. * but it makes kernel debugging a pain due to faults in user mode.
  17. * M002 ??
  18. * - added the MSDOS flag.
  19. * M006 31 Mar 86 craigwi
  20. * - for the MSDOS version, fixed -b feature so that it ignores all \r
  21. * M010 15 Dec 86 craigwi
  22. * - after printing the result, diff aborts with status = 2 if any error
  23. * occurred on stdout.
  24. * M013 21 Mar 88 jangr
  25. * - added -s flag to return SLM specific error statuses:
  26. * 10 files identical
  27. * 11 files different
  28. * 12 other errors
  29. * 13 write error
  30. * M017 27 Oct 88 alanba
  31. * - changed messages to not specify using the -h option and giving
  32. * a clear error message if being executed from within SLM.
  33. */
  34. /*
  35. * Uses an algorithm due to Harold Stone, which finds
  36. * a pair of longest identical subsequences in the two
  37. * files.
  38. *
  39. * The major goal is to generate the match vector J.
  40. * J[i] is the index of the line in file1 corresponding
  41. * to line i file0. J[i] = 0 if there is no
  42. * such line in file1.
  43. *
  44. * Lines are hashed so as to work in core. All potential
  45. * matches are located by sorting the lines of each file
  46. * on the hash (called value). In particular, this
  47. * collects the equivalence classes in file1 together.
  48. * Subroutine equiv replaces the value of each line in
  49. * file0 by the index of the first element of its
  50. * matching equivalence in (the reordered) file1.
  51. * To save space equiv squeezes file1 into a single
  52. * array member in which the equivalence classes
  53. * are simply concatenated, except that their first
  54. * members are flagged by changing sign.
  55. *
  56. * Next the indices that point into member are unsorted into
  57. * array class according to the original order of file0.
  58. *
  59. * The cleverness lies in routine stone. This marches
  60. * through the lines of file0, developing a vector klist
  61. * of "k-candidates". At step i a k-candidate is a matched
  62. * pair of lines x,y (x in file0 y in file1) such that
  63. * there is a common subsequence of lenght k
  64. * between the first i lines of file0 and the first y
  65. * lines of file1, but there is no such subsequence for
  66. * any smaller y. x is the earliest possible mate to y
  67. * that occurs in such a subsequence.
  68. *
  69. * Whenever any of the members of the equivalence class of
  70. * lines in file1 matable to a line in file0 has serial number
  71. * less than the y of some k-candidate, that k-candidate
  72. * with the smallest such y is replaced. The new
  73. * k-candidate is chained (via pred) to the current
  74. * k-1 candidate so that the actual subsequence can
  75. * be recovered. When a member has serial number greater
  76. * that the y of all k-candidates, the klist is extended.
  77. * At the end, the longest subsequence is pulled out
  78. * and placed in the array J by unravel.
  79. *
  80. * With J in hand, the matches there recorded are
  81. * checked against reality to assure that no spurious
  82. * matches have crept in due to hashing. If they have,
  83. * they are broken, and "jackpot " is recorded--a harmless
  84. * matter except that a true match for a spuriously
  85. * mated line may now be unnecessarily reported as a change.
  86. *
  87. * Much of the complexity of the program comes simply
  88. * from trying to minimize core utilization and
  89. * maximize the range of doable problems by dynamically
  90. * allocating what is needed and reusing what is not.
  91. * The core requirements for problems larger than somewhat
  92. * are (in words) 2*length(file0) + length(file1) +
  93. * 3*(number of k-candidates installed), typically about
  94. * 6n words for files of length n.
  95. */
  96. #include <stdio.h>
  97. #include <io.h>
  98. #include <stdlib.h>
  99. #include <ctype.h>
  100. #include <sys/types.h>
  101. #include <sys/stat.h>
  102. #include <excpt.h>
  103. #include <process.h>
  104. #include <fcntl.h>
  105. #ifdef _OS2_SUBSYS_
  106. #define INCL_DOSSIGNALS
  107. #include <os2.h>
  108. #else
  109. #include <nt.h>
  110. #include <ntrtl.h>
  111. #include <nturtl.h>
  112. #include <windows.h>
  113. /*
  114. * Signal subtypes for XCPT_SIGNAL
  115. */
  116. #define XCPT_SIGNAL 0xC0010003
  117. #define XCPT_SIGNAL_INTR 1
  118. #define XCPT_SIGNAL_KILLPROC 3
  119. #define XCPT_SIGNAL_BREAK 4
  120. #endif
  121. #define isslash(c) (c=='/'||c=='\\')
  122. #define DIFFH "diffh.exe"
  123. #ifndef _MAX_PATH
  124. #if defined(LFNMAX) && defined(LPNMAX)
  125. #define _MAX_PATH (LFNMAX + LPNMAX + 1)
  126. #else
  127. #define _MAX_PATH (80)
  128. #endif
  129. #endif
  130. #ifndef _HEAP_MAXREQ
  131. #define _HEAP_MAXREQ ((~(unsigned int) 0) - (unsigned) 32)
  132. #endif
  133. #define HALFLONG 16
  134. #define low(x) (x&((1L<<HALFLONG)-1))
  135. #define high(x) (x>>HALFLONG)
  136. struct cand **clist; /* merely a free storage pot for candidates */
  137. int clistcnt = 0; /* number of arrays of struct cand in clist */
  138. unsigned clen = 0; /* total number of struct cand in all clist arrays */
  139. /*
  140. Number of struct cand in one clist array
  141. (the largest power of 2 smaller than (64k / sizeof(struct cand))
  142. is 2^13. Thus, these gross hacks to make the array references
  143. more efficient, and still permit huge files.
  144. */
  145. #define CLISTSEG (0x2000)
  146. #define CLISTDIV(x) ((x) >> 13)
  147. #define CLISTMOD(x) ((x) & (CLISTSEG - 1))
  148. #define CLIST(x) (clist[CLISTDIV(x)][CLISTMOD(x)])
  149. PVOID input[2];
  150. char *inputfile[2];
  151. int inputfilesize[2];
  152. char *inputfilep[2];
  153. int inputfileleft[2];
  154. #define EndOfFile(x) (inputfileleft[x] <= 0)
  155. #define GetChar(x) ((char)((inputfileleft[x]--) ? \
  156. (*(inputfilep[x])++) : \
  157. EOF))
  158. #define SEARCH(c1,k1,y1) (CLIST(c1[k1]).y < y1) ? (k1+1) : search(c1,k1,y1)
  159. #if 0
  160. char
  161. GetChar( int x );
  162. char
  163. GetChar( int x ) {
  164. if ( inputfileleft[x]-- ) {
  165. return *(inputfilep[x])++;
  166. } else {
  167. return EOF;
  168. }
  169. }
  170. #endif
  171. struct cand {
  172. int x;
  173. int y;
  174. unsigned pred;
  175. } cand;
  176. struct line {
  177. int serial;
  178. int value;
  179. } *file[2], line;
  180. typedef struct _FILEMAP *PFILEMAP;
  181. typedef struct _FILEMAP {
  182. HANDLE FileHandle;
  183. HANDLE MapHandle;
  184. DWORD Access;
  185. DWORD Create;
  186. DWORD Share;
  187. PVOID Base;
  188. DWORD Offset;
  189. DWORD Size;
  190. DWORD Allocated;
  191. } FILEMAP;
  192. PVOID
  193. Open(
  194. const char *FileName,
  195. const char *Mode,
  196. DWORD Size
  197. );
  198. int
  199. Close (
  200. PVOID Map
  201. );
  202. /* fn prototypes gen'd from cl -Zg */
  203. DECLSPEC_NORETURN void done(void);
  204. char *talloc(unsigned n);
  205. char *ralloc(char *p,unsigned n);
  206. void myfree( char *p );
  207. void noroom(void);
  208. int __cdecl sortcmp(void const *first, void const *second);
  209. void unsort(struct line *f,unsigned l,int *b);
  210. void filename(char * *pa1,char * *pa2);
  211. void prepare(int i,char *arg);
  212. void prune(void);
  213. void equiv(struct line *a,int n,struct line *b,int m,int *c);
  214. int stone(int *a,unsigned n,int *b,unsigned *c);
  215. unsigned newcand(int x,int y,unsigned pred);
  216. int search(unsigned *c,int k,int y);
  217. void unravel(unsigned p);
  218. void check(char * *argv);
  219. char * skipline(int f);
  220. void output(char * *argv);
  221. void change(int a,int b,int c,int d);
  222. void range(int a,int b,char *separator);
  223. void fetch(char * *f,int a,int b, int lb,char *s);
  224. int readhash( int f);
  225. void mesg(char *s,char *t);
  226. void SetOutputFile (char *FileName);
  227. unsigned len[2];
  228. struct line *sfile[2]; /*shortened by pruning common prefix and suffix*/
  229. unsigned slen[2];
  230. unsigned int pref, suff; /*length of prefix and suffix*/
  231. int *class; /*will be overlaid on file[0]*/
  232. int *member; /*will be overlaid on file[1]*/
  233. unsigned *klist; /*will be overlaid on file[0] after class*/
  234. int *J; /*will be overlaid on class*/
  235. char * *ixold; /*will be overlaid on klist*/
  236. char * *ixnew; /*will be overlaid on file[1]*/
  237. int opt; /* -1,0,1 = -e,normal,-f */
  238. int status = 2; /*abnormal status; set to 0/1 just before successful exit */
  239. int anychange = 0;
  240. char *empty = "";
  241. int bflag;
  242. int slmFlag;
  243. FILE* OutputFile;
  244. char *tempfile; /*used when comparing against std input*/
  245. #ifndef MSDOS
  246. char *dummy; /*used in resetting storage search ptr*/
  247. #endif
  248. void
  249. done()
  250. {
  251. if (tempfile != NULL)
  252. _unlink(tempfile);
  253. if (OutputFile && OutputFile != stdout) {
  254. fclose(OutputFile);
  255. }
  256. exit(10*slmFlag + status);
  257. }
  258. #define MALLOC(n) talloc(n)
  259. #define REALLOC(p,n) ralloc(p,n)
  260. #define FREE(p) myfree(p)
  261. // #define DEBUG_MALLOC
  262. #ifdef DEBUG_MALLOC
  263. #define MALLOC_SIG 0xABCDEF00
  264. #define FREE_SIG 0x00FEDCBA
  265. typedef struct _MEMBLOCK {
  266. DWORD Sig;
  267. } MEMBLOCK, *PMEMBLOCK;
  268. #endif
  269. char *
  270. talloc(
  271. unsigned n
  272. )
  273. {
  274. #ifdef DEBUG_MALLOC
  275. PMEMBLOCK mem;
  276. char DbgB[128];
  277. //sprintf(DbgB, "MALLOC size %d -> ", n );
  278. //OutputDebugString( DbgB );
  279. mem = malloc( n + sizeof(MEMBLOCK)+1 );
  280. if ( !mem ) {
  281. noroom();
  282. }
  283. mem->Sig = MALLOC_SIG;
  284. //sprintf(DbgB, "%lX\n", mem );
  285. //OutputDebugString( DbgB );
  286. return (char *)((PBYTE)mem + sizeof(MEMBLOCK));
  287. #else
  288. register char *p;
  289. p = malloc(++n);
  290. if (p == NULL) {
  291. noroom();
  292. }
  293. return p;
  294. #endif
  295. }
  296. char *
  297. ralloc(
  298. char *p,
  299. unsigned n
  300. )
  301. {
  302. #ifdef DEBUG_MALLOC
  303. PMEMBLOCK mem;
  304. char DbgB[128];
  305. mem = (PMEMBLOCK)((PBYTE)p - sizeof(MEMBLOCK));
  306. //sprintf(DbgB, "REALLOC: %lX, %d -> ", mem, n );
  307. //OutputDebugString( DbgB );
  308. if ( mem->Sig != MALLOC_SIG ) {
  309. sprintf(DbgB, "REALLOC ERROR: Reallocating %lX\n", mem );
  310. OutputDebugString( DbgB );
  311. }
  312. mem->Sig = FREE_SIG;
  313. mem = (PMEMBLOCK)realloc(mem, n + sizeof(MEMBLOCK)+1);
  314. if (!mem) {
  315. noroom();
  316. }
  317. mem->Sig = MALLOC_SIG;
  318. //sprintf(DbgB, "%lX\n", mem );
  319. //OutputDebugString( DbgB );
  320. return (char *)((PBYTE)mem + sizeof(MEMBLOCK));
  321. #else
  322. void *pv = realloc(p, ++n);
  323. if (!pv) {
  324. noroom();
  325. }
  326. return(pv);
  327. #endif
  328. }
  329. void
  330. myfree(
  331. char *p
  332. )
  333. {
  334. #ifdef DEBUG_MALLOC
  335. PMEMBLOCK mem;
  336. char DbgB[128];
  337. mem = (PMEMBLOCK)((PBYTE)p - sizeof(MEMBLOCK));
  338. //sprintf(DbgB, "FREE: %lX -> ", mem );
  339. //OutputDebugString( DbgB);
  340. if ( mem->Sig != MALLOC_SIG ) {
  341. sprintf(DbgB, "\n\tFREE ERROR: FREEING %lX\n", mem );
  342. OutputDebugString( DbgB );
  343. }
  344. mem->Sig = FREE_SIG;
  345. free(mem);
  346. //sprintf(DbgB, "Ok\n", mem );
  347. //OutputDebugString( DbgB);
  348. #else
  349. if (p) {
  350. free(p);
  351. }
  352. #endif
  353. }
  354. void
  355. noroom()
  356. {
  357. if (slmFlag == 1) {
  358. mesg("file too big; do delfile filename/addfile filename, or",empty);
  359. mesg("reduce the size of the file.",empty);
  360. done();
  361. }
  362. mesg("files too big",empty); /* end M017 */
  363. done();
  364. }
  365. int
  366. __cdecl
  367. sortcmp(
  368. const void *first,
  369. const void *second
  370. )
  371. {
  372. struct line *one = (struct line *)first;
  373. struct line *two = (struct line *)second;
  374. if (one->value < two->value)
  375. return -1;
  376. else if (one->value > two->value)
  377. return 1;
  378. else if (one->serial < two->serial)
  379. return -1;
  380. else if (one->serial > two->serial)
  381. return 1;
  382. else
  383. return 0;
  384. }
  385. void
  386. unsort(
  387. struct line *f,
  388. unsigned l,
  389. int *b
  390. )
  391. {
  392. register int *a;
  393. register unsigned int i;
  394. a = (int *)MALLOC((l+1)*sizeof(int));
  395. if (a) {
  396. memset(a, 0, (l+1)*sizeof(int));
  397. for (i=1;i<=l;i++)
  398. a[f[i].serial] = f[i].value;
  399. for (i=1;i<=l;i++)
  400. b[i] = a[i];
  401. FREE((char *)a);
  402. }
  403. }
  404. void
  405. filename(
  406. char **pa1,
  407. char **pa2
  408. )
  409. {
  410. register char *a1, *b1, *a2;
  411. char buf[BUFSIZ];
  412. struct _stat stbuf;
  413. int i, f;
  414. a1 = *pa1;
  415. a2 = *pa2;
  416. if (_stat(a1,&stbuf)!=-1 && ((stbuf.st_mode&S_IFMT)==S_IFDIR)) {
  417. b1 = *pa1 = MALLOC((unsigned) _MAX_PATH);
  418. while (*b1++ = *a1++) ;
  419. if (isslash(b1[-2]))
  420. b1--;
  421. else
  422. b1[-1] = '/';
  423. a1 = b1;
  424. if ( a2[1] == ':' ) {
  425. a2 += 2;
  426. }
  427. while (*a1++ = *a2++)
  428. if (*a2 && !isslash(*a2) && isslash(a2[-1])) /*M002*/
  429. a1 = b1;
  430. } else if (a1[0]=='-'&&a1[1]==0&&tempfile==NULL) {
  431. /* the signal handling in original source
  432. **
  433. ** signal(SIGINT,done);
  434. ** #ifndef MSDOS
  435. ** signal(SIGHUP,done);
  436. ** signal(SIGPIPE,done);
  437. ** signal(SIGTERM,done);
  438. ** #endif
  439. */
  440. if ((*pa1 = tempfile = _tempnam(getenv("TEMP"), "d")) == NULL) {
  441. mesg("cannot create temporary file", "");
  442. done();
  443. }
  444. if ((f = _open(tempfile,O_WRONLY|O_CREAT|O_TRUNC, 0600)) < 0) {
  445. mesg("cannot create ",tempfile);
  446. done();
  447. }
  448. while ((i=_read(0,buf,BUFSIZ))>0)
  449. _write(f,buf,i);
  450. _close(f);
  451. }
  452. }
  453. void
  454. prepare(
  455. int i,
  456. char *arg
  457. )
  458. {
  459. #define CHUNKSIZE 100
  460. register struct line *p;
  461. register unsigned j;
  462. register int h;
  463. char *c;
  464. PVOID f;
  465. unsigned int MaxSize;
  466. f = Open(arg,"r", 0);
  467. if (!f) {
  468. mesg("cannot open ", arg);
  469. done();
  470. }
  471. input[i] = f;
  472. inputfile[i] = ((PFILEMAP)f)->Base;
  473. inputfilesize[i] = ((PFILEMAP)f)->Size;
  474. inputfilep[i] = inputfile[i];
  475. inputfileleft[i] = inputfilesize[i];
  476. //
  477. // Lets assume that lines are 30 characters on average
  478. //
  479. MaxSize = inputfilesize[i] / 30;
  480. p = (struct line *)MALLOC((3+MaxSize)*sizeof(line));
  481. for (j=0; h=readhash(i);) {
  482. j++;
  483. if ( j >= MaxSize ) {
  484. MaxSize += CHUNKSIZE;
  485. p = (struct line *)REALLOC((char *)p,(MaxSize+3)*sizeof(line));
  486. }
  487. p[j].value = h;
  488. }
  489. p = (struct line *)REALLOC((char *)p,(j+3+1)*sizeof(line));
  490. len[i] = j;
  491. file[i] = p;
  492. //Close(input[i]);
  493. }
  494. void
  495. prune()
  496. {
  497. register unsigned int i,j;
  498. for (pref=0;pref<len[0]&&pref<len[1]&&
  499. file[0][pref+1].value==file[1][pref+1].value;
  500. pref++ ) ;
  501. for (suff=0;suff<len[0]-pref&&suff<len[1]-pref&&
  502. file[0][len[0]-suff].value==file[1][len[1]-suff].value;
  503. suff++) ;
  504. for (j=0;j<2;j++) {
  505. sfile[j] = file[j]+pref;
  506. slen[j] = len[j]-pref-suff;
  507. for (i=0;i<=slen[j];i++)
  508. sfile[j][i].serial = i;
  509. }
  510. }
  511. void
  512. equiv(
  513. struct line *a,
  514. int n,
  515. struct line *b,
  516. int m,
  517. int *c
  518. )
  519. {
  520. register int i, j;
  521. i = j = 1;
  522. while (i<=n && j<=m) {
  523. if (a[i].value <b[j].value)
  524. a[i++].value = 0;
  525. else if (a[i].value == b[j].value)
  526. a[i++].value = j;
  527. else
  528. j++;
  529. }
  530. while (i <= n)
  531. a[i++].value = 0;
  532. b[m+1].value = 0;
  533. j = 0;
  534. while (++j <= m) {
  535. c[j] = -b[j].serial;
  536. while (b[j+1].value == b[j].value) {
  537. j++;
  538. c[j] = b[j].serial;
  539. }
  540. }
  541. c[j] = -1;
  542. }
  543. char **args;
  544. void
  545. __cdecl
  546. main(
  547. int argc,
  548. char **argv
  549. )
  550. {
  551. register int k;
  552. args = argv;
  553. OutputFile = stdout; // Init to default
  554. argc--;
  555. argv++;
  556. while (argc > 0 && argv[0][0]=='-') {
  557. BOOL Skip = FALSE;
  558. for (k=1; (!Skip) && argv[0][k]; k++) {
  559. switch (argv[0][k]) {
  560. case 'e':
  561. opt = -1;
  562. break;
  563. case 'f':
  564. opt = 1;
  565. break;
  566. case 'b':
  567. bflag = 1;
  568. break;
  569. case 'h':
  570. _execvp(DIFFH, args);
  571. mesg("cannot run diffh",empty);
  572. done();
  573. case 's':
  574. slmFlag = 1;
  575. break;
  576. case 'o':
  577. //
  578. // Dirty hack: Redirection is not working, so if
  579. // this flag is present, output goes to
  580. // file.
  581. //
  582. argc--;
  583. argv++;
  584. if (argc < 3) {
  585. mesg("arg count",empty);
  586. done();
  587. }
  588. SetOutputFile(argv[0]);
  589. Skip = TRUE;
  590. break;
  591. }
  592. }
  593. argc--;
  594. argv++;
  595. }
  596. if (argc!=2) {
  597. mesg("arg count",empty);
  598. done();
  599. }
  600. #ifndef MSDOS
  601. dummy = malloc(1);
  602. #endif
  603. _setmode(_fileno(OutputFile), O_BINARY);
  604. _setmode(_fileno(stdin),O_TEXT);
  605. filename(&argv[0], &argv[1]);
  606. filename(&argv[1], &argv[0]);
  607. prepare(0, argv[0]);
  608. prepare(1, argv[1]);
  609. prune();
  610. qsort((char *) (sfile[0] + 1), slen[0], sizeof(struct line), sortcmp);
  611. qsort((char *) (sfile[1] + 1), slen[1], sizeof(struct line), sortcmp);
  612. member = (int *)file[1];
  613. equiv(sfile[0], slen[0], sfile[1], slen[1], member);
  614. member = (int *)REALLOC((char *)member,(slen[1]+2)*sizeof(int));
  615. class = (int *)file[0];
  616. unsort(sfile[0], slen[0], class);
  617. class = (int *)REALLOC((char *)class,(slen[0]+2)*sizeof(int));
  618. klist = (unsigned *)MALLOC((slen[0]+2)*sizeof(int));
  619. clist = (struct cand **)MALLOC(sizeof(struct cand *));
  620. clist[0] = (struct cand *) MALLOC(sizeof(struct cand));
  621. clistcnt = 1;
  622. k = stone(class, slen[0], member, klist);
  623. FREE((char *)member);
  624. FREE((char *)class);
  625. J = (int *)MALLOC((len[0]+2)*sizeof(int));
  626. unravel(klist[k]);
  627. for (k = 0; k < clistcnt; ++k)
  628. FREE((char *)(clist[k]));
  629. FREE((char *)clist);
  630. FREE((char *)klist);
  631. ixold = (char **)MALLOC((len[0]+2)*sizeof(char *));
  632. ixnew = (char **)MALLOC((len[1]+2)*sizeof(char *));
  633. check(argv);
  634. output(argv);
  635. status = anychange;
  636. Close(input[0]);
  637. Close(input[1]);
  638. done();
  639. }
  640. stone(
  641. int *a,
  642. unsigned n,
  643. int *b,
  644. unsigned *c
  645. )
  646. {
  647. register int i, k,y;
  648. int j, l;
  649. unsigned oldc, tc;
  650. int oldl;
  651. k = 0;
  652. c[0] = newcand(0,0,0);
  653. for (i=1; i<=(int)n; i++) {
  654. j = a[i];
  655. if (j==0)
  656. continue;
  657. y = -b[j];
  658. oldl = 0;
  659. oldc = c[0];
  660. do {
  661. if (y <= CLIST(oldc).y)
  662. continue;
  663. l = SEARCH(c, k, y);
  664. if (l!=oldl+1)
  665. oldc = c[l-1];
  666. if (l<=k) {
  667. if (CLIST(c[l]).y <= y)
  668. continue;
  669. tc = c[l];
  670. c[l] = newcand(i,y,oldc);
  671. oldc = tc;
  672. oldl = l;
  673. } else {
  674. c[l] = newcand(i,y,oldc);
  675. k++;
  676. break;
  677. }
  678. } while ((y=b[++j]) > 0);
  679. }
  680. return(k);
  681. }
  682. unsigned
  683. newcand(
  684. int x,
  685. int y,
  686. unsigned pred
  687. )
  688. {
  689. register struct cand *q;
  690. ++clen;
  691. if ((int)CLISTDIV(clen) > (clistcnt - 1)) {
  692. // printf("diff: surpassing segment boundry..\n");
  693. clist = (struct cand **) REALLOC((char *) clist,
  694. ++clistcnt * sizeof(struct cand *));
  695. clist[clistcnt-1] = (struct cand *) MALLOC(sizeof(struct cand));
  696. }
  697. clist[clistcnt-1] = (struct cand *)
  698. REALLOC((char *)(clist[clistcnt-1]),
  699. (1 + CLISTMOD(clen)) * sizeof(struct cand));
  700. q = &CLIST(clen - 1);
  701. q->x = x;
  702. q->y = y;
  703. q->pred = pred;
  704. return(clen-1);
  705. }
  706. search(
  707. unsigned *c,
  708. int k,
  709. int y
  710. )
  711. {
  712. register int i, j;
  713. int l;
  714. int t;
  715. //if(CLIST(c[k]).y<y) /*quick look for typical case*/
  716. // return(k+1);
  717. i = 0;
  718. j = k+1;
  719. while ((l=(i+j)/2) > i) {
  720. t = CLIST(c[l]).y;
  721. if (t > y)
  722. j = l;
  723. else if (t < y)
  724. i = l;
  725. else
  726. return(l);
  727. }
  728. return(l+1);
  729. }
  730. void
  731. unravel(
  732. unsigned p
  733. )
  734. {
  735. register unsigned int i;
  736. register struct cand *q;
  737. for (i=0; i<=len[0]; i++)
  738. J[i] = i<=pref ? i:
  739. i>len[0]-suff ? i+len[1]-len[0]:
  740. 0;
  741. for (q=&CLIST(p);q->y!=0;q=&CLIST(q->pred)) {
  742. J[q->x+pref] = q->y+pref;
  743. }
  744. }
  745. /* check does double duty:
  746. 1. ferret out any fortuitous correspondences due
  747. to confounding by hashing (which result in "jackpot")
  748. 2. collect random access indexes to the two files */
  749. void
  750. check(
  751. char **argv
  752. )
  753. {
  754. register unsigned int i, j;
  755. int jackpot;
  756. char c,d;
  757. //input[0] = fopen(argv[0],"r");
  758. //input[1] = fopen(argv[1],"r");
  759. inputfilep[0] = inputfile[0];
  760. inputfilep[1] = inputfile[1];
  761. inputfileleft[0] = inputfilesize[0];
  762. inputfileleft[1] = inputfilesize[1];
  763. j = 1;
  764. ixold[0] = ixnew[0] = 0L;
  765. ixold[0] = inputfilep[0];
  766. ixnew[0] = inputfilep[1];
  767. //ixold[1] = inputfilep[0];
  768. //ixnew[1] = inputfilep[1];
  769. jackpot = 0;
  770. for (i=1;i<=len[0];i++) {
  771. if (J[i]==0) {
  772. ixold[i] = skipline(0);
  773. continue;
  774. }
  775. while (j<(unsigned)J[i]) {
  776. ixnew[j] = skipline(1);
  777. j++;
  778. }
  779. for (;;) {
  780. c = GetChar(0);
  781. d = GetChar(1);
  782. if (bflag && isspace(c) && isspace(d)) {
  783. do {
  784. if (c=='\n') break;
  785. } while (isspace(c=GetChar(0)));
  786. do {
  787. if (d=='\n') break;
  788. } while (isspace(d=GetChar(1)));
  789. }
  790. if (c!=d) {
  791. jackpot++;
  792. J[i] = 0;
  793. if (c!='\n')
  794. skipline(0);
  795. if (d!='\n')
  796. skipline(1);
  797. break;
  798. }
  799. if (c=='\n')
  800. break;
  801. }
  802. ixold[i] = inputfilep[0];
  803. ixnew[j] = inputfilep[1];
  804. j++;
  805. }
  806. for (;j<=len[1];j++) {
  807. ixnew[j] = skipline(1);
  808. }
  809. //fclose(input[0]);
  810. //fclose(input[1]);
  811. /*
  812. if(jackpot)
  813. mesg("jackpot",empty);
  814. */
  815. }
  816. char *
  817. skipline(
  818. int f
  819. )
  820. {
  821. while (GetChar(f) != '\n' )
  822. ;
  823. return inputfilep[f];
  824. }
  825. void
  826. output(
  827. char **argv
  828. )
  829. {
  830. int m;
  831. register int i0, i1, j1;
  832. int j0;
  833. input[0] = Open(argv[0],"r", 0);
  834. input[1] = Open(argv[1],"r", 0);
  835. m = len[0];
  836. J[0] = 0;
  837. J[m+1] = len[1]+1;
  838. if (opt!=-1) for (i0=1;i0<=m;i0=i1+1) {
  839. while (i0<=m&&J[i0]==J[i0-1]+1) i0++;
  840. j0 = J[i0-1]+1;
  841. i1 = i0-1;
  842. while (i1<m&&J[i1+1]==0) i1++;
  843. j1 = J[i1+1]-1;
  844. J[i1] = j1;
  845. change(i0,i1,j0,j1);
  846. } else for (i0=m;i0>=1;i0=i1-1) {
  847. while (i0>=1&&J[i0]==J[i0+1]-1&&J[i0]!=0) i0--;
  848. j0 = J[i0+1]-1;
  849. i1 = i0+1;
  850. while (i1>1&&J[i1-1]==0) i1--;
  851. j1 = J[i1-1]+1;
  852. J[i1] = j1;
  853. change(i1,i0,j1,j0);
  854. }
  855. if (m==0)
  856. change(1,0,1,len[1]);
  857. }
  858. void
  859. change(
  860. int a,
  861. int b,
  862. int c,
  863. int d
  864. )
  865. {
  866. if (a>b&&c>d)
  867. return;
  868. anychange = 1;
  869. if (opt!=1) {
  870. range(a,b,",");
  871. putc(a>b?'a':c>d?'d':'c', OutputFile);
  872. if (opt!=-1)
  873. range(c,d,",");
  874. } else {
  875. putc(a>b?'a':c>d?'d':'c', OutputFile);
  876. range(a,b," ");
  877. }
  878. putc('\r',OutputFile);
  879. putc('\n',OutputFile);
  880. if (opt==0) {
  881. fetch(ixold,a,b,0,"< ");
  882. if (a<=b&&c<=d)
  883. fputs("---\r\n", OutputFile);
  884. }
  885. fetch(ixnew,c,d,1,opt==0?"> ":empty);
  886. if (opt!=0&&c<=d)
  887. fputs(".",OutputFile);
  888. }
  889. void
  890. range(
  891. int a,
  892. int b,
  893. char *separator
  894. )
  895. {
  896. fprintf(OutputFile,"%d", a>b?b:a);
  897. if (a<b)
  898. fprintf(OutputFile,"%s%d", separator, b);
  899. }
  900. void
  901. fetch(
  902. char **f,
  903. int a,
  904. int b,
  905. int lb,
  906. char *s
  907. )
  908. {
  909. register int i, j;
  910. register int nc;
  911. register char c;
  912. char *p;
  913. UNREFERENCED_PARAMETER( lb );
  914. for (i=a;i<=b;i++) {
  915. p = f[i-1];
  916. nc = (int)(f[i]-f[i-1]);
  917. fputs(s, OutputFile);
  918. for (j=0;j<nc;j++) {
  919. c = *p++;
  920. if (c == '\n' ) {
  921. //putc( '\r', OutputFile );
  922. putc( '\n', OutputFile );
  923. if ( p >= f[i] ) break;
  924. } else {
  925. putc(c, OutputFile);
  926. }
  927. }
  928. }
  929. }
  930. /* hashing has the effect of
  931. * arranging line in 7-bit bytes and then
  932. * summing 1-s complement in 16-bit hunks
  933. */
  934. readhash(
  935. int f
  936. )
  937. {
  938. register unsigned shift;
  939. register char t;
  940. register int space;
  941. long sum = 1L;
  942. space = 0;
  943. if (!bflag) for (shift=0;(t=GetChar(f))!='\n';shift+=7) {
  944. if (t==(char)EOF && EndOfFile(f) )
  945. return(0);
  946. sum += (long)t << (shift%=HALFLONG);
  947. } else for (shift=0;;) {
  948. switch (t=GetChar(f)) {
  949. case '\t':
  950. case ' ':
  951. case '\r':
  952. space++;
  953. continue;
  954. default:
  955. if ( t==(char)EOF && EndOfFile(f) ) {
  956. return(0);
  957. }
  958. if (space) {
  959. shift += 7;
  960. space = 0;
  961. }
  962. sum += (long)t << (shift%=HALFLONG);
  963. shift += 7;
  964. continue;
  965. case '\n':
  966. break;
  967. }
  968. break;
  969. }
  970. sum = low(sum) + high(sum);
  971. return((short)low(sum) + (short)high(sum));
  972. }
  973. void
  974. mesg(
  975. char *s,
  976. char *t
  977. )
  978. {
  979. fprintf(stderr,"diff: %s%s\n",s,t);
  980. }
  981. void
  982. SetOutputFile (
  983. char *FileName
  984. )
  985. {
  986. OutputFile = fopen(FileName, "ab");
  987. if (!OutputFile) {
  988. mesg("Unable to open: ", FileName);
  989. done();
  990. }
  991. }
  992. PVOID
  993. Open(
  994. const char *FileName,
  995. const char *Mode,
  996. DWORD Size
  997. )
  998. {
  999. PFILEMAP FileMap = NULL;
  1000. FileMap = (PFILEMAP)malloc(sizeof(FILEMAP));
  1001. if ( FileMap ) {
  1002. FileMap->Access = 0;
  1003. FileMap->Share = FILE_SHARE_READ | FILE_SHARE_WRITE;
  1004. while ( *Mode ) {
  1005. switch ( *Mode ) {
  1006. case 'r':
  1007. FileMap->Access |= GENERIC_READ;
  1008. FileMap->Create = OPEN_EXISTING;
  1009. break;
  1010. case 'w':
  1011. FileMap->Access |= GENERIC_WRITE;
  1012. FileMap->Create = CREATE_ALWAYS;
  1013. break;
  1014. case 'a':
  1015. FileMap->Access += GENERIC_WRITE;
  1016. FileMap->Create = OPEN_ALWAYS;
  1017. break;
  1018. case '+':
  1019. FileMap->Access |= (GENERIC_READ | GENERIC_WRITE);
  1020. break;
  1021. default:
  1022. break;
  1023. }
  1024. Mode++;
  1025. }
  1026. FileMap->FileHandle = CreateFile(
  1027. FileName,
  1028. FileMap->Access,
  1029. FileMap->Share,
  1030. NULL,
  1031. FileMap->Create,
  1032. FILE_ATTRIBUTE_NORMAL,
  1033. NULL
  1034. );
  1035. if ( FileMap->FileHandle != INVALID_HANDLE_VALUE ) {
  1036. FileMap->Size = GetFileSize( FileMap->FileHandle, NULL );
  1037. FileMap->Allocated = (FileMap->Access == GENERIC_READ) ? FileMap->Size : Size;
  1038. FileMap->MapHandle = CreateFileMapping(
  1039. FileMap->FileHandle,
  1040. NULL,
  1041. (FileMap->Access & GENERIC_WRITE) ? PAGE_READWRITE : PAGE_READONLY,
  1042. 0,
  1043. (FileMap->Access == GENERIC_READ) ? 0 : (DWORD)Size,
  1044. NULL
  1045. );
  1046. if ( FileMap->MapHandle ) {
  1047. FileMap->Base = MapViewOfFile(
  1048. FileMap->MapHandle,
  1049. (FileMap->Access & GENERIC_WRITE) ? FILE_MAP_ALL_ACCESS : FILE_MAP_READ,
  1050. 0,
  1051. 0,
  1052. (FileMap->Access == GENERIC_READ) ? 0 : Size
  1053. );
  1054. if ( FileMap->Base ) {
  1055. if ( FileMap->Create == OPEN_ALWAYS ) {
  1056. FileMap->Offset = FileMap->Size;
  1057. }
  1058. goto Done;
  1059. }
  1060. CloseHandle( FileMap->MapHandle );
  1061. }
  1062. CloseHandle( FileMap->FileHandle );
  1063. }
  1064. free( FileMap );
  1065. FileMap = NULL;
  1066. }
  1067. Done:
  1068. return (PVOID)FileMap;
  1069. }
  1070. int
  1071. Close (
  1072. PVOID Map
  1073. )
  1074. {
  1075. PFILEMAP FileMap = (PFILEMAP)Map;
  1076. UnmapViewOfFile( FileMap->Base );
  1077. CloseHandle( FileMap->MapHandle );
  1078. if ( FileMap->Access & GENERIC_WRITE ) {
  1079. SetFilePointer( FileMap->FileHandle,
  1080. FileMap->Size,
  1081. 0,
  1082. FILE_BEGIN );
  1083. SetEndOfFile( FileMap->FileHandle );
  1084. }
  1085. CloseHandle( FileMap->FileHandle );
  1086. free( FileMap );
  1087. return 0;
  1088. }