Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1887 lines
56 KiB

  1. @rem = '--*-Perl-*--
  2. @echo off
  3. if "%OS%" == "Windows_NT" goto WinNT
  4. perl -x -S "%0" %1 %2 %3 %4 %5 %6 %7 %8 %9
  5. goto endofperl
  6. :WinNT
  7. perl -x -S %0 %*
  8. if NOT "%COMSPEC%" == "%SystemRoot%\system32\cmd.exe" goto endofperl
  9. if %errorlevel% == 9009 echo You do not have Perl in your PATH.
  10. if errorlevel 1 goto script_failed_so_exit_with_non_zero_val 2>nul
  11. goto endofperl
  12. @rem ';
  13. #!/usr/local/bin/perl -w
  14. #line 15
  15. 'di';
  16. 'ig00';
  17. ##############################################################################
  18. ##
  19. ## search
  20. ##
  21. ## Jeffrey Friedl ([email protected]), Dec 1994.
  22. ## Copyright 19.... ah hell, just take it.
  23. ##
  24. ## BLURB:
  25. ## A combo of find and grep -- more or less do a 'grep' on a whole
  26. ## directory tree. Fast, with lots of options. Much more powerful than
  27. ## the simple "find ... | xargs grep ....". Has a full man page.
  28. ## Powerfully customizable.
  29. ##
  30. ## This file is big, but mostly comments and man page.
  31. ##
  32. ## See man page for usage info.
  33. ## Return value: 2=error, 1=nothing found, 0=something found.
  34. ##
  35. $version = "950918.5";
  36. ##
  37. ## "950918.5";
  38. ## Changed all 'sysread' to 'read' because Linux perl's don't seem
  39. ## to like sysread()
  40. ##
  41. ## "941227.4";
  42. ## Added -n, -u
  43. ##
  44. ## "941222.3"
  45. ## Added -nice (due to Lionel Cons <[email protected]>)
  46. ## Removed any leading "./" from name.
  47. ## Added default flags for ~/.search, including TTY, -nice, -list, etc.
  48. ## Program name now has path removed when printed in diagnostics.
  49. ## Added simple tilde-expansion to -dir arg.
  50. ## Added -dskip, etc. Fixed -iregex bug.
  51. ## Changed -dir to be additive, adding -ddir.
  52. ## Now screen out devices, pipes, and sockets.
  53. ## More tidying and lots of expanding of the man page
  54. ##
  55. ##
  56. ## "941217.2";
  57. ## initial release.
  58. $stripped=0;
  59. &init;
  60. if (exists $ENV{'HOME'}) {
  61. $rc_file = join('/', $ENV{'HOME'}, ".search");
  62. }
  63. else {
  64. $rc_file = "";
  65. }
  66. &check_args;
  67. ## Make sure we've got a regex.
  68. ## Don't need one if -find or -showrc was specified.
  69. $!=2, die "expecting regex arguments.\n"
  70. if $FIND_ONLY == 0 && $showrc == 0 && @ARGV == 0;
  71. &prepare_to_search($rc_file);
  72. &import_program if !defined &dodir; ## BIG key to speed.
  73. ## do search while there are directories to be done.
  74. &dodir(shift(@todo)) while @todo;
  75. &clear_message if $VERBOSE && $STDERR_IS_TTY;
  76. exit($retval);
  77. ###############################################################################
  78. sub init
  79. {
  80. ## initialize variables that might be reset by command-line args
  81. $DOREP=0; ## set true by -dorep (redo multi-hardlink files)
  82. $DOREP=1 if $^O eq 'MSWin32';
  83. $DO_SORT=0; ## set by -sort (sort files in a dir before checking)
  84. $FIND_ONLY=0; ## set by -find (don't search files)
  85. $LIST_ONLY=0; ## set true by -l (list filenames only)
  86. $NEWER=0; ## set by -newer, "-mtime -###"
  87. $NICE=0; ## set by -nice (print human-readable output)
  88. $NOLINKS=0; ## set true by -nolinks (don't follow symlinks)
  89. $OLDER=0; ## set by -older, "-mtime ###"
  90. $PREPEND_FILENAME=1; ## set false by -h (don't prefix lines with filename)
  91. $REPORT_LINENUM=0; ## set true by -n (show line numbers)
  92. $VERBOSE=0; ## set to a value by -v, -vv, etc. (verbose messages)
  93. $WHY=0; ## set true by -why, -vvv+ (report why skipped)
  94. $XDEV=0; ## set true by -xdev (stay on one filesystem)
  95. $all=0; ## set true by -all (don't skip many kinds of files)
  96. $iflag = ''; ## set to 'i' by -i (ignore case);
  97. $norc=0; ## set by -norc (don't load rc file)
  98. $showrc=0; ## set by -showrc (show what happens with rc file)
  99. $underlineOK=0; ## set true by -u (watch for underline stuff)
  100. $words=0; ## set true by -w (match whole-words only)
  101. $DELAY=0; ## inter-file delay (seconds)
  102. $retval=1; ## will set to 0 if we find anything.
  103. ## various elements of stat() that we might access
  104. $STAT_DEV = 1;
  105. $STAT_INODE = 2;
  106. $STAT_MTIME = 9;
  107. $VV_PRINT_COUNT = 50; ## with -vv, print every VV_PRINT_COUNT files, or...
  108. $VV_SIZE = 1024*1024; ## ...every VV_SIZE bytes searched
  109. $vv_print = $vv_size = 0; ## running totals.
  110. ## set default options, in case the rc file wants them
  111. $opt{'TTY'}= 1 if -t STDOUT;
  112. ## want to know this for debugging message stuff
  113. $STDERR_IS_TTY = -t STDERR ? 1 : 0;
  114. $STDERR_SCREWS_STDOUT = ($STDERR_IS_TTY && -t STDOUT) ? 1 : 0;
  115. $0 =~ s,.*/,,; ## clean up $0 for any diagnostics we'll be printing.
  116. }
  117. ##
  118. ## Check arguments.
  119. ##
  120. sub check_args
  121. {
  122. while (@ARGV && $ARGV[0] =~ m/^-/)
  123. {
  124. $arg = shift(@ARGV);
  125. if ($arg eq '-version' || ($VERBOSE && $arg eq '-help')) {
  126. print qq/Jeffrey's file search, version "$version".\n/;
  127. exit(0) unless $arg eq '-help';
  128. }
  129. if ($arg eq '-help') {
  130. print <<INLINE_LITERAL_TEXT;
  131. usage: $0 [options] [-e] [PerlRegex ....]
  132. OPTIONS TELLING *WHERE* TO SEARCH:
  133. -dir DIR start search at the named directory (default is current dir).
  134. -xdev stay on starting file system.
  135. -sort sort the files in each directory before processing.
  136. -nolinks don't follow symbolic links.
  137. OPTIONS TELLING WHICH FILES TO EVEN CONSIDER:
  138. -mtime # consider files modified > # days ago (-# for < # days old)
  139. -newer FILE consider files modified more recently than FILE (also -older)
  140. -name GLOB consider files whose name matches pattern (also -regex).
  141. -skip GLOB opposite of -name: identifies files to not consider.
  142. -path GLOB like -name, but for files whose whole path is described.
  143. -dpath/-dregex/-dskip versions for selecting or pruning directories.
  144. -all don't skip any files marked to be skipped by the startup file.
  145. -x<SPECIAL> (see manual, and/or try -showrc).
  146. -why report why a file isn't checked (also implied by -vvvv).
  147. OPTIONS TELLING WHAT TO DO WITH FILES THAT WILL BE CONSIDERED:
  148. -f | -find just list files (PerlRegex ignored). Default is to grep them.
  149. -ff | -ffind Does a faster -find (implies -find -all -dorep)
  150. OPTIONS CONTROLLING HOW THE SEARCH IS DONE (AND WHAT IS PRINTED):
  151. -l | -list only list files with matches, not the lines themselves.
  152. -nice | -nnice print more "human readable" output.
  153. -n prefix each output line with its line number in the file.
  154. -h don't prefix output lines with file name.
  155. -u also look "inside" manpage-style underlined text
  156. -i do case-insensitive searching.
  157. -w match words only (as defined by perl's \\b).
  158. OTHER OPTIONS:
  159. -v, -vv, -vvv various levels of message verbosity.
  160. -e end of options (in case a regex looks like an option).
  161. -showrc show what the rc file sets, then exit.
  162. -norc don't load the rc file.
  163. -dorep check files with multiple hard links multiple times.
  164. INLINE_LITERAL_TEXT
  165. print "Use -v -help for more verbose help.\n" unless $VERBOSE;
  166. print "This script file is also a man page.\n" unless $stripped;
  167. print <<INLINE_LITERAL_TEXT if $VERBOSE;
  168. If -f (or -find) given, PerlRegex is optional and ignored.
  169. Otherwise, will search for files with lines matching any of the given regexes.
  170. Combining things like -name and -mtime implies boolean AND.
  171. However, duplicating things (such as -name '*.c' -name '*.txt') implies OR.
  172. -mtime may be given floating point (i.e. 1.5 is a day and a half).
  173. -iskip/-idskip/-ipath/... etc are case-insensitive versions.
  174. If any letter in -newer/-older is upper case, "or equal" is
  175. inserted into the test.
  176. You can always find the latest version on the World Wide Web in
  177. http://www.wg.omron.co.jp/~jfriedl/perl/
  178. INLINE_LITERAL_TEXT
  179. exit(0);
  180. }
  181. $DOREP=1, next if $arg eq '-dorep'; ## do repeats
  182. $DO_SORT=1, next if $arg eq '-sort'; ## sort files
  183. $NOLINKS=1, next if $arg eq '-nolinks'; ## no sym. links
  184. $PREPEND_FILENAME=0, next if $arg eq '-h'; ## no filename prefix
  185. $REPORT_LINENUM=1, next if $arg eq '-n'; ## show line numbers
  186. $WHY=1, next if $arg eq '-why'; ## tell why skipped
  187. $XDEV=1, next if $arg eq '-xdev'; ## don't leave F.S.
  188. $all=1,$opt{'-all'}=1,next if $arg eq '-all'; ## don't skip *.Z, etc
  189. $iflag='i', next if $arg eq '-i'; ## ignore case
  190. $norc=1, next if $arg eq '-norc'; ## don't load rc file
  191. $showrc=1, next if $arg eq '-showrc'; ## show rc file
  192. $underlineOK=1, next if $arg eq '-u'; ## look throuh underln.
  193. $words=1, next if $arg eq '-w'; ## match "words" only
  194. &strip if $arg eq '-strip'; ## dump this program
  195. last if $arg eq '-e';
  196. $DELAY=$1, next if $arg =~ m/-delay(\d+)/;
  197. $FIND_ONLY=1, next if $arg =~/^-f(ind)?$/;## do "find" only
  198. $FIND_ONLY=1, $DOREP=1, $all=1,
  199. next if $arg =~/^-ff(ind)?$/;## fast -find
  200. $LIST_ONLY=1,$opt{'-list'}=1,
  201. next if $arg =~/^-l(ist)?$/;## only list files
  202. if ($arg =~ m/^-(v+)$/) { ## verbosity
  203. $VERBOSE =length($1);
  204. foreach $len (1..$VERBOSE) { $opt{'-'.('v' x $len)}=1 }
  205. next;
  206. }
  207. if ($arg =~ m/^-(n+)ice$/) { ## "nice" output
  208. $NICE =length($1);
  209. foreach $len (1..$NICE) { $opt{'-'.('n' x $len).'ice'}=1 }
  210. next;
  211. }
  212. if ($arg =~ m/^-(i?)(d?)skip$/) {
  213. local($i) = $1 eq 'i';
  214. local($d) = $2 eq 'd';
  215. $! = 2, die qq/$0: expecting glob arg to -$arg\n/ unless @ARGV;
  216. foreach (split(/\s+/, shift @ARGV)) {
  217. if ($d) {
  218. $idskip{$_}=1 if $i;
  219. $dskip{$_}=1;
  220. } else {
  221. $iskip{$_}=1 if $i;
  222. $skip{$_}=1;
  223. }
  224. }
  225. next;
  226. }
  227. if ($arg =~ m/^-(i?)(d?)(regex|path|name)$/) {
  228. local($i) = $1 eq 'i';
  229. $! = 2, die qq/$0: expecting arg to -$arg\n/ unless @ARGV;
  230. foreach (split(/\s+/, shift @ARGV)) {
  231. $iname{join(',', $arg, $_)}=1 if $i;
  232. $name{join(',', $arg, $_)}=1;
  233. }
  234. next;
  235. }
  236. if ($arg =~ m/^-d?dir$/) {
  237. $opt{'-dir'}=1;
  238. $! = 2, die qq/$0: expecting filename arg to -$arg\n/ unless @ARGV;
  239. $start = shift(@ARGV);
  240. $start =~ s#^~(/+|$)#$ENV{'HOME'}$1# if defined $ENV{'HOME'};
  241. $! = 2, die qq/$0: can't find ${arg}'s "$start"\n/ unless -e $start;
  242. $! = 2, die qq/$0: ${arg}'s "$start" not a directory.\n/ unless -d _;
  243. undef(@todo), $opt{'-ddir'}=1 if $arg eq '-ddir';
  244. push(@todo, $start);
  245. next;
  246. }
  247. if ($arg =~ m/^-(new|old)er$/i) {
  248. $! = 2, die "$0: expecting filename arg to -$arg\n" unless @ARGV;
  249. local($file, $time) = shift(@ARGV);
  250. $! = 2, die qq/$0: can't stat -${arg}'s "$file"./
  251. unless $time = (stat($file))[$STAT_MTIME];
  252. local($upper) = $arg =~ tr/A-Z//;
  253. if ($arg =~ m/new/i) {
  254. $time++ unless $upper;
  255. $NEWER = $time if $NEWER < $time;
  256. } else {
  257. $time-- unless $upper;
  258. $OLDER = $time if $OLDER == 0 || $OLDER > $time;
  259. }
  260. next;
  261. }
  262. if ($arg =~ m/-mtime/) {
  263. $! = 2, die "$0: expecting numerical arg to -$arg\n" unless @ARGV;
  264. local($days) = shift(@ARGV);
  265. $! = 2, die qq/$0: inappropriate arg ($days) to $arg\n/ if $days==0;
  266. $days *= 3600 * 24;
  267. if ($days < 0) {
  268. local($time) = $^T + $days;
  269. $NEWER = $time if $NEWER < $time;
  270. } else {
  271. local($time) = $^T - $days;
  272. $OLDER = $time if $OLDER == 0 || $OLDER > $time;
  273. }
  274. next;
  275. }
  276. ## special user options
  277. if ($arg =~ m/^-x(.+)/) {
  278. foreach (split(/[\s,]+/, $1)) { $user_opt{$_} = $opt{$_}= 1; }
  279. next;
  280. }
  281. $! = 2, die "$0: unknown arg [$arg]\n";
  282. }
  283. }
  284. ##
  285. ## Given a filename glob, return a regex.
  286. ## If the glob has no globbing chars (no * ? or [..]), then
  287. ## prepend an effective '*' to it.
  288. ##
  289. sub glob_to_regex
  290. {
  291. local($glob) = @_;
  292. local(@parts) = $glob =~ m/\\.|[*?]|\[]?[^]]*]|[^[\\*?]+/g;
  293. local($trueglob)=0;
  294. foreach (@parts) {
  295. if ($_ eq '*' || $_ eq '?') {
  296. $_ = ".$_";
  297. $trueglob=1; ## * and ? are a real glob
  298. } elsif (substr($_, 0, 1) eq '[') {
  299. $trueglob=1; ## [..] is a real glob
  300. } else {
  301. s/^\\//; ## remove any leading backslash;
  302. s/\W/\\$&/g; ## now quote anything dangerous;
  303. }
  304. }
  305. unshift(@parts, '.*') unless $trueglob;
  306. join('', '^', @parts, '$');
  307. }
  308. sub prepare_to_search
  309. {
  310. local($rc_file) = @_;
  311. $HEADER_BYTES=0; ## Might be set nonzero in &read_rc;
  312. $last_message_length = 0; ## For &message and &clear_message.
  313. &read_rc($rc_file, $showrc) unless $norc;
  314. exit(0) if $showrc;
  315. $NEXT_DIR_ENTRY = $DO_SORT ? 'shift @files' : 'readdir(DIR)';
  316. $WHY = 1 if $VERBOSE > 3; ## Arg -vvvv or above implies -why.
  317. @todo = ('.') if @todo == 0; ## Where we'll start looking
  318. ## see if any user options were specified that weren't accounted for
  319. foreach $opt (keys %user_opt) {
  320. next if defined $seen_opt{$opt};
  321. warn "warning: -x$opt never considered.\n";
  322. }
  323. die "$0: multiple time constraints exclude all possible files.\n"
  324. if ($NEWER && $OLDER) && ($NEWER > $OLDER);
  325. ##
  326. ## Process any -skip/-iskip args that had been given
  327. ##
  328. local(@skip_test);
  329. foreach $glob (keys %skip) {
  330. $i = defined($iskip{$glob}) ? 'i': '';
  331. push(@skip_test, '$name =~ m/'. &glob_to_regex($glob). "/$i");
  332. }
  333. if (@skip_test) {
  334. $SKIP_TEST = join('||',@skip_test);
  335. $DO_SKIP_TEST = 1;
  336. } else {
  337. $DO_SKIP_TEST = $SKIP_TEST = 0;
  338. }
  339. ##
  340. ## Process any -dskip/-idskip args that had been given
  341. ##
  342. local(@dskip_test);
  343. foreach $glob (keys %dskip) {
  344. $i = defined($idskip{$glob}) ? 'i': '';
  345. push(@dskip_test, '$name =~ m/'. &glob_to_regex($glob). "/$i");
  346. }
  347. if (@dskip_test) {
  348. $DSKIP_TEST = join('||',@dskip_test);
  349. $DO_DSKIP_TEST = 1;
  350. } else {
  351. $DO_DSKIP_TEST = $DSKIP_TEST = 0;
  352. }
  353. ##
  354. ## Process any -name, -path, -regex, etc. args that had been given.
  355. ##
  356. undef @name_test;
  357. undef @dname_test;
  358. foreach $key (keys %name) {
  359. local($type, $pat) = split(/,/, $key, 2);
  360. local($i) = defined($iname{$key}) ? 'i' : '';
  361. if ($type =~ /regex/) {
  362. $pat =~ s/!/\\!/g;
  363. $test = "\$name =~ m!^$pat\$!$i";
  364. } else {
  365. local($var) = $type eq 'name' ? '$name' : '$file';
  366. $test = "$var =~ m/". &glob_to_regex($pat). "/$i";
  367. }
  368. if ($type =~ m/^-i?d/) {
  369. push(@dname_test, $test);
  370. } else {
  371. push(@name_test, $test);
  372. }
  373. }
  374. if (@name_test) {
  375. $GLOB_TESTS = join('||', @name_test);
  376. $DO_GLOB_TESTS = 1;
  377. } else {
  378. $GLOB_TESTS = $DO_GLOB_TESTS = 0;
  379. }
  380. if (@dname_test) {
  381. $DGLOB_TESTS = join('||', @dname_test);
  382. $DO_DGLOB_TESTS = 1;
  383. } else {
  384. $DGLOB_TESTS = $DO_DGLOB_TESTS = 0;
  385. }
  386. ##
  387. ## Process any 'magic' things from the startup file.
  388. ##
  389. if (@magic_tests && $HEADER_BYTES) {
  390. ## the $magic' one is for when &dodir is not inlined
  391. $tests = join('||',@magic_tests);
  392. $MAGIC_TESTS = " { package magic; \$val = ($tests) }";
  393. $DO_MAGIC_TESTS = 1;
  394. } else {
  395. $MAGIC_TESTS = 1;
  396. $DO_MAGIC_TESTS = 0;
  397. }
  398. ##
  399. ## Prepare regular expressions.
  400. ##
  401. {
  402. local(@regex_tests);
  403. if ($LIST_ONLY) {
  404. $mflag = '';
  405. ## need to have $* set, but perl5 just won''t shut up about it.
  406. if ($] >= 5) {
  407. $mflag = 'm';
  408. } else {
  409. eval ' $* = 1 ';
  410. }
  411. }
  412. ##
  413. ## Until I figure out a better way to deal with it,
  414. ## We have to worry about a regex like [^xyz] when doing $LIST_ONLY.
  415. ## Such a regex *will* match \n, and if I'm pulling in multiple
  416. ## lines, it can allow lines to match that would otherwise not match.
  417. ##
  418. ## Therefore, if there is a '[^' in a regex, we can NOT take a chance
  419. ## an use the fast listonly.
  420. ##
  421. $CAN_USE_FAST_LISTONLY = $LIST_ONLY;
  422. local(@extra);
  423. local($underline_glue) = ($] >= 5) ? '(:?_\cH)?' : '(_\cH)?';
  424. while (@ARGV) {
  425. $regex = shift(@ARGV);
  426. ##
  427. ## If watching for underlined things too, add another regex.
  428. ##
  429. if ($underlineOK) {
  430. if ($regex =~ m/[?*+{}()\\.|^\$[]/) {
  431. warn "$0: warning, can't underline-safe ``$regex''.\n";
  432. } else {
  433. $regex = join($underline_glue, split(//, $regex));
  434. }
  435. }
  436. ## If nothing special in the regex, just use index...
  437. ## is quite a bit faster.
  438. if (($iflag eq '') && ($words == 0) &&
  439. $regex !~ m/[?*+{}()\\.|^\$[]/)
  440. {
  441. push(@regex_tests, "(index(\$_, q+$regex+)>=0)");
  442. } else {
  443. $regex =~ s#[\$\@\/]\w#\\$&#;
  444. if ($words) {
  445. if ($regex =~ m/\|/) {
  446. ## could be dangerous -- see if we can wrap in parens.
  447. if ($regex =~ m/\\\d/) {
  448. warn "warning: -w and a | in a regex is dangerous.\n"
  449. } else {
  450. $regex = join($regex, '(', ')');
  451. }
  452. }
  453. $regex = join($regex, '\b', '\b');
  454. }
  455. $CAN_USE_FAST_LISTONLY = 0 if substr($regex, "[^") >= 0;
  456. push(@regex_tests, "m/$regex/$iflag$mflag");
  457. }
  458. ## If we're done, but still have @extra to do, get set for that.
  459. if (@ARGV == 0 && @extra) {
  460. @ARGV = @extra; ## now deal with the extra stuff.
  461. $underlineOK = 0; ## but no more of this.
  462. undef @extra; ## or this.
  463. }
  464. }
  465. if (@regex_tests) {
  466. $REGEX_TEST = join('||', @regex_tests);
  467. ## print STDERR $REGEX_TEST, "\n"; exit;
  468. } else {
  469. ## must be doing -find -- just give something syntactically correct.
  470. $REGEX_TEST = 1;
  471. }
  472. }
  473. ##
  474. ## Make sure we can read the first item(s).
  475. ##
  476. foreach $start (@todo) {
  477. $! = 2, die qq/$0: can't stat "$start"\n/
  478. unless ($dev,$inode) = (stat($start))[$STAT_DEV,$STAT_INODE];
  479. if (defined $dir_done{"$dev,$inode"}) {
  480. ## ignore the repeat.
  481. warn(qq/ignoring "$start" (same as "$dir_done{"$dev,$inode"}").\n/)
  482. if $VERBOSE;
  483. next;
  484. }
  485. ## if -xdev was given, remember the device.
  486. $xdev{$dev} = 1 if $XDEV;
  487. ## Note that we won't want to do it again
  488. $dir_done{"$dev,$inode"} = $start;
  489. }
  490. }
  491. ##
  492. ## See the comment above the __END__ above the 'sub dodir' below.
  493. ##
  494. sub import_program
  495. {
  496. sub bad {
  497. print STDERR "$0: internal error (@_)\n";
  498. exit 2;
  499. }
  500. ## Read from data, up to next __END__. This will be &dodir.
  501. local($/) = "\n__END__";
  502. $prog = <DATA>;
  503. close(DATA);
  504. $prog =~ s/\beval\b//g; ## remove any 'eval'
  505. ## Inline uppercase $-variables by their current values.
  506. if ($] >= 5) {
  507. $prog =~ s/\$([A-Z][A-Z0-9_]{2,}\b)/
  508. &bad($1) if !defined ${$main::{$1}}; ${$main::{$1}};/eg;
  509. } else {
  510. $prog =~ s/\$([A-Z][A-Z0-9_]{2,}\b)/local(*VAR) = $_main{$1};
  511. &bad($1) if !defined $VAR; $VAR;/eg;
  512. }
  513. eval $prog; ## now do it. This will define &dodir;
  514. $!=2, die "$0 internal error: $@\n" if $@;
  515. }
  516. ###########################################################################
  517. ##
  518. ## Read the .search file:
  519. ## Blank lines and lines that are only #-comments ignored.
  520. ## Newlines may be escaped to create long lines
  521. ## Other lines are directives.
  522. ##
  523. ## A directive may begin with an optional tag in the form <...>
  524. ## Things inside the <...> are evaluated as with:
  525. ## <(this || that) && must>
  526. ## will be true if
  527. ## -xmust -xthis or -xmust -xthat
  528. ## were specified on the command line (order doesn't matter, though)
  529. ## A directive is not done if there is a tag and it's false.
  530. ## Any characters but whitespace and &|()>,! may appear after an -x
  531. ## (although "-xdev" is special). -xmust,this is the same as -xmust -xthis.
  532. ## Something like -x~ would make <~> true, and <!~> false.
  533. ##
  534. ## Directives are in the form:
  535. ## option: STRING
  536. ## magic : NUMBYTES : EXPR
  537. ##
  538. ## With option:
  539. ## The STRING is parsed like a Bourne shell command line, and the
  540. ## options are used as if given on the command line.
  541. ## No comments are allowed on 'option' lines.
  542. ## Examples:
  543. ## # skip objects and libraries
  544. ## option: -skip '.o .a'
  545. ## # skip emacs *~ and *# files, unless -x~ given:
  546. ## <!~> option: -skip '~ #'
  547. ##
  548. ## With magic:
  549. ## EXPR can be pretty much any perl (comments allowed!).
  550. ## If it evaluates to true for any particular file, it is skipped.
  551. ## The only info you'll have about a file is the variable $H, which
  552. ## will have at least the first NUMBYTES of the file (less if the file
  553. ## is shorter than that, of course, and maybe more). You'll also have
  554. ## any variables you set in previous 'magic' lines.
  555. ## Examples:
  556. ## magic: 6 : ($x6 = substr($H, 0, 6)) eq 'GIF87a'
  557. ## magic: 6 : $x6 eq 'GIF89a'
  558. ##
  559. ## magic: 6 : (($x6 = substr($H, 0, 6)) eq 'GIF87a' ## old gif \
  560. ## || $x6 eq 'GIF89a' ## new gif
  561. ## (the above two sets are the same)
  562. ## ## Check the first 32 bytes for "binarish" looking bytes.
  563. ## ## Don't blindly dump on any high-bit set, as non-ASCII text
  564. ## ## often has them set. \x80 and \xff seem to be special, though.
  565. ## ## Require two in a row to not get things like perl's $^T.
  566. ## ## This is known to get *.Z, *.gz, pkzip, *.elc and about any
  567. ## ## executable you'll find.
  568. ## magic: 32 : $H =~ m/[\x00-\x06\x10-\x1a\x1c-\x1f\x80\xff]{2}/
  569. ##
  570. sub read_rc
  571. {
  572. local($file, $show) = @_;
  573. local($line_num, $ln, $tag) = 0;
  574. local($use_default, @default) = 0;
  575. { package magic; $^W= 0; } ## turn off warnings for when we run EXPR's
  576. unless (open(RC, "$file")) {
  577. $use_default=1;
  578. $file = "<internal default startup file>";
  579. ## no RC file -- use this default.
  580. @default = split(/\n/,<<'--------INLINE_LITERAL_TEXT');
  581. magic: 32 : $H =~ m/[\x00-\x06\x10-\x1a\x1c-\x1f\x80\xff]{2}/
  582. option: -skip '.a .COM .elc .EXE .gz .o .pbm .xbm .dvi'
  583. option: -iskip '.tarz .zip .z .lzh .jpg .jpeg .gif .uu'
  584. <!~> option: -skip '~ #'
  585. --------INLINE_LITERAL_TEXT
  586. }
  587. ##
  588. ## Make an eval error pretty.
  589. ##
  590. sub clean_eval_error {
  591. local($_) = @_;
  592. s/ in file \(eval\) at line \d+,//g; ## perl4-style error
  593. s/ at \(eval \d+\) line \d+,//g; ## perl5-style error
  594. $_ = $` if m/\n/; ## remove all but first line
  595. "$_\n";
  596. }
  597. print "reading RC file: $file\n" if $show;
  598. while (defined($_ = ($use_default ? shift(@default) : <RC>))) {
  599. $ln = ++$line_num; ## note starting line num.
  600. $_ .= <RC>, $line_num++ while s/\\\n?$/\n/; ## allow continuations
  601. next if /^\s*(#.*)?$/; ## skip blank or comment-only lines.
  602. $do = '';
  603. ## look for an initial <...> tag.
  604. if (s/^\s*<([^>]*)>//) {
  605. ## This simple s// will make the tag ready to eval.
  606. ($tag = $msg = $1) =~
  607. s/[^\s&|(!)]+/
  608. $seen_opt{$&}=1; ## note seen option
  609. "defined(\$opt{q>$&>})" ## (q>> is safe quoting here)
  610. /eg;
  611. ## see if the tag is true or not, abort this line if not.
  612. $dothis = (eval $tag);
  613. $!=2, die "$file $ln <$msg>: $_".&clean_eval_error($@) if $@;
  614. if ($show) {
  615. $msg =~ s/[^\s&|(!)]+/-x$&/;
  616. $msg =~ s/\s*!\s*/ no /g;
  617. $msg =~ s/\s*&&\s*/ and /g;
  618. $msg =~ s/\s*\|\|\s*/ or /g;
  619. $msg =~ s/^\s+//; $msg =~ s/\s+$//;
  620. $do = $dothis ? "(doing because $msg)" :
  621. "(do if $msg)";
  622. } elsif (!$dothis) {
  623. next;
  624. }
  625. }
  626. if (m/^\s*option\s*:\s*/) {
  627. next if $all && !$show; ## -all turns off these checks;
  628. local($_) = $';
  629. s/\n$//;
  630. local($orig) = $_;
  631. print " $do option: $_\n" if $show;
  632. local($0) = "$0 ($file)"; ## for any error message.
  633. local(@ARGV);
  634. local($this);
  635. ##
  636. ## Parse $_ as a Bourne shell line -- fill @ARGV
  637. ##
  638. while (length) {
  639. if (s/^\s+//) {
  640. push(@ARGV, $this) if defined $this;
  641. undef $this;
  642. next;
  643. }
  644. $this = '' if !defined $this;
  645. $this .= $1 while s/^'([^']*)'// ||
  646. s/^"([^"]*)"// ||
  647. s/^([^'"\s\\]+)//||
  648. s/^(\\[\D\d])//;
  649. die "$file $ln: error parsing $orig at $_\n" if m/^\S/;
  650. }
  651. push(@ARGV, $this) if defined $this;
  652. &check_args;
  653. die qq/$file $ln: unused arg "@ARGV".\n/ if @ARGV;
  654. next;
  655. }
  656. if (m/^\s*magic\s*:\s*(\d+)\s*:\s*/) {
  657. next if $all && !$show; ## -all turns off these checks;
  658. local($bytes, $check) = ($1, $');
  659. if ($show) {
  660. $check =~ s/\n?$/\n/;
  661. print " $do contents: $check";
  662. }
  663. ## Check to make sure the thing at least compiles.
  664. eval "package magic; (\$H = '1'x \$main'bytes) && (\n$check\n)\n";
  665. $! = 2, die "$file $ln: ".&clean_eval_error($@) if $@;
  666. $HEADER_BYTES = $bytes if $bytes > $HEADER_BYTES;
  667. push(@magic_tests, "(\n$check\n)");
  668. next;
  669. }
  670. $! = 2, die "$file $ln: unknown command\n";
  671. }
  672. close(RC);
  673. }
  674. sub message
  675. {
  676. if (!$STDERR_IS_TTY) {
  677. print STDERR $_[0], "\n";
  678. } else {
  679. local($text) = @_;
  680. $thislength = length($text);
  681. if ($thislength >= $last_message_length) {
  682. print STDERR $text, "\r";
  683. } else {
  684. print STDERR $text, ' 'x ($last_message_length-$thislength),"\r";
  685. }
  686. $last_message_length = $thislength;
  687. }
  688. }
  689. sub clear_message
  690. {
  691. print STDERR ' ' x $last_message_length, "\r" if $last_message_length;
  692. $vv_print = $vv_size = $last_message_length = 0;
  693. }
  694. ##
  695. ## Output a copy of this program with comments, extra whitespace, and
  696. ## the trailing man page removed. On an ultra slow machine, such a copy
  697. ## might load faster (but I can't tell any difference on my machine).
  698. ##
  699. sub strip {
  700. seek(DATA, 0, 0) || die "$0: can't reset internal pointer.\n";
  701. while(<DATA>) {
  702. print, next if /INLINE_LITERAL_TEXT/.../INLINE_LITERAL_TEXT/;
  703. ## must mention INLINE_LITERAL_TEXT on this line!
  704. s/\#\#.*|^\s+|\s+$//; ## remove cruft
  705. last if $_ eq '.00;';
  706. next if ($_ eq '') || ($_ eq "'di'") || ($_ eq "'ig00'");
  707. s/\$stripped=0;/\$stripped=1;/;
  708. s/\s\s+/ /; ## squish multiple whitespaces down to one.
  709. print $_, "\n";
  710. }
  711. exit(0);
  712. }
  713. ##
  714. ## Just to shut up -w. Never executed.
  715. ##
  716. sub dummy {
  717. 1 || &dummy || &dir_done || &bad || &message || $NEXT_DIR_ENTRY ||
  718. $DELAY || $VV_SIZE || $VV_PRINT_COUNT || $STDERR_SCREWS_STDOUT ||
  719. @files || @files || $magic'H || $magic'H || $xdev{''} || &clear_message;
  720. }
  721. ##
  722. ## If the following __END__ is in place, what follows will be
  723. ## inlined when the program first starts up. Any $ variable name
  724. ## all in upper case, specifically, any string matching
  725. ## \$([A-Z][A-Z0-9_]{2,}\b
  726. ## will have the true value for that variable inlined. Also, any 'eval' is
  727. ## removed
  728. ##
  729. ## The idea is that when the whole thing is then eval'ed to define &dodir,
  730. ## the perl optimizer will make all the decisions that are based upon
  731. ## command-line options (such as $VERBOSE), since they'll be inlined as
  732. ## constants
  733. ##
  734. ## Also, and here's the big win, the tests for matching the regex, and a
  735. ## few others, are all inlined. Should be blinding speed here.
  736. ##
  737. ## See the read from <DATA> above for where all this takes place.
  738. ## But all-in-all, you *want* the __END__ here. Comment it out only for
  739. ## debugging....
  740. ##
  741. __END__
  742. ##
  743. ## Given a directory, check all "appropriate" files in it.
  744. ## Shove any subdirectories into the global @todo, so they'll be done
  745. ## later.
  746. ##
  747. ## Be careful about adding any upper-case variables, as they are subject
  748. ## to being inlined. See comments above the __END__ above.
  749. ##
  750. sub dodir
  751. {
  752. local($dir) = @_;
  753. $dir =~ s,/+$,,; ## remove any trailing slash.
  754. unless (opendir(DIR, "$dir/.")) {
  755. &clear_message if $VERBOSE && $STDERR_SCREWS_STDOUT;
  756. warn qq($0: can't opendir "$dir/".\n);
  757. return;
  758. }
  759. if ($VERBOSE) {
  760. &message($dir);
  761. $vv_print = $vv_size = 0;
  762. }
  763. @files = sort readdir(DIR) if $DO_SORT;
  764. while (defined($name = eval $NEXT_DIR_ENTRY))
  765. {
  766. next if $name eq '.' || $name eq '..'; ## never follow these.
  767. ## create full relative pathname.
  768. $file = $dir eq '.' ? $name : "$dir/$name";
  769. ## if link and skipping them, do so.
  770. if ($NOLINKS && -l $file) {
  771. warn qq/skip (symlink): $file\n/ if $WHY;
  772. next;
  773. }
  774. ## skip things unless files or directories
  775. unless (-f $file || -d _) {
  776. if ($WHY) {
  777. $why = (-S _ && "socket") ||
  778. (-p _ && "pipe") ||
  779. (-b _ && "block special")||
  780. (-c _ && "char special") || "somekinda special";
  781. warn qq/skip ($why): $file\n/;
  782. }
  783. next;
  784. }
  785. ## skip things we can't read
  786. unless (-r _) {
  787. if ($WHY) {
  788. $why = (-l $file) ? "follow" : "read";
  789. warn qq/skip (can't $why): $file\n/;
  790. }
  791. next;
  792. }
  793. ## skip things that are empty
  794. unless (-s _ || -d _) {
  795. warn qq/skip (empty): $file\n/ if $WHY;
  796. next;
  797. }
  798. ## Note file device & inode. If -xdev, skip if appropriate.
  799. ($dev, $inode) = (stat(_))[$STAT_DEV, $STAT_INODE];
  800. if ($XDEV && defined $xdev{$dev}) {
  801. warn qq/skip (other device): $file\n/ if $WHY;
  802. next;
  803. }
  804. $id = "$dev,$inode";
  805. ## special work for a directory
  806. if (-d _) {
  807. ## Do checks for directory file endings.
  808. if ($DO_DSKIP_TEST && (eval $DSKIP_TEST)) {
  809. warn qq/skip (-dskip): $file\n/ if $WHY;
  810. next;
  811. }
  812. ## do checks for -name/-regex/-path tests
  813. if ($DO_DGLOB_TESTS && !(eval $DGLOB_TESTS)) {
  814. warn qq/skip (dirname): $file\n/ if $WHY;
  815. next;
  816. }
  817. ## _never_ redo a directory
  818. if (defined $dir_done{$id} and $^O ne 'MSWin32') {
  819. warn qq/skip (did as "$dir_done{$id}"): $file\n/ if $WHY;
  820. next;
  821. }
  822. $dir_done{$id} = $file; ## mark it done.
  823. unshift(@todo, $file); ## add to the list to do.
  824. next;
  825. }
  826. if ($WHY == 0 && $VERBOSE > 1) {
  827. if ($VERBOSE>2||$vv_print++>$VV_PRINT_COUNT||($vv_size+=-s _)>$VV_SIZE){
  828. &message($file);
  829. $vv_print = $vv_size = 0;
  830. }
  831. }
  832. ## do time-related tests
  833. if ($NEWER || $OLDER) {
  834. $_ = (stat(_))[$STAT_MTIME];
  835. if ($NEWER && $_ < $NEWER) {
  836. warn qq/skip (too old): $file\n/ if $WHY;
  837. next;
  838. }
  839. if ($OLDER && $_ > $OLDER) {
  840. warn qq/skip (too new): $file\n/ if $WHY;
  841. next;
  842. }
  843. }
  844. ## do checks for file endings
  845. if ($DO_SKIP_TEST && (eval $SKIP_TEST)) {
  846. warn qq/skip (-skip): $file\n/ if $WHY;
  847. next;
  848. }
  849. ## do checks for -name/-regex/-path tests
  850. if ($DO_GLOB_TESTS && !(eval $GLOB_TESTS)) {
  851. warn qq/skip (filename): $file\n/ if $WHY;
  852. next;
  853. }
  854. ## If we're not repeating files,
  855. ## skip this one if we've done it, or note we're doing it.
  856. unless ($DOREP) {
  857. if (defined $file_done{$id}) {
  858. warn qq/skip (did as "$file_done{$id}"): $file\n/ if $WHY;
  859. next;
  860. }
  861. $file_done{$id} = $file;
  862. }
  863. if ($DO_MAGIC_TESTS) {
  864. if (!open(FILE_IN, $file)) {
  865. &clear_message if $VERBOSE && $STDERR_SCREWS_STDOUT;
  866. warn qq/$0: can't open: $file\n/;
  867. next;
  868. }
  869. unless (read(FILE_IN, $magic'H, $HEADER_BYTES)) {
  870. &clear_message if $VERBOSE && $STDERR_SCREWS_STDOUT;
  871. warn qq/$0: can't read from "$file"\n"/;
  872. close(FILE_IN);
  873. next;
  874. }
  875. eval $MAGIC_TESTS;
  876. if ($magic'val) {
  877. close(FILE_IN);
  878. warn qq/skip (magic): $file\n/ if $WHY;
  879. next;
  880. }
  881. seek(FILE_IN, 0, 0); ## reset for later <FILE_IN>
  882. }
  883. if ($WHY != 0 && $VERBOSE > 1) {
  884. if ($VERBOSE>2||$vv_print++>$VV_PRINT_COUNT||($vv_size+=-s _)>$VV_SIZE){
  885. &message($file);
  886. $vv_print = $vv_size = 0;
  887. }
  888. }
  889. if ($DELAY) {
  890. sleep($DELAY);
  891. }
  892. if ($FIND_ONLY) {
  893. &clear_message if $VERBOSE && $STDERR_SCREWS_STDOUT;
  894. print $file, "\n";
  895. $retval=0; ## we've found something
  896. close(FILE_IN) if $DO_MAGIC_TESTS;
  897. next;
  898. } else {
  899. ## if we weren't doing magic tests, file won't be open yet...
  900. if (!$DO_MAGIC_TESTS && !open(FILE_IN, $file)) {
  901. &clear_message if $VERBOSE && $STDERR_SCREWS_STDOUT;
  902. warn qq/$0: can't open: $file\n/;
  903. next;
  904. }
  905. if ($LIST_ONLY && $CAN_USE_FAST_LISTONLY) {
  906. ##
  907. ## This is rather complex, but buys us a LOT when we're just
  908. ## listing files and not the individual internal lines.
  909. ##
  910. local($size) = 4096; ## block-size in which to do reads
  911. local($nl); ## will point to $_'s ending newline.
  912. local($read); ## will be how many bytes read.
  913. local($_) = ''; ## Starts out empty
  914. local($hold); ## (see below)
  915. while (($read = read(FILE_IN,$_,$size,length($_)))||length($_))
  916. {
  917. undef @parts;
  918. ## if read a full block, but no newline, need to read more.
  919. while ($read == $size && ($nl = rindex($_, "\n")) < 0) {
  920. push(@parts, $_); ## save that part
  921. $read = read(FILE_IN, $_, $size); ## keep trying
  922. }
  923. ##
  924. ## If we had to save parts, must now combine them together.
  925. ## adjusting $nl to reflect the now-larger $_. This should
  926. ## be a lot more efficient than using any kind of .= in the
  927. ## loop above.
  928. ##
  929. if (@parts) {
  930. local($lastlen) = length($_); #only need if $nl >= 0
  931. $_ = join('', @parts, $_);
  932. $nl = length($_) - ($lastlen - $nl) if $nl >= 0;
  933. }
  934. ##
  935. ## If we're at the end of the file, then we can use $_ as
  936. ## is. Otherwise, we need to remove the final partial-line
  937. ## and save it so that it'll be at the beginning of the
  938. ## next read (where the rest of the line will be layed in
  939. ## right after it). $hold will be what we should save
  940. ## until next time.
  941. ##
  942. if ($read != $size || $nl < 0) {
  943. $hold = '';
  944. } else {
  945. $hold = substr($_, $nl + 1);
  946. substr($_, $nl + 1) = '';
  947. }
  948. ##
  949. ## Now have a bunch of full lines in $_. Use it.
  950. ##
  951. if (eval $REGEX_TEST) {
  952. &clear_message if $VERBOSE && $STDERR_SCREWS_STDOUT;
  953. print $file, "\n";
  954. $retval=0; ## we've found something
  955. last;
  956. }
  957. ## Prepare for next read....
  958. $_ = $hold;
  959. }
  960. } else { ## else not using faster block scanning.....
  961. $lines_printed = 0 if $NICE;
  962. while (<FILE_IN>) {
  963. study;
  964. next unless (eval $REGEX_TEST);
  965. ##
  966. ## We found a matching line.
  967. ##
  968. $retval=0;
  969. &clear_message if $VERBOSE && $STDERR_SCREWS_STDOUT;
  970. if ($LIST_ONLY) {
  971. print $file, "\n";
  972. last;
  973. } else {
  974. ## prepare to print line.
  975. if ($NICE && $lines_printed++ == 0) {
  976. print '-' x 70, "\n" if $NICE > 1;
  977. print $file, ":\n";
  978. }
  979. ##
  980. ## Print all the prelim stuff. This looks less efficient
  981. ## than it needs to be, but that's so that when the eval
  982. ## is compiled (and the tests are optimized away), the
  983. ## result will be less actual PRINTs than the more natural
  984. ## way of doing these tests....
  985. ##
  986. if ($NICE) {
  987. if ($REPORT_LINENUM) {
  988. print " line $.: ";
  989. } else {
  990. print " ";
  991. }
  992. } elsif ($REPORT_LINENUM && $PREPEND_FILENAME) {
  993. print "$file,:$.: ";
  994. } elsif ($PREPEND_FILENAME) {
  995. print "$file: ";
  996. } elsif ($REPORT_LINENUM) {
  997. print "$.: ";
  998. }
  999. print $_;
  1000. print "\n" unless m/\n$/;
  1001. }
  1002. }
  1003. print "\n" if ($NICE > 1) && $lines_printed;
  1004. }
  1005. close(FILE_IN);
  1006. }
  1007. }
  1008. closedir(DIR);
  1009. }
  1010. __END__
  1011. .00; ## finish .ig
  1012. 'di \" finish diversion--previous line must be blank
  1013. .nr nl 0-1 \" fake up transition to first page again
  1014. .nr % 0 \" start at page 1
  1015. .\"__________________NORMAL_MAN_PAGE_BELOW_________________
  1016. .ll+10n
  1017. .TH search 1 "Dec 17, 1994"
  1018. .SH SEARCH
  1019. search \- search files (a'la grep) in a whole directory tree.
  1020. .SH SYNOPSIS
  1021. search [ grep-like and find-like options] [regex ....]
  1022. .SH DESCRIPTION
  1023. .I Search
  1024. is more or less a combo of 'find' and 'grep' (although the regular
  1025. expression flavor is that of the perl being used, which is closer to
  1026. egrep's than grep's).
  1027. .I Search
  1028. does generally the same kind of thing that
  1029. .nf
  1030. find <blah blah> | xargs egrep <blah blah>
  1031. .fi
  1032. does, but is
  1033. .I much
  1034. more powerful and efficient (and intuitive, I think).
  1035. This manual describes
  1036. .I search
  1037. as of version "941227.4". You can always find the latest version at
  1038. .nf
  1039. http://www.wg.omron.co.jp/~jfriedl/perl/index.html
  1040. .fi
  1041. .SH "QUICK EXAMPLE"
  1042. Basic use is simple:
  1043. .nf
  1044. % search jeff
  1045. .fi
  1046. will search files in the current directory, and all sub directories, for
  1047. files that have "jeff" in them. The lines will be listed with the
  1048. containing file's name prepended.
  1049. .PP
  1050. If you list more than one regex, such as with
  1051. .nf
  1052. % search jeff Larry Randal+ 'Stoc?k' 'C.*son'
  1053. .fi
  1054. then a line containing any of the regexes will be listed.
  1055. This makes it effectively the same as
  1056. .nf
  1057. % search 'jeff|Larry|Randal+|Stoc?k|C.*son'
  1058. .fi
  1059. However, listing them separately is much more efficient (and is easier
  1060. to type).
  1061. .PP
  1062. Note that in the case of these examples, the
  1063. .B \-w
  1064. (list whole-words only) option would be useful.
  1065. .PP
  1066. Normally, various kinds of files are automatically removed from consideration.
  1067. If it has has a certain ending (such as ".tar", ".Z", ".o", .etc), or if
  1068. the beginning of the file looks like a binary, it'll be excluded.
  1069. You can control exactly how this works -- see below. One quick way to
  1070. override this is to use the
  1071. .B \-all
  1072. option, which means to consider all the files that would normally be
  1073. automatically excluded.
  1074. Or, if you're curious, you can use
  1075. .B \-why
  1076. to have notes about what files are skipped (and why) printed to stderr.
  1077. .SH "BASIC OVERVIEW"
  1078. Normally, the search starts in the current directory, considering files in
  1079. all subdirectories.
  1080. You can use the
  1081. .I ~/.search
  1082. file to control ways to automatically exclude files.
  1083. If you don't have this file, a default one will kick in, which automatically
  1084. add
  1085. .nf
  1086. -skip .o .Z .gif
  1087. .fi
  1088. (among others) to exclude those kinds of files (which you probably want to
  1089. skip when searching for text, as is normal).
  1090. Files that look to be be binary will also be excluded.
  1091. Files ending with "#" and "~" will also be excluded unless the
  1092. .B -x~
  1093. option is given.
  1094. You can use
  1095. .B -showrc
  1096. to show what kinds of files will normally be skipped.
  1097. See the section on the startup file
  1098. for more info.
  1099. You can use the
  1100. .B -all
  1101. option to indicate you want to consider all files that would otherwise be
  1102. skipped by the startup file.
  1103. Based upon various other flags (see "WHICH FILES TO CONSIDER" below),
  1104. more files might be removed from consideration. For example
  1105. .nf
  1106. -mtime 3
  1107. .fi
  1108. will exclude files that aren't at least three days old (change the 3 to -3
  1109. to exclude files that are more than three days old), while
  1110. .nf
  1111. -skip .*
  1112. .fi
  1113. would exclude any file beginning with a dot (of course, '.' and '..' are
  1114. special and always excluded).
  1115. If you'd like to see what files are being excluded, and why, you can get the
  1116. list via the
  1117. .B \-why
  1118. option.
  1119. If a file makes it past all the checks, it is then "considered".
  1120. This usually means it is greped for the regular expressions you gave
  1121. on the command line.
  1122. If any of the regexes match a line, the line is printed.
  1123. However, if
  1124. .B -list
  1125. is given, just the filename is printed. Or, if
  1126. .B -nice
  1127. is given, a somewhat more (human-)readable output is generated.
  1128. If you're searching a huge tree and want to keep informed about how
  1129. the search is progressing,
  1130. .B -v
  1131. will print (to stderr) the current directory being searched.
  1132. Using
  1133. .B -vv
  1134. will also print the current file "every so often", which could be useful
  1135. if a directory is huge. Using
  1136. .B -vvv
  1137. will print the update with every file.
  1138. Below is the full listing of options.
  1139. .SH "OPTIONS TELLING *WHERE* TO SEARCH"
  1140. .TP
  1141. .BI -dir " DIR"
  1142. Start searching at the named directory instead of the current directory.
  1143. If multiple
  1144. .B -dir
  1145. arguments are given, multiple trees will be searched.
  1146. .TP
  1147. .BI -ddir " DIR"
  1148. Like
  1149. .B -dir
  1150. except it flushes any previous
  1151. .B -dir
  1152. directories (i.e. "-dir A -dir B -dir C" will search A, B, and C, while
  1153. "-dir A -ddir B -dir C" will search only B and C. This might be of use
  1154. in the startup file (see that section below).
  1155. .TP
  1156. .B -xdev
  1157. Stay on the same filesystem as the starting directory/directories.
  1158. .TP
  1159. .B -sort
  1160. Sort the items in a directory before processing them.
  1161. Normally they are processed in whatever order they happen to be read from
  1162. the directory.
  1163. .TP
  1164. .B -nolinks
  1165. Don't follow symbolic links. Normally they're followed.
  1166. .SH "OPTIONS CONTROLLING WHICH FILES TO CONSIDER AND EXCLUDE"
  1167. .TP
  1168. .BI -mtime " NUM"
  1169. Only consider files that were last changed more than
  1170. .I NUM
  1171. days ago
  1172. (less than
  1173. .I NUM
  1174. days if
  1175. .I NUM
  1176. has '-' prepended, i.e. "-mtime -2.5" means to consider files that
  1177. have been changed in the last two and a half days).
  1178. .TP
  1179. .B -older FILE
  1180. Only consider files that have not changed since
  1181. .I FILE
  1182. was last changed.
  1183. If there is any upper case in the "-older", "or equal" is added to the sense
  1184. of the test. Therefore, "search -older ./file regex" will never consider
  1185. "./file", while "search -Older ./file regex" will.
  1186. If a file is a symbolic link, the time used is that of the file and not the
  1187. link.
  1188. .TP
  1189. .BI -newer " FILE"
  1190. Opposite of
  1191. .BR -older .
  1192. .TP
  1193. .BI -name " GLOB"
  1194. Only consider files that match the shell filename pattern
  1195. .IR GLOB .
  1196. The check is only done on a file's name (use
  1197. .B -path
  1198. to check the whole path, and use
  1199. .B -dname
  1200. to check directory names).
  1201. Multiple specifications can be given by separating them with spaces, a'la
  1202. .nf
  1203. -name '*.c *.h'
  1204. .fi
  1205. to consider C source and header files.
  1206. If
  1207. .I GLOB
  1208. doesn't contain any special pattern characters, a '*' is prepended.
  1209. This last example could have been given as
  1210. .nf
  1211. -name '.c .h'
  1212. .fi
  1213. It could also be given as
  1214. .nf
  1215. -name .c -name .h
  1216. .fi
  1217. or
  1218. .nf
  1219. -name '*.c' -name '*.h'
  1220. .fi
  1221. or
  1222. .nf
  1223. -name '*.[ch]'
  1224. .fi
  1225. (among others)
  1226. but in this last case, you have to be sure to supply the leading '*'.
  1227. .TP
  1228. .BI -path " GLOB"
  1229. Like
  1230. .B -name
  1231. except the entire path is checked against the pattern.
  1232. .TP
  1233. .B -regex " REGEX"
  1234. Considers files whose names (not paths) match the given perl regex
  1235. exactly.
  1236. .TP
  1237. .BI -iname " GLOB"
  1238. Case-insensitive version of
  1239. .BR -name .
  1240. .TP
  1241. .BI -ipath " GLOB"
  1242. Case-insensitive version of
  1243. .BR -path .
  1244. .TP
  1245. .BI -iregex " REGEX"
  1246. Case-insensitive version of
  1247. .BR -regex .
  1248. .TP
  1249. .BI -dpath " GLOB"
  1250. Only search down directories whose path matches the given pattern (this
  1251. doesn't apply to the initial directory given by
  1252. .BI -dir ,
  1253. of course).
  1254. Something like
  1255. .nf
  1256. -dir /usr/man -dpath /usr/man/man*
  1257. .fi
  1258. would completely skip
  1259. "/usr/man/cat1", "/usr/man/cat2", etc.
  1260. .TP
  1261. .BI -dskip " GLOB"
  1262. Skips directories whose name (not path) matches the given pattern.
  1263. Something like
  1264. .nf
  1265. -dir /usr/man -dskip cat*
  1266. .fi
  1267. would completely skip any directory in the tree whose name begins with "cat"
  1268. (including "/usr/man/cat1", "/usr/man/cat2", etc.).
  1269. .TP
  1270. .BI -dregex " REGEX"
  1271. Like
  1272. .BI -dpath ,
  1273. but the pattern is a full perl regex. Note that this quite different
  1274. from
  1275. .B -regex
  1276. which considers only file names (not paths). This option considers
  1277. full directory paths (not just names). It's much more useful this way.
  1278. Sorry if it's confusing.
  1279. .TP
  1280. .BI -dpath " GLOB"
  1281. This option exists, but is probably not very useful. It probably wants to
  1282. be like the '-below' or something I mention in the "TODO" section.
  1283. .TP
  1284. .BI -idpath " GLOB"
  1285. Case-insensitive version of
  1286. .BR -dpath .
  1287. .TP
  1288. .BI -idskip " GLOB"
  1289. Case-insensitive version of
  1290. .BR -dskip .
  1291. .TP
  1292. .BI -idregex " REGEX"
  1293. Case-insensitive version of
  1294. .BR -dregex .
  1295. .TP
  1296. .B -all
  1297. Ignore any 'magic' or 'option' lines in the startup file.
  1298. The effect is that all files that would otherwise be automatically
  1299. excluded are considered.
  1300. .TP
  1301. .BI -x SPECIAL
  1302. Arguments starting with
  1303. .B -x
  1304. (except
  1305. .BR -xdev ,
  1306. explained elsewhere) do special interaction with the
  1307. .I ~/.search
  1308. startup file. Something like
  1309. .nf
  1310. -xflag1 -xflag2
  1311. .fi
  1312. will turn on "flag1" and "flag2" in the startup file (and is
  1313. the same as "-xflag1,flag2"). You can use this to write your own
  1314. rules for what kinds of files are to be considered.
  1315. For example, the internal-default startup file contains the line
  1316. .nf
  1317. <!~> option: -skip '~ #'
  1318. .fi
  1319. This means that if the
  1320. .B -x~
  1321. flag is
  1322. .I not
  1323. seen, the option
  1324. .nf
  1325. -skip '~ #'
  1326. .fi
  1327. should be done.
  1328. The effect is that emacs temp and backup files are not normally
  1329. considered, but you can included them with the -x~ flag.
  1330. You can write your own rules to customize
  1331. .I search
  1332. in powerful ways. See the STARTUP FILE section below.
  1333. .TP
  1334. .B -why
  1335. Print a message (to stderr) when and why a file is not considered.
  1336. .SH "OPTIONS TELLING WHAT TO DO WITH FILES THAT WILL BE CONSIDERED"
  1337. .TP
  1338. .B -find
  1339. (you can use
  1340. .B -f
  1341. as well).
  1342. This option changes the basic action of
  1343. .IR search .
  1344. Normally, if a file is considered, it is searched
  1345. for the regular expressions as described earlier. However, if this option
  1346. is given, the filename is printed and no searching takes place. This turns
  1347. .I search
  1348. into a 'find' of some sorts.
  1349. In this case, no regular expressions are needed on the command line
  1350. (any that are there are silently ignored).
  1351. This is not intended to be a replacement for the 'find' program,
  1352. but to aid
  1353. you in understanding just what files are getting past the exclusion checks.
  1354. If you really want to use it as a sort of replacement for the 'find' program,
  1355. you might want to use
  1356. .B -all
  1357. so that it doesn't waste time checking to see if the file is binary, etc
  1358. (unless you really want that, of course).
  1359. If you use
  1360. .BR -find ,
  1361. none of the "GREP-LIKE OPTIONS" (below) matter.
  1362. As a replacement for 'find',
  1363. .I search
  1364. is probably a bit slower (or in the case of GNU find, a lot slower --
  1365. GNU find is
  1366. .I unbelievably
  1367. fast).
  1368. However, "search -ffind"
  1369. might be more useful than 'find' when options such as
  1370. .B -skip
  1371. are used (at least until 'find' gets such functionality).
  1372. .TP
  1373. .B -ffind
  1374. (or
  1375. .BR -ff )
  1376. A faster more 'find'-like find. Does
  1377. .nf
  1378. -find -all -dorep
  1379. .fi
  1380. .SH "GREP-LIKE OPTIONS"
  1381. These options control how a searched file is accessed,
  1382. and how things are printed.
  1383. .TP
  1384. .B -i
  1385. Ignore letter case when matching.
  1386. .TP
  1387. .B -w
  1388. Consider only whole-word matches ("whole word" as defined by perl's "\\b"
  1389. regex).
  1390. .TP
  1391. .B -u
  1392. If the regex(es) is/are simple, try to modify them so that they'll work
  1393. in manpage-like underlined text (i.e. like _^Ht_^Hh_^Hi_^Hs).
  1394. This is very rudimentary at the moment.
  1395. .TP
  1396. .B -list
  1397. (you can use
  1398. .B -l
  1399. too).
  1400. Don't print matching lines, but the names of files that contain matching
  1401. lines. This will likely be *much* faster, as special optimizations are
  1402. made -- particularly with large files.
  1403. .TP
  1404. .B -n
  1405. Pepfix each line by its line number.
  1406. .TP
  1407. .B -nice
  1408. Not a grep-like option, but similar to
  1409. .BR -list ,
  1410. so included here.
  1411. .B -nice
  1412. will have the output be a bit more human-readable, with matching lines printed
  1413. slightly indented after the filename, a'la
  1414. .nf
  1415. % search foo
  1416. somedir/somefile: line with foo in it
  1417. somedir/somefile: some food for thought
  1418. anotherdir/x: don't be a buffoon!
  1419. %
  1420. .fi
  1421. will become
  1422. .nf
  1423. % search -nice foo
  1424. somedir/somefile:
  1425. line with foo in it
  1426. some food for thought
  1427. anotherdir/x:
  1428. don't be a buffoon!
  1429. %
  1430. .fi
  1431. This option due to Lionel Cons.
  1432. .TP
  1433. .B -nnice
  1434. Be a bit nicer than
  1435. .BR -nice .
  1436. Prefix each file's output by a rule line, and follow with an extra blank line.
  1437. .TP
  1438. .B -h
  1439. Don't prepend each output line with the name of the file
  1440. (meaningless when
  1441. .B -find
  1442. or
  1443. .B -l
  1444. are given).
  1445. .SH "OTHER OPTIONS"
  1446. .TP
  1447. .B -help
  1448. Print the usage information.
  1449. .TP
  1450. .B -version
  1451. Print the version information and quit.
  1452. .TP
  1453. .B -v
  1454. Set the level of message verbosity.
  1455. .B -v
  1456. will print a note whenever a new directory is entered.
  1457. .B -vv
  1458. will also print a note "every so often". This can be useful to see
  1459. what's happening when searching huge directories.
  1460. .B -vvv
  1461. will print a new with every file.
  1462. .B -vvvv
  1463. is
  1464. -vvv
  1465. plus
  1466. .BR -why .
  1467. .TP
  1468. .B -e
  1469. This ends the options, and can be useful if the regex begins with '-'.
  1470. .TP
  1471. .B -showrc
  1472. Shows what is being considered in the startup file, then exits.
  1473. .TP
  1474. .B -dorep
  1475. Normally, an identical file won't be checked twice (even with multiple
  1476. hard or symbolic links). If you're just trying to do a fast
  1477. .BR -find ,
  1478. the bookkeeping to remember which files have been seen is not desirable,
  1479. so you can eliminate the bookkeeping with this flag.
  1480. .SH "STARTUP FILE"
  1481. When
  1482. .I search
  1483. starts up, it processes the directives in
  1484. .IR ~/.search .
  1485. If no such file exists, a default
  1486. internal version is used.
  1487. The internal version looks like:
  1488. .nf
  1489. magic: 32 : $H =~ m/[\ex00-\ex06\ex10-\ex1a\ex1c-\ex1f\ex80\exff]{2}/
  1490. option: -skip '.a .COM .elc .EXE .gz .o .pbm .xbm .dvi'
  1491. option: -iskip '.tarz .zip .z .lzh .jpg .jpeg .gif .uu'
  1492. <!~> option: -skip '~ #'
  1493. .fi
  1494. If you wish to create your own "~/.search",
  1495. you might consider copying the above, and then working from there.
  1496. There are two kinds of directives in a startup file: "magic" and "option".
  1497. .RS 0n
  1498. .TP
  1499. OPTION
  1500. Option lines will automatically do the command-line options given.
  1501. For example, the line
  1502. .nf
  1503. option: -v
  1504. .fi
  1505. in you startup file will turn on -v every time, without needing to type it
  1506. on the command line.
  1507. The text on the line after the "option:" directive is processed
  1508. like the Bourne shell, so make sure to pay attention to quoting.
  1509. .nf
  1510. option: -skip .exe .com
  1511. .fi
  1512. will give an error (".com" by itself isn't a valid option), while
  1513. .nf
  1514. option: -skip ".exe .com"
  1515. .fi
  1516. will properly include it as part of -skip's argument.
  1517. .TP
  1518. MAGIC
  1519. Magic lines are used to determine if a file should be considered a binary
  1520. or not (the term "magic" refers to checking a file's magic number). These
  1521. are described in more detail below.
  1522. .RE
  1523. Blank lines and comments (lines beginning with '#') are allowed.
  1524. If a line begins with <...>, then it's a check to see if the
  1525. directive on the line should be done or not. The stuff inside the <...>
  1526. can contain perl's && (and), || (or), ! (not), and parens for grouping,
  1527. along with "flags" that might be indicated by the user with
  1528. .BI -x flag
  1529. options.
  1530. For example, using "-xfoo" will cause "foo" to be true inside the <...>
  1531. blocks. Therefore, a line beginning with "<foo>" would be done only when
  1532. "-xfoo" had been specified, while a line beginning with "<!foo>" would be
  1533. done only when "-xfoo" is not specified (of course, a line without any <...>
  1534. is done in either case).
  1535. A realistic example might be
  1536. .nf
  1537. <!v> -vv
  1538. .fi
  1539. This will cause -vv messages to be the default, but allow "-xv" to override.
  1540. There are a few flags that are set automatically:
  1541. .RS
  1542. .TP
  1543. .B TTY
  1544. true if the output is to the screen (as opposed to being redirected to a file).
  1545. You can force this (as with all the other automatic flags) with -xTTY.
  1546. .TP
  1547. .B -v
  1548. True if -v was specified. If -vv was specified, both
  1549. .B -v
  1550. and
  1551. .B -vv
  1552. flags are true (and so on).
  1553. .TP
  1554. .B -nice
  1555. True if -nice was specified. Same thing about -nnice as for -vv.
  1556. .PP
  1557. .TP
  1558. .B -list
  1559. true if -list (or -l) was given.
  1560. .TP
  1561. .B -dir
  1562. true if -dir was given.
  1563. .RE
  1564. Using this info, you might change the last example to
  1565. .nf
  1566. <!v && !-v> option: -vv
  1567. .fi
  1568. The added "&& !-v" means "and if the '-v' option not given".
  1569. This will allow you to use "-v" alone on the command line, and not
  1570. have this directive add the more verbose "-vv" automatically.
  1571. .RS 0
  1572. Some other examples:
  1573. .TP
  1574. <!-dir && !here> option: -dir ~/
  1575. Effectively make the default directory your home directory (instead of the
  1576. current directory). Using -dir or -xhere will undo this.
  1577. .TP
  1578. <tex> option: -name .tex -dir ~/pub
  1579. Create '-xtex' to search only "*.tex" files in your ~/pub directory tree.
  1580. Actually, this could be made a bit better. If you combine '-xtex' and '-dir'
  1581. on the command line, this directive will add ~/pub to the list, when you
  1582. probably want to use the -dir directory only. You could do
  1583. .nf
  1584. <tex> option: -name .tex
  1585. <tex && !-dir> option: -dir ~/pub
  1586. .fi
  1587. to will allow '-xtex' to work as before, but allow a command-line "-dir"
  1588. to take precedence with respect to ~/pub.
  1589. .TP
  1590. <fluff> option: -nnice -sort -i -vvv
  1591. Combine a few user-friendly options into one '-xfluff' option.
  1592. .TP
  1593. <man> option: -ddir /usr/man -v -w
  1594. When the '-xman' option is given, search "/usr/man" for whole-words
  1595. (of whatever regex or regexes are given on the command line), with -v.
  1596. .RE
  1597. The lines in the startup file are executed from top to bottom, so something
  1598. like
  1599. .nf
  1600. <both> option: -xflag1 -xflag2
  1601. <flag1> option: ...whatever...
  1602. <flag2> option: ...whatever...
  1603. .fi
  1604. will allow '-xboth' to be the same as '-xflag1 -xflag2' (or '-xflag1,flag2'
  1605. for that matter). However, if you put the "<both>" line below the others,
  1606. they will not be true when encountered, so the result would be different
  1607. (and probably undesired).
  1608. The "magic" directives are used to determine if a file looks to be binary
  1609. or not. The form of a magic line is
  1610. .nf
  1611. magic: \fISIZE\fP : \fIPERLCODE\fP
  1612. .fi
  1613. where
  1614. .I SIZE
  1615. is the number of bytes of the file you need to check, and
  1616. .I PERLCODE
  1617. is the code to do the check. Within
  1618. .IR PERLCODE ,
  1619. the variable $H will hold at least the first
  1620. .I SIZE
  1621. bytes of the file (unless the file is shorter than that, of course).
  1622. It might hold more bytes. The perl should evaluate to true if the file
  1623. should be considered a binary.
  1624. An example might be
  1625. .nf
  1626. magic: 6 : substr($H, 0, 6) eq 'GIF87a'
  1627. .fi
  1628. to test for a GIF ("-iskip .gif" is better, but this might be useful
  1629. if you have images in files without the ".gif" extension).
  1630. Since the startup file is checked from top to bottom, you can be a bit
  1631. efficient:
  1632. .nf
  1633. magic: 6 : ($x6 = substr($H, 0, 6)) eq 'GIF87a'
  1634. magic: 6 : $x6 eq 'GIF89a'
  1635. .fi
  1636. You could also write the same thing as
  1637. .nf
  1638. magic: 6 : (($x6 = substr($H, 0, 6)) eq 'GIF87a') || ## an old gif, or.. \e
  1639. $x6 eq 'GIF89a' ## .. a new one.
  1640. .fi
  1641. since newlines may be escaped.
  1642. The default internal startup file includes
  1643. .nf
  1644. magic: 32 : $H =~ m/[\ex00-\ex06\ex10-\ex1a\ex1c-\ex1f\ex80\exff]{2}/
  1645. .fi
  1646. which checks for certain non-printable characters, and catches a large
  1647. number of binary files, including most system's executables, linkable
  1648. objects, compressed, tarred, and otherwise folded, spindled, and mutilated
  1649. files.
  1650. Another example might be
  1651. .nf
  1652. ## an archive library
  1653. magic: 17 : substr($H, 0, 17) eq "!<arch>\en__.SYMDEF"
  1654. .fi
  1655. .SH "RETURN VALUE"
  1656. .I Search
  1657. returns zero if lines (or files, if appropriate) were found,
  1658. or if no work was requested (such as with
  1659. .BR -help ).
  1660. Returns 1 if no lines (or files) were found.
  1661. Returns 2 on error.
  1662. .SH TODO
  1663. Things I'd like to add some day:
  1664. .nf
  1665. + show surrounding lines (context).
  1666. + highlight matched portions of lines.
  1667. + add '-and', which can go between regexes to override
  1668. the default logical or of the regexes.
  1669. + add something like
  1670. -below GLOB
  1671. which will examine a tree and only consider files that
  1672. lie in a directory deeper than one named by the pattern.
  1673. + add 'warning' and 'error' directives.
  1674. + add 'help' directive.
  1675. .fi
  1676. .SH BUGS
  1677. If -xdev and multiple -dir arguments are given, any file in any of the
  1678. target filesystems are allowed. It would be better to allow each filesystem
  1679. for each separate tree.
  1680. Multiple -dir args might also cause some confusing effects. Doing
  1681. .nf
  1682. -dir some/dir -dir other
  1683. .fi
  1684. will search "some/dir" completely, then search "other" completely. This
  1685. is good. However, something like
  1686. .nf
  1687. -dir some/dir -dir some/dir/more/specific
  1688. .fi
  1689. will search "some/dir" completely *except for* "some/dir/more/specific",
  1690. after which it will return and be searched. Not really a bug, but just sort
  1691. of odd.
  1692. File times (for -newer, etc.) of symbolic links are for the file, not the
  1693. link. This could cause some misunderstandings.
  1694. Probably more. Please let me know.
  1695. .SH AUTHOR
  1696. Jeffrey Friedl, Omron Corp ([email protected])
  1697. .br
  1698. http://www.wg.omron.co.jp/cgi-bin/j-e/jfriedl.html
  1699. .SH "LATEST SOURCE"
  1700. See http://www.wg.omron.co.jp/~jfriedl/perl/index.html
  1701. __END__
  1702. :endofperl