Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1880 lines
54 KiB

  1. @rem = '--*-Perl-*--
  2. @echo off
  3. if "%OS%" == "Windows_NT" goto WinNT
  4. perl -x -S "%0" %1 %2 %3 %4 %5 %6 %7 %8 %9
  5. goto endofperl
  6. :WinNT
  7. perl -x -S "%0" %*
  8. if NOT "%COMSPEC%" == "%SystemRoot%\system32\cmd.exe" goto endofperl
  9. if %errorlevel% == 9009 echo You do not have Perl in your PATH.
  10. goto endofperl
  11. @rem ';
  12. #!/usr/local/bin/perl -w
  13. #line 14
  14. 'di';
  15. 'ig00';
  16. ##############################################################################
  17. ##
  18. ## search
  19. ##
  20. ## Jeffrey Friedl ([email protected]), Dec 1994.
  21. ## Copyright 19.... ah hell, just take it.
  22. ##
  23. ## BLURB:
  24. ## A combo of find and grep -- more or less do a 'grep' on a whole
  25. ## directory tree. Fast, with lots of options. Much more powerful than
  26. ## the simple "find ... | xargs grep ....". Has a full man page.
  27. ## Powerfully customizable.
  28. ##
  29. ## This file is big, but mostly comments and man page.
  30. ##
  31. ## See man page for usage info.
  32. ## Return value: 2=error, 1=nothing found, 0=something found.
  33. ##
  34. $version = "950918.5";
  35. ##
  36. ## "950918.5";
  37. ## Changed all 'sysread' to 'read' because Linux perl's don't seem
  38. ## to like sysread()
  39. ##
  40. ## "941227.4";
  41. ## Added -n, -u
  42. ##
  43. ## "941222.3"
  44. ## Added -nice (due to Lionel Cons <[email protected]>)
  45. ## Removed any leading "./" from name.
  46. ## Added default flags for ~/.search, including TTY, -nice, -list, etc.
  47. ## Program name now has path removed when printed in diagnostics.
  48. ## Added simple tilde-expansion to -dir arg.
  49. ## Added -dskip, etc. Fixed -iregex bug.
  50. ## Changed -dir to be additive, adding -ddir.
  51. ## Now screen out devices, pipes, and sockets.
  52. ## More tidying and lots of expanding of the man page
  53. ##
  54. ##
  55. ## "941217.2";
  56. ## initial release.
  57. $stripped=0;
  58. &init;
  59. $rc_file = join('/', $ENV{'HOME'}, ".search");
  60. &check_args;
  61. ## Make sure we've got a regex.
  62. ## Don't need one if -find or -showrc was specified.
  63. $!=2, die "expecting regex arguments.\n"
  64. if $FIND_ONLY == 0 && $showrc == 0 && @ARGV == 0;
  65. &prepare_to_search($rc_file);
  66. &import_program if !defined &dodir; ## BIG key to speed.
  67. ## do search while there are directories to be done.
  68. &dodir(shift(@todo)) while @todo;
  69. &clear_message if $VERBOSE && $STDERR_IS_TTY;
  70. exit($retval);
  71. ###############################################################################
  72. sub init
  73. {
  74. ## initialize variables that might be reset by command-line args
  75. $DOREP=0; ## set true by -dorep (redo multi-hardlink files)
  76. $DOREP=1 if $^O eq 'MSWin32';
  77. $DO_SORT=0; ## set by -sort (sort files in a dir before checking)
  78. $FIND_ONLY=0; ## set by -find (don't search files)
  79. $LIST_ONLY=0; ## set true by -l (list filenames only)
  80. $NEWER=0; ## set by -newer, "-mtime -###"
  81. $NICE=0; ## set by -nice (print human-readable output)
  82. $NOLINKS=0; ## set true by -nolinks (don't follow symlinks)
  83. $OLDER=0; ## set by -older, "-mtime ###"
  84. $PREPEND_FILENAME=1; ## set false by -h (don't prefix lines with filename)
  85. $REPORT_LINENUM=0; ## set true by -n (show line numbers)
  86. $VERBOSE=0; ## set to a value by -v, -vv, etc. (verbose messages)
  87. $WHY=0; ## set true by -why, -vvv+ (report why skipped)
  88. $XDEV=0; ## set true by -xdev (stay on one filesystem)
  89. $all=0; ## set true by -all (don't skip many kinds of files)
  90. $iflag = ''; ## set to 'i' by -i (ignore case);
  91. $norc=0; ## set by -norc (don't load rc file)
  92. $showrc=0; ## set by -showrc (show what happens with rc file)
  93. $underlineOK=0; ## set true by -u (watch for underline stuff)
  94. $words=0; ## set true by -w (match whole-words only)
  95. $DELAY=0; ## inter-file delay (seconds)
  96. $retval=1; ## will set to 0 if we find anything.
  97. ## various elements of stat() that we might access
  98. $STAT_DEV = 1;
  99. $STAT_INODE = 2;
  100. $STAT_MTIME = 9;
  101. $VV_PRINT_COUNT = 50; ## with -vv, print every VV_PRINT_COUNT files, or...
  102. $VV_SIZE = 1024*1024; ## ...every VV_SIZE bytes searched
  103. $vv_print = $vv_size = 0; ## running totals.
  104. ## set default options, in case the rc file wants them
  105. $opt{'TTY'}= 1 if -t STDOUT;
  106. ## want to know this for debugging message stuff
  107. $STDERR_IS_TTY = -t STDERR ? 1 : 0;
  108. $STDERR_SCREWS_STDOUT = ($STDERR_IS_TTY && -t STDOUT) ? 1 : 0;
  109. $0 =~ s,.*/,,; ## clean up $0 for any diagnostics we'll be printing.
  110. }
  111. ##
  112. ## Check arguments.
  113. ##
  114. sub check_args
  115. {
  116. while (@ARGV && $ARGV[0] =~ m/^-/)
  117. {
  118. $arg = shift(@ARGV);
  119. if ($arg eq '-version' || ($VERBOSE && $arg eq '-help')) {
  120. print qq/Jeffrey's file search, version "$version".\n/;
  121. exit(0) unless $arg eq '-help';
  122. }
  123. if ($arg eq '-help') {
  124. print <<INLINE_LITERAL_TEXT;
  125. usage: $0 [options] [-e] [PerlRegex ....]
  126. OPTIONS TELLING *WHERE* TO SEARCH:
  127. -dir DIR start search at the named directory (default is current dir).
  128. -xdev stay on starting file system.
  129. -sort sort the files in each directory before processing.
  130. -nolinks don't follow symbolic links.
  131. OPTIONS TELLING WHICH FILES TO EVEN CONSIDER:
  132. -mtime # consider files modified > # days ago (-# for < # days old)
  133. -newer FILE consider files modified more recently than FILE (also -older)
  134. -name GLOB consider files whose name matches pattern (also -regex).
  135. -skip GLOB opposite of -name: identifies files to not consider.
  136. -path GLOB like -name, but for files whose whole path is described.
  137. -dpath/-dregex/-dskip versions for selecting or pruning directories.
  138. -all don't skip any files marked to be skipped by the startup file.
  139. -x<SPECIAL> (see manual, and/or try -showrc).
  140. -why report why a file isn't checked (also implied by -vvvv).
  141. OPTIONS TELLING WHAT TO DO WITH FILES THAT WILL BE CONSIDERED:
  142. -f | -find just list files (PerlRegex ignored). Default is to grep them.
  143. -ff | -ffind Does a faster -find (implies -find -all -dorep)
  144. OPTIONS CONTROLLING HOW THE SEARCH IS DONE (AND WHAT IS PRINTED):
  145. -l | -list only list files with matches, not the lines themselves.
  146. -nice | -nnice print more "human readable" output.
  147. -n prefix each output line with its line number in the file.
  148. -h don't prefix output lines with file name.
  149. -u also look "inside" manpage-style underlined text
  150. -i do case-insensitive searching.
  151. -w match words only (as defined by perl's \\b).
  152. OTHER OPTIONS:
  153. -v, -vv, -vvv various levels of message verbosity.
  154. -e end of options (in case a regex looks like an option).
  155. -showrc show what the rc file sets, then exit.
  156. -norc don't load the rc file.
  157. -dorep check files with multiple hard links multiple times.
  158. INLINE_LITERAL_TEXT
  159. print "Use -v -help for more verbose help.\n" unless $VERBOSE;
  160. print "This script file is also a man page.\n" unless $stripped;
  161. print <<INLINE_LITERAL_TEXT if $VERBOSE;
  162. If -f (or -find) given, PerlRegex is optional and ignored.
  163. Otherwise, will search for files with lines matching any of the given regexes.
  164. Combining things like -name and -mtime implies boolean AND.
  165. However, duplicating things (such as -name '*.c' -name '*.txt') implies OR.
  166. -mtime may be given floating point (i.e. 1.5 is a day and a half).
  167. -iskip/-idskip/-ipath/... etc are case-insensitive versions.
  168. If any letter in -newer/-older is upper case, "or equal" is
  169. inserted into the test.
  170. You can always find the latest version on the World Wide Web in
  171. http://www.wg.omron.co.jp/~jfriedl/perl/
  172. INLINE_LITERAL_TEXT
  173. exit(0);
  174. }
  175. $DOREP=1, next if $arg eq '-dorep'; ## do repeats
  176. $DO_SORT=1, next if $arg eq '-sort'; ## sort files
  177. $NOLINKS=1, next if $arg eq '-nolinks'; ## no sym. links
  178. $PREPEND_FILENAME=0, next if $arg eq '-h'; ## no filename prefix
  179. $REPORT_LINENUM=1, next if $arg eq '-n'; ## show line numbers
  180. $WHY=1, next if $arg eq '-why'; ## tell why skipped
  181. $XDEV=1, next if $arg eq '-xdev'; ## don't leave F.S.
  182. $all=1,$opt{'-all'}=1,next if $arg eq '-all'; ## don't skip *.Z, etc
  183. $iflag='i', next if $arg eq '-i'; ## ignore case
  184. $norc=1, next if $arg eq '-norc'; ## don't load rc file
  185. $showrc=1, next if $arg eq '-showrc'; ## show rc file
  186. $underlineOK=1, next if $arg eq '-u'; ## look throuh underln.
  187. $words=1, next if $arg eq '-w'; ## match "words" only
  188. &strip if $arg eq '-strip'; ## dump this program
  189. last if $arg eq '-e';
  190. $DELAY=$1, next if $arg =~ m/-delay(\d+)/;
  191. $FIND_ONLY=1, next if $arg =~/^-f(ind)?$/;## do "find" only
  192. $FIND_ONLY=1, $DOREP=1, $all=1,
  193. next if $arg =~/^-ff(ind)?$/;## fast -find
  194. $LIST_ONLY=1,$opt{'-list'}=1,
  195. next if $arg =~/^-l(ist)?$/;## only list files
  196. if ($arg =~ m/^-(v+)$/) { ## verbosity
  197. $VERBOSE =length($1);
  198. foreach $len (1..$VERBOSE) { $opt{'-'.('v' x $len)}=1 }
  199. next;
  200. }
  201. if ($arg =~ m/^-(n+)ice$/) { ## "nice" output
  202. $NICE =length($1);
  203. foreach $len (1..$NICE) { $opt{'-'.('n' x $len).'ice'}=1 }
  204. next;
  205. }
  206. if ($arg =~ m/^-(i?)(d?)skip$/) {
  207. local($i) = $1 eq 'i';
  208. local($d) = $2 eq 'd';
  209. $! = 2, die qq/$0: expecting glob arg to -$arg\n/ unless @ARGV;
  210. foreach (split(/\s+/, shift @ARGV)) {
  211. if ($d) {
  212. $idskip{$_}=1 if $i;
  213. $dskip{$_}=1;
  214. } else {
  215. $iskip{$_}=1 if $i;
  216. $skip{$_}=1;
  217. }
  218. }
  219. next;
  220. }
  221. if ($arg =~ m/^-(i?)(d?)(regex|path|name)$/) {
  222. local($i) = $1 eq 'i';
  223. $! = 2, die qq/$0: expecting arg to -$arg\n/ unless @ARGV;
  224. foreach (split(/\s+/, shift @ARGV)) {
  225. $iname{join(',', $arg, $_)}=1 if $i;
  226. $name{join(',', $arg, $_)}=1;
  227. }
  228. next;
  229. }
  230. if ($arg =~ m/^-d?dir$/) {
  231. $opt{'-dir'}=1;
  232. $! = 2, die qq/$0: expecting filename arg to -$arg\n/ unless @ARGV;
  233. $start = shift(@ARGV);
  234. $start =~ s#^~(/+|$)#$ENV{'HOME'}$1# if defined $ENV{'HOME'};
  235. $! = 2, die qq/$0: can't find ${arg}'s "$start"\n/ unless -e $start;
  236. $! = 2, die qq/$0: ${arg}'s "$start" not a directory.\n/ unless -d _;
  237. undef(@todo), $opt{'-ddir'}=1 if $arg eq '-ddir';
  238. push(@todo, $start);
  239. next;
  240. }
  241. if ($arg =~ m/^-(new|old)er$/i) {
  242. $! = 2, die "$0: expecting filename arg to -$arg\n" unless @ARGV;
  243. local($file, $time) = shift(@ARGV);
  244. $! = 2, die qq/$0: can't stat -${arg}'s "$file"./
  245. unless $time = (stat($file))[$STAT_MTIME];
  246. local($upper) = $arg =~ tr/A-Z//;
  247. if ($arg =~ m/new/i) {
  248. $time++ unless $upper;
  249. $NEWER = $time if $NEWER < $time;
  250. } else {
  251. $time-- unless $upper;
  252. $OLDER = $time if $OLDER == 0 || $OLDER > $time;
  253. }
  254. next;
  255. }
  256. if ($arg =~ m/-mtime/) {
  257. $! = 2, die "$0: expecting numerical arg to -$arg\n" unless @ARGV;
  258. local($days) = shift(@ARGV);
  259. $! = 2, die qq/$0: inappropriate arg ($days) to $arg\n/ if $days==0;
  260. $days *= 3600 * 24;
  261. if ($days < 0) {
  262. local($time) = $^T + $days;
  263. $NEWER = $time if $NEWER < $time;
  264. } else {
  265. local($time) = $^T - $days;
  266. $OLDER = $time if $OLDER == 0 || $OLDER > $time;
  267. }
  268. next;
  269. }
  270. ## special user options
  271. if ($arg =~ m/^-x(.+)/) {
  272. foreach (split(/[\s,]+/, $1)) { $user_opt{$_} = $opt{$_}= 1; }
  273. next;
  274. }
  275. $! = 2, die "$0: unknown arg [$arg]\n";
  276. }
  277. }
  278. ##
  279. ## Given a filename glob, return a regex.
  280. ## If the glob has no globbing chars (no * ? or [..]), then
  281. ## prepend an effective '*' to it.
  282. ##
  283. sub glob_to_regex
  284. {
  285. local($glob) = @_;
  286. local(@parts) = $glob =~ m/\\.|[*?]|\[]?[^]]*]|[^[\\*?]+/g;
  287. local($trueglob)=0;
  288. foreach (@parts) {
  289. if ($_ eq '*' || $_ eq '?') {
  290. $_ = ".$_";
  291. $trueglob=1; ## * and ? are a real glob
  292. } elsif (substr($_, 0, 1) eq '[') {
  293. $trueglob=1; ## [..] is a real glob
  294. } else {
  295. s/^\\//; ## remove any leading backslash;
  296. s/\W/\\$&/g; ## now quote anything dangerous;
  297. }
  298. }
  299. unshift(@parts, '.*') unless $trueglob;
  300. join('', '^', @parts, '$');
  301. }
  302. sub prepare_to_search
  303. {
  304. local($rc_file) = @_;
  305. $HEADER_BYTES=0; ## Might be set nonzero in &read_rc;
  306. $last_message_length = 0; ## For &message and &clear_message.
  307. &read_rc($rc_file, $showrc) unless $norc;
  308. exit(0) if $showrc;
  309. $NEXT_DIR_ENTRY = $DO_SORT ? 'shift @files' : 'readdir(DIR)';
  310. $WHY = 1 if $VERBOSE > 3; ## Arg -vvvv or above implies -why.
  311. @todo = ('.') if @todo == 0; ## Where we'll start looking
  312. ## see if any user options were specified that weren't accounted for
  313. foreach $opt (keys %user_opt) {
  314. next if defined $seen_opt{$opt};
  315. warn "warning: -x$opt never considered.\n";
  316. }
  317. die "$0: multiple time constraints exclude all possible files.\n"
  318. if ($NEWER && $OLDER) && ($NEWER > $OLDER);
  319. ##
  320. ## Process any -skip/-iskip args that had been given
  321. ##
  322. local(@skip_test);
  323. foreach $glob (keys %skip) {
  324. $i = defined($iskip{$glob}) ? 'i': '';
  325. push(@skip_test, '$name =~ m/'. &glob_to_regex($glob). "/$i");
  326. }
  327. if (@skip_test) {
  328. $SKIP_TEST = join('||',@skip_test);
  329. $DO_SKIP_TEST = 1;
  330. } else {
  331. $DO_SKIP_TEST = $SKIP_TEST = 0;
  332. }
  333. ##
  334. ## Process any -dskip/-idskip args that had been given
  335. ##
  336. local(@dskip_test);
  337. foreach $glob (keys %dskip) {
  338. $i = defined($idskip{$glob}) ? 'i': '';
  339. push(@dskip_test, '$name =~ m/'. &glob_to_regex($glob). "/$i");
  340. }
  341. if (@dskip_test) {
  342. $DSKIP_TEST = join('||',@dskip_test);
  343. $DO_DSKIP_TEST = 1;
  344. } else {
  345. $DO_DSKIP_TEST = $DSKIP_TEST = 0;
  346. }
  347. ##
  348. ## Process any -name, -path, -regex, etc. args that had been given.
  349. ##
  350. undef @name_test;
  351. undef @dname_test;
  352. foreach $key (keys %name) {
  353. local($type, $pat) = split(/,/, $key, 2);
  354. local($i) = defined($iname{$key}) ? 'i' : '';
  355. if ($type =~ /regex/) {
  356. $pat =~ s/!/\\!/g;
  357. $test = "\$name =~ m!^$pat\$!$i";
  358. } else {
  359. local($var) = $type eq 'name' ? '$name' : '$file';
  360. $test = "$var =~ m/". &glob_to_regex($pat). "/$i";
  361. }
  362. if ($type =~ m/^-i?d/) {
  363. push(@dname_test, $test);
  364. } else {
  365. push(@name_test, $test);
  366. }
  367. }
  368. if (@name_test) {
  369. $GLOB_TESTS = join('||', @name_test);
  370. $DO_GLOB_TESTS = 1;
  371. } else {
  372. $GLOB_TESTS = $DO_GLOB_TESTS = 0;
  373. }
  374. if (@dname_test) {
  375. $DGLOB_TESTS = join('||', @dname_test);
  376. $DO_DGLOB_TESTS = 1;
  377. } else {
  378. $DGLOB_TESTS = $DO_DGLOB_TESTS = 0;
  379. }
  380. ##
  381. ## Process any 'magic' things from the startup file.
  382. ##
  383. if (@magic_tests && $HEADER_BYTES) {
  384. ## the $magic' one is for when &dodir is not inlined
  385. $tests = join('||',@magic_tests);
  386. $MAGIC_TESTS = " { package magic; \$val = ($tests) }";
  387. $DO_MAGIC_TESTS = 1;
  388. } else {
  389. $MAGIC_TESTS = 1;
  390. $DO_MAGIC_TESTS = 0;
  391. }
  392. ##
  393. ## Prepare regular expressions.
  394. ##
  395. {
  396. local(@regex_tests);
  397. if ($LIST_ONLY) {
  398. $mflag = '';
  399. ## need to have $* set, but perl5 just won''t shut up about it.
  400. if ($] >= 5) {
  401. $mflag = 'm';
  402. } else {
  403. eval ' $* = 1 ';
  404. }
  405. }
  406. ##
  407. ## Until I figure out a better way to deal with it,
  408. ## We have to worry about a regex like [^xyz] when doing $LIST_ONLY.
  409. ## Such a regex *will* match \n, and if I'm pulling in multiple
  410. ## lines, it can allow lines to match that would otherwise not match.
  411. ##
  412. ## Therefore, if there is a '[^' in a regex, we can NOT take a chance
  413. ## an use the fast listonly.
  414. ##
  415. $CAN_USE_FAST_LISTONLY = $LIST_ONLY;
  416. local(@extra);
  417. local($underline_glue) = ($] >= 5) ? '(:?_\cH)?' : '(_\cH)?';
  418. while (@ARGV) {
  419. $regex = shift(@ARGV);
  420. ##
  421. ## If watching for underlined things too, add another regex.
  422. ##
  423. if ($underlineOK) {
  424. if ($regex =~ m/[?*+{}()\\.|^\$[]/) {
  425. warn "$0: warning, can't underline-safe ``$regex''.\n";
  426. } else {
  427. $regex = join($underline_glue, split(//, $regex));
  428. }
  429. }
  430. ## If nothing special in the regex, just use index...
  431. ## is quite a bit faster.
  432. if (($iflag eq '') && ($words == 0) &&
  433. $regex !~ m/[?*+{}()\\.|^\$[]/)
  434. {
  435. push(@regex_tests, "(index(\$_, q+$regex+)>=0)");
  436. } else {
  437. $regex =~ s#[\$\@\/]\w#\\$&#;
  438. if ($words) {
  439. if ($regex =~ m/\|/) {
  440. ## could be dangerous -- see if we can wrap in parens.
  441. if ($regex =~ m/\\\d/) {
  442. warn "warning: -w and a | in a regex is dangerous.\n"
  443. } else {
  444. $regex = join($regex, '(', ')');
  445. }
  446. }
  447. $regex = join($regex, '\b', '\b');
  448. }
  449. $CAN_USE_FAST_LISTONLY = 0 if substr($regex, "[^") >= 0;
  450. push(@regex_tests, "m/$regex/$iflag$mflag");
  451. }
  452. ## If we're done, but still have @extra to do, get set for that.
  453. if (@ARGV == 0 && @extra) {
  454. @ARGV = @extra; ## now deal with the extra stuff.
  455. $underlineOK = 0; ## but no more of this.
  456. undef @extra; ## or this.
  457. }
  458. }
  459. if (@regex_tests) {
  460. $REGEX_TEST = join('||', @regex_tests);
  461. ## print STDERR $REGEX_TEST, "\n"; exit;
  462. } else {
  463. ## must be doing -find -- just give something syntactically correct.
  464. $REGEX_TEST = 1;
  465. }
  466. }
  467. ##
  468. ## Make sure we can read the first item(s).
  469. ##
  470. foreach $start (@todo) {
  471. $! = 2, die qq/$0: can't stat "$start"\n/
  472. unless ($dev,$inode) = (stat($start))[$STAT_DEV,$STAT_INODE];
  473. if (defined $dir_done{"$dev,$inode"}) {
  474. ## ignore the repeat.
  475. warn(qq/ignoring "$start" (same as "$dir_done{"$dev,$inode"}").\n/)
  476. if $VERBOSE;
  477. next;
  478. }
  479. ## if -xdev was given, remember the device.
  480. $xdev{$dev} = 1 if $XDEV;
  481. ## Note that we won't want to do it again
  482. $dir_done{"$dev,$inode"} = $start;
  483. }
  484. }
  485. ##
  486. ## See the comment above the __END__ above the 'sub dodir' below.
  487. ##
  488. sub import_program
  489. {
  490. sub bad {
  491. print STDERR "$0: internal error (@_)\n";
  492. exit 2;
  493. }
  494. ## Read from data, up to next __END__. This will be &dodir.
  495. local($/) = "\n__END__";
  496. $prog = <DATA>;
  497. close(DATA);
  498. $prog =~ s/\beval\b//g; ## remove any 'eval'
  499. ## Inline uppercase $-variables by their current values.
  500. if ($] >= 5) {
  501. $prog =~ s/\$([A-Z][A-Z0-9_]{2,}\b)/
  502. &bad($1) if !defined ${$main::{$1}}; ${$main::{$1}};/eg;
  503. } else {
  504. $prog =~ s/\$([A-Z][A-Z0-9_]{2,}\b)/local(*VAR) = $_main{$1};
  505. &bad($1) if !defined $VAR; $VAR;/eg;
  506. }
  507. eval $prog; ## now do it. This will define &dodir;
  508. $!=2, die "$0 internal error: $@\n" if $@;
  509. }
  510. ###########################################################################
  511. ##
  512. ## Read the .search file:
  513. ## Blank lines and lines that are only #-comments ignored.
  514. ## Newlines may be escaped to create long lines
  515. ## Other lines are directives.
  516. ##
  517. ## A directive may begin with an optional tag in the form <...>
  518. ## Things inside the <...> are evaluated as with:
  519. ## <(this || that) && must>
  520. ## will be true if
  521. ## -xmust -xthis or -xmust -xthat
  522. ## were specified on the command line (order doesn't matter, though)
  523. ## A directive is not done if there is a tag and it's false.
  524. ## Any characters but whitespace and &|()>,! may appear after an -x
  525. ## (although "-xdev" is special). -xmust,this is the same as -xmust -xthis.
  526. ## Something like -x~ would make <~> true, and <!~> false.
  527. ##
  528. ## Directives are in the form:
  529. ## option: STRING
  530. ## magic : NUMBYTES : EXPR
  531. ##
  532. ## With option:
  533. ## The STRING is parsed like a Bourne shell command line, and the
  534. ## options are used as if given on the command line.
  535. ## No comments are allowed on 'option' lines.
  536. ## Examples:
  537. ## # skip objects and libraries
  538. ## option: -skip '.o .a'
  539. ## # skip emacs *~ and *# files, unless -x~ given:
  540. ## <!~> option: -skip '~ #'
  541. ##
  542. ## With magic:
  543. ## EXPR can be pretty much any perl (comments allowed!).
  544. ## If it evaluates to true for any particular file, it is skipped.
  545. ## The only info you'll have about a file is the variable $H, which
  546. ## will have at least the first NUMBYTES of the file (less if the file
  547. ## is shorter than that, of course, and maybe more). You'll also have
  548. ## any variables you set in previous 'magic' lines.
  549. ## Examples:
  550. ## magic: 6 : ($x6 = substr($H, 0, 6)) eq 'GIF87a'
  551. ## magic: 6 : $x6 eq 'GIF89a'
  552. ##
  553. ## magic: 6 : (($x6 = substr($H, 0, 6)) eq 'GIF87a' ## old gif \
  554. ## || $x6 eq 'GIF89a' ## new gif
  555. ## (the above two sets are the same)
  556. ## ## Check the first 32 bytes for "binarish" looking bytes.
  557. ## ## Don't blindly dump on any high-bit set, as non-ASCII text
  558. ## ## often has them set. \x80 and \xff seem to be special, though.
  559. ## ## Require two in a row to not get things like perl's $^T.
  560. ## ## This is known to get *.Z, *.gz, pkzip, *.elc and about any
  561. ## ## executable you'll find.
  562. ## magic: 32 : $H =~ m/[\x00-\x06\x10-\x1a\x1c-\x1f\x80\xff]{2}/
  563. ##
  564. sub read_rc
  565. {
  566. local($file, $show) = @_;
  567. local($line_num, $ln, $tag) = 0;
  568. local($use_default, @default) = 0;
  569. { package magic; $ = 0; } ## turn off warnings for when we run EXPR's
  570. unless (open(RC, "$file")) {
  571. $use_default=1;
  572. $file = "<internal default startup file>";
  573. ## no RC file -- use this default.
  574. @default = split(/\n/,<<'--------INLINE_LITERAL_TEXT');
  575. magic: 32 : $H =~ m/[\x00-\x06\x10-\x1a\x1c-\x1f\x80\xff]{2}/
  576. option: -skip '.a .COM .elc .EXE .gz .o .pbm .xbm .dvi'
  577. option: -iskip '.tarz .zip .z .lzh .jpg .jpeg .gif .uu'
  578. <!~> option: -skip '~ #'
  579. --------INLINE_LITERAL_TEXT
  580. }
  581. ##
  582. ## Make an eval error pretty.
  583. ##
  584. sub clean_eval_error {
  585. local($_) = @_;
  586. s/ in file \(eval\) at line \d+,//g; ## perl4-style error
  587. s/ at \(eval \d+\) line \d+,//g; ## perl5-style error
  588. $_ = $` if m/\n/; ## remove all but first line
  589. "$_\n";
  590. }
  591. print "reading RC file: $file\n" if $show;
  592. while (defined($_ = ($use_default ? shift(@default) : <RC>))) {
  593. $ln = ++$line_num; ## note starting line num.
  594. $_ .= <RC>, $line_num++ while s/\\\n?$/\n/; ## allow continuations
  595. next if /^\s*(#.*)?$/; ## skip blank or comment-only lines.
  596. $do = '';
  597. ## look for an initial <...> tag.
  598. if (s/^\s*<([^>]*)>//) {
  599. ## This simple s// will make the tag ready to eval.
  600. ($tag = $msg = $1) =~
  601. s/[^\s&|(!)]+/
  602. $seen_opt{$&}=1; ## note seen option
  603. "defined(\$opt{q>$&>})" ## (q>> is safe quoting here)
  604. /eg;
  605. ## see if the tag is true or not, abort this line if not.
  606. $dothis = (eval $tag);
  607. $!=2, die "$file $ln <$msg>: $_".&clean_eval_error($@) if $@;
  608. if ($show) {
  609. $msg =~ s/[^\s&|(!)]+/-x$&/;
  610. $msg =~ s/\s*!\s*/ no /g;
  611. $msg =~ s/\s*&&\s*/ and /g;
  612. $msg =~ s/\s*\|\|\s*/ or /g;
  613. $msg =~ s/^\s+//; $msg =~ s/\s+$//;
  614. $do = $dothis ? "(doing because $msg)" :
  615. "(do if $msg)";
  616. } elsif (!$dothis) {
  617. next;
  618. }
  619. }
  620. if (m/^\s*option\s*:\s*/) {
  621. next if $all && !$show; ## -all turns off these checks;
  622. local($_) = $';
  623. s/\n$//;
  624. local($orig) = $_;
  625. print " $do option: $_\n" if $show;
  626. local($0) = "$0 ($file)"; ## for any error message.
  627. local(@ARGV);
  628. local($this);
  629. ##
  630. ## Parse $_ as a Bourne shell line -- fill @ARGV
  631. ##
  632. while (length) {
  633. if (s/^\s+//) {
  634. push(@ARGV, $this) if defined $this;
  635. undef $this;
  636. next;
  637. }
  638. $this = '' if !defined $this;
  639. $this .= $1 while s/^'([^']*)'// ||
  640. s/^"([^"]*)"// ||
  641. s/^([^'"\s\\]+)//||
  642. s/^(\\[\D\d])//;
  643. die "$file $ln: error parsing $orig at $_\n" if m/^\S/;
  644. }
  645. push(@ARGV, $this) if defined $this;
  646. &check_args;
  647. die qq/$file $ln: unused arg "@ARGV".\n/ if @ARGV;
  648. next;
  649. }
  650. if (m/^\s*magic\s*:\s*(\d+)\s*:\s*/) {
  651. next if $all && !$show; ## -all turns off these checks;
  652. local($bytes, $check) = ($1, $');
  653. if ($show) {
  654. $check =~ s/\n?$/\n/;
  655. print " $do contents: $check";
  656. }
  657. ## Check to make sure the thing at least compiles.
  658. eval "package magic; (\$H = '1'x \$main'bytes) && (\n$check\n)\n";
  659. $! = 2, die "$file $ln: ".&clean_eval_error($@) if $@;
  660. $HEADER_BYTES = $bytes if $bytes > $HEADER_BYTES;
  661. push(@magic_tests, "(\n$check\n)");
  662. next;
  663. }
  664. $! = 2, die "$file $ln: unknown command\n";
  665. }
  666. close(RC);
  667. }
  668. sub message
  669. {
  670. if (!$STDERR_IS_TTY) {
  671. print STDERR $_[0], "\n";
  672. } else {
  673. local($text) = @_;
  674. $thislength = length($text);
  675. if ($thislength >= $last_message_length) {
  676. print STDERR $text, "\r";
  677. } else {
  678. print STDERR $text, ' 'x ($last_message_length-$thislength),"\r";
  679. }
  680. $last_message_length = $thislength;
  681. }
  682. }
  683. sub clear_message
  684. {
  685. print STDERR ' ' x $last_message_length, "\r" if $last_message_length;
  686. $vv_print = $vv_size = $last_message_length = 0;
  687. }
  688. ##
  689. ## Output a copy of this program with comments, extra whitespace, and
  690. ## the trailing man page removed. On an ultra slow machine, such a copy
  691. ## might load faster (but I can't tell any difference on my machine).
  692. ##
  693. sub strip {
  694. seek(DATA, 0, 0) || die "$0: can't reset internal pointer.\n";
  695. while(<DATA>) {
  696. print, next if /INLINE_LITERAL_TEXT/.../INLINE_LITERAL_TEXT/;
  697. ## must mention INLINE_LITERAL_TEXT on this line!
  698. s/\#\#.*|^\s+|\s+$//; ## remove cruft
  699. last if $_ eq '.00;';
  700. next if ($_ eq '') || ($_ eq "'di'") || ($_ eq "'ig00'");
  701. s/\$stripped=0;/\$stripped=1;/;
  702. s/\s\s+/ /; ## squish multiple whitespaces down to one.
  703. print $_, "\n";
  704. }
  705. exit(0);
  706. }
  707. ##
  708. ## Just to shut up -w. Never executed.
  709. ##
  710. sub dummy {
  711. 1 || &dummy || &dir_done || &bad || &message || $NEXT_DIR_ENTRY ||
  712. $DELAY || $VV_SIZE || $VV_PRINT_COUNT || $STDERR_SCREWS_STDOUT ||
  713. @files || @files || $magic'H || $magic'H || $xdev{''} || &clear_message;
  714. }
  715. ##
  716. ## If the following __END__ is in place, what follows will be
  717. ## inlined when the program first starts up. Any $ variable name
  718. ## all in upper case, specifically, any string matching
  719. ## \$([A-Z][A-Z0-9_]{2,}\b
  720. ## will have the true value for that variable inlined. Also, any 'eval' is
  721. ## removed
  722. ##
  723. ## The idea is that when the whole thing is then eval'ed to define &dodir,
  724. ## the perl optimizer will make all the decisions that are based upon
  725. ## command-line options (such as $VERBOSE), since they'll be inlined as
  726. ## constants
  727. ##
  728. ## Also, and here's the big win, the tests for matching the regex, and a
  729. ## few others, are all inlined. Should be blinding speed here.
  730. ##
  731. ## See the read from <DATA> above for where all this takes place.
  732. ## But all-in-all, you *want* the __END__ here. Comment it out only for
  733. ## debugging....
  734. ##
  735. __END__
  736. ##
  737. ## Given a directory, check all "appropriate" files in it.
  738. ## Shove any subdirectories into the global @todo, so they'll be done
  739. ## later.
  740. ##
  741. ## Be careful about adding any upper-case variables, as they are subject
  742. ## to being inlined. See comments above the __END__ above.
  743. ##
  744. sub dodir
  745. {
  746. local($dir) = @_;
  747. $dir =~ s,/+$,,; ## remove any trailing slash.
  748. unless (opendir(DIR, "$dir/.")) {
  749. &clear_message if $VERBOSE && $STDERR_SCREWS_STDOUT;
  750. warn qq($0: can't opendir "$dir/".\n);
  751. return;
  752. }
  753. if ($VERBOSE) {
  754. &message($dir);
  755. $vv_print = $vv_size = 0;
  756. }
  757. @files = sort readdir(DIR) if $DO_SORT;
  758. while (defined($name = eval $NEXT_DIR_ENTRY))
  759. {
  760. next if $name eq '.' || $name eq '..'; ## never follow these.
  761. ## create full relative pathname.
  762. $file = $dir eq '.' ? $name : "$dir/$name";
  763. ## if link and skipping them, do so.
  764. if ($NOLINKS && -l $file) {
  765. warn qq/skip (symlink): $file\n/ if $WHY;
  766. next;
  767. }
  768. ## skip things unless files or directories
  769. unless (-f $file || -d _) {
  770. if ($WHY) {
  771. $why = (-S _ && "socket") ||
  772. (-p _ && "pipe") ||
  773. (-b _ && "block special")||
  774. (-c _ && "char special") || "somekinda special";
  775. warn qq/skip ($why): $file\n/;
  776. }
  777. next;
  778. }
  779. ## skip things we can't read
  780. unless (-r _) {
  781. if ($WHY) {
  782. $why = (-l $file) ? "follow" : "read";
  783. warn qq/skip (can't $why): $file\n/;
  784. }
  785. next;
  786. }
  787. ## skip things that are empty
  788. unless (-s _ || -d _) {
  789. warn qq/skip (empty): $file\n/ if $WHY;
  790. next;
  791. }
  792. ## Note file device & inode. If -xdev, skip if appropriate.
  793. ($dev, $inode) = (stat(_))[$STAT_DEV, $STAT_INODE];
  794. if ($XDEV && defined $xdev{$dev}) {
  795. warn qq/skip (other device): $file\n/ if $WHY;
  796. next;
  797. }
  798. $id = "$dev,$inode";
  799. ## special work for a directory
  800. if (-d _) {
  801. ## Do checks for directory file endings.
  802. if ($DO_DSKIP_TEST && (eval $DSKIP_TEST)) {
  803. warn qq/skip (-dskip): $file\n/ if $WHY;
  804. next;
  805. }
  806. ## do checks for -name/-regex/-path tests
  807. if ($DO_DGLOB_TESTS && !(eval $DGLOB_TESTS)) {
  808. warn qq/skip (dirname): $file\n/ if $WHY;
  809. next;
  810. }
  811. ## _never_ redo a directory
  812. if (defined $dir_done{$id} and $^O ne 'MSWin32') {
  813. warn qq/skip (did as "$dir_done{$id}"): $file\n/ if $WHY;
  814. next;
  815. }
  816. $dir_done{$id} = $file; ## mark it done.
  817. unshift(@todo, $file); ## add to the list to do.
  818. next;
  819. }
  820. if ($WHY == 0 && $VERBOSE > 1) {
  821. if ($VERBOSE>2||$vv_print++>$VV_PRINT_COUNT||($vv_size+=-s _)>$VV_SIZE){
  822. &message($file);
  823. $vv_print = $vv_size = 0;
  824. }
  825. }
  826. ## do time-related tests
  827. if ($NEWER || $OLDER) {
  828. $_ = (stat(_))[$STAT_MTIME];
  829. if ($NEWER && $_ < $NEWER) {
  830. warn qq/skip (too old): $file\n/ if $WHY;
  831. next;
  832. }
  833. if ($OLDER && $_ > $OLDER) {
  834. warn qq/skip (too new): $file\n/ if $WHY;
  835. next;
  836. }
  837. }
  838. ## do checks for file endings
  839. if ($DO_SKIP_TEST && (eval $SKIP_TEST)) {
  840. warn qq/skip (-skip): $file\n/ if $WHY;
  841. next;
  842. }
  843. ## do checks for -name/-regex/-path tests
  844. if ($DO_GLOB_TESTS && !(eval $GLOB_TESTS)) {
  845. warn qq/skip (filename): $file\n/ if $WHY;
  846. next;
  847. }
  848. ## If we're not repeating files,
  849. ## skip this one if we've done it, or note we're doing it.
  850. unless ($DOREP) {
  851. if (defined $file_done{$id}) {
  852. warn qq/skip (did as "$file_done{$id}"): $file\n/ if $WHY;
  853. next;
  854. }
  855. $file_done{$id} = $file;
  856. }
  857. if ($DO_MAGIC_TESTS) {
  858. if (!open(FILE_IN, $file)) {
  859. &clear_message if $VERBOSE && $STDERR_SCREWS_STDOUT;
  860. warn qq/$0: can't open: $file\n/;
  861. next;
  862. }
  863. unless (read(FILE_IN, $magic'H, $HEADER_BYTES)) {
  864. &clear_message if $VERBOSE && $STDERR_SCREWS_STDOUT;
  865. warn qq/$0: can't read from "$file"\n"/;
  866. close(FILE_IN);
  867. next;
  868. }
  869. eval $MAGIC_TESTS;
  870. if ($magic'val) {
  871. close(FILE_IN);
  872. warn qq/skip (magic): $file\n/ if $WHY;
  873. next;
  874. }
  875. seek(FILE_IN, 0, 0); ## reset for later <FILE_IN>
  876. }
  877. if ($WHY != 0 && $VERBOSE > 1) {
  878. if ($VERBOSE>2||$vv_print++>$VV_PRINT_COUNT||($vv_size+=-s _)>$VV_SIZE){
  879. &message($file);
  880. $vv_print = $vv_size = 0;
  881. }
  882. }
  883. if ($DELAY) {
  884. sleep($DELAY);
  885. }
  886. if ($FIND_ONLY) {
  887. &clear_message if $VERBOSE && $STDERR_SCREWS_STDOUT;
  888. print $file, "\n";
  889. $retval=0; ## we've found something
  890. close(FILE_IN) if $DO_MAGIC_TESTS;
  891. next;
  892. } else {
  893. ## if we weren't doing magic tests, file won't be open yet...
  894. if (!$DO_MAGIC_TESTS && !open(FILE_IN, $file)) {
  895. &clear_message if $VERBOSE && $STDERR_SCREWS_STDOUT;
  896. warn qq/$0: can't open: $file\n/;
  897. next;
  898. }
  899. if ($LIST_ONLY && $CAN_USE_FAST_LISTONLY) {
  900. ##
  901. ## This is rather complex, but buys us a LOT when we're just
  902. ## listing files and not the individual internal lines.
  903. ##
  904. local($size) = 4096; ## block-size in which to do reads
  905. local($nl); ## will point to $_'s ending newline.
  906. local($read); ## will be how many bytes read.
  907. local($_) = ''; ## Starts out empty
  908. local($hold); ## (see below)
  909. while (($read = read(FILE_IN,$_,$size,length($_)))||length($_))
  910. {
  911. undef @parts;
  912. ## if read a full block, but no newline, need to read more.
  913. while ($read == $size && ($nl = rindex($_, "\n")) < 0) {
  914. push(@parts, $_); ## save that part
  915. $read = read(FILE_IN, $_, $size); ## keep trying
  916. }
  917. ##
  918. ## If we had to save parts, must now combine them together.
  919. ## adjusting $nl to reflect the now-larger $_. This should
  920. ## be a lot more efficient than using any kind of .= in the
  921. ## loop above.
  922. ##
  923. if (@parts) {
  924. local($lastlen) = length($_); #only need if $nl >= 0
  925. $_ = join('', @parts, $_);
  926. $nl = length($_) - ($lastlen - $nl) if $nl >= 0;
  927. }
  928. ##
  929. ## If we're at the end of the file, then we can use $_ as
  930. ## is. Otherwise, we need to remove the final partial-line
  931. ## and save it so that it'll be at the beginning of the
  932. ## next read (where the rest of the line will be layed in
  933. ## right after it). $hold will be what we should save
  934. ## until next time.
  935. ##
  936. if ($read != $size || $nl < 0) {
  937. $hold = '';
  938. } else {
  939. $hold = substr($_, $nl + 1);
  940. substr($_, $nl + 1) = '';
  941. }
  942. ##
  943. ## Now have a bunch of full lines in $_. Use it.
  944. ##
  945. if (eval $REGEX_TEST) {
  946. &clear_message if $VERBOSE && $STDERR_SCREWS_STDOUT;
  947. print $file, "\n";
  948. $retval=0; ## we've found something
  949. last;
  950. }
  951. ## Prepare for next read....
  952. $_ = $hold;
  953. }
  954. } else { ## else not using faster block scanning.....
  955. $lines_printed = 0 if $NICE;
  956. while (<FILE_IN>) {
  957. study;
  958. next unless (eval $REGEX_TEST);
  959. ##
  960. ## We found a matching line.
  961. ##
  962. $retval=0;
  963. &clear_message if $VERBOSE && $STDERR_SCREWS_STDOUT;
  964. if ($LIST_ONLY) {
  965. print $file, "\n";
  966. last;
  967. } else {
  968. ## prepare to print line.
  969. if ($NICE && $lines_printed++ == 0) {
  970. print '-' x 70, "\n" if $NICE > 1;
  971. print $file, ":\n";
  972. }
  973. ##
  974. ## Print all the prelim stuff. This looks less efficient
  975. ## than it needs to be, but that's so that when the eval
  976. ## is compiled (and the tests are optimized away), the
  977. ## result will be less actual PRINTs than the more natural
  978. ## way of doing these tests....
  979. ##
  980. if ($NICE) {
  981. if ($REPORT_LINENUM) {
  982. print " line $.: ";
  983. } else {
  984. print " ";
  985. }
  986. } elsif ($REPORT_LINENUM && $PREPEND_FILENAME) {
  987. print "$file,:$.: ";
  988. } elsif ($PREPEND_FILENAME) {
  989. print "$file: ";
  990. } elsif ($REPORT_LINENUM) {
  991. print "$.: ";
  992. }
  993. print $_;
  994. print "\n" unless m/\n$/;
  995. }
  996. }
  997. print "\n" if ($NICE > 1) && $lines_printed;
  998. }
  999. close(FILE_IN);
  1000. }
  1001. }
  1002. closedir(DIR);
  1003. }
  1004. __END__
  1005. .00; ## finish .ig
  1006. 'di \" finish diversion--previous line must be blank
  1007. .nr nl 0-1 \" fake up transition to first page again
  1008. .nr % 0 \" start at page 1
  1009. .\"__________________NORMAL_MAN_PAGE_BELOW_________________
  1010. .ll+10n
  1011. .TH search 1 "Dec 17, 1994"
  1012. .SH SEARCH
  1013. search \- search files (a'la grep) in a whole directory tree.
  1014. .SH SYNOPSIS
  1015. search [ grep-like and find-like options] [regex ....]
  1016. .SH DESCRIPTION
  1017. .I Search
  1018. is more or less a combo of 'find' and 'grep' (although the regular
  1019. expression flavor is that of the perl being used, which is closer to
  1020. egrep's than grep's).
  1021. .I Search
  1022. does generally the same kind of thing that
  1023. .nf
  1024. find <blah blah> | xargs egrep <blah blah>
  1025. .fi
  1026. does, but is
  1027. .I much
  1028. more powerful and efficient (and intuitive, I think).
  1029. This manual describes
  1030. .I search
  1031. as of version "941227.4". You can always find the latest version at
  1032. .nf
  1033. http://www.wg.omron.co.jp/~jfriedl/perl/index.html
  1034. .fi
  1035. .SH "QUICK EXAMPLE"
  1036. Basic use is simple:
  1037. .nf
  1038. % search jeff
  1039. .fi
  1040. will search files in the current directory, and all sub directories, for
  1041. files that have "jeff" in them. The lines will be listed with the
  1042. containing file's name prepended.
  1043. .PP
  1044. If you list more than one regex, such as with
  1045. .nf
  1046. % search jeff Larry Randal+ 'Stoc?k' 'C.*son'
  1047. .fi
  1048. then a line containing any of the regexes will be listed.
  1049. This makes it effectively the same as
  1050. .nf
  1051. % search 'jeff|Larry|Randal+|Stoc?k|C.*son'
  1052. .fi
  1053. However, listing them separately is much more efficient (and is easier
  1054. to type).
  1055. .PP
  1056. Note that in the case of these examples, the
  1057. .B \-w
  1058. (list whole-words only) option would be useful.
  1059. .PP
  1060. Normally, various kinds of files are automatically removed from consideration.
  1061. If it has has a certain ending (such as ".tar", ".Z", ".o", .etc), or if
  1062. the beginning of the file looks like a binary, it'll be excluded.
  1063. You can control exactly how this works -- see below. One quick way to
  1064. override this is to use the
  1065. .B \-all
  1066. option, which means to consider all the files that would normally be
  1067. automatically excluded.
  1068. Or, if you're curious, you can use
  1069. .B \-why
  1070. to have notes about what files are skipped (and why) printed to stderr.
  1071. .SH "BASIC OVERVIEW"
  1072. Normally, the search starts in the current directory, considering files in
  1073. all subdirectories.
  1074. You can use the
  1075. .I ~/.search
  1076. file to control ways to automatically exclude files.
  1077. If you don't have this file, a default one will kick in, which automatically
  1078. add
  1079. .nf
  1080. -skip .o .Z .gif
  1081. .fi
  1082. (among others) to exclude those kinds of files (which you probably want to
  1083. skip when searching for text, as is normal).
  1084. Files that look to be be binary will also be excluded.
  1085. Files ending with "#" and "~" will also be excluded unless the
  1086. .B -x~
  1087. option is given.
  1088. You can use
  1089. .B -showrc
  1090. to show what kinds of files will normally be skipped.
  1091. See the section on the startup file
  1092. for more info.
  1093. You can use the
  1094. .B -all
  1095. option to indicate you want to consider all files that would otherwise be
  1096. skipped by the startup file.
  1097. Based upon various other flags (see "WHICH FILES TO CONSIDER" below),
  1098. more files might be removed from consideration. For example
  1099. .nf
  1100. -mtime 3
  1101. .fi
  1102. will exclude files that aren't at least three days old (change the 3 to -3
  1103. to exclude files that are more than three days old), while
  1104. .nf
  1105. -skip .*
  1106. .fi
  1107. would exclude any file beginning with a dot (of course, '.' and '..' are
  1108. special and always excluded).
  1109. If you'd like to see what files are being excluded, and why, you can get the
  1110. list via the
  1111. .B \-why
  1112. option.
  1113. If a file makes it past all the checks, it is then "considered".
  1114. This usually means it is greped for the regular expressions you gave
  1115. on the command line.
  1116. If any of the regexes match a line, the line is printed.
  1117. However, if
  1118. .B -list
  1119. is given, just the filename is printed. Or, if
  1120. .B -nice
  1121. is given, a somewhat more (human-)readable output is generated.
  1122. If you're searching a huge tree and want to keep informed about how
  1123. the search is progressing,
  1124. .B -v
  1125. will print (to stderr) the current directory being searched.
  1126. Using
  1127. .B -vv
  1128. will also print the current file "every so often", which could be useful
  1129. if a directory is huge. Using
  1130. .B -vvv
  1131. will print the update with every file.
  1132. Below is the full listing of options.
  1133. .SH "OPTIONS TELLING *WHERE* TO SEARCH"
  1134. .TP
  1135. .BI -dir " DIR"
  1136. Start searching at the named directory instead of the current directory.
  1137. If multiple
  1138. .B -dir
  1139. arguments are given, multiple trees will be searched.
  1140. .TP
  1141. .BI -ddir " DIR"
  1142. Like
  1143. .B -dir
  1144. except it flushes any previous
  1145. .B -dir
  1146. directories (i.e. "-dir A -dir B -dir C" will search A, B, and C, while
  1147. "-dir A -ddir B -dir C" will search only B and C. This might be of use
  1148. in the startup file (see that section below).
  1149. .TP
  1150. .B -xdev
  1151. Stay on the same filesystem as the starting directory/directories.
  1152. .TP
  1153. .B -sort
  1154. Sort the items in a directory before processing them.
  1155. Normally they are processed in whatever order they happen to be read from
  1156. the directory.
  1157. .TP
  1158. .B -nolinks
  1159. Don't follow symbolic links. Normally they're followed.
  1160. .SH "OPTIONS CONTROLLING WHICH FILES TO CONSIDER AND EXCLUDE"
  1161. .TP
  1162. .BI -mtime " NUM"
  1163. Only consider files that were last changed more than
  1164. .I NUM
  1165. days ago
  1166. (less than
  1167. .I NUM
  1168. days if
  1169. .I NUM
  1170. has '-' prepended, i.e. "-mtime -2.5" means to consider files that
  1171. have been changed in the last two and a half days).
  1172. .TP
  1173. .B -older FILE
  1174. Only consider files that have not changed since
  1175. .I FILE
  1176. was last changed.
  1177. If there is any upper case in the "-older", "or equal" is added to the sense
  1178. of the test. Therefore, "search -older ./file regex" will never consider
  1179. "./file", while "search -Older ./file regex" will.
  1180. If a file is a symbolic link, the time used is that of the file and not the
  1181. link.
  1182. .TP
  1183. .BI -newer " FILE"
  1184. Opposite of
  1185. .BR -older .
  1186. .TP
  1187. .BI -name " GLOB"
  1188. Only consider files that match the shell filename pattern
  1189. .IR GLOB .
  1190. The check is only done on a file's name (use
  1191. .B -path
  1192. to check the whole path, and use
  1193. .B -dname
  1194. to check directory names).
  1195. Multiple specifications can be given by separating them with spaces, a'la
  1196. .nf
  1197. -name '*.c *.h'
  1198. .fi
  1199. to consider C source and header files.
  1200. If
  1201. .I GLOB
  1202. doesn't contain any special pattern characters, a '*' is prepended.
  1203. This last example could have been given as
  1204. .nf
  1205. -name '.c .h'
  1206. .fi
  1207. It could also be given as
  1208. .nf
  1209. -name .c -name .h
  1210. .fi
  1211. or
  1212. .nf
  1213. -name '*.c' -name '*.h'
  1214. .fi
  1215. or
  1216. .nf
  1217. -name '*.[ch]'
  1218. .fi
  1219. (among others)
  1220. but in this last case, you have to be sure to supply the leading '*'.
  1221. .TP
  1222. .BI -path " GLOB"
  1223. Like
  1224. .B -name
  1225. except the entire path is checked against the pattern.
  1226. .TP
  1227. .B -regex " REGEX"
  1228. Considers files whose names (not paths) match the given perl regex
  1229. exactly.
  1230. .TP
  1231. .BI -iname " GLOB"
  1232. Case-insensitive version of
  1233. .BR -name .
  1234. .TP
  1235. .BI -ipath " GLOB"
  1236. Case-insensitive version of
  1237. .BR -path .
  1238. .TP
  1239. .BI -iregex " REGEX"
  1240. Case-insensitive version of
  1241. .BR -regex .
  1242. .TP
  1243. .BI -dpath " GLOB"
  1244. Only search down directories whose path matches the given pattern (this
  1245. doesn't apply to the initial directory given by
  1246. .BI -dir ,
  1247. of course).
  1248. Something like
  1249. .nf
  1250. -dir /usr/man -dpath /usr/man/man*
  1251. .fi
  1252. would completely skip
  1253. "/usr/man/cat1", "/usr/man/cat2", etc.
  1254. .TP
  1255. .BI -dskip " GLOB"
  1256. Skips directories whose name (not path) matches the given pattern.
  1257. Something like
  1258. .nf
  1259. -dir /usr/man -dskip cat*
  1260. .fi
  1261. would completely skip any directory in the tree whose name begins with "cat"
  1262. (including "/usr/man/cat1", "/usr/man/cat2", etc.).
  1263. .TP
  1264. .BI -dregex " REGEX"
  1265. Like
  1266. .BI -dpath ,
  1267. but the pattern is a full perl regex. Note that this quite different
  1268. from
  1269. .B -regex
  1270. which considers only file names (not paths). This option considers
  1271. full directory paths (not just names). It's much more useful this way.
  1272. Sorry if it's confusing.
  1273. .TP
  1274. .BI -dpath " GLOB"
  1275. This option exists, but is probably not very useful. It probably wants to
  1276. be like the '-below' or something I mention in the "TODO" section.
  1277. .TP
  1278. .BI -idpath " GLOB"
  1279. Case-insensitive version of
  1280. .BR -dpath .
  1281. .TP
  1282. .BI -idskip " GLOB"
  1283. Case-insensitive version of
  1284. .BR -dskip .
  1285. .TP
  1286. .BI -idregex " REGEX"
  1287. Case-insensitive version of
  1288. .BR -dregex .
  1289. .TP
  1290. .B -all
  1291. Ignore any 'magic' or 'option' lines in the startup file.
  1292. The effect is that all files that would otherwise be automatically
  1293. excluded are considered.
  1294. .TP
  1295. .BI -x SPECIAL
  1296. Arguments starting with
  1297. .B -x
  1298. (except
  1299. .BR -xdev ,
  1300. explained elsewhere) do special interaction with the
  1301. .I ~/.search
  1302. startup file. Something like
  1303. .nf
  1304. -xflag1 -xflag2
  1305. .fi
  1306. will turn on "flag1" and "flag2" in the startup file (and is
  1307. the same as "-xflag1,flag2"). You can use this to write your own
  1308. rules for what kinds of files are to be considered.
  1309. For example, the internal-default startup file contains the line
  1310. .nf
  1311. <!~> option: -skip '~ #'
  1312. .fi
  1313. This means that if the
  1314. .B -x~
  1315. flag is
  1316. .I not
  1317. seen, the option
  1318. .nf
  1319. -skip '~ #'
  1320. .fi
  1321. should be done.
  1322. The effect is that emacs temp and backup files are not normally
  1323. considered, but you can included them with the -x~ flag.
  1324. You can write your own rules to customize
  1325. .I search
  1326. in powerful ways. See the STARTUP FILE section below.
  1327. .TP
  1328. .B -why
  1329. Print a message (to stderr) when and why a file is not considered.
  1330. .SH "OPTIONS TELLING WHAT TO DO WITH FILES THAT WILL BE CONSIDERED"
  1331. .TP
  1332. .B -find
  1333. (you can use
  1334. .B -f
  1335. as well).
  1336. This option changes the basic action of
  1337. .IR search .
  1338. Normally, if a file is considered, it is searched
  1339. for the regular expressions as described earlier. However, if this option
  1340. is given, the filename is printed and no searching takes place. This turns
  1341. .I search
  1342. into a 'find' of some sorts.
  1343. In this case, no regular expressions are needed on the command line
  1344. (any that are there are silently ignored).
  1345. This is not intended to be a replacement for the 'find' program,
  1346. but to aid
  1347. you in understanding just what files are getting past the exclusion checks.
  1348. If you really want to use it as a sort of replacement for the 'find' program,
  1349. you might want to use
  1350. .B -all
  1351. so that it doesn't waste time checking to see if the file is binary, etc
  1352. (unless you really want that, of course).
  1353. If you use
  1354. .BR -find ,
  1355. none of the "GREP-LIKE OPTIONS" (below) matter.
  1356. As a replacement for 'find',
  1357. .I search
  1358. is probably a bit slower (or in the case of GNU find, a lot slower --
  1359. GNU find is
  1360. .I unbelievably
  1361. fast).
  1362. However, "search -ffind"
  1363. might be more useful than 'find' when options such as
  1364. .B -skip
  1365. are used (at least until 'find' gets such functionality).
  1366. .TP
  1367. .B -ffind
  1368. (or
  1369. .BR -ff )
  1370. A faster more 'find'-like find. Does
  1371. .nf
  1372. -find -all -dorep
  1373. .fi
  1374. .SH "GREP-LIKE OPTIONS"
  1375. These options control how a searched file is accessed,
  1376. and how things are printed.
  1377. .TP
  1378. .B -i
  1379. Ignore letter case when matching.
  1380. .TP
  1381. .B -w
  1382. Consider only whole-word matches ("whole word" as defined by perl's "\\b"
  1383. regex).
  1384. .TP
  1385. .B -u
  1386. If the regex(es) is/are simple, try to modify them so that they'll work
  1387. in manpage-like underlined text (i.e. like _^Ht_^Hh_^Hi_^Hs).
  1388. This is very rudimentary at the moment.
  1389. .TP
  1390. .B -list
  1391. (you can use
  1392. .B -l
  1393. too).
  1394. Don't print matching lines, but the names of files that contain matching
  1395. lines. This will likely be *much* faster, as special optimizations are
  1396. made -- particularly with large files.
  1397. .TP
  1398. .B -n
  1399. Pepfix each line by its line number.
  1400. .TP
  1401. .B -nice
  1402. Not a grep-like option, but similar to
  1403. .BR -list ,
  1404. so included here.
  1405. .B -nice
  1406. will have the output be a bit more human-readable, with matching lines printed
  1407. slightly indented after the filename, a'la
  1408. .nf
  1409. % search foo
  1410. somedir/somefile: line with foo in it
  1411. somedir/somefile: some food for thought
  1412. anotherdir/x: don't be a buffoon!
  1413. %
  1414. .fi
  1415. will become
  1416. .nf
  1417. % search -nice foo
  1418. somedir/somefile:
  1419. line with foo in it
  1420. some food for thought
  1421. anotherdir/x:
  1422. don't be a buffoon!
  1423. %
  1424. .fi
  1425. This option due to Lionel Cons.
  1426. .TP
  1427. .B -nnice
  1428. Be a bit nicer than
  1429. .BR -nice .
  1430. Prefix each file's output by a rule line, and follow with an extra blank line.
  1431. .TP
  1432. .B -h
  1433. Don't prepend each output line with the name of the file
  1434. (meaningless when
  1435. .B -find
  1436. or
  1437. .B -l
  1438. are given).
  1439. .SH "OTHER OPTIONS"
  1440. .TP
  1441. .B -help
  1442. Print the usage information.
  1443. .TP
  1444. .B -version
  1445. Print the version information and quit.
  1446. .TP
  1447. .B -v
  1448. Set the level of message verbosity.
  1449. .B -v
  1450. will print a note whenever a new directory is entered.
  1451. .B -vv
  1452. will also print a note "every so often". This can be useful to see
  1453. what's happening when searching huge directories.
  1454. .B -vvv
  1455. will print a new with every file.
  1456. .B -vvvv
  1457. is
  1458. -vvv
  1459. plus
  1460. .BR -why .
  1461. .TP
  1462. .B -e
  1463. This ends the options, and can be useful if the regex begins with '-'.
  1464. .TP
  1465. .B -showrc
  1466. Shows what is being considered in the startup file, then exits.
  1467. .TP
  1468. .B -dorep
  1469. Normally, an identical file won't be checked twice (even with multiple
  1470. hard or symbolic links). If you're just trying to do a fast
  1471. .BR -find ,
  1472. the bookkeeping to remember which files have been seen is not desirable,
  1473. so you can eliminate the bookkeeping with this flag.
  1474. .SH "STARTUP FILE"
  1475. When
  1476. .I search
  1477. starts up, it processes the directives in
  1478. .IR ~/.search .
  1479. If no such file exists, a default
  1480. internal version is used.
  1481. The internal version looks like:
  1482. .nf
  1483. magic: 32 : $H =~ m/[\ex00-\ex06\ex10-\ex1a\ex1c-\ex1f\ex80\exff]{2}/
  1484. option: -skip '.a .COM .elc .EXE .gz .o .pbm .xbm .dvi'
  1485. option: -iskip '.tarz .zip .z .lzh .jpg .jpeg .gif .uu'
  1486. <!~> option: -skip '~ #'
  1487. .fi
  1488. If you wish to create your own "~/.search",
  1489. you might consider copying the above, and then working from there.
  1490. There are two kinds of directives in a startup file: "magic" and "option".
  1491. .RS 0n
  1492. .TP
  1493. OPTION
  1494. Option lines will automatically do the command-line options given.
  1495. For example, the line
  1496. .nf
  1497. option: -v
  1498. .fi
  1499. in you startup file will turn on -v every time, without needing to type it
  1500. on the command line.
  1501. The text on the line after the "option:" directive is processed
  1502. like the Bourne shell, so make sure to pay attention to quoting.
  1503. .nf
  1504. option: -skip .exe .com
  1505. .fi
  1506. will give an error (".com" by itself isn't a valid option), while
  1507. .nf
  1508. option: -skip ".exe .com"
  1509. .fi
  1510. will properly include it as part of -skip's argument.
  1511. .TP
  1512. MAGIC
  1513. Magic lines are used to determine if a file should be considered a binary
  1514. or not (the term "magic" refers to checking a file's magic number). These
  1515. are described in more detail below.
  1516. .RE
  1517. Blank lines and comments (lines beginning with '#') are allowed.
  1518. If a line begins with <...>, then it's a check to see if the
  1519. directive on the line should be done or not. The stuff inside the <...>
  1520. can contain perl's && (and), || (or), ! (not), and parens for grouping,
  1521. along with "flags" that might be indicated by the user with
  1522. .BI -x flag
  1523. options.
  1524. For example, using "-xfoo" will cause "foo" to be true inside the <...>
  1525. blocks. Therefore, a line beginning with "<foo>" would be done only when
  1526. "-xfoo" had been specified, while a line beginning with "<!foo>" would be
  1527. done only when "-xfoo" is not specified (of course, a line without any <...>
  1528. is done in either case).
  1529. A realistic example might be
  1530. .nf
  1531. <!v> -vv
  1532. .fi
  1533. This will cause -vv messages to be the default, but allow "-xv" to override.
  1534. There are a few flags that are set automatically:
  1535. .RS
  1536. .TP
  1537. .B TTY
  1538. true if the output is to the screen (as opposed to being redirected to a file).
  1539. You can force this (as with all the other automatic flags) with -xTTY.
  1540. .TP
  1541. .B -v
  1542. True if -v was specified. If -vv was specified, both
  1543. .B -v
  1544. and
  1545. .B -vv
  1546. flags are true (and so on).
  1547. .TP
  1548. .B -nice
  1549. True if -nice was specified. Same thing about -nnice as for -vv.
  1550. .PP
  1551. .TP
  1552. .B -list
  1553. true if -list (or -l) was given.
  1554. .TP
  1555. .B -dir
  1556. true if -dir was given.
  1557. .RE
  1558. Using this info, you might change the last example to
  1559. .nf
  1560. <!v && !-v> option: -vv
  1561. .fi
  1562. The added "&& !-v" means "and if the '-v' option not given".
  1563. This will allow you to use "-v" alone on the command line, and not
  1564. have this directive add the more verbose "-vv" automatically.
  1565. .RS 0
  1566. Some other examples:
  1567. .TP
  1568. <!-dir && !here> option: -dir ~/
  1569. Effectively make the default directory your home directory (instead of the
  1570. current directory). Using -dir or -xhere will undo this.
  1571. .TP
  1572. <tex> option: -name .tex -dir ~/pub
  1573. Create '-xtex' to search only "*.tex" files in your ~/pub directory tree.
  1574. Actually, this could be made a bit better. If you combine '-xtex' and '-dir'
  1575. on the command line, this directive will add ~/pub to the list, when you
  1576. probably want to use the -dir directory only. You could do
  1577. .nf
  1578. <tex> option: -name .tex
  1579. <tex && !-dir> option: -dir ~/pub
  1580. .fi
  1581. to will allow '-xtex' to work as before, but allow a command-line "-dir"
  1582. to take precedence with respect to ~/pub.
  1583. .TP
  1584. <fluff> option: -nnice -sort -i -vvv
  1585. Combine a few user-friendly options into one '-xfluff' option.
  1586. .TP
  1587. <man> option: -ddir /usr/man -v -w
  1588. When the '-xman' option is given, search "/usr/man" for whole-words
  1589. (of whatever regex or regexes are given on the command line), with -v.
  1590. .RE
  1591. The lines in the startup file are executed from top to bottom, so something
  1592. like
  1593. .nf
  1594. <both> option: -xflag1 -xflag2
  1595. <flag1> option: ...whatever...
  1596. <flag2> option: ...whatever...
  1597. .fi
  1598. will allow '-xboth' to be the same as '-xflag1 -xflag2' (or '-xflag1,flag2'
  1599. for that matter). However, if you put the "<both>" line below the others,
  1600. they will not be true when encountered, so the result would be different
  1601. (and probably undesired).
  1602. The "magic" directives are used to determine if a file looks to be binary
  1603. or not. The form of a magic line is
  1604. .nf
  1605. magic: \fISIZE\fP : \fIPERLCODE\fP
  1606. .fi
  1607. where
  1608. .I SIZE
  1609. is the number of bytes of the file you need to check, and
  1610. .I PERLCODE
  1611. is the code to do the check. Within
  1612. .IR PERLCODE ,
  1613. the variable $H will hold at least the first
  1614. .I SIZE
  1615. bytes of the file (unless the file is shorter than that, of course).
  1616. It might hold more bytes. The perl should evaluate to true if the file
  1617. should be considered a binary.
  1618. An example might be
  1619. .nf
  1620. magic: 6 : substr($H, 0, 6) eq 'GIF87a'
  1621. .fi
  1622. to test for a GIF ("-iskip .gif" is better, but this might be useful
  1623. if you have images in files without the ".gif" extension).
  1624. Since the startup file is checked from top to bottom, you can be a bit
  1625. efficient:
  1626. .nf
  1627. magic: 6 : ($x6 = substr($H, 0, 6)) eq 'GIF87a'
  1628. magic: 6 : $x6 eq 'GIF89a'
  1629. .fi
  1630. You could also write the same thing as
  1631. .nf
  1632. magic: 6 : (($x6 = substr($H, 0, 6)) eq 'GIF87a') || ## an old gif, or.. \e
  1633. $x6 eq 'GIF89a' ## .. a new one.
  1634. .fi
  1635. since newlines may be escaped.
  1636. The default internal startup file includes
  1637. .nf
  1638. magic: 32 : $H =~ m/[\ex00-\ex06\ex10-\ex1a\ex1c-\ex1f\ex80\exff]{2}/
  1639. .fi
  1640. which checks for certain non-printable characters, and catches a large
  1641. number of binary files, including most system's executables, linkable
  1642. objects, compressed, tarred, and otherwise folded, spindled, and mutilated
  1643. files.
  1644. Another example might be
  1645. .nf
  1646. ## an archive library
  1647. magic: 17 : substr($H, 0, 17) eq "!<arch>\en__.SYMDEF"
  1648. .fi
  1649. .SH "RETURN VALUE"
  1650. .I Search
  1651. returns zero if lines (or files, if appropriate) were found,
  1652. or if no work was requested (such as with
  1653. .BR -help ).
  1654. Returns 1 if no lines (or files) were found.
  1655. Returns 2 on error.
  1656. .SH TODO
  1657. Things I'd like to add some day:
  1658. .nf
  1659. + show surrounding lines (context).
  1660. + highlight matched portions of lines.
  1661. + add '-and', which can go between regexes to override
  1662. the default logical or of the regexes.
  1663. + add something like
  1664. -below GLOB
  1665. which will examine a tree and only consider files that
  1666. lie in a directory deeper than one named by the pattern.
  1667. + add 'warning' and 'error' directives.
  1668. + add 'help' directive.
  1669. .fi
  1670. .SH BUGS
  1671. If -xdev and multiple -dir arguments are given, any file in any of the
  1672. target filesystems are allowed. It would be better to allow each filesystem
  1673. for each separate tree.
  1674. Multiple -dir args might also cause some confusing effects. Doing
  1675. .nf
  1676. -dir some/dir -dir other
  1677. .fi
  1678. will search "some/dir" completely, then search "other" completely. This
  1679. is good. However, something like
  1680. .nf
  1681. -dir some/dir -dir some/dir/more/specific
  1682. .fi
  1683. will search "some/dir" completely *except for* "some/dir/more/specific",
  1684. after which it will return and be searched. Not really a bug, but just sort
  1685. of odd.
  1686. File times (for -newer, etc.) of symbolic links are for the file, not the
  1687. link. This could cause some misunderstandings.
  1688. Probably more. Please let me know.
  1689. .SH AUTHOR
  1690. Jeffrey Friedl, Omron Corp ([email protected])
  1691. .br
  1692. http://www.wg.omron.co.jp/cgi-bin/j-e/jfriedl.html
  1693. .SH "LATEST SOURCE"
  1694. See http://www.wg.omron.co.jp/~jfriedl/perl/index.html
  1695. __END__
  1696. :endofperl