#!/usr/bin/env perl =head1 NAME cgrep - print lines matching a pattern =head1 SYNOPSIS B [I] I [I] =head1 DESCRIPTION B searches for I in text files, and prints matching lines. B reads all files specified in I, or stdin if no I are given. Directories are ignored, unless recursive mode is enabled. Every line is matched with I separately. The pattern can match all or part of the line, but the terminating newline is discarded from the line before the match. I is interpreted as a perl regular expression by default, but as a fixed string if the B<-F> flag is set. In normal operation mode, B prints matching lines. Exit code is 0 if any matches are found, 1 if no matches were found. =head1 OPTIONS Long options can be abbreviated to a unique prefix, and can be negated with a B prefix. A double hyphen stops option processing, allowing you to use filenames starting with a minus. =over =item B<-A> I, B<--after-context=>I; B<-B> I, B<--before-context=>I Print context lines around matches, as with B<-C>. These options, however, set the number of context lines after and before matches to I and I respectively. If only one of the options is given, context lines will be printed only after or only before the match resp. =item B<-C> I, B<--context=>I Print I lines of context before and after each match. Also, print a line with a lone dash between continuous chunks of lines. If any of B<-n>, B<-h>, B<-p>, B<-P> is enabled, the separator before the line and the prefixes will be a colon on matching lines, and a hyphen in context lines. =item B<-e> I, B<--pattern=>I A different way to specify I. If this option is used, all non-option arguments are filenames. If not used, the first non-option argument is the I. =item B<-E>, B<--perl-regexp> Interpret I as a perl regexp. This is the default. Perl regexps are NFA, see L for details of syntax and semantics. You can use escape sequences like C<\t> or C<\011> to match special characters. Note that C<\b> matches word boundaries. =item B<-F>, B<--fixed-strings> Interpret I as fixed string, not a regexp. More precisely, it is interpreted as a list of fixed strings separated by newlines. =item B<-h>, B<--with-no-filename> Do not print filename before each line printed. This is the default if there is only one I given. =item B<-H>, B<--with-filename> Print filename before each line. This is the default if more than one I are given. =item B<-i>, B<--fold-case> Interpret I case insensitively. Works with both regexp pattern and fixed strings. =item B<-l>, B<--files-with-match> Print the lists of files that have any matching lines. Stop reading the file at the first match. This supresses normal operation mode, that is, printing matching lines. The options B<-ABChHnpP> do not make sense in combination with B<-l>. =item B<-L>, B<--files-without-match> Prints the lists of files having no matching lines. Similar to B<-l> otherwise. =item B<-n>, B<--line-number> Print line number before each line printed. =item B<-p>, B<--show-function> Prints current function name before each line. =item B<-P> I, B<--function-regexp=>I Defines how function names are found. Implies B<-p>. Lines matching I are taken to be function headers. I is a perl regexp, unaffected by B<-F>, B<-i>, B<-v>. The last matching capture in it should return the function name. The default regexp is C<^(\w+)>, suitable for C. =item B<-q>, B<--silent> Do not print anything, just give the return value. Stops execution at very first match. =item B<-R>, B<-r>, B<--recursive> Recursively search files in the directories given on command line. Note that symlinks to directories are not followed. If this option is not given, directories are ignored. =item B<-s>, B<--no-messages> Do not print warnings about files that can't be opened or read. =item B<-t> I, B<--include=>I Only search in files whose name matches the shell-like pattern I. This is primarily useful with B<--recursive>, when it applies only to the name of files, not that of directories. The pattern is matched only to the last path component (the basename of the file), but it must match the basename entirely (not only a part of it). The following seven characters are metacharacters: B<\ | * ? [ ( )>, all other characters in B stand for themselves. You can escape a metacharacter with a backslash. A vertical bar means alternation, that is, IB<|>I matches a string matched either by I or I. A star matches any substring, a question mark matches any single character. Brackets surround a character range like B<[A-Za-z0-9_]>, or a negated character range such as B<[^.]>; closing brackets or backslashes in a character range must be escaped with a backspace, and you can escape a minus sign too so that it doesn't mean a continuous range. The special sequences B<@(>IB<)>, BIB<)>, B<*(>IB<)>, B<+(>IB<)>, match one, zero or one, at least zero, and at least one substrings each of which are matched by the pattern I; the first one can be useful if there is an alternation inside. Shell-like pattern matching is NFA. =item B<-v>, B<--invert-match> Inverts the sense of the match, that is, searches for line not matching I =item B<-V>, B<--version> Prints program version. =item B<--help> Prints short help message. =item B<--exclude=>I Skip files whose names match the shell-like pattern I. This option is the opposite of B<--include>; see that option on how I is interpreted. =item B<--verbose> Print the names of the files as they are searched and some addittional info. =back =head1 HISTORY B is a clone of the well-known unix utilities B and B. It is, however, not completely compatible with them. The most important differences from other implementations are as follows: =over =item B interprets patterns as perl regular expressions. These are NFA regexps, unlike the POSIX regular expressions used in B, and there are also some syntax differences. =item Some options are missing, most importantly B<-c> (count matches), B<-o> (print only matching part), B<-x> (match full line), and those dealing with binary files. =back =head1 SEE ALSO L, L, L =head1 BUGS Please report bugs to the author by e-mail: L. =cut use warnings; use strict; use Getopt::Long; use IO::Handle; use File::Spec; { # shell-like patterns # metacharacters are: \ | * ? [ ( ) # any of these can be escaped by a backslash sub glob_to_re { my($g) = @_; my($r, @s); while ($g =~ m&\G (?s: ([^\\|*?\[\(\)+@]+)| #1 \|()| #2 \*() (?!\()| #3 \?() (?!\()| #4 \\(.)| #5 (\[(?!\^) (?:[^\\\]]|\\.)+ \] | \[\^ (?:[^\\\]]|\\.)+ \])| #6 \@\(()| #7 \*\(()| #8 \+\(()| #9 \?\(()| #10 \)()| #11 ([+@])| #12 (.) # 13 ) &gcx ) { if (defined($1)) { $r .= quotemeta($1) } elsif (defined($2)) { $r .= "|" } elsif (defined($3)) { $r .= ".*" } elsif (defined($4)) { $r .= "." } elsif (defined($5)) { $r .= "\\" . $5 } # allow letter escapes elsif (defined($6)) { $r .= $6 } # I'm not very sure in this one elsif (defined($7)) { push @s, ")"; $r .= "(?:" } elsif (defined($8)) { push @s, ")*"; $r .= "(?:" } elsif (defined($9)) { push @s, ")+"; $r .= "(?:" } elsif (defined($10)) { push @s, ")?"; $r .= "(?:" } elsif (defined($11)) { @s or die "error: unblanaced ) in glob at position $-[0]"; $r .= pop(@s); } elsif (defined($12)) { $r .= quotemeta($12) } elsif (defined($13)) { die qq[error: invalid metacharacter "$13" in glob at position $-[0]]; } else { die "internal error paring glob"; } } $r = qr/\A(?s:$r)\z/; $r; } # a basename routine sub basename { (File::Spec->splitpath(File::Spec->canonpath($_[0])))[2]; } } { # cgrep my $VERSION_MESSAGE = "cgrep 1.1.0\n"; my $USAGE_MESSAGE = q{Usage: cgrep [OPTIONS] PATTERN [FILES] Try `cgrep --help' or `perldoc cgrep' for more information. }; my $HELP_MESSAGE = q{Usage: cgrep [OPTIONS] PATTERN [FILES] Search for lines with a substring matching PATTERN in each FILE or standard input. Default operation mode is to print matching lines; prefixed with filenames if there are multiple files. Exit status is 0 if there are any matches, 1 if there are any. Common options: -F PATTERN is a newline-separated list of raw strings -E PATTERN is a perl regular expression (default) -i match case-insensitively -v invert sense of the match -n print line numbers -C NUM print NUM lines of context around matching lines -l print names of files containing a match -L print names of files with no match -q do not output anything, only return exit status -R recursively match in directories -t GLOB only search in files whose name match shell-like pattern GLOB See `perldoc cgrep' for more options and detailed documentation. }; my($cregexp, @args, $complement, $found_any, $match, $endfile, $print_filename, $print_lineno, $before_context, $after_context, $context, $hush_messages, $print_func, $func_cregexp, $file, $filename, $anyoutput, $lastoutput, $verbose, $recurse, $include_names, $exclude_names); sub main { parseopts(); $found_any = 0; if (!@args) { $filename = "-"; $file = *STDIN; process_file(); } elsif (!$recurse) { for (@args) { found_file(); } } else { require File::Find; File::Find::find({"wanted", \&found_file, "no_chdir", 1}, @args); } $verbose and warn "finished"; !$found_any; } sub found_file { defined($include_names) || defined($exclude_names) and do { my $basename = basename($_); defined($include_names) and $basename !~ $include_names and do { $verbose and warn "not including file $_, basename $basename"; return; }; defined($exclude_names) and $basename =~ $exclude_names and do { $verbose and warn "excluding file $_, basename $basename"; return; }; }; ($recurse ? -f $_ : !-d $_) or do { $verbose and warn "skipping special file $_\n"; return; }; $verbose and warn "opening file $_\n"; $filename = $_; open $file, "<", $_ or do { $hush_messages or warn qq[error opening file "$_": $!]; return; }; process_file(); close $file; } sub process_file { my($line, $func, $m, @before, $b, $after); ($after, $func, $lastoutput) = (0, undef, -1); READ: { for(;;) { $! = (); !eof($file) and defined($line = <$file>) or last; chomp $line; $print_func and $line =~ /$func_cregexp/ and $func = $+; $m = $line =~ /$cregexp/; if ($m xor $complement) { $found_any = 1; $context and do { for $b (@before) { &$match(@$b[0, 1, 2]), 0 } @before = (); $after = $after_context; }; &$match($line, input_line_number $file, $func, 1); } else { $context and do { if (0 < $after) { &$match($line, input_line_number $file, $func, 0); $after--; } else { push @before, [$line, input_line_number $file, $func]; $before_context < @before and shift @before; } }; } } $! and !$hush_messages and warn qq[error reading file "$filename": $!]; &$endfile() } # nextfile jumps here } sub nextfile { no warnings "exiting"; last READ; } sub print_match { my($line, $lineno, $func, $m) = @_; $context and do { $lastoutput != $lineno - 1 && $anyoutput and print "--\n"; $anyoutput = 1; $lastoutput = $lineno; }; $print_filename and print $filename, $m ? ":" : "-"; $print_func and defined($func) and print $func, $m ? ":" : "-"; $print_lineno and print $lineno, $m ? ":" : "-"; print $line, "\n"; } sub print_name_nextfile { $_[3] or return; print $filename, "\n"; nextfile; } sub found_nextfile { $_[3] or return; nextfile; } sub found_exit_zero { $_[3] or return; exit 0; } sub print_filename { print $filename, "\n"; } sub noop { } sub parseopts { my($regexp, $plain, $ignorecase, $mods, $mode, $exclude_glob, $include_glob); ($before_context, $after_context, $mode, $func_cregexp) = (0, 0, "", qr/^(\w+)/); Getopt::Long::Configure "bundling", "gnu_compat", "prefix_pattern=(--|-)"; GetOptions( "fixed-strings|F!", sub { $plain = $_[1] }, "extended-regexp|perl-regexp|E!", sub { $plain = !$_[1] }, "pattern|e=s", sub { $regexp = $_[1] }, "invert-match|complement-match|v!", sub { $complement = $_[1] }, "filename|with-filename|H!", sub { $print_filename = $_[1] }, "with-no-filename|h!", sub { $print_filename = !$_[1] }, "igore-case|fold-case|y|i!", sub { $ignorecase = $_[1] }, "line-number|n!", sub { $print_lineno = $_[1] }, "context|C=n", sub { $after_context = $before_context = $_[1] }, "after-context|A=n", sub { $after_context = $_[1] }, "before-context|B=n", sub { $before_context = $_[1] }, "files-with-match|list|l!", sub { $mode = $_[1] ? "l" : "" }, "file-without-match|missing|L!", sub { $mode = $_[1] ? "L" : "" }, "quiet|silent|q!", sub { $mode = $_[1] ? "q" : "" }, "no-messages|hush-messages|s!", sub { $hush_messages = $_[1] }, "show-function|function|p!", sub { $print_func = $_[1] }, "function-regexp|P=s", sub { $func_cregexp = qr/$_[1]/; $print_func = 1 }, "directories|d=s", sub { $recurse = ($_[1] eq "recurse") ? 1 : ($_[1] eq "read" || $_[1] eq "skip") ? 0 : die "error: invalid argument to option $_[0]"; }, "recursive|recurse|R|r!", sub { $recurse = $_[1] }, "include-names|type|t=s", sub { $include_glob = $_[1] }, "exclude-names=s", sub { $exclude_glob = $_[1] }, "verbose!", sub { $verbose = $_[1] }, "help", sub { die $HELP_MESSAGE; }, "version|V", sub { die $VERSION_MESSAGE; }, "binary|binary-files|byte-offset|color|colour|count|devices|file|label|line-buffered|line-regexp|max-count|mmap|null|null-data|only-matching|text|unix-byte-offsets|word-regexp|" . "D|I|U|Z|a|b|c|f|m|o|u|w|x|z!", sub { die "option unimplemented" }, ) or exit(2); defined($regexp) or $regexp = shift @ARGV; defined($regexp) or die "required argument missing\n" . $USAGE_MESSAGE; !$plain && $regexp=~/(?)/ and do { $hush_messages or warn 'warning: \< and \> are not special in perl regexen' }; $plain and $regexp = join "|", map { "(?:" . quotemeta() . ")" } split(/\n/, $regexp); $mods = $ignorecase ? "i" : ""; $cregexp = qr/(?$mods:$regexp)/; #warn "cregexp: $cregexp"; # debug @args = @ARGV; defined($print_filename) or $print_filename = 1 < @args || (@args && $recurse); 0 < $before_context || 0 < $after_context and do { $context = 1; $before_context ||= 0; $after_context ||= 0; }; defined($include_glob) and do { $include_names = glob_to_re($include_glob); $verbose and warn "include regexp: $include_names"; }; defined($exclude_glob) and do { $exclude_names = glob_to_re($exclude_glob); $verbose and warn "exclude regexp: $include_names"; }; $endfile = \&noop; if ($mode eq "l") { $match = \&print_name_nextfile } elsif ($mode eq "L") { ($match, $endfile) = (\&found_nextfile, \&print_filename) } elsif ($mode eq "q") { $match = \&found_exit_zero } elsif ($mode eq "") { $match = \&print_match } else { die "internal error: invalid mode: $mode" } } exit main(); } __END__