aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTim J. Robbins <tjr@FreeBSD.org>2004-07-04 09:52:08 +0000
committerTim J. Robbins <tjr@FreeBSD.org>2004-07-04 09:52:08 +0000
commit6fdbbb54872ec7fac83387296f0165f7ad3400a9 (patch)
tree70ffe417c9436d51a3807610738f2a5993f7f0fc
parent7a39f4da90d53085484e72b7504a3e53dbc9f051 (diff)
downloadsrc-vendor/misc-GNU/grep.tar.gz
src-vendor/misc-GNU/grep.zip
Import GNU grep 2.5.1 (trimmed)vendor/misc-GNU/grep
Notes
Notes: svn path=/vendor/misc-GNU/dist1/; revision=131554
-rw-r--r--gnu/usr.bin/grep/AUTHORS5
-rw-r--r--gnu/usr.bin/grep/COPYING6
-rw-r--r--gnu/usr.bin/grep/ChangeLog984
-rw-r--r--gnu/usr.bin/grep/NEWS70
-rw-r--r--gnu/usr.bin/grep/THANKS122
-rw-r--r--gnu/usr.bin/grep/closeout.c121
-rw-r--r--gnu/usr.bin/grep/closeout.h17
-rw-r--r--gnu/usr.bin/grep/dfa.c1331
-rw-r--r--gnu/usr.bin/grep/dfa.h108
-rw-r--r--gnu/usr.bin/grep/doc/grep.texi1387
-rw-r--r--gnu/usr.bin/grep/doc/version.texi7
-rw-r--r--gnu/usr.bin/grep/error.c276
-rw-r--r--gnu/usr.bin/grep/error.h78
-rw-r--r--gnu/usr.bin/grep/exclude.c128
-rw-r--r--gnu/usr.bin/grep/exclude.h35
-rw-r--r--gnu/usr.bin/grep/getpagesize.h5
-rw-r--r--gnu/usr.bin/grep/grep.1212
-rw-r--r--gnu/usr.bin/grep/grep.c1038
-rw-r--r--gnu/usr.bin/grep/grep.h16
-rw-r--r--gnu/usr.bin/grep/hard-locale.c85
-rw-r--r--gnu/usr.bin/grep/hard-locale.h18
-rw-r--r--gnu/usr.bin/grep/isdir.c42
-rw-r--r--gnu/usr.bin/grep/kwset.c98
-rw-r--r--gnu/usr.bin/grep/kwset.h8
-rw-r--r--gnu/usr.bin/grep/quotearg.c613
-rw-r--r--gnu/usr.bin/grep/quotearg.h110
-rw-r--r--gnu/usr.bin/grep/savedir.c47
-rw-r--r--gnu/usr.bin/grep/savedir.h7
-rw-r--r--gnu/usr.bin/grep/search.c723
-rw-r--r--gnu/usr.bin/grep/system.h19
-rwxr-xr-xgnu/usr.bin/grep/tests/backref.sh38
-rw-r--r--gnu/usr.bin/grep/tests/bre.awk3
-rwxr-xr-xgnu/usr.bin/grep/tests/bre.sh2
-rw-r--r--gnu/usr.bin/grep/tests/bre.tests2
-rwxr-xr-xgnu/usr.bin/grep/tests/empty.sh41
-rw-r--r--gnu/usr.bin/grep/tests/ere.awk3
-rwxr-xr-xgnu/usr.bin/grep/tests/ere.sh2
-rwxr-xr-xgnu/usr.bin/grep/tests/file.sh59
-rw-r--r--gnu/usr.bin/grep/tests/formatbre.awk55
-rw-r--r--gnu/usr.bin/grep/tests/formatere.awk60
-rwxr-xr-xgnu/usr.bin/grep/tests/khadafy.sh2
-rwxr-xr-xgnu/usr.bin/grep/tests/options.sh2
-rw-r--r--gnu/usr.bin/grep/tests/spencer1.awk3
-rwxr-xr-xgnu/usr.bin/grep/tests/spencer1.sh2
-rwxr-xr-xgnu/usr.bin/grep/tests/spencer2.sh13
-rw-r--r--gnu/usr.bin/grep/tests/spencer2.tests317
-rwxr-xr-xgnu/usr.bin/grep/tests/status.sh22
-rw-r--r--gnu/usr.bin/grep/tests/tests475
-rwxr-xr-xgnu/usr.bin/grep/tests/warning.sh2
-rw-r--r--gnu/usr.bin/grep/xalloc.h87
-rw-r--r--gnu/usr.bin/grep/xmalloc.c116
-rw-r--r--gnu/usr.bin/grep/xstrtol.c282
-rw-r--r--gnu/usr.bin/grep/xstrtol.h64
-rw-r--r--gnu/usr.bin/grep/xstrtoumax.c31
54 files changed, 8280 insertions, 1119 deletions
diff --git a/gnu/usr.bin/grep/AUTHORS b/gnu/usr.bin/grep/AUTHORS
index 65ff8a1cd26e..e08a38cf24e6 100644
--- a/gnu/usr.bin/grep/AUTHORS
+++ b/gnu/usr.bin/grep/AUTHORS
@@ -38,4 +38,7 @@ it came straight from gawk-3.0.3 with small editing and fixes.
Many folks contributed see THANKS, if I omited someone please
send me email.
-Alain Magloire is the current maintainer.
+Alain Magloire maintained GNU grep until version 2.5e.
+
+Bernhard "Bero" Rosenkränzer <bero@redhat.com> is the current maintainer.
+
diff --git a/gnu/usr.bin/grep/COPYING b/gnu/usr.bin/grep/COPYING
index d60c31a97a54..60549be514af 100644
--- a/gnu/usr.bin/grep/COPYING
+++ b/gnu/usr.bin/grep/COPYING
@@ -2,7 +2,7 @@
Version 2, June 1991
Copyright (C) 1989, 1991 Free Software Foundation, Inc.
- 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
@@ -291,7 +291,7 @@ convey the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.
<one line to give the program's name and a brief idea of what it does.>
- Copyright (C) <year> <name of author>
+ Copyright (C) 19yy <name of author>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -313,7 +313,7 @@ Also add information on how to contact you by electronic and paper mail.
If the program is interactive, make it output a short notice like this
when it starts in an interactive mode:
- Gnomovision version 69, Copyright (C) year name of author
+ Gnomovision version 69, Copyright (C) 19yy name of author
Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
This is free software, and you are welcome to redistribute it
under certain conditions; type `show c' for details.
diff --git a/gnu/usr.bin/grep/ChangeLog b/gnu/usr.bin/grep/ChangeLog
index c98848aa8bc7..aa50e78c1f40 100644
--- a/gnu/usr.bin/grep/ChangeLog
+++ b/gnu/usr.bin/grep/ChangeLog
@@ -1,3 +1,983 @@
+2002-03-26 Bernhard Rosenkraenzer <bero@redhat.com>
+ * src/grep.c: Don't fail if we don't have an stdout fd and -q
+ is used (happens e.g. on calls from hotplug scripts)
+ * src/grep.c: Don't hang forever if fed with an empty string to
+ grep for and --color enabled
+ * src/grep.c: Fix infinite loop on
+ echo "1 one" | grep -E "[0-9]*" -o
+ echo "1 one" | grep -E "[0-9]*" --color
+ * po/*: Sync wiith translation project
+ * src/grep.c, src/Makefile.am, configure.in: Add patch from
+ Paul Eggert <eggert@twinsun.com> to comply with ridiculous
+ guidelines (don't act differently if invoked as egrep or fgrep)
+ * configure.in: Bump version number, require a recent autoconf
+
+2002-03-14 Bernhard Rosenkraenzer <bero@redhat.com>
+ * src/Makefile.am, po/Makefile.in.in: Support DESTDIR properly
+ * tests/bre.tests: Add fix from
+ Peter Breitenlohner <peb@mppmu.mpg.de>
+
+2002-03-13 Bernhard Rosenkraenzer <bero@redhat.com>
+ * configure.in, m4/regex.m4, m4/malloc.m4, m4/realloc.m4:
+ Don't set LIBOBJS directly, autoconf 2.53 doesn't like it
+ * intl/*: Sync with gettext 0.11
+ * po/*: Sync with translation project
+ * configure.in, src/Makefile.am: Don't duplicate code - make
+ egrep and fgrep links to grep and set matcher based on
+ application name, suggestion from
+ Guillaume Cottenceau <gc@mandrakesoft.com>
+ * src/grep.c: (prline) Add fix for -i --color from
+ Jim Meyering <meyering@lucent.com>
+ * configure.in: Version 2.5; release
+
+2002-01-23 Bernhard Rosenkraenzer <bero@redhat.com>
+ * configure.in: Version 2.5g
+ * Makefile.cvs, grep.spec: Add packaging tools
+ Merge djgpp changes from Andrew Cottrell <anddjgpp@ihug.coml.au>:
+ * src/grep.c: Added conditional compilation for DJGPP
+ * djgpp: remove directory as it is no longer required with DJGPP 2.03
+ (or 2.04 when released)
+ * README.DOS: Moved djgpp/readme to readme.dos
+ * PATCHES.AC, PATCHES.AM: delete files - redundant
+ * configure.in, Makefile.am: remove djgpp directory from list
+
+2002-01-22 Bernhard Rosenkraenzer <bero@redhat.com>
+ * doc/grep.texi, doc/grep.1, NEWS: Document --label
+ * po/ru.po: Sync with translation project
+ * po/grep.pot: Sync with source
+
+2002-01-18 Bernhard Rosenkraenzer <bero@redhat.com>
+ * src/grep.c: Add --label, based on patch from Stepan Koltsov
+
+2001-11-20 Bernhard Rosenkraenzer <bero@redhat.com>
+ * autogen.sh: Don't hardcode aclocal dir
+
+2001-11-19 Bernhard Rosenkraenzer <bero@redhat.com>
+ * src/grep.c: Add --only-matching (-o) switch (see NEWS)
+ * doc/grep.texi, doc/grep.1, NEWS: Document changes
+ * configure.in, lib/Makefile.am: Don't use internal getopt if
+ we're on a system that provides a working getopt function
+
+2001-09-25 Bernhard Rosenkraenzer <bero@redhat.com>
+ * configure.in: Detect pcre correctly even when it's in
+ non-standard locations, using pcre-config
+ * src/grep.c: Add --color={always,never,tty} argument (like in ls)
+ * src/grep.c: Turn off blinking in the default colorization
+ * src/grep.c: Add --devices (-D) switch (analogous to --directories)
+ * src/dfa.c: Fix an i18n bug: echo "A" | grep '[A-Z0-9]' wouldn't work
+ in non-C-Locales on systems using current versions of glibc.
+ * AUTHORS: Change maintainer, credit Alain for his work until now
+ * configure.in, m4/decl.m4, m4/dosfile.m4, m4/gettext.m4,
+ m4/init.m4, m4/install.m4, m4/largefile.m4, m4/lcmessage.m4,
+ m4/header.m4, m4/isc-posix.m4, m4/missing.m4, m4/progtest.m4,
+ m4/sanity.m4:
+ Fix build with autoconf 2.5x, retain 2.1x compatibility for now
+ * autogen.sh: Add some crude hacks to make it possible to build with
+ both autoconf 2.5x and 2.1x
+ * acconfig.h: removed (no longer required)
+ * Makefile.am: add cvs-clean target
+ * doc/grep.texi, doc/grep.1, NEWS: Document changes
+ (--color, --devices, -D)
+ * src/dfa.c, src/grep.c: Add vim modelines
+
+2001-08-30 Alain Magloire
+
+ * configure.in: Add gl in ALL_LINGUAS.
+
+2001-08-30 Kurt D Schwehr
+
+ * doc/grep.1: Warn that grep insert a "--" between groups of matches,
+ when using the context options.
+ * doc/grep.texi: Likewised.
+
+2001-08-25 Heikki Korpela
+
+ * doc/grep.texi: Point out that some Platforms do not support
+ reading of directories and silently ignore them.
+
+2001-08-21 Alain Magloire
+
+ * lib/malloc.c: New file:
+ * lib/realloc.c: New file:
+ * lib/Makefile.am: Add malloc.c and realloc.c in EXTRA_DIST.
+
+2001-07-31 Alain Magloire
+
+ * po/*.po: New files from the translation team:
+ grep-2.5e.de.po grep-2.5e.el.po grep-2.5e.eo.po grep-2.5e.es.po
+ grep-2.5e.et.po grep-2.5e.fr.po grep-2.5e.gl.po grep-2.5e.it.po
+ grep-2.5e.pl.po grep-2.5e.sl.po
+
+2001-07-31 Andreas Schwab
+
+ * src/grep.c: Fix all uses of error to pass a proper format
+ string.
+
+2001-07-29 Alain Magloire
+
+ * grep/src/grep.c (usage): Typos corrected.
+ Patches from Santiago Vila.
+
+2001-07-29 Alain Magloire
+
+ David Clissold, wrote:
+ a small bug in the GNU grep 2.4.2, which may have gone unnoticed
+ because it only causes a failure if building on a system with large
+ files enabled (e.g. an "off_t" is a "long long" rather than a "long").
+ savedir() takes on off_t argument, but in grepdir() the parameter
+ is cast to an (unsigned). Well, if an off_t is larger than an int,
+ the value gets truncated. This would not normally have an effect on a
+ little-endian platform (unless the file is >2GB), but on a big-endian
+ system it will always fail. The external effect is that
+ "grep -r foo dir_name" fails with ENOMEM (from malloc() within
+ savedir()).
+
+ * grep/src/grep.c (grepdir): Remove the (unsigned) cast when calling
+ savedir().
+ Patch from David Clissold.
+
+2001-07-29 Alain Magloire
+
+ * grep/doc/grep.texi: In Bugs report use {n,m} for consistency.
+ * grep/doc/grep.1: Likewised.
+ Noted by Steven Lucy.
+
+2001-04-27 Isamu Hasegawa
+
+ * dfa.c (mblen_buf) : New variable contains the amount of remain
+ byte of corresponding multibyte character in the input string.
+ (SKIP_REMAIN_MB_IF_INITIAL_STATE) : Use mblen_buf.
+ (match_anychar) : Use mblen_buf.
+ (match_mb_charset) : Use mblen_buf.
+ (transit_state_consume_1char) : Use mblen_buf.
+ (transit_state) : Use inputwcs to get current (multibyte) character.
+ (dfaexec) : Add initialization of mblen_buf.
+
+2001-04-27 Isamu Hasegawa
+
+ * dfa.c (addtok) : Set appropriate value to multibyte_prop.
+ (dfastate) : Add the initialization of the variable.
+ (dfaexec) : Call transit_state if d->fail may transit by
+ multibyte characters.
+ (transit_state_singlebyte) : Clean up unnecessary code.
+ (transit_state_consume_1char) : Likewise.
+ (transit_state) : Add checking for word and newline.
+
+2001-04-19 Isamu Hasegawa
+
+ * search.c (check_multibyte_string) : Check the case when mbclen == 0.
+
+2001-04-11 Isamu Hasegawa
+
+ * search.c (check_multibyte_string) : Check the head of multibyte
+ characters, and optimize a bit.
+ (EGexecute) : Optimize a bit.
+ (Fexecute) : Fix the index.
+
+2001-04-02 Alain Magloire
+
+ * lib/regex.c: Update from GNU lib C, with the changes
+ provided by Paul Eggert.
+ * lib/posix/regex.h: Likewise.
+
+2001-02-17 Paul Eggert
+
+ Stop trying to support hosts that have nonstandard declarations for
+ mbrtowc and/or mbstate_t. It's not worth the portability hassle.
+
+ * lib/quotearg.c (mbrtowc, mbsinit): Remove workaround macros
+ for hosts that have mbrtowc but not mbstate_t, as we now
+ insist on proper declarations for both before using mbrtowc.
+
+2001-03-18 Alain Magloire
+
+ * configure.in: Call AC_MBSTATE_T.
+ * Makefile.am: Add mbstate_t.m4
+ * m4/Makefile.am: Add mbstate_t.m4
+ * m4/mbstate_t.m4: New m4 macro.
+ * lib/strtol.c: Define CHAR_BITS.
+ Uwe H. Steinfeld, Ruslan Ermilov, Volkert Bochert, noted
+ that mbstate_t was not define for certain platforms.
+
+2001-03-18 Paul Eggert
+
+ * src/grep.c (fillbuf): Fix storage allocation performance
+ bug: buffer was doubling in size in many cases where it didn't
+ have to.
+
+2001-03-17 Paul Eggert
+
+ * src/grep.c (fillbuf): Avoid unnecessary division by 2.
+ Don't check xrealloc return value; it's guaranteed to be nonzero.
+ (fillbuf, grepdir): Use xalloc_die rather than error; it's shorter.
+
+2001-03-17 Alain Magloire
+
+ * src/grep.c (context_length_arg): error () passing wrong format.
+ Spotted by Jim Meyering.
+
+2001-03-07 Alain Magloire
+
+ * README-alpha: Removed reference to GNU tar, add the location
+ of the CVSROOT.
+
+2001-03-06 Alain Magloire
+
+ Only the Regex patterns should be split in an array, patterns[].
+ The dfa and KWset compiled patterns should remain global and the
+ patterns compiled all at once.
+
+ * src/search.c: include "error.h" and "xalloc.h" to get prototyping
+ of x*alloc() and error().
+ (kwsinit): Reverse to previous behaviour and takes no argument.
+ (kwsmusts): Likewised.
+ (Gcompile): For the regex pattern, split them and each pattern
+ is put in different compiled structure patterns[]. The patterns
+ are given to dfacomp() and kwsmusts() as is.
+ (Ecompile): Likewised.
+ (Fcompile): Reverse to the old behaviour of compiling the enire
+ patterns in one shot.
+ (EGexecute): If falling to GNU regex for the matching, loop in the
+ array of compile patterns[] to find a match.
+ (error): Many error () were call with arguments in the wrong order.
+ * tests/file.sh: Simple test to check for pattern in files.
+
+ Reaction to bug report fired by Greg Louis <glouis@dynamicro.on.ca>
+
+2001-03-06 Isamu Hasegawa
+
+ In multibyte environments, handle multibyte characters as single
+ characters in bracket expressions.
+
+ * src/dfa.h (mb_char_classes) : new structure.
+ (mbcsets): new variable.
+ (nmbcsets): new variable.
+ (mbcsets_alloc) : new variable.
+ * src/dfa.c (prtok) : handle MBCSET.
+ (fetch_wc): new function to fetch a wide character.
+ (parse_bracket_exp_mb) : new function to handle multibyte character
+ in lex().
+ (lex): invoke parse_bracket_exp_mb() for multibyte bracket expression.
+ (atom): handle MBCSET.
+ (epsclosure): likewise.
+ (dfaanalyze): likewise.
+ (dfastate): likewise.
+ (match_mb_charset): new function to judge whether a bracket match
+ with a multibyte character.
+ (check_matching_with_multibyte_ops) : handle MBCSET.
+ (dfainit): initialize new variables.
+ (dfafree): free new variables.
+
+2001-03-04 Alain Magloire
+
+ To get more in sync with other GNU utilities like GNU tar and fetish
+ all the supporting functions are now under lib.
+ Thanks to Jim Meyering, Volkert Bochert and Paul Eggert for
+ the code and the reminders.
+
+ * src/grep.c (fatal): Function removed, using error () from
+ lib/error.c instead.
+ (usage): Copyright updated.
+ (error): Function removed, using error () from lib/error.c instead,
+ adjust prototypes.
+ (prog): Global variable rename to program_name, to work with new
+ lib/error.c.
+ (xrealloc): Removed using lib/xmalloc.c.
+ (xmalloc): Removed using lib/xmalloc.c
+ (main): Register with atexit() to check for error on stdout.
+ * configure.in: Check for atexit(), call jm_MALLOC, jm_RELLOC and
+ jm_PREREQ_ERROR.
+ * tests/bre.awk: Removed the hack to drain the buffer since we
+ always fclose(stdout) atexit.
+ * tests/ere.awk: Likewise.
+ * tests/spencer1.awk: Likewise.
+ * bootstrap/Makefile.try: Update the Makefile to reflect the changes
+ in the new hierarchy.
+
+ * README-alpha: New File.
+ * m4/realloc.m4: New File.
+ * m4/malloc.m4: New File.
+ * m4/error.m4: New File.
+ * m4/Makefile.am: Updated.
+ * lib: New directory.
+ * lib/Makefile.am: New file.
+ * lib/closeout.c: New file.
+ * lib/closeout.h: New file.
+ * lib/fnmatch.c: New file.
+ * lib/fnmatch.h: New file.
+ * lib/atexit.c: New file.
+ * lib/error.c: New file.
+ * lib/error.h: New file.
+ * lib/quotearg.h: New file.
+ * lib/quotearg.c: New file.
+ * lib/xmalloc.c: New file.
+ * lib/posix: New directory.
+ * lib/posix/Makefile.am: New file.
+ * src/getopt.c: Moved to lib.
+ * src/getopt1.c: Moved to lib.
+ * src/getopt.h: Moved to lib.
+ * src/alloca.c: Moved to lib.
+ * src/exclude.c: Moved to lib.
+ * src/exclude.h: Moved to lib.
+ * src/hard-locale.h: Moved to lib.
+ * src/hard-locale.c: Moved to lib.
+ * src/isdir.c: Moved to lib.
+ * src/mechr.c: Moved to lib.
+ * src/obstack.c: Moved to lib.
+ * src/obstack.h: Moved to lib.
+ * src/regex.c: Moved to lib.
+ * src/regex.h: Moved to lib.
+ * src/posix: Moved to lib.
+ * src/posix/regex.h: Moved to lib.
+ * src/savedir.h: Moved to lib.
+ * src/savedir.c: Moved to lib.
+ * src/stpcpy.c: Moved to lib.
+ * src/strtoul.c: Moved to lib.
+ * src/strtol.c: Moved to lib.
+ * src/strtoull.c: Moved to lib.
+ * src/strtoumax.c: Moved to lib.
+ * src/xstrtol.c: Moved to lib.
+ * src/xstrtol.h: Moved to lib.
+ * src/xstrtoumax.c: Moved to lib.
+
+2001-03-01 Isamu Hasegawa
+
+ Implement the mechanism to match with multibyte characters,
+ and use it for `period' in multibyte environments.
+
+ * dfa.h (mbps): new variable.
+ * dfa.c (prtok): handle ANYCHAR.
+ (lex): use ANYCHAR for `period' in multibyte environments.
+ (atom): handle ANYCHAR.
+ (state_index): initialize mbps in multibyte environments.
+ (epsclosure): handle ANYCHAR.
+ (dfaanalyze): handle ANYCHAR.
+ (dfastate): handle ANYCHAR.
+ (realloc_trans_if_necessary): new function.
+ (transit_state_singlebyte): new function.
+ (match_anychar): new function.
+ (check_matching_with_multibyte_ops): new function.
+ (transit_state_consume_1char): new function.
+ (transit_state): new function.
+ (dfaexec): invoke transit_state if expression can match with
+ a multibyte character in multibyte environments.
+ (dfamust): handle ANYCHAR.
+
+2001-03-01 Alain Magloire
+
+ * src/exclude.c: New file.
+ * src/exclude.h: New file.
+ * src/grep.c (main): Took the GNU tar code to handle
+ the option --include, --exclude, --exclude-from.
+ Files are check for a match, with exlude_filename ().
+ New option --exclude-from.
+ * src/savedir.c: Call exclude_filename() to check for
+ file pattern exclusion or inclusion.
+ * configure.in: --disable-pcre rename to --disable-perl-regexp.
+
+
+2001-02-25 Alain Magloire
+
+ * src/dfa.c: Typo corrected.
+ Noted by Isamu Hasegawa.
+ * src/savedir.c: Typos corrected.
+
+2001-02-22 Alain Magloire
+
+ * src/savedir.c (isdir1): New function, calling isdir with
+ the correct pathname.
+
+2001-02-19 Isamu Hasegawa
+
+ Avoid incorrect state transition in multibyte environments.
+
+ * dfa.h (nmultibyte_prop): new variable.
+ (multibyte_prop): new variable.
+ * dfa.c (addtok): set inputwcs.
+ (dfastate): avoid incorrect state transition in multibyte
+ environments.
+ (dfaexec): likewise.
+ (dfainit): init multibyte_prop.
+ (dfafree): free multibyte_prop.
+ (inputwcs): new variable.
+
+2001-02-19 Isamu Hasegawa
+
+ Handle a multibyte character followed by '*', '+', and '{n,m}'
+ correctly.
+
+ * dfa.c (update_mb_len_index): new function.
+ Support for multibyte string.
+ (FETCH): call update_mb_len_index.
+ (lex): check cur_mb_index not to misunderstand multibyte characters.
+ (atom): make a tree from a multibyte character.
+ (dfaparse): initialize new variables.
+ (mbs): new variable.
+ (cur_mb_len): new variable.
+ (cur_mb_index): new variable.
+
+2001-02-18 Jim Meyering
+
+ * m4/dosfile.m4 (AC_DOSFILE): Move AC_DEFINEs out of AC_CACHE_CHECK.
+
+2001-02-17 Alain Malgoire
+
+ * doc/grep.texi: Document the new options and the new behaviour
+ back-references are local. Use excerpt from Karl Berry regex
+ texinfo.
+
+ * bootstrap/Makefile.try: Added xstrtoumax.o xstrtoul.o hard-local.o
+
+2001-02-17 Alain Magloire
+
+ From Guglielmo 'bond' Bondioni :
+ The bug was that using a multi line file that contained REs (one per
+ line), backreferences in the REs were considered global (to the file)
+ and not local (to the line).
+ That is, \1 in line n refers to the first \(.\) in the whole file,
+ rather than in the line itself.
+
+ From Tapani Tarvainen :
+ # Re: grep -e '\(a\)\1' -e '\(b\)\1'
+ That's not the way it should work: multiple -e arguments
+ should be treated as independent patterns and back references
+ should not refer to previous ones.
+
+ From Paul Eggert :
+ GNU grep currently does not issue
+ diagnostics for the following two cases, both of which are erroneous:
+ grep -e '[' -e ']'
+ grep '[
+ ]'
+ POSIX requires a diagnostic in both cases because '[' is not a valid
+ regular expression.
+
+ To overcome those problems, grep no longer pass the concatenate
+ patterns to GNU regex but rather compile each patterns separately
+ and keep the result in an array.
+
+ * src/search.c (patterns): New global variable; a structure array
+ holding the compiled patterns.
+ Declare function prototypes to minimize error.
+ (dfa, kswset, regexbuf, regs): Removed, no longer static globals, but
+ rather fields in patterns[] structure per motif.
+ (Fcompile): Alloc an entry in patterns[] to hold the regex.
+ (Ecompile): Alloc an entry per motif in the patterns[] array.
+ (Gcompile): Likewise.
+ (EGexecute): Loop through of array of patterns[] for a match.
+
+2001-02-17 Alain Magloire
+
+ From Bernd Strieder :
+ # tail -f logfile | grep important | do_something_urgent
+ # tail -f logfile | grep important | do_something_taking_very_long
+ If grep does full buffering in these cases then the urgent operation
+ does not happen as it should in the first case, and in the second case
+ time is lost due to waiting for the buffer to be filled.
+ This is clearly spoken not grep's fault in the first place, but libc's.
+ There is a heuristic in libc that make a stream line-buffered only if a
+ terminal is on the other end. This doesn't take care of the cases where
+ this connection is somehow indirect.
+
+ * src/grep.c (line_buffered): new option variable.
+ (prline): if line_buffered is set fflush() is call.
+ (usage): line_buffered new option.
+ Input from Paul Eggert, doing setvbuf() may not be portable
+ and breaks grep -z.
+
+2001-02-16 Alain Magloire
+
+ Patch from Isamu Hasegawa, for multibyte support.
+ This patch prevent kwset_matcher from following problems.
+ For example, in SJIS encoding, one character has the codepoint 0x895c.
+ So the second byte of the character can match with '\' incorrectly.
+ And in eucJP encoding, there are the characters whose codepoints are
+ 0xa5b9, 0xa5c8. On the other hand, there is one character whose
+ codepoint is 0xb9a5. So 0xb9a5 can match with 2nd byte of 0xa5b9
+ and 1st byte of 0xa5c8.
+
+ * configure.in: Add check for mbrtowc.
+ * src/search.c (check_multibyte_string): new function.
+ Support for multibyte string.
+ (EGexecute): call check_multibyte_string when kwset is set.
+ (Fexecute): call to check_multibyte_string.
+ (MBS_SUPPORT): new macro.
+ (MB_CUR_MAX): new macro.
+
+2001-02-16 Alain Magloire
+
+ * djgpp/config.bat: Fix for 4dos.com.
+ * m4/dosfile.m4 (HAVE_DOS_FILE_CONTENTS): Was not set.
+ Bugs noted and patched by Juan Manuel Guerrero.
+
+2001-02-16 Alain Magloire
+
+ A much requested feature, the possibility to select
+ files when doing recurse :
+ # find . -name "*.c" | xargs grep main {}
+ # grep --include=*.c main .
+ # find . -not -name "*.c" | xargs grep main {}
+ # grep --exclude=*.c main .
+
+ * src/grep.c (short_options): -R equivalent to -r.
+ (#ifdef) : Fix some inconsistencies in the use of #ifdefs, prefer
+ #if defined() wen possible.
+ (long_options): Add --color, --include and exclude.
+ (Usage): Description of new options.
+ (color): Rename color variable to color_option.
+ Removed 'always|never|auto' arguments, not necessary for grep.
+ (exclude_pattern): new variable, holder for the file pattern.
+ (include_pattern): new variable, hoder for the file pattern.
+ * src/savedir.c: Signature change, take two new argmuments.
+ * doc/grep.texi: Document, new options.
+ * doc/grep.man: Document, new options.
+
+2001-02-09 Alain Magloire
+
+ * src/grep.c (long_options): Added equivalent to -r with -R.
+ * src/grep.c (usage): added --color and --colour.
+ Noted with patch from, H.Merijn Brand and Wichert Akkerman.
+
+2001-02-09 Alain Magloire
+
+ Patch from Ulrich Drepper to provide hilighting.
+
+ * src/grep.c: New option --color.
+ (color): New static var.
+ (COLOR_OPTION): new constant.
+ (grep_color): new static var.
+ (prline): Now when color is set prline() will call the current matcher
+ to find the offset of the matching string.
+ * src/savedir.c: Take advantage of _DIRENT_HAVE_TYPE if supported.
+ * src/search.c (EGexecute, Fexecute, Pexecute): Take a new argument
+ when doing exact match for the color hiligting.
+
+2000-09-01 Brian Youmans
+
+ * doc/grep.texi: Typo fixes.
+
+2000-08-30 Paul Eggert
+
+ * doc/grep.texi (Usage): Talk about what "grep -r hello *.c"
+ means.
+
+2000-08-20 Paul Eggert
+
+ Handle range expressions correctly even when they match
+ strings with two or more characters.
+
+ * src/dfa.h (CRANGE): New enum value. Comment fix.
+
+ * src/dfa.c: Include <locale.h> if HAVE_SETLOCALE.
+ Include "hard-locale.h".
+ (prtok): Print CRANGE.
+ (hard_LC_COLLATE): New static var.
+ (lex): Return CRANGE when parsing a character range in a hard locale.
+ Don't use strcoll; it's no longer needed and wasn't correct anyway.
+ Use unsigned rather than token to hold unsigned chars.
+ (addtok): Comment fix.
+ (atom): Treat a CRANGE as if it were (.\1), approximately.
+ (dfaparse): Initialize hard_LC_COLLATE.
+
+ * src/Makefile.am (base_sources): Add hard-locale.c, hard-locale.h.
+
+ * src/hard-locale.c, src/hard-locale.h: New files, taken from
+ textutils.
+
+2000-08-20 Paul Eggert
+
+ * tests/Makefile.am (TESTS_ENVIRONMENT): Add LC_ALL=C, since
+ some of the tests assume the C locale.
+
+2000-08-16 Paul Eggert
+
+ * src/search.c (Gcompile, Ecompile): -x overrides -w, for
+ consistency with fgrep. Don't assume that sizes fit in 'int'.
+ Fix comments to match code.
+
+2000-06-06 Paul Eggert
+
+ * src/grep.c (grepdir): Don't look at st_dev when testing for
+ Mingw32 bug.
+
+2000-06-05 Paul Eggert
+
+ Port to Mingw32, based on suggestions from Christian Groessler
+ <cpg@aladdin.de>.
+
+ * src/isdir.c: New file, taken from fileutils.
+
+ * src/Makefile.am (base_sources): Add isdir.c.
+
+ * src/grep.c (grepfile): Use isdir instead of doing it inline.
+ (grepdir): Suppress ancestor check if the directory's inode and device
+ are both zero, as that occurs only on Mingw32 which doesn't support
+ inode or device.
+
+ * src/system.h (isdir): New decl.
+ (is_EISDIR): Depend on HAVE_DIR_EACCES_BUG, not D_OK.
+ Use isdir, not access.
+
+2000-06-02 Paul Eggert
+
+ Problen noted by Gerald Stoller <gerald_stoller@hotmail.com>
+
+ * src/grep.c (main): POSIX.2 says that -q overrides -l, which
+ in turn overrides the other output options. Fix grep to
+ behave that way.
+
+2000-05-27 Paul Eggert
+
+ Simplify and tune the buffer allocation strategy. Do not reserve a
+ large save area: reserve only enough bytes to hold the residue, plus
+ page alignment. Put a newline sentinel before the buffer, for speed
+ when searching backwards for newline.
+
+ * src/grep.c (ubuffer, bufsalloc, PREFERRED_SAVE_FACTOR, page_alloc):
+ Remove. All uses changed.
+ (INITIAL_BUFSIZE): New macro.
+ (reset, fillbuf): Use simpler buffer allocation strategy.
+ (reset): Check for preposterously large pagesize that would cause
+ later calculations to overflow.
+ (fillbuf): Do not resize buffer if there's room at the end for
+ at least one more page. This greatly increases performance when
+ reading from non-regular files that contain no newlines.
+ When growing the buffer, double its size instead of using a
+ more complicated algorithm.
+ (prtext, grep): Speed up by relying on the newline sentinel before the
+ start of the buffer.
+ (grep): When looking backwards for the last newline in a buffer,
+ stop when we hit the residue, since it can't contain a newline.
+ This avoids an O(N**2) algorithm when reading binary data from
+ a pipe. Use a sentinel to speed up the backward search for newline.
+ (nlscan): Undo previous change; it wasn't needed and just complicates
+ and slows down the code a tad.
+
+2000-05-24 Paul Eggert
+
+ Handle very large input counts better. Bug noted by Jim Meyering.
+
+ * src/grep.c (totalcc, totalnl): Use uintmax_t, not off_t.
+ (add_count): New function.
+ (nlscan, prline, grep): Use it to check line and byte count overflows.
+ (nlscan, grep): Don't keep track of counts when not asked to; this
+ avoids unnecessary overflow diagnostics.
+ (print_offset_sep): Now takes args of type uintmax_t and char,
+ not off_t and int.
+
+2000-05-16 Paul Eggert
+
+ Problem reported by Bob Proulx <rwp@hprwp.fc.hp.com>, this patch
+ is base on his finding, with appropiate corrections.
+
+ * src/grep.c (main): Fix bug: -x and -w matched even when no
+ patterns were specified.
+ * tests/empty.sh: Test for -x and -w bug in grep 2.4.2.
+
+2000-04-24 Paul Eggert
+
+ POSIX.2 conformance fixes: grep -q now exits with status zero
+ if an input line is selected, even if an error also occurs.
+ grep -s no longer affects exit status.
+
+ * src/grep.c (suppress_errors): Move definition earlier so
+ that suppressible_error can use it.
+ (suppressible_error): New function.
+ (exit_on_match): New var.
+ (grepbuf): If exit_on_match is nonzero, exit with status zero
+ immediately.
+ (grep, grepfile, grepdir): Invoke suppressible_error.
+ (main): -q sets exit_on_match.
+
+ * doc/grep.1, doc/grep.texi, NEWS:
+ Document -q's behavior as required by POSIX.2.
+
+ * tests/status.sh:
+ Test for -q and -s behavior as conforming to POSIX.2.
+
+2000-04-20 Paul Eggert
+
+ * tests/Makefile.am (TESTS_ENVIRONMENT):
+ Set GREP_OPTIONS to the empty string.
+
+2000-04-20 Paul Eggert
+
+ * tests/status.sh: Fix typo: test -b -> test -r.
+
+2000-04-20 Paul Eggert
+
+ * src/dfa.c (lex):
+ Do not assume that [c] is equivalent to [c-c]; this isn't true
+ if LC_COLLATE specifies that some characters are equivalent.
+ (setbit_case_fold): New function.
+ (lex): Use it to simplify the code a bit.
+
+2000-04-17 Paul Eggert
+
+ Do CRLF munging only if HAVE_DOS_FILE_CONTENTS, instead of
+ having it depend on O_BINARY (which leads to incorrect results
+ on BeOS, VMS, and MacOS).
+
+ * bootstrap/Makefile.try (DEFS): Add -DHAVE_DOS_FILE_CONTENTS.
+ * src/system.h (SET_BINARY): Define only if HAVE_DOS_FILE_CONTENTS.
+ (O_BINARY): Do not define.
+
+ * m4/dosfile.m4: Define HAVE_DOS_FILE_CONTENTS if it appears we're
+ using DOS.
+
+ * src/grep.c (undossify_input, fillbuf, dosbuf.c, prline, main):
+ Depend on HAVE_DOS_FILE_CONTENTS, not O_BINARY, when handling CRLF
+ matters.
+ (grepfile, main): Depend on SET_BINARY, not O_BINARY, when
+ handling binary files on hosts that care about text versus binary.
+
+2000-04-17 Paul Eggert
+
+ * lib/getpagesize.h (getpagesize): Define to B_PAGE_SIZE if
+ __BEOS__ is defined. Based on a fix by Bruno Haible
+ <haible@clisp.cons.org>.
+
+2000-04-17 Bruno Haible
+
+ * src/system.h [BeOS]: Ignore O_BINARY.
+ * src/getpagesize.h [BeOS]: Define getpagesize() as B_PAGE_SIZE.
+
+2000-04-10 Paul Eggert
+
+ * doc/grep.1, doc/grep.texi, NEWS: -C now requires an operand.
+ * src/grep.c (short_options, long_options, main, usage): Likewise.
+ (context_length_arg): Renamed from ck_atoi. Now reports an error
+ and exits if the number is out of range for a context length.
+ (get_nondigit_option): New function, which checks for overflow
+ correctly, and which does not parse nonadjacent strings of digits
+ into a single number.
+ (main): Use get_nondigit_option instead of doing the code inline.
+ With -A, -B, and -C, optarg is now guaranteed to be nonzero.
+
+2000-04-08 Paul Eggert
+
+ Now that we know that the input is always terminated by a
+ newline before the matching algorithms see it, clean up the
+ matching algorithms so that they no longer need to modify the
+ input by inserting a sentinel newline, and no longer worry
+ about running off the end of the buffer due to a missing sentinel.
+
+ * src/grep.c (nlscan, prpending, prtext, grepbuf): Do not
+ worry about running off the end of the input buffer, since
+ it's now guaranteed to end in the sentinel newline.
+ * src/search.c (EGexecute, Pexecute): Likewise.
+
+ * src/dfa.c (prtok, dfasyntax, dfaparse, copy, merge, state_index,
+ epsclosure, dfaexec, dfacomp):
+ Change many instances of "T *" to "T const *", to catch
+ any inadvertent programming errors made during this conversion.
+ * src/dfa.h (dfacomp, dfaexec, dfaparse): Likewise.
+ * src/grep.c (struct stats.parent, long_options, grepdir,
+ compile, execute, fillbuf, lastnl, lastout, nlscan, prline,
+ prpending, prtext, grepbuf, grep, grepfile, grepdir): Likewise.
+ * src/grep.h (struct matcher.compile, struct matcher.execute):
+ Likewise.
+ * src/kwset.c (struct kwset.trans, kwsalloc, kwsincr, treefails,
+ treedelta, hasevery, treenext, bmexec, cwexec, kwsexec): Likewise.
+ * src/kwset.h (kwsalloc, kwsincr, kwsexec): Likewise.
+ * src/search.c (kwsmusts, Gcompile, Ecompile, EGexecute, Pcompile,
+ Pexecute): Likewise.
+
+ * src/dfa.c (dfaexec):
+ Use size_t, not char *, to avoid worrisome casts to convert
+ char const * to char *.
+ * src/dfa.h (dfaexec): Likewise.
+ * src/grep.c (execute): Likewise.
+ * src/grep.h (execute): Likewise.
+ * src/kwset.c (bmexec, cwexec, kwsexec): Likewise.
+ * src/kwset.h (struct kwsmatch.offset, kwsalloc, kwsincr,
+ kwsexec): Likewise.
+ * src/search.c (EGexecute, Fexecute, Pexecute): Likewise.
+
+ * src/dfa.h (_PTR_T): Depend on defined __STDC__, not __STDC__.
+ (PARAMS): Depend on PROTOTYPES, not __STDC__.
+
+ * src/dfa.c (dfasyntax): Last arg is unsigned char, not int.
+ * src/dfa.h (dfasyntax): Likewise.
+
+ * src/dfa.h (struct dfa): Remove member newlines; no longer needed.
+ * src/dfa.c (build_state, dfaexec, dfafree): Do not worry
+ about special newline state.
+
+ * src/search.c (matchers): Move definition to end of file, so
+ that we don't need forward decls.
+ (lastexact): Remove.
+ (kwset_exact_matches): New var; subsumes old lastexact var.
+ All uses changed.
+
+ * src/dfa.c (index): Remove macro.
+ (REALLOC_IF_NECESSARY): Skip unnecessary test.
+ (tstbit, setbit, clrbit): Declare arg to be unsigned, to help compiler.
+ (copyset, zeroset, equal): Use C builtin primitives, to help compiler.
+ (dfaexec): Do not modify input string.
+ Remove newline parameter; no longer needed.
+ (comsubs): Use strchr, not index.
+
+ * src/grep.h (matchers): Use fixed name size, not pointer (as
+ there's no need for the extra flexibility). All uses changed.
+
+ * src/kwset.h (struct kwsmatch.offset): Renamed from beg, with
+ change of type to size_t. All uses changed.
+
+ * src/grep.c (reset): No longer need kludge for dfaexec. Simplify.
+ (reset, grepbuf): Adjust to new interface for 'execute'.
+ (install_matcher): List is now terminated by null compile,
+ not null name.
+ Do not invoke setrlimit if that wouldn't change the limit.
+
+ * src/dfa.c (xcalloc, xmalloc, xrealloc, prtok, tstbit, setbit,
+ clrbit, copyset, zeroset, notset, equal, charclass_index,
+ looking_at, lex, addtok, atom, nsubtoks, copytoks, closure,
+ branch, regexp, copy, insert, merge, delete, state_index,
+ build_state, build_state_zero, icatalloc, icpyalloc, istrstr,
+ ifree, freelist, enlist, comsubs, addlists, inboth):
+ Remove forward decls; no longer needed.
+ * src/grep.c (ck_atoi, usage, error, setmatcher,
+ install_matcher, prepend_args, prepend_default_options,
+ page_alloc, reset, fillbuf, grepbuf, prtext, prpending, prline,
+ print_offset_sep, nlscan, grep, grepfile): Likewise.
+ * src/kwset.c (enqueue, treefails, treedelta, hasevery,
+ treenext, bmexec, cwexec): Likewise.
+ * src/search.c (Gcompile, Ecompile, EGexecute, Fcompile, Fexecute,
+ Pcompile, Pexecute, kwsinit): Likewise.
+
+ * src/search.c (Pcompile): Do not assume newly allocated
+ storage is zeroed.
+
+2000-04-06 Paul Eggert
+
+ * doc/grep.1, doc/grep.texi, NEWS: Improve the explanation of
+ locale-dependent behavior of range expressions. Mention
+ LC_COLLATE, since this affects range expressions.
+
+2000-03-26 Paul Eggert
+
+ * Makefile.am (ACINCLUDE_INPUTS): Add decl.m4, inttypes_h.m4,
+ uintmax_t.m4, ulonglong.m4, xstrtoumax.m4.
+ * m4/Makefile.am (EXTRA_DIST): Likewise.
+
+ * src/Makefile.am (base_sources):
+ Add xstrtol.c, xstrtol.h, xstrtoumax.c.
+ (EXTRA_DIST): Add strtol.c.
+
+ * configure.in (jm_AC_TYPE_UINTMAX_T, jm_AC_PREREQ_XSTRTOUMAX,
+ HAVE_DECL_STRTOUL, HAVE_DECL_STRTOULL): Add.
+ (AC_REPLACE_FUNCS): Add strtoul.
+
+ * src/grep.c: Include xstrtol.h.
+ (ck_atio): Use xstrtoumax and do proper overflow checking.
+ (max_count, outleft): Now off_t, not int.
+ (main): Likewise. Use xstrtoumax to convert max_count from string.
+
+ * acconfig.h (HAVE_DECL_STRTOUL, HAVE_DECL_STRTOULL): New #undefs.
+ (HAVE_STPCPY, ENABLE_NLS, HAVE_CATGETS, HAVE_GETTEXT,
+ HAVE_LC_MESSAGES): Remove.
+
+ * m4/decl.m4, m4/inttypes_h.m4, m4/uintmax_t.m4, m4/ulonglong.m4,
+ m4/xstrtoumax.m4, src/strtol.c, src/strtoul.c, src/strtoull.c,
+ src/strtoumax.c, src/xstrtol.c, src/xstrtol.h, src/xstrtoumax.c:
+ New files, taken unchanged from textutils, fileutils, sh-utils
+ and/or tar.
+
+2000-03-23 Paul Eggert
+
+ * src/search.c (Pcompile): Add support for NUL bytes in
+ Perl regular expressions.
+
+2000-03-23 Paul Eggert
+
+ * NEWS, doc/grep.1, doc/grep.texi: Change --pcre to --perl-regexp.
+ * src/grep.c (long_options, usage): Likewise.
+
+ * doc/grep.1, doc/grep.texi: Remove pgrep program.
+ * src/Makefile.am (bin_PROGRAMS): Likewise.
+ (pgrep_SOURCES): Remove.
+
+ * src/grep.c (main): Rename matcher from "pgrep" to "perl".
+ * src/search.c (matchers): Likewise.
+
+ * src/search.c: Do not include stdio.h; no longer needed.
+ (NILP): Remove.
+ (sub): No longer static.
+ (n_pcre): Remove.
+ (cre): No longer an array. Present only if HAVE_LIBPCRE.
+ (extra): New variable.
+ (Pcompile): Use fatal to report errors.
+ This also removes a possible core dump.
+ Add checks (marked FIXME) for restrictions in pcre.
+ Use pcre_maketables for proper localized behavior.
+ (Pcompile, Pexecute): Use GNU coding style.
+ The argument is a single pattern, not a list of patterns separated
+ by newlines; this is for consistency with grep and egrep.
+ Use pcre_study for speed.
+ (Pexecute): Abort if we lack pcre.
+ Abort if pcre_exec reports an impossible error.
+ Use code similar to the rest of search.c
+ to narrow down to the line we've found.
+
+2000-03-21 Alain Magloire
+
+ * configure.in: added AC_CHECK_LIB(pcre, pcre_exec)
+ * ChangeLog: Typos corrected.
+ * src/search.c: new MACRO HAVE_LIBPCRE
+
+2000-03-21 H.Merijn Brand
+
+ * src/Makefile.am(bin_PROGRAMS): added pgrep and new macro
+ pgrep_SOURCES.
+ * src/search.c: new functions Pcompile() and Pexecute()
+ to support PCRE. Update matcher[] array for pgrep.
+ * src/grep.c: new short and long option --pcre and -P.
+ usage() updated.
+
+2000-03-21 Bastiaan Stougie
+
+ Improvement of the -m or --max-count option. Now works for NUM > 1 and
+ prints trailing context for the last matching line.
+
+ * src/grep.c
+ (after_last_match): Is a new off_t variable that replaces inputhwm
+ to retain the correct input offset even after a call to fillbuf. Note
+ that after_last_match has a different meaning than inputhwm:
+ it always points to the offset in the input of the first byte after
+ the last matching line, and is 0 if no matching line has been found
+ yet.
+ (grep): Print trailing context after the NUMth match when the -m NUM
+ option is used.
+ (grep): Added comment. Should have been commented already.
+ (grepbuf): Now updates outleft correctly. This fixes the bug that the
+ -m NUM option did not stop after NUM lines for NUM greater than 1.
+ (grepbuf, prtext): Now update after_last_match instead of inputhwm.
+ (fillbuf): No longer updates inputhwm.
+ (prpending): When outputting trailing context of the max_count-th
+ matching line, stop at the first matching line.
+ (grepfile): Seek to after_last_match or eof, depending on the values
+ of outleft and bufmapped.
+ (usage): added the -m or --max-count option to the help message.
+ * doc/grep.texi, doc/grep.1: Document the change of the -m option.
+
+2000-03-17 Paul Eggert
+
+ Add new -m or --max-count option, based on a suggestion by
+ Bastiaan Stougie.
+
+ * doc/grep.texi, doc/grep.1: Document it.
+ * src/grep.c (short_options, long_options, main): Add it.
+ (inputhwm): New variable.
+ (fillbuf, prtext, grepbuf): Set it.
+ (bufmapped): Now a macro (defined to zero) if HAVE_MMAP is not defined.
+ (max_count, outleft): New variables.
+ (prtext, grepbuf, grep): Don't output more than outleft lines.
+ (grepfile): If grepping standard input, seek to the limit of what
+ we've read before exiting. This fixes a bug with mmapped input,
+ and is needed for proper -m support.
+ (main): Exit immediately if -m 0 is specified.
+
2000-03-08 Alain Magloire
* configure.in: version 2.4.2
@@ -45,7 +1025,7 @@
2000-01-30 Alain Magloire
* doc/grep.1: corrected typo.
- Noted by Ruslan Ermilob.
+ Noted by Ruslan Ermilov.
2000-01-30 Alain Magloire
@@ -171,7 +1151,7 @@
2000-01-04 Paul Eggert
- Inititial patch from David O'Brien.
+ Initial patch by Ruslan Ermilov.
Add --binary-files option.
* NEWS, doc/grep.1, doc/grep.texi: Document it.
diff --git a/gnu/usr.bin/grep/NEWS b/gnu/usr.bin/grep/NEWS
index 115952f63cc0..9bb821937eb3 100644
--- a/gnu/usr.bin/grep/NEWS
+++ b/gnu/usr.bin/grep/NEWS
@@ -1,3 +1,73 @@
+Version 2.5.1
+ - This is a bugfix release. No new features.
+
+Version 2.5
+ - The new option --label allows to specify a different name for input
+ from stdin. See the man or info pages for details.
+
+ - The internal lib/getopt* files are no longer used on systems providing
+ getopt functionality in their libc (e.g. glibc 2.2.x).
+ If you need the old getopt files, use --with-included-getopt.
+
+ - The new option --only-matching (-o) will print only the part of matching
+ lines that matches the pattern. This is useful, for example, to extract
+ IP addresses from log files.
+
+ - i18n bug fixed ([A-Z0-9] wouldn't match A in locales other than C on
+ systems using recent glibc builds
+
+ - GNU grep can now be built with autoconf 2.52.
+
+ - The new option --devices controls how grep handles device files. Its usage
+ is analogous to --directories.
+
+ - The new option --line-buffered fflush on everyline. There is a noticeable
+ slow down when forcing line buffering.
+
+ - Back references are now local to the regex.
+ grep -e '\(a\)\1' -e '\(b\)\1'
+ The last backref \1 in the second expression refer to \(b\)
+
+ - The new option --include=PATTERN will only search matching files
+ when recursing in directories
+
+ - The new option --exclude=PATTERN will skip matching files when
+ recursing in directories.
+
+ - The new option --color will use the environment variable GREP_COLOR
+ (default is red) to highlight the matching string.
+ --color takes an optional argument specifying when to colorize a line:
+ --color=always, --color=tty, --color=never
+
+ - The following changes are for POSIX.2 conformance:
+
+ . The -q or --quiet or --silent option now causes grep to exit
+ with zero status when a input line is selected, even if an error
+ also occurs.
+
+ . The -s or --no-messages option no longer affects the exit status.
+
+ . Bracket regular expressions like [a-z] are now locale-dependent.
+ For example, many locales sort characters in dictionary order,
+ and in these locales the regular expression [a-d] is not
+ equivalent to [abcd]; it might be equivalent to [aBbCcDd], for
+ example. To obtain the traditional interpretation of bracket
+ expressions, you can use the C locale by setting the LC_ALL
+ environment variable to the value "C".
+
+ - The -C or --context option now requires an argument, partly for
+ consistency, and partly because POSIX.2 recommends against
+ optional arguments.
+
+ - The new -P or --perl-regexp option tells grep to interpert the pattern as
+ a Perl regular expression.
+
+ - The new option --max-count=num makes grep stop reading a file after num
+ matching lines.
+ New option -m; equivalent to --max-count.
+
+ - Translations for bg, ca, da, nb and tr have been added.
+
Version 2.4.2
- Added more check in configure to default the grep-${version}/src/regex.c
diff --git a/gnu/usr.bin/grep/THANKS b/gnu/usr.bin/grep/THANKS
index 40295fc8d7e6..93dd31c1b06e 100644
--- a/gnu/usr.bin/grep/THANKS
+++ b/gnu/usr.bin/grep/THANKS
@@ -1,50 +1,72 @@
-Aharon Robbins <arnold@gnu.org>
-Akim Demaille <akim@epita.fr>
-Alain Magloire <alainm@gnu.org>
-Andreas Schwab <schwab@suse.de>
-Andreas Ley <andy@rz.uni-karlsruhe.de>
-Ben Elliston <bje@cygnus.com>
-David J MacKenzie <djm@catapult.va.pubnix.com>
-David O'Brien <obrien@freebsd.org>
-Eli Zaretskii <eliz@is.elta.co.il>
-Florian La Roche <florian@knorke.saar.de>
-Franc,ois Pinard <pinard@IRO.UMontreal.CA>
-Grant McDorman <grant@isgtec.com>
-Harald Hanche-Olsen <hanche@math.ntnu.no>
-Jeff Bailey <jbailey@nisa.net>
-Jim Hand <jhand@austx.tandem.com>
-Jim Meyering <meyering@asic.sc.ti.com>
-Jochen Hein <jochen.hein@delphi.central.de>
-Joel N. Weber II <devnull@gnu.org>
-John Hughes <john@nitelite.calvacom.fr>
-Jorge Stolfi <stolfi@dcc.unicamp.br>
-Karl Berry <karl@cs.umb.edu>
-Karl Heuer <kwzh@gnu.org>
-Kaveh R. Ghazi <ghazi@caip.rutgers.edu>
-Kazuro Furukawa <furukawa@apricot.kek.jp>
-Keith Bostic <bostic@bsdi.com>
-Krishna Sethuraman <krishna@sgihub.corp.sgi.com>
-Mark Waite <markw@mddmew.fc.hp.com>
-Martin P.J. Zinser <zinser@decus.de>
-Martin Rex <martin.rex@sap-ag.de>
-Michael Aichlmayr <mikla@nx.com>
-Miles Bader <miles@ccs.mt.nec.co.jp>
-Olaf Kirch <okir@ns.lst.de>
-Paul Eggert <eggert@twinsun.com>
-Paul Kimoto <kimoto@spacenet.tn.cornell.edu>
-Phillip C. Brisco <phillip.craig.brisco@ccmail.census.gov>
-Philippe Defert <Philippe.Defert@cern.ch>
-Philippe De Muyter <phdm@info.ucl.ac.be>
-Roland Roberts <rroberts@muller.com>
-Ruslan Ermilov <ru@freebsd.org>
-Shannon Hill <hill@synnet.com>
-Sotiris Vassilopoulos <Sotiris.Vassilopoulos@betatech.gr>
-Stewart Levin <stew@sep.stanford.edu>
-Sydoruk Stepan <step@unitex.kiev.ua>
-Tom 'moof' Spindler <dogcow@ccs.neu.edu>
-Tom Tromey <tromey@creche.cygnus.com>
-Ulrich Drepper <drepper@cygnus.com>
-UEBAYASHI Masao <masao@nf.enveng.titech.ac.jp>
-Volker Borchert <bt@teknon.de>
-Wichert Akkerman <wakkerma@wi.leidenuniv.nl>
-William Bader <william@nscs.fast.net>
+Aharon Robbins <arnold@gnu.org>
+Akim Demaille <akim@epita.fr>
+Alain Magloire <alainm@gnu.org>
+Andreas Schwab <schwab@suse.de>
+Andreas Ley <andy@rz.uni-karlsruhe.de>
+Bastiaan "Darquan" Stougie <darquan@zonnet.nl>
+Ben Elliston <bje@cygnus.com>
+Bernd Strieder <strieder@student.uni-kl.de>
+Bernhard Rosenkraenzer <bero@redhat.com>
+Bob Proulx <rwp@hprwp.fc.hp.com>
+Brian Youmans <3diff@gnu.org>
+Bruno Haible <haible@ilog.fr>
+Christian Groessler <cpg@aladdin.de>
+David Clissold <cliss@austin.ibm.com>
+David J MacKenzie <djm@catapult.va.pubnix.com>
+David O'Brien <obrien@freebsd.org>
+Eli Zaretskii <eliz@is.elta.co.il>
+Florian La Roche <laroche@redhat.com>
+Franc,ois Pinard <pinard@IRO.UMontreal.CA>
+Gerald Stoller <gerald_stoller@hotmail.com>
+Grant McDorman <grant@isgtec.com>
+Greg Louis <glouis@dynamicro.on.ca>
+Guglielmo 'bond' Bondioni <g.bondioni@libero.it>
+H. Merijn Brand <h.m.brand@hccnet.nl>
+Harald Hanche-Olsen <hanche@math.ntnu.no>
+Hans-Bernhard Broeker <broeker@physik.rwth-aachen.de>
+Heikki Korpela <heko@iki.fi>
+Isamu Hasegawa <isamu@yamato.ibm.com>
+Jeff Bailey <jbailey@nisa.net>
+Jim Hand <jhand@austx.tandem.com>
+Jim Meyering <meyering@asic.sc.ti.com>
+Jochen Hein <jochen.hein@delphi.central.de>
+Joel N. Weber II <devnull@gnu.org>
+John Hughes <john@nitelite.calvacom.fr>
+Jorge Stolfi <stolfi@dcc.unicamp.br>
+Juan Manuel Guerrero <ST001906@HRZ1.HRZ.TU-Darmstadt.De>
+Karl Berry <karl@cs.umb.edu>
+Karl Heuer <kwzh@gnu.org>
+Kaveh R. Ghazi <ghazi@caip.rutgers.edu>
+Kazuro Furukawa <furukawa@apricot.kek.jp>
+Keith Bostic <bostic@bsdi.com>
+Krishna Sethuraman <krishna@sgihub.corp.sgi.com>
+Kurt D Schwehr <kdschweh@insci14.ucsd.edu>
+Mark Waite <markw@mddmew.fc.hp.com>
+Martin P.J. Zinser <zinser@decus.de>
+Martin Rex <martin.rex@sap-ag.de>
+Michael Aichlmayr <mikla@nx.com>
+Miles Bader <miles@ccs.mt.nec.co.jp>
+Olaf Kirch <okir@ns.lst.de>
+Paul Eggert <eggert@twinsun.com>
+Paul Kimoto <kimoto@spacenet.tn.cornell.edu>
+Phillip C. Brisco <phillip.craig.brisco@ccmail.census.gov>
+Philippe Defert <Philippe.Defert@cern.ch>
+Philippe De Muyter <phdm@info.ucl.ac.be>
+Philip Hazel <ph10@cus.cam.ac.uk>
+Roland Roberts <rroberts@muller.com>
+Ruslan Ermilov <ru@freebsd.org>
+Santiago Vila <sanvila@unex.es>
+Shannon Hill <hill@synnet.com>
+Sotiris Vassilopoulos <Sotiris.Vassilopoulos@betatech.gr>
+Stewart Levin <stew@sep.stanford.edu>
+Sydoruk Stepan <step@unitex.kiev.ua>
+Tapani Tarvainen <tt@mit.jyu.fi>
+Tom 'moof' Spindler <dogcow@ccs.neu.edu>
+Tom Tromey <tromey@creche.cygnus.com>
+Ulrich Drepper <drepper@cygnus.com>
+UEBAYASHI Masao <masao@nf.enveng.titech.ac.jp>
+Uwe H. Steinfeld <usteinfeld@gmx.net>
+Volker Borchert <bt@teknon.de>
+Wichert Akkerman <wichert@cistron.nl>
+William Bader <william@nscs.fast.net>
+Wolfgang Schludi <schludi@syscomp.de>
diff --git a/gnu/usr.bin/grep/closeout.c b/gnu/usr.bin/grep/closeout.c
new file mode 100644
index 000000000000..4fdac32b2e85
--- /dev/null
+++ b/gnu/usr.bin/grep/closeout.c
@@ -0,0 +1,121 @@
+/* closeout.c - close standard output
+ Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+#if HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#if ENABLE_NLS
+# include <libintl.h>
+# define _(Text) gettext (Text)
+#else
+# define _(Text) Text
+#endif
+
+#if HAVE_STDLIB_H
+# include <stdlib.h>
+#endif
+#ifndef EXIT_FAILURE
+# define EXIT_FAILURE 1
+#endif
+
+#include <stdio.h>
+
+#include <errno.h>
+#ifndef errno
+extern int errno;
+#endif
+
+#include "closeout.h"
+#include "error.h"
+#include "quotearg.h"
+#if 0
+#include "__fpending.h"
+#endif
+
+static int default_exit_status = EXIT_FAILURE;
+static const char *file_name;
+
+/* Set the value to be used for the exit status when close_stdout is called.
+ This is useful when it is not convenient to call close_stdout_status,
+ e.g., when close_stdout is called via atexit. */
+void
+close_stdout_set_status (int status)
+{
+ default_exit_status = status;
+}
+
+/* Set the file name to be reported in the event an error is detected
+ by close_stdout_status. */
+void
+close_stdout_set_file_name (const char *file)
+{
+ file_name = file;
+}
+
+/* Close standard output, exiting with status STATUS on failure.
+ If a program writes *anything* to stdout, that program should `fflush'
+ stdout and make sure that it succeeds before exiting. Otherwise,
+ suppose that you go to the extreme of checking the return status
+ of every function that does an explicit write to stdout. The last
+ printf can succeed in writing to the internal stream buffer, and yet
+ the fclose(stdout) could still fail (due e.g., to a disk full error)
+ when it tries to write out that buffered data. Thus, you would be
+ left with an incomplete output file and the offending program would
+ exit successfully.
+
+ FIXME: note the fflush suggested above is implicit in the fclose
+ we actually do below. Consider doing only the fflush and/or using
+ setvbuf to inhibit buffering.
+
+ Besides, it's wasteful to check the return value from every call
+ that writes to stdout -- just let the internal stream state record
+ the failure. That's what the ferror test is checking below.
+
+ It's important to detect such failures and exit nonzero because many
+ tools (most notably `make' and other build-management systems) depend
+ on being able to detect failure in other tools via their exit status. */
+
+void
+close_stdout_status (int status)
+{
+ int e = ferror (stdout) ? 0 : -1;
+
+#if 0
+ if (__fpending (stdout) == 0)
+ return;
+#endif
+
+ if (fclose (stdout) != 0)
+ e = errno;
+
+ if (0 < e)
+ {
+ char const *write_error = _("write error");
+ if (file_name)
+ error (status, e, "%s: %s", quotearg_colon (file_name), write_error);
+ else
+ error (status, e, "%s", write_error);
+ }
+}
+
+/* Close standard output, exiting with status EXIT_FAILURE on failure. */
+void
+close_stdout (void)
+{
+ close_stdout_status (default_exit_status);
+}
diff --git a/gnu/usr.bin/grep/closeout.h b/gnu/usr.bin/grep/closeout.h
new file mode 100644
index 000000000000..80f24e45f9da
--- /dev/null
+++ b/gnu/usr.bin/grep/closeout.h
@@ -0,0 +1,17 @@
+#ifndef CLOSEOUT_H
+# define CLOSEOUT_H 1
+
+# ifndef PARAMS
+# if defined PROTOTYPES || (defined __STDC__ && __STDC__)
+# define PARAMS(Args) Args
+# else
+# define PARAMS(Args) ()
+# endif
+# endif
+
+void close_stdout_set_status PARAMS ((int status));
+void close_stdout_set_file_name PARAMS ((const char *file));
+void close_stdout PARAMS ((void));
+void close_stdout_status PARAMS ((int status));
+
+#endif
diff --git a/gnu/usr.bin/grep/dfa.c b/gnu/usr.bin/grep/dfa.c
index 048e901c5e81..590bfa7d1109 100644
--- a/gnu/usr.bin/grep/dfa.c
+++ b/gnu/usr.bin/grep/dfa.c
@@ -36,12 +36,24 @@ extern void free();
#if defined(HAVE_STRING_H) || defined(STDC_HEADERS)
#include <string.h>
-#undef index
-#define index strchr
#else
#include <strings.h>
#endif
+#if HAVE_SETLOCALE
+# include <locale.h>
+#endif
+
+#if defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H && defined HAVE_MBRTOWC
+/* We can handle multibyte string. */
+# define MBS_SUPPORT
+#endif
+
+#ifdef MBS_SUPPORT
+# include <wchar.h>
+# include <wctype.h>
+#endif
+
#ifndef DEBUG /* use the same approach as regex.c */
#undef assert
#define assert(e)
@@ -102,6 +114,7 @@ extern void free();
#include "regex.h"
#include "dfa.h"
+#include "hard-locale.h"
/* HPUX, define those as macros in sys/param.h */
#ifdef setbit
@@ -112,47 +125,7 @@ extern void free();
#endif
static void dfamust PARAMS ((struct dfa *dfa));
-
-static ptr_t xcalloc PARAMS ((size_t n, size_t s));
-static ptr_t xmalloc PARAMS ((size_t n));
-static ptr_t xrealloc PARAMS ((ptr_t p, size_t n));
-#ifdef DEBUG
-static void prtok PARAMS ((token t));
-#endif
-static int tstbit PARAMS ((int b, charclass c));
-static void setbit PARAMS ((int b, charclass c));
-static void clrbit PARAMS ((int b, charclass c));
-static void copyset PARAMS ((charclass src, charclass dst));
-static void zeroset PARAMS ((charclass s));
-static void notset PARAMS ((charclass s));
-static int equal PARAMS ((charclass s1, charclass s2));
-static int charclass_index PARAMS ((charclass s));
-static int looking_at PARAMS ((const char *s));
-static token lex PARAMS ((void));
-static void addtok PARAMS ((token t));
-static void atom PARAMS ((void));
-static int nsubtoks PARAMS ((int tindex));
-static void copytoks PARAMS ((int tindex, int ntokens));
-static void closure PARAMS ((void));
-static void branch PARAMS ((void));
static void regexp PARAMS ((int toplevel));
-static void copy PARAMS ((position_set *src, position_set *dst));
-static void insert PARAMS ((position p, position_set *s));
-static void merge PARAMS ((position_set *s1, position_set *s2, position_set *m));
-static void delete PARAMS ((position p, position_set *s));
-static int state_index PARAMS ((struct dfa *d, position_set *s,
- int newline, int letter));
-static void build_state PARAMS ((int s, struct dfa *d));
-static void build_state_zero PARAMS ((struct dfa *d));
-static char *icatalloc PARAMS ((char *old, char *new));
-static char *icpyalloc PARAMS ((char *string));
-static char *istrstr PARAMS ((char *lookin, char *lookfor));
-static void ifree PARAMS ((char *cp));
-static void freelist PARAMS ((char **cpp));
-static char **enlist PARAMS ((char **cpp, char *new, size_t len));
-static char **comsubs PARAMS ((char *left, char *right));
-static char **addlists PARAMS ((char **old, char **new));
-static char **inboth PARAMS ((char **left, char **right));
static ptr_t
xcalloc (size_t n, size_t s)
@@ -194,8 +167,9 @@ xrealloc (ptr_t p, size_t n)
#define REALLOC_IF_NECESSARY(p, t, nalloc, index) \
if ((index) >= (nalloc)) \
{ \
- while ((index) >= (nalloc)) \
+ do \
(nalloc) *= 2; \
+ while ((index) >= (nalloc)); \
REALLOC(p, t, nalloc); \
}
@@ -204,7 +178,7 @@ xrealloc (ptr_t p, size_t n)
static void
prtok (token t)
{
- char *s;
+ char const *s;
if (t < 0)
fprintf(stderr, "END");
@@ -230,6 +204,11 @@ prtok (token t)
case ORTOP: s = "ORTOP"; break;
case LPAREN: s = "LPAREN"; break;
case RPAREN: s = "RPAREN"; break;
+ case CRANGE: s = "CRANGE"; break;
+#ifdef MBS_SUPPORT
+ case ANYCHAR: s = "ANYCHAR"; break;
+ case MBCSET: s = "MBCSET"; break;
+#endif /* MBS_SUPPORT */
default: s = "CSET"; break;
}
fprintf(stderr, "%s", s);
@@ -240,19 +219,19 @@ prtok (token t)
/* Stuff pertaining to charclasses. */
static int
-tstbit (int b, charclass c)
+tstbit (unsigned b, charclass c)
{
return c[b / INTBITS] & 1 << b % INTBITS;
}
static void
-setbit (int b, charclass c)
+setbit (unsigned b, charclass c)
{
c[b / INTBITS] |= 1 << b % INTBITS;
}
static void
-clrbit (int b, charclass c)
+clrbit (unsigned b, charclass c)
{
c[b / INTBITS] &= ~(1 << b % INTBITS);
}
@@ -260,19 +239,13 @@ clrbit (int b, charclass c)
static void
copyset (charclass src, charclass dst)
{
- int i;
-
- for (i = 0; i < CHARCLASS_INTS; ++i)
- dst[i] = src[i];
+ memcpy (dst, src, sizeof (charclass));
}
static void
zeroset (charclass s)
{
- int i;
-
- for (i = 0; i < CHARCLASS_INTS; ++i)
- s[i] = 0;
+ memset (s, 0, sizeof (charclass));
}
static void
@@ -287,12 +260,7 @@ notset (charclass s)
static int
equal (charclass s1, charclass s2)
{
- int i;
-
- for (i = 0; i < CHARCLASS_INTS; ++i)
- if (s1[i] != s2[i])
- return 0;
- return 1;
+ return memcmp (s1, s2, sizeof (charclass)) == 0;
}
/* A pointer to the current dfa is kept here during parsing. */
@@ -324,7 +292,7 @@ static unsigned char eolbyte;
/* Entry point to set syntax options. */
void
-dfasyntax (reg_syntax_t bits, int fold, int eol)
+dfasyntax (reg_syntax_t bits, int fold, unsigned char eol)
{
syntax_bits_set = 1;
syntax_bits = bits;
@@ -332,22 +300,145 @@ dfasyntax (reg_syntax_t bits, int fold, int eol)
eolbyte = eol;
}
+/* Like setbit, but if case is folded, set both cases of a letter. */
+static void
+setbit_case_fold (unsigned b, charclass c)
+{
+ setbit (b, c);
+ if (case_fold)
+ {
+ if (ISUPPER (b))
+ setbit (tolower (b), c);
+ else if (ISLOWER (b))
+ setbit (toupper (b), c);
+ }
+}
+
/* Lexical analyzer. All the dross that deals with the obnoxious
GNU Regex syntax bits is located here. The poor, suffering
reader is referred to the GNU Regex documentation for the
meaning of the @#%!@#%^!@ syntax bits. */
-static char *lexstart; /* Pointer to beginning of input string. */
-static char *lexptr; /* Pointer to next input character. */
+static char const *lexstart; /* Pointer to beginning of input string. */
+static char const *lexptr; /* Pointer to next input character. */
static int lexleft; /* Number of characters remaining. */
static token lasttok; /* Previous token returned; initially END. */
static int laststart; /* True if we're separated from beginning or (, |
only by zero-width characters. */
static int parens; /* Count of outstanding left parens. */
static int minrep, maxrep; /* Repeat counts for {m,n}. */
+static int hard_LC_COLLATE; /* Nonzero if LC_COLLATE is hard. */
+
+#ifdef MBS_SUPPORT
+/* These variables are used only if (MB_CUR_MAX > 1). */
+static mbstate_t mbs; /* Mbstate for mbrlen(). */
+static int cur_mb_len; /* Byte length of the current scanning
+ multibyte character. */
+static int cur_mb_index; /* Byte index of the current scanning multibyte
+ character.
+
+ singlebyte character : cur_mb_index = 0
+ multibyte character
+ 1st byte : cur_mb_index = 1
+ 2nd byte : cur_mb_index = 2
+ ...
+ nth byte : cur_mb_index = n */
+static unsigned char *mblen_buf;/* Correspond to the input buffer in dfaexec().
+ Each element store the amount of remain
+ byte of corresponding multibyte character
+ in the input string. A element's value
+ is 0 if corresponding character is a
+ singlebyte chracter.
+ e.g. input : 'a', <mb(0)>, <mb(1)>, <mb(2)>
+ mblen_buf : 0, 3, 2, 1
+ */
+static wchar_t *inputwcs; /* Wide character representation of input
+ string in dfaexec().
+ The length of this array is same as
+ the length of input string(char array).
+ inputstring[i] is a single-byte char,
+ or 1st byte of a multibyte char.
+ And inputwcs[i] is the codepoint. */
+static unsigned char const *buf_begin;/* refference to begin in dfaexec(). */
+static unsigned char const *buf_end; /* refference to end in dfaexec(). */
+#endif /* MBS_SUPPORT */
+
+#ifdef MBS_SUPPORT
+/* This function update cur_mb_len, and cur_mb_index.
+ p points current lexptr, len is the remaining buffer length. */
+static void
+update_mb_len_index (unsigned char const *p, int len)
+{
+ /* If last character is a part of a multibyte character,
+ we update cur_mb_index. */
+ if (cur_mb_index)
+ cur_mb_index = (cur_mb_index >= cur_mb_len)? 0
+ : cur_mb_index + 1;
+
+ /* If last character is a single byte character, or the
+ last portion of a multibyte character, we check whether
+ next character is a multibyte character or not. */
+ if (! cur_mb_index)
+ {
+ cur_mb_len = mbrlen(p, len, &mbs);
+ if (cur_mb_len > 1)
+ /* It is a multibyte character.
+ cur_mb_len was already set by mbrlen(). */
+ cur_mb_index = 1;
+ else if (cur_mb_len < 1)
+ /* Invalid sequence. We treat it as a singlebyte character.
+ cur_mb_index is aleady 0. */
+ cur_mb_len = 1;
+ /* Otherwise, cur_mb_len == 1, it is a singlebyte character.
+ cur_mb_index is aleady 0. */
+ }
+}
+#endif /* MBS_SUPPORT */
+#ifdef MBS_SUPPORT
/* Note that characters become unsigned here. */
-#define FETCH(c, eoferr) \
+# define FETCH(c, eoferr) \
+ { \
+ if (! lexleft) \
+ { \
+ if (eoferr != 0) \
+ dfaerror (eoferr); \
+ else \
+ return lasttok = END; \
+ } \
+ if (MB_CUR_MAX > 1) \
+ update_mb_len_index(lexptr, lexleft); \
+ (c) = (unsigned char) *lexptr++; \
+ --lexleft; \
+ }
+
+/* This function fetch a wide character, and update cur_mb_len,
+ used only if the current locale is a multibyte environment. */
+static wchar_t
+fetch_wc (char const *eoferr)
+{
+ wchar_t wc;
+ if (! lexleft)
+ {
+ if (eoferr != 0)
+ dfaerror (eoferr);
+ else
+ return -1;
+ }
+
+ cur_mb_len = mbrtowc(&wc, lexptr, lexleft, &mbs);
+ if (cur_mb_len <= 0)
+ {
+ cur_mb_len = 1;
+ wc = *lexptr;
+ }
+ lexptr += cur_mb_len;
+ lexleft -= cur_mb_len;
+ return wc;
+}
+#else
+/* Note that characters become unsigned here. */
+# define FETCH(c, eoferr) \
{ \
if (! lexleft) \
{ \
@@ -359,6 +450,202 @@ static int minrep, maxrep; /* Repeat counts for {m,n}. */
(c) = (unsigned char) *lexptr++; \
--lexleft; \
}
+#endif /* MBS_SUPPORT */
+
+#ifdef MBS_SUPPORT
+/* Multibyte character handling sub-routin for lex.
+ This function parse a bracket expression and build a struct
+ mb_char_classes. */
+static void
+parse_bracket_exp_mb ()
+{
+ wchar_t wc, wc1, wc2;
+
+ /* Work area to build a mb_char_classes. */
+ struct mb_char_classes *work_mbc;
+ int chars_al, range_sts_al, range_ends_al, ch_classes_al,
+ equivs_al, coll_elems_al;
+
+ REALLOC_IF_NECESSARY(dfa->mbcsets, struct mb_char_classes,
+ dfa->mbcsets_alloc, dfa->nmbcsets + 1);
+ /* dfa->multibyte_prop[] hold the index of dfa->mbcsets.
+ We will update dfa->multibyte_prop in addtok(), because we can't
+ decide the index in dfa->tokens[]. */
+
+ /* Initialize work are */
+ work_mbc = &(dfa->mbcsets[dfa->nmbcsets++]);
+
+ chars_al = 1;
+ range_sts_al = range_ends_al = 0;
+ ch_classes_al = equivs_al = coll_elems_al = 0;
+ MALLOC(work_mbc->chars, wchar_t, chars_al);
+
+ work_mbc->nchars = work_mbc->nranges = work_mbc->nch_classes = 0;
+ work_mbc->nequivs = work_mbc->ncoll_elems = 0;
+ work_mbc->chars = work_mbc->ch_classes = NULL;
+ work_mbc->range_sts = work_mbc->range_ends = NULL;
+ work_mbc->equivs = work_mbc->coll_elems = NULL;
+
+ wc = fetch_wc(_("Unbalanced ["));
+ if (wc == L'^')
+ {
+ wc = fetch_wc(_("Unbalanced ["));
+ work_mbc->invert = 1;
+ }
+ else
+ work_mbc->invert = 0;
+ do
+ {
+ wc1 = -1; /* mark wc1 is not initialized". */
+
+ /* Note that if we're looking at some other [:...:] construct,
+ we just treat it as a bunch of ordinary characters. We can do
+ this because we assume regex has checked for syntax errors before
+ dfa is ever called. */
+ if (wc == L'[' && (syntax_bits & RE_CHAR_CLASSES))
+ {
+#define BRACKET_BUFFER_SIZE 128
+ char str[BRACKET_BUFFER_SIZE];
+ wc1 = wc;
+ wc = fetch_wc(_("Unbalanced ["));
+
+ /* If pattern contains `[[:', `[[.', or `[[='. */
+ if (cur_mb_len == 1 && (wc == L':' || wc == L'.' || wc == L'='))
+ {
+ unsigned char c;
+ unsigned char delim = (unsigned char)wc;
+ int len = 0;
+ for (;;)
+ {
+ if (! lexleft)
+ dfaerror (_("Unbalanced ["));
+ c = (unsigned char) *lexptr++;
+ --lexleft;
+
+ if ((c == delim && *lexptr == ']') || lexleft == 0)
+ break;
+ if (len < BRACKET_BUFFER_SIZE)
+ str[len++] = c;
+ else
+ /* This is in any case an invalid class name. */
+ str[0] = '\0';
+ }
+ str[len] = '\0';
+
+ if (lexleft == 0)
+ {
+ REALLOC_IF_NECESSARY(work_mbc->chars, wchar_t, chars_al,
+ work_mbc->nchars + 2);
+ work_mbc->chars[work_mbc->nchars++] = L'[';
+ work_mbc->chars[work_mbc->nchars++] = delim;
+ break;
+ }
+
+ if (--lexleft, *lexptr++ != ']')
+ dfaerror (_("Unbalanced ["));
+ if (delim == ':')
+ /* build character class. */
+ {
+ wctype_t wt;
+ /* Query the character class as wctype_t. */
+ wt = wctype (str);
+
+ if (ch_classes_al == 0)
+ MALLOC(work_mbc->ch_classes, wchar_t, ++ch_classes_al);
+ REALLOC_IF_NECESSARY(work_mbc->ch_classes, wctype_t,
+ ch_classes_al,
+ work_mbc->nch_classes + 1);
+ work_mbc->ch_classes[work_mbc->nch_classes++] = wt;
+
+ }
+ else if (delim == '=' || delim == '.')
+ {
+ char *elem;
+ MALLOC(elem, char, len + 1);
+ strncpy(elem, str, len + 1);
+
+ if (delim == '=')
+ /* build equivalent class. */
+ {
+ if (equivs_al == 0)
+ MALLOC(work_mbc->equivs, char*, ++equivs_al);
+ REALLOC_IF_NECESSARY(work_mbc->equivs, char*,
+ equivs_al,
+ work_mbc->nequivs + 1);
+ work_mbc->equivs[work_mbc->nequivs++] = elem;
+ }
+
+ if (delim == '.')
+ /* build collating element. */
+ {
+ if (coll_elems_al == 0)
+ MALLOC(work_mbc->coll_elems, char*, ++coll_elems_al);
+ REALLOC_IF_NECESSARY(work_mbc->coll_elems, char*,
+ coll_elems_al,
+ work_mbc->ncoll_elems + 1);
+ work_mbc->coll_elems[work_mbc->ncoll_elems++] = elem;
+ }
+ }
+ wc = -1;
+ }
+ else
+ /* We treat '[' as a normal character here. */
+ {
+ wc2 = wc1; wc1 = wc; wc = wc2; /* swap */
+ }
+ }
+ else
+ {
+ if (wc == L'\\' && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS))
+ wc = fetch_wc(("Unbalanced ["));
+ }
+
+ if (wc1 == -1)
+ wc1 = fetch_wc(_("Unbalanced ["));
+
+ if (wc1 == L'-')
+ /* build range characters. */
+ {
+ wc2 = fetch_wc(_("Unbalanced ["));
+ if (wc2 == L']')
+ {
+ /* In the case [x-], the - is an ordinary hyphen,
+ which is left in c1, the lookahead character. */
+ lexptr -= cur_mb_len;
+ lexleft += cur_mb_len;
+ wc2 = wc;
+ }
+ else
+ {
+ if (wc2 == L'\\'
+ && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS))
+ wc2 = fetch_wc(_("Unbalanced ["));
+ wc1 = fetch_wc(_("Unbalanced ["));
+ }
+
+ if (range_sts_al == 0)
+ {
+ MALLOC(work_mbc->range_sts, wchar_t, ++range_sts_al);
+ MALLOC(work_mbc->range_ends, wchar_t, ++range_ends_al);
+ }
+ REALLOC_IF_NECESSARY(work_mbc->range_sts, wchar_t,
+ range_sts_al, work_mbc->nranges + 1);
+ work_mbc->range_sts[work_mbc->nranges] = wc;
+ REALLOC_IF_NECESSARY(work_mbc->range_ends, wchar_t,
+ range_ends_al, work_mbc->nranges + 1);
+ work_mbc->range_ends[work_mbc->nranges++] = wc2;
+ }
+ else if (wc != -1)
+ /* build normal characters. */
+ {
+ REALLOC_IF_NECESSARY(work_mbc->chars, wchar_t, chars_al,
+ work_mbc->nchars + 1);
+ work_mbc->chars[work_mbc->nchars++] = wc;
+ }
+ }
+ while ((wc = wc1) != L']');
+}
+#endif /* MBS_SUPPORT */
#ifdef __STDC__
#define FUNC(F, P) static int F(int c) { return P(c); }
@@ -390,7 +677,7 @@ is_blank (int c)
static struct {
const char *name;
int (*pred) PARAMS ((int));
-} prednames[] = {
+} const prednames[] = {
{ ":alpha:]", is_alpha },
{ ":upper:]", is_upper },
{ ":lower:]", is_lower },
@@ -423,12 +710,10 @@ looking_at (char const *s)
static token
lex (void)
{
- token c, c1, c2;
+ unsigned c, c1, c2;
int backslash = 0, invert;
charclass ccl;
int i;
- char lo[2];
- char hi[2];
/* Basic plan: We fetch a character. If it's a backslash,
we set the backslash flag and go through the loop again.
@@ -439,6 +724,14 @@ lex (void)
for (i = 0; i < 2; ++i)
{
FETCH(c, 0);
+#ifdef MBS_SUPPORT
+ if (MB_CUR_MAX > 1 && cur_mb_index)
+ /* If this is a part of a multi-byte character, we must treat
+ this byte data as a normal character.
+ e.g. In case of SJIS encoding, some character contains '\',
+ but they must not be backslash. */
+ goto normal_char;
+#endif /* MBS_SUPPORT */
switch (c)
{
case '\\':
@@ -659,6 +952,15 @@ lex (void)
case '.':
if (backslash)
goto normal_char;
+#ifdef MBS_SUPPORT
+ if (MB_CUR_MAX > 1)
+ {
+ /* In multibyte environment period must match with a single
+ character not a byte. So we use ANYCHAR. */
+ laststart = 0;
+ return lasttok = ANYCHAR;
+ }
+#endif /* MBS_SUPPORT */
zeroset(ccl);
notset(ccl);
if (!(syntax_bits & RE_DOT_NEWLINE))
@@ -684,6 +986,17 @@ lex (void)
case '[':
if (backslash)
goto normal_char;
+ laststart = 0;
+#ifdef MBS_SUPPORT
+ if (MB_CUR_MAX > 1)
+ {
+ /* In multibyte environment a bracket expression may contain
+ multibyte characters, which must be treated as characters
+ (not bytes). So we parse it by parse_bracket_exp_mb(). */
+ parse_bracket_exp_mb();
+ return lasttok = MBCSET;
+ }
+#endif
zeroset(ccl);
FETCH(c, _("Unbalanced ["));
if (c == '^')
@@ -705,14 +1018,11 @@ lex (void)
for (c1 = 0; prednames[c1].name; ++c1)
if (looking_at(prednames[c1].name))
{
- int (*pred)() = prednames[c1].pred;
- if (case_fold
- && (pred == is_upper || pred == is_lower))
- pred = is_alpha;
+ int (*pred) PARAMS ((int)) = prednames[c1].pred;
for (c2 = 0; c2 < NOTCHAR; ++c2)
if ((*pred)(c2))
- setbit(c2, ccl);
+ setbit_case_fold (c2, ccl);
lexptr += strlen(prednames[c1].name);
lexleft -= strlen(prednames[c1].name);
FETCH(c1, _("Unbalanced ["));
@@ -730,7 +1040,6 @@ lex (void)
which is left in c1, the lookahead character. */
--lexptr;
++lexleft;
- c2 = c;
}
else
{
@@ -738,30 +1047,30 @@ lex (void)
&& (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS))
FETCH(c2, _("Unbalanced ["));
FETCH(c1, _("Unbalanced ["));
- }
- }
- else
- c2 = c;
-
- lo[0] = c; lo[1] = '\0';
- hi[0] = c2; hi[1] = '\0';
- for (c = 0; c < NOTCHAR; c++)
- {
- char ch[2];
- ch[0] = c; ch[1] = '\0';
- if (strcoll (lo, ch) <= 0 && strcoll (ch, hi) <= 0)
- {
- setbit (c, ccl);
- if (case_fold)
- {
- if (ISUPPER (c))
- setbit (tolower (c), ccl);
- else if (ISLOWER (c))
- setbit (toupper (c), ccl);
+ if (!hard_LC_COLLATE) {
+ for (; c <= c2; c++)
+ setbit_case_fold (c, ccl);
+ } else {
+ /* POSIX locales are painful - leave the decision to libc */
+ char expr[6] = { '[', c, '-', c2, ']', '\0' };
+ regex_t re;
+ if (regcomp (&re, expr, case_fold ? REG_ICASE : 0) == REG_NOERROR) {
+ for (c = 0; c < NOTCHAR; ++c) {
+ char buf[2] = { c, '\0' };
+ regmatch_t mat;
+ if (regexec (&re, buf, 1, &mat, 0) == REG_NOERROR
+ && mat.rm_so == 0 && mat.rm_eo == 1)
+ setbit_case_fold (c, ccl);
+ }
+ regfree (&re);
}
+ }
+ continue;
}
}
+ setbit_case_fold (c, ccl);
+
skip:
;
}
@@ -772,7 +1081,6 @@ lex (void)
if (syntax_bits & RE_HAT_LISTS_NOT_NEWLINE)
clrbit(eolbyte, ccl);
}
- laststart = 0;
return lasttok = CSET + charclass_index(ccl);
default:
@@ -781,11 +1089,7 @@ lex (void)
if (case_fold && ISALPHA(c))
{
zeroset(ccl);
- setbit(c, ccl);
- if (isupper(c))
- setbit(tolower(c), ccl);
- else
- setbit(toupper(c), ccl);
+ setbit_case_fold (c, ccl);
return lasttok = CSET + charclass_index(ccl);
}
return c;
@@ -812,6 +1116,26 @@ static int depth; /* Current depth of a hypothetical stack
static void
addtok (token t)
{
+#ifdef MBS_SUPPORT
+ if (MB_CUR_MAX > 1)
+ {
+ REALLOC_IF_NECESSARY(dfa->multibyte_prop, int, dfa->nmultibyte_prop,
+ dfa->tindex);
+ /* Set dfa->multibyte_prop. See struct dfa in dfa.h. */
+ if (t == MBCSET)
+ dfa->multibyte_prop[dfa->tindex] = ((dfa->nmbcsets - 1) << 2) + 3;
+ else if (t < NOTCHAR)
+ dfa->multibyte_prop[dfa->tindex]
+ = (cur_mb_len == 1)? 3 /* single-byte char */
+ : (((cur_mb_index == 1)? 1 : 0) /* 1st-byte of multibyte char */
+ + ((cur_mb_index == cur_mb_len)? 2 : 0)); /* last-byte */
+ else
+ /* It may be unnecesssary, but it is safer to treat other
+ symbols as singlebyte characters. */
+ dfa->multibyte_prop[dfa->tindex] = 3;
+ }
+#endif
+
REALLOC_IF_NECESSARY(dfa->tokens, token, dfa->talloc, dfa->tindex);
dfa->tokens[dfa->tindex++] = t;
@@ -852,10 +1176,14 @@ addtok (token t)
closure QMARK
closure STAR
closure PLUS
+ closure REPMN
atom
atom:
<normal character>
+ <multibyte character>
+ ANYCHAR
+ MBCSET
CSET
BACKREF
BEGLINE
@@ -864,6 +1192,8 @@ addtok (token t)
ENDWORD
LIMWORD
NOTLIMWORD
+ CRANGE
+ LPAREN regexp RPAREN
<empty>
The parser builds a parse tree in postfix form in an array of tokens. */
@@ -873,10 +1203,47 @@ atom (void)
{
if ((tok >= 0 && tok < NOTCHAR) || tok >= CSET || tok == BACKREF
|| tok == BEGLINE || tok == ENDLINE || tok == BEGWORD
+#ifdef MBS_SUPPORT
+ || tok == ANYCHAR || tok == MBCSET /* MB_CUR_MAX > 1 */
+#endif /* MBS_SUPPORT */
|| tok == ENDWORD || tok == LIMWORD || tok == NOTLIMWORD)
{
addtok(tok);
tok = lex();
+#ifdef MBS_SUPPORT
+ /* We treat a multibyte character as a single atom, so that DFA
+ can treat a multibyte character as a single expression.
+
+ e.g. We construct following tree from "<mb1><mb2>".
+ <mb1(1st-byte)><mb1(2nd-byte)><CAT><mb1(3rd-byte)><CAT>
+ <mb2(1st-byte)><mb2(2nd-byte)><CAT><mb2(3rd-byte)><CAT><CAT>
+ */
+ if (MB_CUR_MAX > 1)
+ {
+ while (cur_mb_index > 1 && tok >= 0 && tok < NOTCHAR)
+ {
+ addtok(tok);
+ addtok(CAT);
+ tok = lex();
+ }
+ }
+#endif /* MBS_SUPPORT */
+ }
+ else if (tok == CRANGE)
+ {
+ /* A character range like "[a-z]" in a locale other than "C" or
+ "POSIX". This range might any sequence of one or more
+ characters. Unfortunately the POSIX locale primitives give
+ us no practical way to find what character sequences might be
+ matched. Treat this approximately like "(.\1)" -- i.e. match
+ one character, and then punt to the full matcher. */
+ charclass ccl;
+ zeroset (ccl);
+ notset (ccl);
+ addtok (CSET + charclass_index (ccl));
+ addtok (BACKREF);
+ addtok (CAT);
+ tok = lex ();
}
else if (tok == LPAREN)
{
@@ -987,7 +1354,7 @@ regexp (int toplevel)
length of the string, so s can include NUL characters. D is a pointer to
the struct dfa to parse into. */
void
-dfaparse (char *s, size_t len, struct dfa *d)
+dfaparse (char const *s, size_t len, struct dfa *d)
{
dfa = d;
lexstart = lexptr = s;
@@ -995,6 +1362,17 @@ dfaparse (char *s, size_t len, struct dfa *d)
lasttok = END;
laststart = 1;
parens = 0;
+#if ENABLE_NLS
+ hard_LC_COLLATE = hard_locale (LC_COLLATE);
+#endif
+#ifdef MBS_SUPPORT
+ if (MB_CUR_MAX > 1)
+ {
+ cur_mb_index = 0;
+ cur_mb_len = 0;
+ memset(&mbs, 0, sizeof(mbstate_t));
+ }
+#endif /* MBS_SUPPORT */
if (! syntax_bits_set)
dfaerror(_("No syntax specified"));
@@ -1020,7 +1398,7 @@ dfaparse (char *s, size_t len, struct dfa *d)
/* Copy one set to another; the destination must be large enough. */
static void
-copy (position_set *src, position_set *dst)
+copy (position_set const *src, position_set *dst)
{
int i;
@@ -1059,7 +1437,7 @@ insert (position p, position_set *s)
/* Merge two sets of positions into a third. The result is exactly as if
the positions of both sets were inserted into an initially empty set. */
static void
-merge (position_set *s1, position_set *s2, position_set *m)
+merge (position_set const *s1, position_set const *s2, position_set *m)
{
int i = 0, j = 0;
@@ -1099,7 +1477,7 @@ delete (position p, position_set *s)
state. Newline and letter tell whether we got here on a newline or
letter, respectively. */
static int
-state_index (struct dfa *d, position_set *s, int newline, int letter)
+state_index (struct dfa *d, position_set const *s, int newline, int letter)
{
int hash = 0;
int constraint;
@@ -1136,6 +1514,10 @@ state_index (struct dfa *d, position_set *s, int newline, int letter)
d->states[i].backref = 0;
d->states[i].constraint = 0;
d->states[i].first_end = 0;
+#ifdef MBS_SUPPORT
+ if (MB_CUR_MAX > 1)
+ d->states[i].mbps.nelem = 0;
+#endif
for (j = 0; j < s->nelem; ++j)
if (d->tokens[s->elems[j].index] < 0)
{
@@ -1165,7 +1547,7 @@ state_index (struct dfa *d, position_set *s, int newline, int letter)
constraint. Repeat exhaustively until no funny positions are left.
S->elems must be large enough to hold the result. */
static void
-epsclosure (position_set *s, struct dfa *d)
+epsclosure (position_set *s, struct dfa const *d)
{
int i, j;
int *visited;
@@ -1178,6 +1560,10 @@ epsclosure (position_set *s, struct dfa *d)
for (i = 0; i < s->nelem; ++i)
if (d->tokens[s->elems[i].index] >= NOTCHAR
&& d->tokens[s->elems[i].index] != BACKREF
+#ifdef MBS_SUPPORT
+ && d->tokens[s->elems[i].index] != ANYCHAR
+ && d->tokens[s->elems[i].index] != MBCSET
+#endif
&& d->tokens[s->elems[i].index] < CSET)
{
old = s->elems[i];
@@ -1459,6 +1845,10 @@ dfaanalyze (struct dfa *d, int searchflag)
it with its epsilon closure. */
for (i = 0; i < d->tindex; ++i)
if (d->tokens[i] < NOTCHAR || d->tokens[i] == BACKREF
+#ifdef MBS_SUPPORT
+ || d->tokens[i] == ANYCHAR
+ || d->tokens[i] == MBCSET
+#endif
|| d->tokens[i] >= CSET)
{
#ifdef DEBUG
@@ -1560,6 +1950,9 @@ dfastate (int s, struct dfa *d, int trans[])
int wants_letter; /* New state wants to know letter context. */
int state_letter; /* New state on a letter transition. */
static int initialized; /* Flag for static initialization. */
+#ifdef MBS_SUPPORT
+ int next_isnt_1st_byte = 0; /* Flag If we can't add state0. */
+#endif
int i, j, k;
/* Initialize the set of letters, if necessary. */
@@ -1581,6 +1974,23 @@ dfastate (int s, struct dfa *d, int trans[])
setbit(d->tokens[pos.index], matches);
else if (d->tokens[pos.index] >= CSET)
copyset(d->charclasses[d->tokens[pos.index] - CSET], matches);
+#ifdef MBS_SUPPORT
+ else if (d->tokens[pos.index] == ANYCHAR
+ || d->tokens[pos.index] == MBCSET)
+ /* MB_CUR_MAX > 1 */
+ {
+ /* ANYCHAR and MBCSET must match with a single character, so we
+ must put it to d->states[s].mbps, which contains the positions
+ which can match with a single character not a byte. */
+ if (d->states[s].mbps.nelem == 0)
+ {
+ MALLOC(d->states[s].mbps.elems, position,
+ d->states[s].elems.nelem);
+ }
+ insert(pos, &(d->states[s].mbps));
+ continue;
+ }
+#endif /* MBS_SUPPORT */
else
continue;
@@ -1717,9 +2127,46 @@ dfastate (int s, struct dfa *d, int trans[])
for (k = 0; k < d->follows[grps[i].elems[j].index].nelem; ++k)
insert(d->follows[grps[i].elems[j].index].elems[k], &follows);
+#ifdef MBS_SUPPORT
+ if (MB_CUR_MAX > 1)
+ {
+ /* If a token in follows.elems is not 1st byte of a multibyte
+ character, or the states of follows must accept the bytes
+ which are not 1st byte of the multibyte character.
+ Then, if a state of follows encounter a byte, it must not be
+ a 1st byte of a multibyte character nor singlebyte character.
+ We cansel to add state[0].follows to next state, because
+ state[0] must accept 1st-byte
+
+ For example, we assume <sb a> is a certain singlebyte
+ character, <mb A> is a certain multibyte character, and the
+ codepoint of <sb a> equals the 2nd byte of the codepoint of
+ <mb A>.
+ When state[0] accepts <sb a>, state[i] transit to state[i+1]
+ by accepting accepts 1st byte of <mb A>, and state[i+1]
+ accepts 2nd byte of <mb A>, if state[i+1] encounter the
+ codepoint of <sb a>, it must not be <sb a> but 2nd byte of
+ <mb A>, so we can not add state[0]. */
+
+ next_isnt_1st_byte = 0;
+ for (j = 0; j < follows.nelem; ++j)
+ {
+ if (!(d->multibyte_prop[follows.elems[j].index] & 1))
+ {
+ next_isnt_1st_byte = 1;
+ break;
+ }
+ }
+ }
+#endif
+
/* If we are building a searching matcher, throw in the positions
of state 0 as well. */
+#ifdef MBS_SUPPORT
+ if (d->searchflag && (MB_CUR_MAX == 1 || !next_isnt_1st_byte))
+#else
if (d->searchflag)
+#endif
for (j = 0; j < d->states[0].elems.nelem; ++j)
insert(d->states[0].elems.elems[j], &follows);
@@ -1836,7 +2283,6 @@ build_state (int s, struct dfa *d)
d->trans = d->realtrans + 1;
REALLOC(d->fails, int *, d->tralloc);
REALLOC(d->success, int, d->tralloc);
- REALLOC(d->newlines, int, d->tralloc);
while (oldalloc < d->tralloc)
{
d->trans[oldalloc] = NULL;
@@ -1844,9 +2290,7 @@ build_state (int s, struct dfa *d)
}
}
- /* Keep the newline transition in a special place so we can use it as
- a sentinel. */
- d->newlines[s] = trans[eolbyte];
+ /* Newline is a sentinel. */
trans[eolbyte] = -1;
if (ACCEPTING(s, *d))
@@ -1864,29 +2308,450 @@ build_state_zero (struct dfa *d)
d->trans = d->realtrans + 1;
CALLOC(d->fails, int *, d->tralloc);
MALLOC(d->success, int, d->tralloc);
- MALLOC(d->newlines, int, d->tralloc);
build_state(0, d);
}
+#ifdef MBS_SUPPORT
+/* Multibyte character handling sub-routins for dfaexec. */
+
+/* Initial state may encounter the byte which is not a singlebyte character
+ nor 1st byte of a multibyte character. But it is incorrect for initial
+ state to accept such a byte.
+ For example, in sjis encoding the regular expression like "\\" accepts
+ the codepoint 0x5c, but should not accept the 2nd byte of the codepoint
+ 0x815c. Then Initial state must skip the bytes which are not a singlebyte
+ character nor 1st byte of a multibyte character. */
+#define SKIP_REMAINS_MB_IF_INITIAL_STATE(s, p) \
+ if (s == 0) \
+ { \
+ while (inputwcs[p - buf_begin] == 0 \
+ && mblen_buf[p - buf_begin] > 0 \
+ && p < buf_end) \
+ ++p; \
+ if (p >= end) \
+ { \
+ free(mblen_buf); \
+ free(inputwcs); \
+ return (size_t) -1; \
+ } \
+ }
+
+static void
+realloc_trans_if_necessary(struct dfa *d, int new_state)
+{
+ /* Make sure that the trans and fail arrays are allocated large enough
+ to hold a pointer for the new state. */
+ if (new_state >= d->tralloc)
+ {
+ int oldalloc = d->tralloc;
+
+ while (new_state >= d->tralloc)
+ d->tralloc *= 2;
+ REALLOC(d->realtrans, int *, d->tralloc + 1);
+ d->trans = d->realtrans + 1;
+ REALLOC(d->fails, int *, d->tralloc);
+ REALLOC(d->success, int, d->tralloc);
+ while (oldalloc < d->tralloc)
+ {
+ d->trans[oldalloc] = NULL;
+ d->fails[oldalloc++] = NULL;
+ }
+ }
+}
+
+/* Return values of transit_state_singlebyte(), and
+ transit_state_consume_1char. */
+typedef enum
+{
+ TRANSIT_STATE_IN_PROGRESS, /* State transition has not finished. */
+ TRANSIT_STATE_DONE, /* State transition has finished. */
+ TRANSIT_STATE_END_BUFFER /* Reach the end of the buffer. */
+} status_transit_state;
+
+/* Consume a single byte and transit state from 's' to '*next_state'.
+ This function is almost same as the state transition routin in dfaexec().
+ But state transition is done just once, otherwise matching succeed or
+ reach the end of the buffer. */
+static status_transit_state
+transit_state_singlebyte (struct dfa *d, int s, unsigned char const *p,
+ int *next_state)
+{
+ int *t;
+ int works = s;
+
+ status_transit_state rval = TRANSIT_STATE_IN_PROGRESS;
+
+ while (rval == TRANSIT_STATE_IN_PROGRESS)
+ {
+ if ((t = d->trans[works]) != NULL)
+ {
+ works = t[*p];
+ rval = TRANSIT_STATE_DONE;
+ if (works < 0)
+ works = 0;
+ }
+ else if (works < 0)
+ {
+ if (p == buf_end)
+ /* At the moment, it must not happen. */
+ return TRANSIT_STATE_END_BUFFER;
+ works = 0;
+ }
+ else if (d->fails[works])
+ {
+ works = d->fails[works][*p];
+ rval = TRANSIT_STATE_DONE;
+ }
+ else
+ {
+ build_state(works, d);
+ }
+ }
+ *next_state = works;
+ return rval;
+}
+
+/* Check whether period can match or not in the current context. If it can,
+ return the amount of the bytes with which period can match, otherwise
+ return 0.
+ `pos' is the position of the period. `index' is the index from the
+ buf_begin, and it is the current position in the buffer. */
+static int
+match_anychar (struct dfa *d, int s, position pos, int index)
+{
+ int newline = 0;
+ int letter = 0;
+ wchar_t wc;
+ int mbclen;
+
+ wc = inputwcs[index];
+ mbclen = (mblen_buf[index] == 0)? 1 : mblen_buf[index];
+
+ /* Check context. */
+ if (wc == (wchar_t)eolbyte)
+ {
+ if (!(syntax_bits & RE_DOT_NEWLINE))
+ return 0;
+ newline = 1;
+ }
+ else if (wc == (wchar_t)'\0')
+ {
+ if (syntax_bits & RE_DOT_NOT_NULL)
+ return 0;
+ newline = 1;
+ }
+
+ if (iswalnum(wc) || wc == L'_')
+ letter = 1;
+
+ if (!SUCCEEDS_IN_CONTEXT(pos.constraint, d->states[s].newline,
+ newline, d->states[s].letter, letter))
+ return 0;
+
+ return mbclen;
+}
+
+/* Check whether bracket expression can match or not in the current context.
+ If it can, return the amount of the bytes with which expression can match,
+ otherwise return 0.
+ `pos' is the position of the bracket expression. `index' is the index
+ from the buf_begin, and it is the current position in the buffer. */
+int
+match_mb_charset (struct dfa *d, int s, position pos, int index)
+{
+ int i;
+ int match; /* Flag which represent that matching succeed. */
+ int match_len; /* Length of the character (or collating element)
+ with which this operator match. */
+ int op_len; /* Length of the operator. */
+ char buffer[128];
+ wchar_t wcbuf[6];
+
+ /* Pointer to the structure to which we are currently reffering. */
+ struct mb_char_classes *work_mbc;
+
+ int newline = 0;
+ int letter = 0;
+ wchar_t wc; /* Current reffering character. */
+
+ wc = inputwcs[index];
+
+ /* Check context. */
+ if (wc == (wchar_t)eolbyte)
+ {
+ if (!(syntax_bits & RE_DOT_NEWLINE))
+ return 0;
+ newline = 1;
+ }
+ else if (wc == (wchar_t)'\0')
+ {
+ if (syntax_bits & RE_DOT_NOT_NULL)
+ return 0;
+ newline = 1;
+ }
+ if (iswalnum(wc) || wc == L'_')
+ letter = 1;
+ if (!SUCCEEDS_IN_CONTEXT(pos.constraint, d->states[s].newline,
+ newline, d->states[s].letter, letter))
+ return 0;
+
+ /* Assign the current reffering operator to work_mbc. */
+ work_mbc = &(d->mbcsets[(d->multibyte_prop[pos.index]) >> 2]);
+ match = !work_mbc->invert;
+ match_len = (mblen_buf[index] == 0)? 1 : mblen_buf[index];
+
+ /* match with a character class? */
+ for (i = 0; i<work_mbc->nch_classes; i++)
+ {
+ if (iswctype((wint_t)wc, work_mbc->ch_classes[i]))
+ goto charset_matched;
+ }
+
+ strncpy(buffer, buf_begin + index, match_len);
+ buffer[match_len] = '\0';
+
+ /* match with an equivalent class? */
+ for (i = 0; i<work_mbc->nequivs; i++)
+ {
+ op_len = strlen(work_mbc->equivs[i]);
+ strncpy(buffer, buf_begin + index, op_len);
+ buffer[op_len] = '\0';
+ if (strcoll(work_mbc->equivs[i], buffer) == 0)
+ {
+ match_len = op_len;
+ goto charset_matched;
+ }
+ }
+
+ /* match with a collating element? */
+ for (i = 0; i<work_mbc->ncoll_elems; i++)
+ {
+ op_len = strlen(work_mbc->coll_elems[i]);
+ strncpy(buffer, buf_begin + index, op_len);
+ buffer[op_len] = '\0';
+
+ if (strcoll(work_mbc->coll_elems[i], buffer) == 0)
+ {
+ match_len = op_len;
+ goto charset_matched;
+ }
+ }
+
+ wcbuf[0] = wc;
+ wcbuf[1] = wcbuf[3] = wcbuf[5] = '\0';
+
+ /* match with a range? */
+ for (i = 0; i<work_mbc->nranges; i++)
+ {
+ wcbuf[2] = work_mbc->range_sts[i];
+ wcbuf[4] = work_mbc->range_ends[i];
+
+ if (wcscoll(wcbuf, wcbuf+2) >= 0 &&
+ wcscoll(wcbuf+4, wcbuf) >= 0)
+ goto charset_matched;
+ }
+
+ /* match with a character? */
+ for (i = 0; i<work_mbc->nchars; i++)
+ {
+ if (wc == work_mbc->chars[i])
+ goto charset_matched;
+ }
+
+ match = !match;
+
+ charset_matched:
+ return match ? match_len : 0;
+}
+
+/* Check each of `d->states[s].mbps.elem' can match or not. Then return the
+ array which corresponds to `d->states[s].mbps.elem' and each element of
+ the array contains the amount of the bytes with which the element can
+ match.
+ `index' is the index from the buf_begin, and it is the current position
+ in the buffer.
+ Caller MUST free the array which this function return. */
+static int*
+check_matching_with_multibyte_ops (struct dfa *d, int s, int index)
+{
+ int i;
+ int* rarray;
+
+ MALLOC(rarray, int, d->states[s].mbps.nelem);
+ for (i = 0; i < d->states[s].mbps.nelem; ++i)
+ {
+ position pos = d->states[s].mbps.elems[i];
+ switch(d->tokens[pos.index])
+ {
+ case ANYCHAR:
+ rarray[i] = match_anychar(d, s, pos, index);
+ break;
+ case MBCSET:
+ rarray[i] = match_mb_charset(d, s, pos, index);
+ break;
+ default:
+ break; /* can not happen. */
+ }
+ }
+ return rarray;
+}
+
+/* Consume a single character and enumerate all of the positions which can
+ be next position from the state `s'.
+ `match_lens' is the input. It can be NULL, but it can also be the output
+ of check_matching_with_multibyte_ops() for optimization.
+ `mbclen' and `pps' are the output. `mbclen' is the length of the
+ character consumed, and `pps' is the set this function enumerate. */
+static status_transit_state
+transit_state_consume_1char (struct dfa *d, int s, unsigned char const **pp,
+ int *match_lens, int *mbclen, position_set *pps)
+{
+ int i, j;
+ int s1, s2;
+ int* work_mbls;
+ status_transit_state rs = TRANSIT_STATE_DONE;
+
+ /* Calculate the length of the (single/multi byte) character
+ to which p points. */
+ *mbclen = (mblen_buf[*pp - buf_begin] == 0)? 1
+ : mblen_buf[*pp - buf_begin];
+
+ /* Calculate the state which can be reached from the state `s' by
+ consuming `*mbclen' single bytes from the buffer. */
+ s1 = s;
+ for (i = 0; i < *mbclen; i++)
+ {
+ s2 = s1;
+ rs = transit_state_singlebyte(d, s2, (*pp)++, &s1);
+ }
+ /* Copy the positions contained by `s1' to the set `pps'. */
+ copy(&(d->states[s1].elems), pps);
+
+ /* Check (inputed)match_lens, and initialize if it is NULL. */
+ if (match_lens == NULL && d->states[s].mbps.nelem != 0)
+ work_mbls = check_matching_with_multibyte_ops(d, s, *pp - buf_begin);
+ else
+ work_mbls = match_lens;
+
+ /* Add all of the positions which can be reached from `s' by consuming
+ a single character. */
+ for (i = 0; i < d->states[s].mbps.nelem ; i++)
+ {
+ if (work_mbls[i] == *mbclen)
+ for (j = 0; j < d->follows[d->states[s].mbps.elems[i].index].nelem;
+ j++)
+ insert(d->follows[d->states[s].mbps.elems[i].index].elems[j],
+ pps);
+ }
+
+ if (match_lens == NULL && work_mbls != NULL)
+ free(work_mbls);
+ return rs;
+}
+
+/* Transit state from s, then return new state and update the pointer of the
+ buffer. This function is for some operator which can match with a multi-
+ byte character or a collating element(which may be multi characters). */
+static int
+transit_state (struct dfa *d, int s, unsigned char const **pp)
+{
+ int s1;
+ int mbclen; /* The length of current input multibyte character. */
+ int maxlen = 0;
+ int i, j;
+ int *match_lens = NULL;
+ int nelem = d->states[s].mbps.nelem; /* Just a alias. */
+ position_set follows;
+ unsigned char const *p1 = *pp;
+ status_transit_state rs;
+ wchar_t wc;
+
+ if (nelem > 0)
+ /* This state has (a) multibyte operator(s).
+ We check whether each of them can match or not. */
+ {
+ /* Note: caller must free the return value of this function. */
+ match_lens = check_matching_with_multibyte_ops(d, s, *pp - buf_begin);
+
+ for (i = 0; i < nelem; i++)
+ /* Search the operator which match the longest string,
+ in this state. */
+ {
+ if (match_lens[i] > maxlen)
+ maxlen = match_lens[i];
+ }
+ }
+
+ if (nelem == 0 || maxlen == 0)
+ /* This state has no multibyte operator which can match.
+ We need to check only one singlebyte character. */
+ {
+ status_transit_state rs;
+ rs = transit_state_singlebyte(d, s, *pp, &s1);
+
+ /* We must update the pointer if state transition succeeded. */
+ if (rs == TRANSIT_STATE_DONE)
+ ++*pp;
+
+ if (match_lens != NULL)
+ free(match_lens);
+ return s1;
+ }
+
+ /* This state has some operators which can match a multibyte character. */
+ follows.nelem = 0;
+ MALLOC(follows.elems, position, d->nleaves);
+
+ /* `maxlen' may be longer than the length of a character, because it may
+ not be a character but a (multi character) collating element.
+ We enumerate all of the positions which `s' can reach by consuming
+ `maxlen' bytes. */
+ rs = transit_state_consume_1char(d, s, pp, match_lens, &mbclen, &follows);
+
+ wc = inputwcs[*pp - mbclen - buf_begin];
+ s1 = state_index(d, &follows, wc == L'\n', iswalnum(wc));
+ realloc_trans_if_necessary(d, s1);
+
+ while (*pp - p1 < maxlen)
+ {
+ follows.nelem = 0;
+ rs = transit_state_consume_1char(d, s1, pp, NULL, &mbclen, &follows);
+
+ for (i = 0; i < nelem ; i++)
+ {
+ if (match_lens[i] == *pp - p1)
+ for (j = 0;
+ j < d->follows[d->states[s1].mbps.elems[i].index].nelem; j++)
+ insert(d->follows[d->states[s1].mbps.elems[i].index].elems[j],
+ &follows);
+ }
+
+ wc = inputwcs[*pp - mbclen - buf_begin];
+ s1 = state_index(d, &follows, wc == L'\n', iswalnum(wc));
+ realloc_trans_if_necessary(d, s1);
+ }
+ free(match_lens);
+ free(follows.elems);
+ return s1;
+}
+
+#endif
+
/* Search through a buffer looking for a match to the given struct dfa.
Find the first occurrence of a string matching the regexp in the buffer,
- and the shortest possible version thereof. Return a pointer to the first
- character after the match, or NULL if none is found. Begin points to
- the beginning of the buffer, and end points to the first character after
- its end. We store a newline in *end to act as a sentinel, so end had
- better point somewhere valid. Newline is a flag indicating whether to
- allow newlines to be in the matching string. If count is non-
- NULL it points to a place we're supposed to increment every time we
- see a newline. Finally, if backref is non-NULL it points to a place
+ and the shortest possible version thereof. Return the offset of the first
+ character after the match, or (size_t) -1 if none is found. BEGIN points to
+ the beginning of the buffer, and SIZE is the size of the buffer. If SIZE
+ is nonzero, BEGIN[SIZE - 1] must be a newline. BACKREF points to a place
where we're supposed to store a 1 if backreferencing happened and the
match needs to be verified by a backtracking matcher. Otherwise
we store a 0 in *backref. */
-char *
-dfaexec (struct dfa *d, char *begin, char *end,
- int newline, int *count, int *backref)
+size_t
+dfaexec (struct dfa *d, char const *begin, size_t size, int *backref)
{
- register int s, s1, tmp; /* Current state. */
- register unsigned char *p; /* Current input character. */
+ register int s; /* Current state. */
+ register unsigned char const *p; /* Current input character. */
+ register unsigned char const *end; /* One past the last input character. */
register int **trans, *t; /* Copy of d->trans so it can be optimized
into a register. */
register unsigned char eol = eolbyte; /* Likewise for eolbyte. */
@@ -1906,58 +2771,145 @@ dfaexec (struct dfa *d, char *begin, char *end,
if (! d->tralloc)
build_state_zero(d);
- s = s1 = 0;
- p = (unsigned char *) begin;
+ s = 0;
+ p = (unsigned char const *) begin;
+ end = p + size;
trans = d->trans;
- *end = eol;
+
+#ifdef MBS_SUPPORT
+ if (MB_CUR_MAX > 1)
+ {
+ int remain_bytes, i;
+ buf_begin = begin;
+ buf_end = end;
+
+ /* initialize mblen_buf, and inputwcs. */
+ MALLOC(mblen_buf, unsigned char, end - (unsigned char const *)begin + 2);
+ MALLOC(inputwcs, wchar_t, end - (unsigned char const *)begin + 2);
+ memset(&mbs, 0, sizeof(mbstate_t));
+ remain_bytes = 0;
+ for (i = 0; i < end - (unsigned char const *)begin + 1; i++)
+ {
+ if (remain_bytes == 0)
+ {
+ remain_bytes
+ = mbrtowc(inputwcs + i, begin + i,
+ end - (unsigned char const *)begin - i + 1, &mbs);
+ if (remain_bytes <= 1)
+ {
+ remain_bytes = 0;
+ inputwcs[i] = (wchar_t)begin[i];
+ mblen_buf[i] = 0;
+ }
+ else
+ {
+ mblen_buf[i] = remain_bytes;
+ remain_bytes--;
+ }
+ }
+ else
+ {
+ mblen_buf[i] = remain_bytes;
+ inputwcs[i] = 0;
+ remain_bytes--;
+ }
+ }
+ mblen_buf[i] = 0;
+ inputwcs[i] = 0; /* sentinel */
+ }
+#endif /* MBS_SUPPORT */
for (;;)
{
- while ((t = trans[s]) != 0) { /* hand-optimized loop */
- s1 = t[*p++];
- if ((t = trans[s1]) == 0) {
- tmp = s ; s = s1 ; s1 = tmp ; /* swap */
- break;
- }
- s = t[*p++];
- }
+#ifdef MBS_SUPPORT
+ if (MB_CUR_MAX > 1)
+ while ((t = trans[s]))
+ {
+ if (d->states[s].mbps.nelem != 0)
+ {
+ /* Can match with a multibyte character( and multi character
+ collating element). */
+ unsigned char const *nextp;
+
+ SKIP_REMAINS_MB_IF_INITIAL_STATE(s, p);
- if (s >= 0 && p <= (unsigned char *) end && d->fails[s])
+ nextp = p;
+ s = transit_state(d, s, &nextp);
+ p = nextp;
+
+ /* Trans table might be updated. */
+ trans = d->trans;
+ }
+ else
+ {
+ SKIP_REMAINS_MB_IF_INITIAL_STATE(s, p);
+ s = t[*p++];
+ }
+ }
+ else
+#endif /* MBS_SUPPORT */
+ while ((t = trans[s]))
+ s = t[*p++];
+
+ if (s < 0)
+ {
+ if (p == end)
+ {
+#ifdef MBS_SUPPORT
+ if (MB_CUR_MAX > 1)
+ {
+ free(mblen_buf);
+ free(inputwcs);
+ }
+#endif /* MBS_SUPPORT */
+ return (size_t) -1;
+ }
+ s = 0;
+ }
+ else if ((t = d->fails[s]))
{
if (d->success[s] & sbit[*p])
{
if (backref)
*backref = (d->states[s].backref != 0);
- return (char *) p;
+#ifdef MBS_SUPPORT
+ if (MB_CUR_MAX > 1)
+ {
+ free(mblen_buf);
+ free(inputwcs);
+ }
+#endif /* MBS_SUPPORT */
+ return (char const *) p - begin;
}
- s1 = s;
- s = d->fails[s][*p++];
- continue;
+#ifdef MBS_SUPPORT
+ if (MB_CUR_MAX > 1)
+ {
+ SKIP_REMAINS_MB_IF_INITIAL_STATE(s, p);
+ if (d->states[s].mbps.nelem != 0)
+ {
+ /* Can match with a multibyte character( and multi
+ character collating element). */
+ unsigned char const *nextp;
+ nextp = p;
+ s = transit_state(d, s, &nextp);
+ p = nextp;
+
+ /* Trans table might be updated. */
+ trans = d->trans;
+ }
+ else
+ s = t[*p++];
+ }
+ else
+#endif /* MBS_SUPPORT */
+ s = t[*p++];
}
-
- /* If the previous character was a newline, count it. */
- if (count && (char *) p <= end && p[-1] == eol)
- ++*count;
-
- /* Check if we've run off the end of the buffer. */
- if ((char *) p > end)
- return NULL;
-
- if (s >= 0)
+ else
{
build_state(s, d);
trans = d->trans;
- continue;
- }
-
- if (p[-1] == eol && newline)
- {
- s = d->newlines[s1];
- continue;
}
-
- s = 0;
}
}
@@ -1973,6 +2925,16 @@ dfainit (struct dfa *d)
d->talloc = 1;
MALLOC(d->tokens, token, d->talloc);
d->tindex = d->depth = d->nleaves = d->nregexps = 0;
+#ifdef MBS_SUPPORT
+ if (MB_CUR_MAX > 1)
+ {
+ d->nmultibyte_prop = 1;
+ MALLOC(d->multibyte_prop, int, d->nmultibyte_prop);
+ d->nmbcsets = 0;
+ d->mbcsets_alloc = 1;
+ MALLOC(d->mbcsets, struct mb_char_classes, d->mbcsets_alloc);
+ }
+#endif
d->searchflag = 0;
d->tralloc = 0;
@@ -1982,7 +2944,7 @@ dfainit (struct dfa *d)
/* Parse and analyze a single string of the given length. */
void
-dfacomp (char *s, size_t len, struct dfa *d, int searchflag)
+dfacomp (char const *s, size_t len, struct dfa *d, int searchflag)
{
if (case_fold) /* dummy folding in service of dfamust() */
{
@@ -2028,6 +2990,38 @@ dfafree (struct dfa *d)
free((ptr_t) d->charclasses);
free((ptr_t) d->tokens);
+
+#ifdef MBS_SUPPORT
+ if (MB_CUR_MAX > 1)
+ {
+ free((ptr_t) d->multibyte_prop);
+ for (i = 0; i < d->nmbcsets; ++i)
+ {
+ int j;
+ struct mb_char_classes *p = &(d->mbcsets[i]);
+ if (p->chars != NULL)
+ free(p->chars);
+ if (p->ch_classes != NULL)
+ free(p->ch_classes);
+ if (p->range_sts != NULL)
+ free(p->range_sts);
+ if (p->range_ends != NULL)
+ free(p->range_ends);
+
+ for (j = 0; j < p->nequivs; ++j)
+ free(p->equivs[j]);
+ if (p->equivs != NULL)
+ free(p->equivs);
+
+ for (j = 0; j < p->ncoll_elems; ++j)
+ free(p->coll_elems[j]);
+ if (p->coll_elems != NULL)
+ free(p->coll_elems);
+ }
+ free((ptr_t) d->mbcsets);
+ }
+#endif /* MBS_SUPPORT */
+
for (i = 0; i < d->sindex; ++i)
free((ptr_t) d->states[i].elems.elems);
free((ptr_t) d->states);
@@ -2042,7 +3036,6 @@ dfafree (struct dfa *d)
free((ptr_t) d->fails[i]);
if (d->realtrans) free((ptr_t) d->realtrans);
if (d->fails) free((ptr_t) d->fails);
- if (d->newlines) free((ptr_t) d->newlines);
if (d->success) free((ptr_t) d->success);
for (dm = d->musts; dm; dm = ndm)
{
@@ -2083,6 +3076,10 @@ dfafree (struct dfa *d)
---- ---- ----- -- --
char c # c # c # c # c
+ ANYCHAR ZERO ZERO ZERO ZERO
+
+ MBCSET ZERO ZERO ZERO ZERO
+
CSET ZERO ZERO ZERO ZERO
STAR ZERO ZERO ZERO ZERO
@@ -2255,14 +3252,14 @@ comsubs (char *left, char *right)
for (lcp = left; *lcp != '\0'; ++lcp)
{
len = 0;
- rcp = index(right, *lcp);
+ rcp = strchr (right, *lcp);
while (rcp != NULL)
{
for (i = 1; lcp[i] != '\0' && lcp[i] == rcp[i]; ++i)
continue;
if (i > len)
len = i;
- rcp = index(rcp + 1, *lcp);
+ rcp = strchr (rcp + 1, *lcp);
}
if (len == 0)
continue;
@@ -2528,7 +3525,12 @@ dfamust (struct dfa *dfa)
/* not on *my* shift */
goto done;
}
- else if (t >= CSET)
+ else if (t >= CSET
+#ifdef MBS_SUPPORT
+ || t == ANYCHAR
+ || t == MBCSET
+#endif /* MBS_SUPPORT */
+ )
{
/* easy enough */
resetmust(mp);
@@ -2578,3 +3580,4 @@ dfamust (struct dfa *dfa)
}
free((char *) mp);
}
+/* vim:set shiftwidth=2: */
diff --git a/gnu/usr.bin/grep/dfa.h b/gnu/usr.bin/grep/dfa.h
index f2fef4b7edbc..bbc0457afef8 100644
--- a/gnu/usr.bin/grep/dfa.h
+++ b/gnu/usr.bin/grep/dfa.h
@@ -22,18 +22,24 @@
In addition to clobbering modularity, we eat up valuable
name space. */
-# undef PARAMS
-#if __STDC__
+#ifdef __STDC__
# ifndef _PTR_T
# define _PTR_T
typedef void * ptr_t;
# endif
-# define PARAMS(x) x
#else
# ifndef _PTR_T
# define _PTR_T
typedef char * ptr_t;
# endif
+#endif
+
+#ifdef PARAMS
+# undef PARAMS
+#endif
+#if PROTOTYPES
+# define PARAMS(x) x
+#else
# define PARAMS(x) ()
#endif
@@ -136,6 +142,21 @@ typedef enum
RPAREN, /* RPAREN never appears in the parse tree. */
+ CRANGE, /* CRANGE never appears in the parse tree.
+ It stands for a character range that can
+ match a string of one or more characters.
+ For example, [a-z] can match "ch" in
+ a Spanish locale. */
+
+#ifdef MBS_SUPPORT
+ ANYCHAR, /* ANYCHAR is a terminal symbol that matches
+ any multibyte(or singlebyte) characters.
+ It is used only if MB_CUR_MAX > 1. */
+
+ MBCSET, /* MBCSET is similar to CSET, but for
+ multibyte characters. */
+#endif /* MBS_SUPPORT */
+
CSET /* CSET and (and any value greater) is a
terminal symbol that matches any of a
class of characters. */
@@ -223,6 +244,12 @@ typedef struct
char backref; /* True if this state matches a \<digit>. */
unsigned char constraint; /* Constraint for this state to accept. */
int first_end; /* Token value of the first END in elems. */
+#ifdef MBS_SUPPORT
+ position_set mbps; /* Positions which can match multibyte
+ characters. e.g. period.
+ These staff are used only if
+ MB_CUR_MAX > 1. */
+#endif
} dfa_state;
/* Element of a list of strings, at least one of which is known to
@@ -234,6 +261,26 @@ struct dfamust
struct dfamust *next;
};
+#ifdef MBS_SUPPORT
+/* A bracket operator.
+ e.g. [a-c], [[:alpha:]], etc. */
+struct mb_char_classes
+{
+ int invert;
+ wchar_t *chars; /* Normal characters. */
+ int nchars;
+ wctype_t *ch_classes; /* Character classes. */
+ int nch_classes;
+ wchar_t *range_sts; /* Range characters (start of the range). */
+ wchar_t *range_ends; /* Range characters (end of the range). */
+ int nranges;
+ char **equivs; /* Equivalent classes. */
+ int nequivs;
+ char **coll_elems;
+ int ncoll_elems; /* Collating elements. */
+};
+#endif
+
/* A compiled regular expression. */
struct dfa
{
@@ -252,6 +299,32 @@ struct dfa
int nleaves; /* Number of leaves on the parse tree. */
int nregexps; /* Count of parallel regexps being built
with dfaparse(). */
+#ifdef MBS_SUPPORT
+ /* These stuff are used only if MB_CUR_MAX > 1 or multibyte environments. */
+ int nmultibyte_prop;
+ int *multibyte_prop;
+ /* The value of multibyte_prop[i] is defined by following rule.
+ if tokens[i] < NOTCHAR
+ bit 1 : tokens[i] is a singlebyte character, or the last-byte of
+ a multibyte character.
+ bit 0 : tokens[i] is a singlebyte character, or the 1st-byte of
+ a multibyte character.
+ if tokens[i] = MBCSET
+ ("the index of mbcsets correspnd to this operator" << 2) + 3
+
+ e.g.
+ tokens
+ = 'single_byte_a', 'multi_byte_A', single_byte_b'
+ = 'sb_a', 'mb_A(1st byte)', 'mb_A(2nd byte)', 'mb_A(3rd byte)', 'sb_b'
+ multibyte_prop
+ = 3 , 1 , 0 , 2 , 3
+ */
+
+ /* Array of the bracket expressoin in the DFA. */
+ struct mb_char_classes *mbcsets;
+ int nmbcsets;
+ int mbcsets_alloc;
+#endif
/* Stuff owned by the state builder. */
dfa_state *states; /* States of the dfa. */
@@ -290,13 +363,6 @@ struct dfa
on a state that potentially could do so. */
int *success; /* Table of acceptance conditions used in
dfaexec and computed in build_state. */
- int *newlines; /* Transitions on newlines. The entry for a
- newline in any transition table is always
- -1 so we can count lines without wasting
- too many cycles. The transition for a
- newline is stored separately and handled
- as a special case. Newline is also used
- as a sentinel at the end of the buffer. */
struct dfamust *musts; /* List of strings, at least one of which
is known to appear in any r.e. matching
the dfa. */
@@ -323,26 +389,21 @@ struct dfa
/* dfasyntax() takes three arguments; the first sets the syntax bits described
earlier in this file, the second sets the case-folding flag, and the
third specifies the line terminator. */
-extern void dfasyntax PARAMS ((reg_syntax_t, int, int));
+extern void dfasyntax PARAMS ((reg_syntax_t, int, unsigned char));
/* Compile the given string of the given length into the given struct dfa.
Final argument is a flag specifying whether to build a searching or an
exact matcher. */
-extern void dfacomp PARAMS ((char *, size_t, struct dfa *, int));
+extern void dfacomp PARAMS ((char const *, size_t, struct dfa *, int));
/* Execute the given struct dfa on the buffer of characters. The
- first char * points to the beginning, and the second points to the
- first character after the end of the buffer, which must be a writable
- place so a sentinel end-of-buffer marker can be stored there. The
- second-to-last argument is a flag telling whether to allow newlines to
- be part of a string matching the regexp. The next-to-last argument,
- if non-NULL, points to a place to increment every time we see a
- newline. The final argument, if non-NULL, points to a flag that will
+ last byte of the buffer must equal the end-of-line byte.
+ The final argument points to a flag that will
be set if further examination by a backtracking matcher is needed in
order to verify backreferencing; otherwise the flag will be cleared.
- Returns NULL if no match is found, or a pointer to the first
+ Returns (size_t) -1 if no match is found, or the offset of the first
character after the first & shortest matching string in the buffer. */
-extern char *dfaexec PARAMS ((struct dfa *, char *, char *, int, int *, int *));
+extern size_t dfaexec PARAMS ((struct dfa *, char const *, size_t, int *));
/* Free the storage held by the components of a struct dfa. */
extern void dfafree PARAMS ((struct dfa *));
@@ -353,7 +414,7 @@ extern void dfafree PARAMS ((struct dfa *));
extern void dfainit PARAMS ((struct dfa *));
/* Incrementally parse a string of given length into a struct dfa. */
-extern void dfaparse PARAMS ((char *, size_t, struct dfa *));
+extern void dfaparse PARAMS ((char const *, size_t, struct dfa *));
/* Analyze a parsed regexp; second argument tells whether to build a searching
or an exact matcher. */
@@ -367,6 +428,5 @@ extern void dfastate PARAMS ((int, struct dfa *, int []));
/* dfaerror() is called by the regexp routines whenever an error occurs. It
takes a single argument, a NUL-terminated string describing the error.
- The default dfaerror() prints the error message to stderr and exits.
- The user can provide a different dfafree() if so desired. */
+ The user must supply a dfaerror. */
extern void dfaerror PARAMS ((const char *));
diff --git a/gnu/usr.bin/grep/doc/grep.texi b/gnu/usr.bin/grep/doc/grep.texi
index beb2410f956d..32b27c419926 100644
--- a/gnu/usr.bin/grep/doc/grep.texi
+++ b/gnu/usr.bin/grep/doc/grep.texi
@@ -35,7 +35,8 @@ Published by the Free Software Foundation,
59 Temple Place - Suite 330
Boston, MA 02111-1307, USA
-Copyright 2000 Free Software Foundation, Inc.
+@c man begin COPYRIGHT
+Copyright @copyright{} 2000, 2001 Free Software Foundation, Inc.
Permission is granted to make and distribute verbatim copies of
this manual provided the copyright notice and this permission notice
@@ -43,20 +44,19 @@ are preserved on all copies.
@ignore
Permission is granted to process this file through TeX and print the
-results, provided the printed document carries copying permission
+results, provided the printed document carries a copying permission
notice identical to this one except for the removal of this paragraph
(this paragraph not being relevant to the printed manual).
@end ignore
-Permission is granted to copy and distribute modified versions of this
-manual under the conditions for verbatim copying, provided that the entire
-resulting derived work is distributed under the terms of a permission
-notice identical to this one.
-
-Permission is granted to copy and distribute translations of this manual
-into another language, under the above conditions for modified versions,
-except that this permission notice may be stated in a translation approved
-by the Foundation.
+Permission is granted to copy, distribute and/or modify this document
+under the terms of the GNU Free Documentation License, Version 1.1 or
+any later version published by the Free Software Foundation; with the
+Invariant Sections being ``GNU General Public License'' and ``GNU Free
+Documentation License'', with no Front-Cover Texts, and with no
+Back-Cover Texts. A copy of the license is included in the section
+entitled ``GNU Free Documentation License'' (@pxref{Copying}).
+@c man end
@end ifinfo
@setchapternewpage off
@@ -68,29 +68,21 @@ by the Foundation.
@page
@vskip 0pt plus 1filll
-Copyright @copyright{} 2000 Free Software Foundation, Inc.
+Copyright @copyright{} 2000, 2001 Free Software Foundation, Inc.
@sp 2
Published by the Free Software Foundation, @*
59 Temple Place - Suite 330, @*
Boston, MA 02111-1307, USA
-Permission is granted to make and distribute verbatim copies of
-this manual provided the copyright notice and this permission notice
-are preserved on all copies.
-
-Permission is granted to copy and distribute modified versions of this
-manual under the conditions for verbatim copying, provided that the entire
-resulting derived work is distributed under the terms of a permission
-notice identical to this one.
-
-Permission is granted to copy and distribute translations of this manual
-into another language, under the above conditions for modified versions,
-except that this permission notice may be stated in a translation approved
-by the Foundation.
-
+Permission is granted to copy, distribute and/or modify this document
+under the terms of the GNU Free Documentation License, Version 1.1 or
+any later version published by the Free Software Foundation; with the
+Invariant Sections being ``GNU General Public License'' and ``GNU Free
+Documentation License'', with no Front-Cover Texts, and with no
+Back-Cover Texts. A copy of the license is included in the section
+entitled ``GNU Free Documentation License''.
@end titlepage
-@page
@ifnottex
@@ -111,13 +103,14 @@ This document was produced for version @value{VERSION} of @sc{gnu}
* Regular Expressions:: Regular Expressions.
* Usage:: Examples.
* Reporting Bugs:: Reporting Bugs.
+* Copying:: License terms.
* Concept Index:: A menu with all the topics in this manual.
* Index:: A menu with all @command{grep} commands
and command-line options.
@end menu
-@node Introduction
+@node Introduction, Invoking, Top, Top
@chapter Introduction
@cindex Searching for a pattern.
@@ -147,7 +140,7 @@ extensions.
@item -c
@itemx --count
@opindex -c
-@opindex -count
+@opindex --count
@cindex counting lines
Suppress normal output; instead print a count of matching
lines for each input file. With the @samp{-v}, @samp{--invert-match} option,
@@ -192,6 +185,13 @@ The scanning of every file will stop on the first match.
@cindex line numbering
Prefix each line of output with the line number within its input file.
+@item -o
+@itemx --only-matching
+@opindex -o
+@opindex --only-matching
+@cindex only matching
+Print only the part of matching lines that actually matches @var{pattern}.
+
@item -q
@itemx --quiet
@itemx --silent
@@ -199,8 +199,9 @@ Prefix each line of output with the line number within its input file.
@opindex --quiet
@opindex --silent
@cindex quiet, silent
-Quiet; suppress normal output. The scanning of every file will stop on
-the first match. Also see the @samp{-s} or @samp{--no-messages} option.
+Quiet; do not write anything to standard output. Exit immediately with
+zero status if any match is found, even if an error was detected. Also
+see the @samp{-s} or @samp{--no-messages} option.
@item -s
@itemx --no-messages
@@ -254,12 +255,18 @@ Print @var{num} lines of trailing context after matching lines.
Print @var{num} lines of leading context before matching lines.
@item -C @var{num}
-@itemx --context=[@var{num}]
+@itemx --context=@var{num}
@opindex -C
@opindex --context
@cindex context
-Print @var{num} lines (default 2) of output context.
+Print @var{num} lines of output context.
+@item --colour[=@var{WHEN}]
+@itemx --color[=@var{WHEN}]
+@opindex --colour
+@cindex highlight, color, colour
+The matching string is surrounded by the marker specify in @var{GREP_COLOR}.
+@var{WHEN} may be `never', `always', or `auto'.
@item -@var{num}
@opindex -NUM
@@ -308,6 +315,17 @@ byte offsets
depend on whether the @samp{-u} (@samp{--unix-byte-offsets}) option is
used; see below.
+@item -D @var{action}
+@itemx --devices=@var{action}
+@opindex -D
+@opindex --devices
+@cindex device search
+If an input file is a device, FIFO or socket, use @var{action} to process it.
+By default, @var{action} is @samp{read}, which means that devices are
+read just as if they were ordinary files.
+If @var{action} is @samp{skip}, devices, FIFOs and sockets are silently
+skipped.
+
@item -d @var{action}
@itemx --directories=@var{action}
@opindex -d
@@ -317,10 +335,10 @@ If an input file is a directory, use @var{action} to process it.
By default, @var{action} is @samp{read}, which means that directories are
read just as if they were ordinary files (some operating systems
and filesystems disallow this, and will cause @command{grep} to print error
-messages for every directory). If @var{action} is @samp{skip},
-directories are silently skipped. If @var{action} is @samp{recurse},
-@command{grep} reads all files under each directory, recursively; this is
-equivalent to the @samp{-r} option.
+messages for every directory or silently skip them). If @var{action} is
+@samp{skip}, directories are silently skipped. If @var{action} is
+@samp{recurse}, @command{grep} reads all files under each directory,
+recursively; this is equivalent to the @samp{-r} option.
@item -H
@itemx --with-filename
@@ -336,6 +354,18 @@ Print the filename for each match.
@cindex no filename prefix
Suppress the prefixing of filenames on output when multiple files are searched.
+@item --line-buffered
+@opindex --line-buffered
+@cindex line buffering
+Set the line buffering policy, this can be a performance penality.
+
+@item --label=@var{LABEL}
+@opindex --label
+@cindex changing name of standard input
+Displays input actually coming from standard input as input coming from file
+@var{LABEL}. This is especially useful for tools like zgrep, e.g.
+@command{gzip -cd foo.gz |grep --label=foo something}
+
@item -L
@itemx --files-without-match
@opindex -L
@@ -372,14 +402,69 @@ a non-word constituent character. Word-constituent
characters are letters, digits, and the underscore.
@item -r
+@itemx -R
@itemx --recursive
@opindex -r
@opindex --recursive
@cindex recursive search
@cindex searching directory trees
For each directory mentioned in the command line, read and process all
-files in that directory, recursively. This is the same as the @samp{-d
-recurse} option.
+files in that directory, recursively. This is the same as the
+@samp{--directories=recurse} option.
+
+@item --include=@var{file_pattern}
+@opindex --include
+@cindex include files
+@cindex searching directory trees
+When processing directories recursively, only files matching @var{file_pattern}
+will be search.
+
+@item --exclude=@var{file_pattern}
+@opindex --exclude
+@cindex exclude files
+@cindex searching directory trees
+When processing directories recursively, skip files matching @var{file_pattern}.
+
+@item -m @var{num}
+@itemx --max-count=@var{num}
+@opindex -m
+@opindex --max-count
+@cindex max-count
+Stop reading a file after @var{num} matching lines. If the input is
+standard input from a regular file, and @var{num} matching lines are
+output, @command{grep} ensures that the standard input is positioned to
+just after the last matching line before exiting, regardless of the
+presence of trailing context lines. This enables a calling process
+to resume a search. For example, the following shell script makes use
+of it:
+
+@example
+while grep -m 1 PATTERN
+do
+ echo xxxx
+done < FILE
+@end example
+
+But the following probably will not work because a pipe is not a regular
+file:
+
+@example
+# This probably will not work.
+cat FILE |
+while grep -m 1 PATTERN
+do
+ echo xxxx
+done
+@end example
+
+When @command{grep} stops after NUM matching lines, it outputs
+any trailing context lines. Since context does not include matching
+lines, @command{grep} will stop when it encounters another matching line.
+When the @samp{-c} or @samp{--count} option is also used,
+@command{grep} does not output a count greater than @var{num}.
+When the @samp{-v} or @samp{--invert-match} option is
+also used, @command{grep} stops after outputting @var{num}
+non-matching lines.
@item -y
@opindex -y
@@ -460,9 +545,20 @@ matching engine is used. @xref{Grep Programs}.
@section Environment Variables
Grep's behavior is affected by the following environment variables.
+
+A locale @code{LC_@var{foo}} is specified by examining the three
+environment variables @env{LC_ALL}, @env{LC_@var{foo}}, and @env{LANG},
+in that order. The first of these variables that is set specifies the
+locale. For example, if @env{LC_ALL} is not set, but @env{LC_MESSAGES}
+is set to @samp{pt_BR}, then Brazilian Portuguese is used for the
+@code{LC_MESSAGES} locale. The C locale is used if none of these
+environment variables are set, or if the locale catalog is not
+installed, or if @command{grep} was not compiled with national language
+support (@sc{nls}).
+
@cindex environment variables
-@table @code
+@table @env
@item GREP_OPTIONS
@vindex GREP_OPTIONS
@@ -476,23 +572,24 @@ any explicit options. Option specifications are separated by
whitespace. A backslash escapes the next character, so it can be used to
specify an option containing whitespace or a backslash.
+@item GREP_COLOR
+@vindex GREP_COLOR
+@cindex highlight markers
+This variable specifies the surrounding markers use to highlight the matching
+text. The default is control ascii red.
+
@item LC_ALL
-@itemx LC_MESSAGES
+@itemx LC_COLLATE
@itemx LANG
@vindex LC_ALL
-@vindex LC_MESSAGES
+@vindex LC_COLLATE
@vindex LANG
-@cindex language of messages
-@cindex message language
+@cindex character type
@cindex national language support
@cindex NLS
-@cindex translation of message language
-These variables specify the @code{LC_MESSAGES} locale, which determines
-the language that @command{grep} uses for messages. The locale is determined
-by the first of these variables that is set. American English is used
-if none of these environment variables are set, or if the message
-catalog is not installed, or if @command{grep} was not compiled with national
-language support (@sc{nls}).
+These variables specify the @code{LC_COLLATE} locale, which determines
+the collating sequence used to interpret range expressions like
+@samp{[a-z]}.
@item LC_ALL
@itemx LC_CTYPE
@@ -504,11 +601,22 @@ language support (@sc{nls}).
@cindex national language support
@cindex NLS
These variables specify the @code{LC_CTYPE} locale, which determines the
-type of characters, e.g., which characters are whitespace. The locale is
-determined by the first of these variables that is set. The @sc{posix}
-locale is used if none of these environment variables are set, or if the
-locale catalog is not installed, or if @command{grep} was not compiled with
-national language support (@sc{nls}).
+type of characters, e.g., which characters are whitespace.
+
+@item LC_ALL
+@itemx LC_MESSAGES
+@itemx LANG
+@vindex LC_ALL
+@vindex LC_MESSAGES
+@vindex LANG
+@cindex language of messages
+@cindex message language
+@cindex national language support
+@cindex NLS
+@cindex translation of message language
+These variables specify the @code{LC_MESSAGES} locale, which determines
+the language that @command{grep} uses for messages. The default C
+locale uses American English messages.
@item POSIXLY_CORRECT
@vindex POSIXLY_CORRECT
@@ -539,10 +647,10 @@ only when @code{POSIXLY_CORRECT} is not set.
@node Diagnostics
@chapter Diagnostics
-Normally, exit status is 0 if matches were found, and 1 if no matches
-were found (the @samp{-v} option inverts the sense of the exit status).
-Exit status is 2 if there were syntax errors in the pattern,
-inaccessible input files, or other system errors.
+Normally, exit status is 0 if selected lines are found and 1 otherwise.
+But the exit status is 2 if an error occurred, unless the @option{-q} or
+@option{--quiet} or @option{--silent} option is used and a selected line
+is found.
@node Grep Programs
@chapter @command{grep} programs
@@ -550,7 +658,7 @@ inaccessible input files, or other system errors.
@command{grep} searches the named input files (or standard input if no
files are named, or the file name @file{-} is given) for lines containing
a match to the given pattern. By default, @command{grep} prints the
-matching lines. There are three major variants of @command{grep},
+matching lines. There are four major variants of @command{grep},
controlled by the following options.
@table @samp
@@ -560,24 +668,30 @@ controlled by the following options.
@opindex -G
@opindex --basic-regexp
@cindex matching basic regular expressions
-Interpret pattern as a basic regular expression. This is the default.
+Interpret the pattern as a basic regular expression. This is the default.
@item -E
@itemx --extended-regexp
@opindex -E
@opindex --extended-regexp
@cindex matching extended regular expressions
-Interpret pattern as an extended regular expression.
-
+Interpret the pattern as an extended regular expression.
@item -F
@itemx --fixed-strings
@opindex -F
@opindex --fixed-strings
@cindex matching fixed strings
-Interpret pattern as a list of fixed strings, separated
+Interpret the pattern as a list of fixed strings, separated
by newlines, any of which is to be matched.
+@item -P
+@itemx --perl-regexp
+@opindex -P
+@opindex --perl-regexp
+@cindex matching Perl regular expressions
+Interpret the pattern as a Perl regular expression.
+
@end table
In addition, two variant programs @sc{egrep} and @sc{fgrep} are available.
@@ -592,8 +706,8 @@ A @dfn{regular expression} is a pattern that describes a set of strings.
Regular expressions are constructed analogously to arithmetic expressions,
by using various operators to combine smaller expressions.
@command{grep} understands two different versions of regular expression
-syntax: ``basic'' and ``extended''. In @sc{gnu} @command{grep}, there is no
-difference in available functionality using either syntax.
+syntax: ``basic''(BRE) and ``extended''(ERE). In @sc{gnu} @command{grep},
+there is no difference in available functionality using either syntax.
In other implementations, basic regular expressions are less powerful.
The following description applies to extended regular expressions;
differences for basic regular expressions are summarized afterwards.
@@ -602,17 +716,92 @@ The fundamental building blocks are the regular expressions that match
a single character. Most characters, including all letters and digits,
are regular expressions that match themselves. Any metacharacter
with special meaning may be quoted by preceding it with a backslash.
-A list of characters enclosed by @samp{[} and @samp{]} matches any
-single character in that list; if the first character of the list is the
-caret @samp{^}, then it
-matches any character @strong{not} in the list. For example, the regular
-expression @samp{[0123456789]} matches any single digit.
-A range of characters may be specified by giving the first
-and last characters, separated by a hyphen.
-
-Finally, certain named classes of characters are predefined, as follows.
+
+A regular expression may be followed by one of several
+repetition operators:
+
+@table @samp
+
+@item .
+@opindex .
+@cindex dot
+@cindex period
+The period @samp{.} matches any single character.
+
+@item ?
+@opindex ?
+@cindex question mark
+@cindex match sub-expression at most once
+The preceding item is optional and will be matched at most once.
+
+@item *
+@opindex *
+@cindex asterisk
+@cindex match sub-expression zero or more times
+The preceding item will be matched zero or more times.
+
+@item +
+@opindex +
+@cindex plus sign
+The preceding item will be matched one or more times.
+
+@item @{@var{n}@}
+@opindex @{n@}
+@cindex braces, one argument
+@cindex match sub-expression n times
+The preceding item is matched exactly @var{n} times.
+
+@item @{@var{n},@}
+@opindex @{n,@}
+@cindex braces, second argument omitted
+@cindex match sub-expression n or more times
+The preceding item is matched n or more times.
+
+@item @{@var{n},@var{m}@}
+@opindex @{n,m@}
+@cindex braces, two arguments
+The preceding item is matched at least @var{n} times, but not more than
+@var{m} times.
+
+@end table
+
+Two regular expressions may be concatenated; the resulting regular
+expression matches any string formed by concatenating two substrings
+that respectively match the concatenated subexpressions.
+
+Two regular expressions may be joined by the infix operator @samp{|}; the
+resulting regular expression matches any string matching either subexpression.
+
+Repetition takes precedence over concatenation, which in turn
+takes precedence over alternation. A whole subexpression may be
+enclosed in parentheses to override these precedence rules.
+
+@section Character Class
+
+@cindex bracket expression
+@cindex character class
+A @dfn{bracket expression} is a list of characters enclosed by @samp{[} and
+@samp{]}. It matches any single character in that list; if the first
+character of the list is the caret @samp{^}, then it matches any character
+@strong{not} in the list. For example, the regular expression
+@samp{[0123456789]} matches any single digit.
+
+@cindex range expression
+Within a bracket expression, a @dfn{range expression} consists of two
+characters separated by a hyphen. It matches any single character that
+sorts between the two characters, inclusive, using the locale's
+collating sequence and character set. For example, in the default C
+locale, @samp{[a-d]} is equivalent to @samp{[abcd]}. Many locales sort
+characters in dictionary order, and in these locales @samp{[a-d]} is
+typically not equivalent to @samp{[abcd]}; it might be equivalent to
+@samp{[aBbCcDd]}, for example. To obtain the traditional interpretation
+of bracket expressions, you can use the C locale by setting the
+@env{LC_ALL} environment variable to the value @samp{C}.
+
+Finally, certain named classes of characters are predefined within
+bracket expressions, as follows.
Their interpretation depends on the @code{LC_CTYPE} locale; the
-interpretation below is that of the @sc{posix} locale, which is the default
+interpretation below is that of the C locale, which is the default
if no @code{LC_CTYPE} locale is specified.
@cindex classes of characters
@@ -696,86 +885,100 @@ Hexadecimal digits:
@end table
For example, @samp{[[:alnum:]]} means @samp{[0-9A-Za-z]}, except the latter
-depends upon the @sc{posix} locale and the @sc{ascii} character
+depends upon the C locale and the @sc{ascii} character
encoding, whereas the former is independent of locale and character set.
(Note that the brackets in these class names are
part of the symbolic names, and must be included in addition to
-the brackets delimiting the bracket list.) Most metacharacters lose
-their special meaning inside lists. To include a literal @samp{]}, place it
-first in the list. Similarly, to include a literal @samp{^}, place it anywhere
-but first. Finally, to include a literal @samp{-}, place it last.
+the brackets delimiting the bracket list.)
-The period @samp{.} matches any single character. The symbol @samp{\w}
-is a synonym for @samp{[[:alnum:]]} and @samp{\W} is a synonym for
-@samp{[^[:alnum]]}.
+Most metacharacters lose their special meaning inside lists.
-The caret @samp{^} and the dollar sign @samp{$} are metacharacters that
-respectively match the empty string at the beginning and end
-of a line. The symbols @samp{\<} and @samp{\>} respectively match the
-empty string at the beginning and end of a word. The symbol
-@samp{\b} matches the empty string at the edge of a word, and @samp{\B}
-matches the empty string provided it's not at the edge of a word.
+@table @samp
+@item ]
+ends the list if it's not the first list item. So, if you want to make
+the @samp{]} character a list item, you must put it first.
-A regular expression may be followed by one of several
-repetition operators:
+@item [.
+represents the open collating symbol.
+
+@item .]
+represents the close collating symbol.
+
+@item [=
+represents the open equivalence class.
+@item =]
+represents the close equivalence class.
+
+@item [:
+represents the open character class followed by a valid character class name.
+
+@item :]
+represents the close character class followed by a valid character class name.
+
+@item -
+represents the range if it's not first or last in a list or the ending point
+of a range.
+
+@item ^
+represents the characters not in the list. If you want to make the @samp{^}
+character a list item, place it anywhere but first.
+
+@end table
+
+@section Backslash Character
+@cindex backslash
+
+The @samp{\} when followed by certain ordinary characters take a special
+meaning :
@table @samp
-@item ?
-@opindex ?
-@cindex question mark
-@cindex match sub-expression at most once
-The preceding item is optional and will be matched at most once.
+@item @samp{\b}
+Match the empty string at the edge of a word.
-@item *
-@opindex *
-@cindex asterisk
-@cindex match sub-expression zero or more times
-The preceding item will be matched zero or more times.
+@item @samp{\B}
+Match the empty string provided it's not at the edge of a word.
-@item +
-@opindex +
-@cindex plus sign
-The preceding item will be matched one or more times.
+@item @samp{\<}
+Match the empty string at the beginning of word.
-@item @{@var{n}@}
-@opindex @{n@}
-@cindex braces, one argument
-@cindex match sub-expression n times
-The preceding item is matched exactly @var{n} times.
+@item @samp{\>}
+Match the empty string at the end of word.
-@item @{@var{n},@}
-@opindex @{n,@}
-@cindex braces, second argument omitted
-@cindex match sub-expression n or more times
-The preceding item is matched n or more times.
+@item @samp{\w}
+Match word constituent, it is a synonym for @samp{[[:alnum:]]}.
-@item @{@var{n},@var{m}@}
-@opindex @{n,m@}
-@cindex braces, two arguments
-The preceding item is matched at least @var{n} times, but not more than
-@var{m} times.
+@item @samp{\W}
+Match non word constituent, it is a synonym for @samp{[^[:alnum:]]}.
@end table
-Two regular expressions may be concatenated; the resulting regular
-expression matches any string formed by concatenating two substrings
-that respectively match the concatenated subexpressions.
+For example , @samp{\brat\b} matches the separate word @samp{rat},
+@samp{c\Brat\Be} matches @samp{crate}, but @samp{dirty \Brat} doesn't
+match @samp{dirty rat}.
-Two regular expressions may be joined by the infix operator @samp{|}; the
-resulting regular expression matches any string matching either
-subexpression.
+@section Anchoring
+@cindex anchoring
-Repetition takes precedence over concatenation, which in turn
-takes precedence over alternation. A whole subexpression may be
-enclosed in parentheses to override these precedence rules.
+The caret @samp{^} and the dollar sign @samp{$} are metacharacters that
+respectively match the empty string at the beginning and end of a line.
+
+@section Back-reference
+@cindex back-reference
-The backreference @samp{\@var{n}}, where @var{n} is a single digit, matches the
-substring previously matched by the @var{n}th parenthesized subexpression
-of the regular expression.
+The back-reference @samp{\@var{n}}, where @var{n} is a single digit, matches
+the substring previously matched by the @var{n}th parenthesized subexpression
+of the regular expression. For example, @samp{(a)\1} matches @samp{aa}.
+When use with alternation if the group does not participate in the match, then
+the back-reference makes the whole match fail. For example, @samp{a(.)|b\1}
+will not match @samp{ba}. When multiple regular expressions are given with
+@samp{-e} or from a file @samp{-f file}, the back-referecences are local to
+each expression.
+@section Basic vs Extended
@cindex basic regular expressions
+
In basic regular expressions the metacharacters @samp{?}, @samp{+},
@samp{@{}, @samp{|}, @samp{(}, and @samp{)} lose their special meaning;
instead use the backslashed versions @samp{\?}, @samp{\+}, @samp{\@{},
@@ -825,7 +1028,7 @@ How can I list just the names of matching files?
@example
grep -l 'main' *.c
-@end example
+@end example
@noindent
lists the names of all C files in the current directory whose contents
@@ -848,12 +1051,23 @@ the following command searches only C files:
find /home/gigi -name '*.c' -print | xargs grep 'hello' /dev/null
@end smallexample
+This differs from the command:
+
+@example
+grep -r 'hello' *.c
+@end example
+
+which merely looks for @samp{hello} in all files in the current
+directory whose names end in @samp{.c}. Here the @option{-r} is
+probably unnecessary, as recursion occurs only in the unlikely event
+that one of @samp{.c} files is a directory.
+
@item
What if a pattern has a leading @samp{-}?
@example
grep -e '--cut here--' *
-@end example
+@end example
@noindent
searches for all lines matching @samp{--cut here--}. Without @samp{-e},
@@ -899,6 +1113,12 @@ Append @file{/dev/null}:
grep 'eli' /etc/passwd /dev/null
@end example
+gets you:
+
+@smallexample
+/etc/passwd:eli:DNGUTF58.IMe.:98:11:Eli Smith:/home/do/eli:/bin/bash
+@end smallexample
+
@item
Why do people use strange regular expressions on @command{ps} output?
@@ -909,6 +1129,9 @@ ps -ef | grep '[c]ron'
If the pattern had been written without the square brackets, it would
have matched not only the @command{ps} output line for @command{cron},
but also the @command{ps} output line for @command{grep}.
+Note that some platforms @command{ps} limit the ouput to the width
+of the screen, grep does not have any limit on the length of a line
+except the available memory.
@item
Why does @command{grep} report ``Binary file matches''?
@@ -948,23 +1171,905 @@ Use the special file name @samp{-}:
@example
cat /etc/passwd | grep 'alain' - /etc/motd
@end example
+
+@item
+@cindex palindromes
+How to express palindromes in a regular expression?
+
+It can be done by using the back referecences, for example a palindrome
+of 4 chararcters can be written in BRE.
+
+@example
+grep -w -e '\(.\)\(.\).\2\1' file
+@end example
+
+It matches the word "radar" or "civic".
+
+Guglielmo Bondioni proposed a single RE that finds all the palindromes up to 19
+characters long.
+
+@example
+egrep -e '^(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?).?\9\8\7\6\5\4\3\2\1$' file
+@end example
+
+Note this is done by using GNU ERE extensions, it might not be portable on
+other greps.
+
+@item
+Why are my expressions whith the vertical bar fail?
+
+@example
+/bin/echo "ba" | egrep '(a)\1|(b)\1'
+@end example
+
+The first alternate branch fails then the first group was not in the match
+this will make the second alternate branch fails. For example, "aaba" will
+match, the first group participate in the match and can be reuse in the
+second branch.
+
+@item
+What do @command{grep, fgrep, egrep} stand for ?
+
+grep comes from the way line editing was done on Unix. For example,
+@command{ed} uses this syntax to print a list of matching lines on the screen.
+
+@example
+global/regular expression/print
+g/re/p
+@end example
+
+@command{fgrep} stands for Fixed @command{grep}, @command{egrep} Extended
+@command{grep}.
+
@end enumerate
-@node Reporting Bugs
+@node Reporting Bugs, Copying, Usage, Top
@chapter Reporting bugs
@cindex Bugs, reporting
Email bug reports to @email{bug-gnu-utils@@gnu.org}.
Be sure to include the word ``grep'' somewhere in the ``Subject:'' field.
-Large repetition counts in the @samp{@{m,n@}} construct may cause
+Large repetition counts in the @samp{@{n,m@}} construct may cause
@command{grep} to use lots of memory. In addition, certain other
obscure regular expressions require exponential time and
space, and may cause grep to run out of memory.
-Backreferences are very slow, and may require exponential time.
+Back-references are very slow, and may require exponential time.
+
+@node Copying, GNU General Public License, Reporting Bugs, Top
+@chapter Copying
+@cindex Copying
+GNU grep is licensed under the GNU GPL, which makes it @dfn{free
+software}.
+
+Please note that ``free'' in ``free software'' refers to liberty, not
+price. As some GNU project advocates like to point out, think of ``free
+speech'' rather than ``free beer''. The exact and legally binding
+distribution terms are spelled out below; in short, you have the right
+(freedom) to run and change grep and distribute it to other people, and
+even---if you want---charge money for doing either. The important
+restriction is that you have to grant your recipients the same rights
+and impose the same restrictions.
+
+This method of licensing software is also known as @dfn{open source}
+because, among other things, it makes sure that all recipients will
+receive the source code along with the program, and be able to improve
+it. The GNU project prefers the term ``free software'' for reasons
+outlined at
+@url{http://www.gnu.org/philosophy/free-software-for-freedom.html}.
+
+The exact license terms are defined by this paragraph and the GNU
+General Public License it refers to:
+
+@quotation
+GNU grep is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+GNU grep is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+A copy of the GNU General Public License is included as part of this
+manual; if you did not receive it, write to the Free Software
+Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+@end quotation
+
+In addition to this, this manual is free in the same sense:
+
+@quotation
+Permission is granted to copy, distribute and/or modify this document
+under the terms of the GNU Free Documentation License, Version 1.1 or
+any later version published by the Free Software Foundation; with the
+Invariant Sections being ``GNU General Public License'' and ``GNU Free
+Documentation License'', with no Front-Cover Texts, and with no
+Back-Cover Texts. A copy of the license is included in the section
+entitled ``GNU Free Documentation License''.
+@end quotation
+
+@c #### Maybe we should wrap these licenses in ifinfo? Stallman says
+@c that the GFDL needs to be present in the manual, and to me it would
+@c suck to include the license for the manual and not the license for
+@c the program.
+
+The full texts of the GNU General Public License and of the GNU Free
+Documentation License are available below.
+
+@menu
+* GNU General Public License:: GNU GPL
+* GNU Free Documentation License:: GNU FDL
+@end menu
+
+@node GNU General Public License, GNU Free Documentation License, Copying, Copying
+@section GNU General Public License
+@center Version 2, June 1991
+@cindex GPL, GNU General Public License
+
+@display
+Copyright @copyright{} 1989, 1991 Free Software Foundation, Inc.
+675 Mass Ave, Cambridge, MA 02139, USA
+
+Everyone is permitted to copy and distribute verbatim copies
+of this license document, but changing it is not allowed.
+@end display
+
+@unnumberedsec Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software---to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+@iftex
+@unnumberedsec TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+@end iftex
+@ifinfo
+@center TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+@end ifinfo
+
+@enumerate
+@item
+This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The ``Program'', below,
+refers to any such program or work, and a ``work based on the Program''
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term ``modification''.) Each licensee is addressed as ``you''.
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+@item
+You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+@item
+You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+@enumerate a
+@item
+You must cause the modified files to carry prominent notices
+stating that you changed the files and the date of any change.
+
+@item
+You must cause any work that you distribute or publish, that in
+whole or in part contains or is derived from the Program or any
+part thereof, to be licensed as a whole at no charge to all third
+parties under the terms of this License.
+
+@item
+If the modified program normally reads commands interactively
+when run, you must cause it, when started running for such
+interactive use in the most ordinary way, to print or display an
+announcement including an appropriate copyright notice and a
+notice that there is no warranty (or else, saying that you provide
+a warranty) and that users may redistribute the program under
+these conditions, and telling the user how to view a copy of this
+License. (Exception: if the Program itself is interactive but
+does not normally print such an announcement, your work based on
+the Program is not required to print an announcement.)
+@end enumerate
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+@item
+You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+@enumerate a
+@item
+Accompany it with the complete corresponding machine-readable
+source code, which must be distributed under the terms of Sections
+1 and 2 above on a medium customarily used for software interchange; or,
+
+@item
+Accompany it with a written offer, valid for at least three
+years, to give any third party, for a charge no more than your
+cost of physically performing source distribution, a complete
+machine-readable copy of the corresponding source code, to be
+distributed under the terms of Sections 1 and 2 above on a medium
+customarily used for software interchange; or,
+
+@item
+Accompany it with the information you received as to the offer
+to distribute corresponding source code. (This alternative is
+allowed only for noncommercial distribution and only if you
+received the program in object code or executable form with such
+an offer, in accord with Subsection b above.)
+@end enumerate
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+@item
+You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+@item
+You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+@item
+Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+@item
+If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+@item
+If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+@item
+The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and ``any
+later version'', you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+@item
+If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+@iftex
+@heading NO WARRANTY
+@end iftex
+@ifinfo
+@center NO WARRANTY
+@end ifinfo
+@cindex no warranty
+
+@item
+BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM ``AS IS'' WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+@item
+IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+@end enumerate
+
+@iftex
+@heading END OF TERMS AND CONDITIONS
+@end iftex
+@ifinfo
+@center END OF TERMS AND CONDITIONS
+@end ifinfo
+
+@page
+@unnumberedsec How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the ``copyright'' line and a pointer to where the full notice is found.
+
+@smallexample
+@var{one line to give the program's name and an idea of what it does.}
+Copyright (C) 19@var{yy} @var{name of author}
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+@end smallexample
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+@smallexample
+Gnomovision version 69, Copyright (C) 19@var{yy} @var{name of author}
+Gnomovision comes with ABSOLUTELY NO WARRANTY; for details
+type `show w'. This is free software, and you are welcome
+to redistribute it under certain conditions; type `show c'
+for details.
+@end smallexample
+
+The hypothetical commands @samp{show w} and @samp{show c} should show
+the appropriate parts of the General Public License. Of course, the
+commands you use may be called something other than @samp{show w} and
+@samp{show c}; they could even be mouse-clicks or menu items---whatever
+suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a ``copyright disclaimer'' for the program, if
+necessary. Here is a sample; alter the names:
+
+@smallexample
+@group
+Yoyodyne, Inc., hereby disclaims all copyright
+interest in the program `Gnomovision'
+(which makes passes at compilers) written
+by James Hacker.
+
+@var{signature of Ty Coon}, 1 April 1989
+Ty Coon, President of Vice
+@end group
+@end smallexample
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Library General
+Public License instead of this License.
+
+@node GNU Free Documentation License, Concept Index, GNU General Public License, Copying
+@section GNU Free Documentation License
+@center Version 1.1, March 2000
+@cindex FDL, GNU Free Documentation License
+
+@display
+Copyright (C) 2000 Free Software Foundation, Inc.
+59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+Everyone is permitted to copy and distribute verbatim copies
+of this license document, but changing it is not allowed.
+@end display
+@sp 1
+@enumerate 0
+@item
+PREAMBLE
+
+The purpose of this License is to make a manual, textbook, or other
+written document ``free'' in the sense of freedom: to assure everyone
+the effective freedom to copy and redistribute it, with or without
+modifying it, either commercially or noncommercially. Secondarily,
+this License preserves for the author and publisher a way to get
+credit for their work, while not being considered responsible for
+modifications made by others.
+
+This License is a kind of ``copyleft'', which means that derivative
+works of the document must themselves be free in the same sense. It
+complements the GNU General Public License, which is a copyleft
+license designed for free software.
+
+We have designed this License in order to use it for manuals for free
+software, because free software needs free documentation: a free
+program should come with manuals providing the same freedoms that the
+software does. But this License is not limited to software manuals;
+it can be used for any textual work, regardless of subject matter or
+whether it is published as a printed book. We recommend this License
+principally for works whose purpose is instruction or reference.
+
+@sp 1
+@item
+APPLICABILITY AND DEFINITIONS
+
+This License applies to any manual or other work that contains a
+notice placed by the copyright holder saying it can be distributed
+under the terms of this License. The ``Document'', below, refers to any
+such manual or work. Any member of the public is a licensee, and is
+addressed as ``you''.
+
+A ``Modified Version'' of the Document means any work containing the
+Document or a portion of it, either copied verbatim, or with
+modifications and/or translated into another language.
+
+A ``Secondary Section'' is a named appendix or a front-matter section of
+the Document that deals exclusively with the relationship of the
+publishers or authors of the Document to the Document's overall subject
+(or to related matters) and contains nothing that could fall directly
+within that overall subject. (For example, if the Document is in part a
+textbook of mathematics, a Secondary Section may not explain any
+mathematics.) The relationship could be a matter of historical
+connection with the subject or with related matters, or of legal,
+commercial, philosophical, ethical or political position regarding
+them.
+
+The ``Invariant Sections'' are certain Secondary Sections whose titles
+are designated, as being those of Invariant Sections, in the notice
+that says that the Document is released under this License.
+
+The ``Cover Texts'' are certain short passages of text that are listed,
+as Front-Cover Texts or Back-Cover Texts, in the notice that says that
+the Document is released under this License.
+
+A ``Transparent'' copy of the Document means a machine-readable copy,
+represented in a format whose specification is available to the
+general public, whose contents can be viewed and edited directly and
+straightforwardly with generic text editors or (for images composed of
+pixels) generic paint programs or (for drawings) some widely available
+drawing editor, and that is suitable for input to text formatters or
+for automatic translation to a variety of formats suitable for input
+to text formatters. A copy made in an otherwise Transparent file
+format whose markup has been designed to thwart or discourage
+subsequent modification by readers is not Transparent. A copy that is
+not ``Transparent'' is called ``Opaque''.
+
+Examples of suitable formats for Transparent copies include plain
+ASCII without markup, Texinfo input format, LaTeX input format, SGML
+or XML using a publicly available DTD, and standard-conforming simple
+HTML designed for human modification. Opaque formats include
+PostScript, PDF, proprietary formats that can be read and edited only
+by proprietary word processors, SGML or XML for which the DTD and/or
+processing tools are not generally available, and the
+machine-generated HTML produced by some word processors for output
+purposes only.
+
+The ``Title Page'' means, for a printed book, the title page itself,
+plus such following pages as are needed to hold, legibly, the material
+this License requires to appear in the title page. For works in
+formats which do not have any title page as such, ``Title Page'' means
+the text near the most prominent appearance of the work's title,
+preceding the beginning of the body of the text.
+@sp 1
+@item
+VERBATIM COPYING
+
+You may copy and distribute the Document in any medium, either
+commercially or noncommercially, provided that this License, the
+copyright notices, and the license notice saying this License applies
+to the Document are reproduced in all copies, and that you add no other
+conditions whatsoever to those of this License. You may not use
+technical measures to obstruct or control the reading or further
+copying of the copies you make or distribute. However, you may accept
+compensation in exchange for copies. If you distribute a large enough
+number of copies you must also follow the conditions in section 3.
+
+You may also lend copies, under the same conditions stated above, and
+you may publicly display copies.
+@sp 1
+@item
+COPYING IN QUANTITY
+
+If you publish printed copies of the Document numbering more than 100,
+and the Document's license notice requires Cover Texts, you must enclose
+the copies in covers that carry, clearly and legibly, all these Cover
+Texts: Front-Cover Texts on the front cover, and Back-Cover Texts on
+the back cover. Both covers must also clearly and legibly identify
+you as the publisher of these copies. The front cover must present
+the full title with all words of the title equally prominent and
+visible. You may add other material on the covers in addition.
+Copying with changes limited to the covers, as long as they preserve
+the title of the Document and satisfy these conditions, can be treated
+as verbatim copying in other respects.
+
+If the required texts for either cover are too voluminous to fit
+legibly, you should put the first ones listed (as many as fit
+reasonably) on the actual cover, and continue the rest onto adjacent
+pages.
+
+If you publish or distribute Opaque copies of the Document numbering
+more than 100, you must either include a machine-readable Transparent
+copy along with each Opaque copy, or state in or with each Opaque copy
+a publicly-accessible computer-network location containing a complete
+Transparent copy of the Document, free of added material, which the
+general network-using public has access to download anonymously at no
+charge using public-standard network protocols. If you use the latter
+option, you must take reasonably prudent steps, when you begin
+distribution of Opaque copies in quantity, to ensure that this
+Transparent copy will remain thus accessible at the stated location
+until at least one year after the last time you distribute an Opaque
+copy (directly or through your agents or retailers) of that edition to
+the public.
+
+It is requested, but not required, that you contact the authors of the
+Document well before redistributing any large number of copies, to give
+them a chance to provide you with an updated version of the Document.
+@sp 1
+@item
+MODIFICATIONS
+
+You may copy and distribute a Modified Version of the Document under
+the conditions of sections 2 and 3 above, provided that you release
+the Modified Version under precisely this License, with the Modified
+Version filling the role of the Document, thus licensing distribution
+and modification of the Modified Version to whoever possesses a copy
+of it. In addition, you must do these things in the Modified Version:
+
+A. Use in the Title Page (and on the covers, if any) a title distinct
+ from that of the Document, and from those of previous versions
+ (which should, if there were any, be listed in the History section
+ of the Document). You may use the same title as a previous version
+ if the original publisher of that version gives permission.@*
+B. List on the Title Page, as authors, one or more persons or entities
+ responsible for authorship of the modifications in the Modified
+ Version, together with at least five of the principal authors of the
+ Document (all of its principal authors, if it has less than five).@*
+C. State on the Title page the name of the publisher of the
+ Modified Version, as the publisher.@*
+D. Preserve all the copyright notices of the Document.@*
+E. Add an appropriate copyright notice for your modifications
+ adjacent to the other copyright notices.@*
+F. Include, immediately after the copyright notices, a license notice
+ giving the public permission to use the Modified Version under the
+ terms of this License, in the form shown in the Addendum below.@*
+G. Preserve in that license notice the full lists of Invariant Sections
+ and required Cover Texts given in the Document's license notice.@*
+H. Include an unaltered copy of this License.@*
+I. Preserve the section entitled ``History'', and its title, and add to
+ it an item stating at least the title, year, new authors, and
+ publisher of the Modified Version as given on the Title Page. If
+ there is no section entitled ``History'' in the Document, create one
+ stating the title, year, authors, and publisher of the Document as
+ given on its Title Page, then add an item describing the Modified
+ Version as stated in the previous sentence.@*
+J. Preserve the network location, if any, given in the Document for
+ public access to a Transparent copy of the Document, and likewise
+ the network locations given in the Document for previous versions
+ it was based on. These may be placed in the ``History'' section.
+ You may omit a network location for a work that was published at
+ least four years before the Document itself, or if the original
+ publisher of the version it refers to gives permission.@*
+K. In any section entitled ``Acknowledgements'' or ``Dedications'',
+ preserve the section's title, and preserve in the section all the
+ substance and tone of each of the contributor acknowledgements
+ and/or dedications given therein.@*
+L. Preserve all the Invariant Sections of the Document,
+ unaltered in their text and in their titles. Section numbers
+ or the equivalent are not considered part of the section titles.@*
+M. Delete any section entitled ``Endorsements''. Such a section
+ may not be included in the Modified Version.@*
+N. Do not retitle any existing section as ``Endorsements''
+ or to conflict in title with any Invariant Section.@*
+@sp 1
+If the Modified Version includes new front-matter sections or
+appendices that qualify as Secondary Sections and contain no material
+copied from the Document, you may at your option designate some or all
+of these sections as invariant. To do this, add their titles to the
+list of Invariant Sections in the Modified Version's license notice.
+These titles must be distinct from any other section titles.
+
+You may add a section entitled ``Endorsements'', provided it contains
+nothing but endorsements of your Modified Version by various
+parties--for example, statements of peer review or that the text has
+been approved by an organization as the authoritative definition of a
+standard.
+
+You may add a passage of up to five words as a Front-Cover Text, and a
+passage of up to 25 words as a Back-Cover Text, to the end of the list
+of Cover Texts in the Modified Version. Only one passage of
+Front-Cover Text and one of Back-Cover Text may be added by (or
+through arrangements made by) any one entity. If the Document already
+includes a cover text for the same cover, previously added by you or
+by arrangement made by the same entity you are acting on behalf of,
+you may not add another; but you may replace the old one, on explicit
+permission from the previous publisher that added the old one.
+
+The author(s) and publisher(s) of the Document do not by this License
+give permission to use their names for publicity for or to assert or
+imply endorsement of any Modified Version.
+@sp 1
+@item
+COMBINING DOCUMENTS
+
+You may combine the Document with other documents released under this
+License, under the terms defined in section 4 above for modified
+versions, provided that you include in the combination all of the
+Invariant Sections of all of the original documents, unmodified, and
+list them all as Invariant Sections of your combined work in its
+license notice.
+
+The combined work need only contain one copy of this License, and
+multiple identical Invariant Sections may be replaced with a single
+copy. If there are multiple Invariant Sections with the same name but
+different contents, make the title of each such section unique by
+adding at the end of it, in parentheses, the name of the original
+author or publisher of that section if known, or else a unique number.
+Make the same adjustment to the section titles in the list of
+Invariant Sections in the license notice of the combined work.
+
+In the combination, you must combine any sections entitled ``History''
+in the various original documents, forming one section entitled
+``History''; likewise combine any sections entitled ``Acknowledgements'',
+and any sections entitled ``Dedications''. You must delete all sections
+entitled ``Endorsements.''
+@sp 1
+@item
+COLLECTIONS OF DOCUMENTS
+
+You may make a collection consisting of the Document and other documents
+released under this License, and replace the individual copies of this
+License in the various documents with a single copy that is included in
+the collection, provided that you follow the rules of this License for
+verbatim copying of each of the documents in all other respects.
+
+You may extract a single document from such a collection, and distribute
+it individually under this License, provided you insert a copy of this
+License into the extracted document, and follow this License in all
+other respects regarding verbatim copying of that document.
+@sp 1
+@item
+AGGREGATION WITH INDEPENDENT WORKS
+
+A compilation of the Document or its derivatives with other separate
+and independent documents or works, in or on a volume of a storage or
+distribution medium, does not as a whole count as a Modified Version
+of the Document, provided no compilation copyright is claimed for the
+compilation. Such a compilation is called an ``aggregate'', and this
+License does not apply to the other self-contained works thus compiled
+with the Document, on account of their being thus compiled, if they
+are not themselves derivative works of the Document.
+
+If the Cover Text requirement of section 3 is applicable to these
+copies of the Document, then if the Document is less than one quarter
+of the entire aggregate, the Document's Cover Texts may be placed on
+covers that surround only the Document within the aggregate.
+Otherwise they must appear on covers around the whole aggregate.
+@sp 1
+@item
+TRANSLATION
+
+Translation is considered a kind of modification, so you may
+distribute translations of the Document under the terms of section 4.
+Replacing Invariant Sections with translations requires special
+permission from their copyright holders, but you may include
+translations of some or all Invariant Sections in addition to the
+original versions of these Invariant Sections. You may include a
+translation of this License provided that you also include the
+original English version of this License. In case of a disagreement
+between the translation and the original English version of this
+License, the original English version will prevail.
+@sp 1
+@item
+TERMINATION
+
+You may not copy, modify, sublicense, or distribute the Document except
+as expressly provided for under this License. Any other attempt to
+copy, modify, sublicense or distribute the Document is void, and will
+automatically terminate your rights under this License. However,
+parties who have received copies, or rights, from you under this
+License will not have their licenses terminated so long as such
+parties remain in full compliance.
+@sp 1
+@item
+FUTURE REVISIONS OF THIS LICENSE
+
+The Free Software Foundation may publish new, revised versions
+of the GNU Free Documentation License from time to time. Such new
+versions will be similar in spirit to the present version, but may
+differ in detail to address new problems or concerns. See
+http://www.gnu.org/copyleft/.
+
+Each version of the License is given a distinguishing version number.
+If the Document specifies that a particular numbered version of this
+License ``or any later version'' applies to it, you have the option of
+following the terms and conditions either of that specified version or
+of any later version that has been published (not as a draft) by the
+Free Software Foundation. If the Document does not specify a version
+number of this License, you may choose any version ever published (not
+as a draft) by the Free Software Foundation.
+
+@end enumerate
+
+@unnumberedsec ADDENDUM: How to use this License for your documents
+
+To use this License in a document you have written, include a copy of
+the License in the document and put the following copyright and
+license notices just after the title page:
+
+@smallexample
+@group
+
+ Copyright (C) @var{year} @var{your name}.
+ Permission is granted to copy, distribute and/or modify this document
+ under the terms of the GNU Free Documentation License, Version 1.1
+ or any later version published by the Free Software Foundation;
+ with the Invariant Sections being @var{list their titles}, with the
+ Front-Cover Texts being @var{list}, and with the Back-Cover Texts being @var{list}.
+ A copy of the license is included in the section entitled ``GNU
+ Free Documentation License''.
+@end group
+@end smallexample
+If you have no Invariant Sections, write ``with no Invariant Sections''
+instead of saying which ones are invariant. If you have no
+Front-Cover Texts, write ``no Front-Cover Texts'' instead of
+``Front-Cover Texts being @var{list}''; likewise for Back-Cover Texts.
+
+If your document contains nontrivial examples of program code, we
+recommend releasing these examples in parallel under your choice of
+free software license, such as the GNU General Public License,
+to permit their use in free software.
@page
-@node Concept Index
+@node Concept Index, Index, GNU Free Documentation License, Top
@unnumbered Concept Index
This is a general index of all issues discussed in this manual, with the
@@ -973,7 +2078,7 @@ exception of the @command{grep} commands and command-line options.
@printindex cp
@page
-@node Index
+@node Index,, Concept Index, Top
@unnumbered Index
This is an alphabetical list of all @command{grep} commands, command-line
diff --git a/gnu/usr.bin/grep/doc/version.texi b/gnu/usr.bin/grep/doc/version.texi
index c87a09f007c2..4350a83ba1fc 100644
--- a/gnu/usr.bin/grep/doc/version.texi
+++ b/gnu/usr.bin/grep/doc/version.texi
@@ -1,3 +1,4 @@
-@set UPDATED 2 February 2000
-@set EDITION 2.4.2
-@set VERSION 2.4.2
+@set UPDATED 23 January 2002
+@set UPDATED-MONTH January 2002
+@set EDITION 2.5.1
+@set VERSION 2.5.1
diff --git a/gnu/usr.bin/grep/error.c b/gnu/usr.bin/grep/error.c
new file mode 100644
index 000000000000..c78b6cffb94a
--- /dev/null
+++ b/gnu/usr.bin/grep/error.c
@@ -0,0 +1,276 @@
+/* Error handler for noninteractive utilities
+ Copyright (C) 1990-1998, 2000 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library. Its master source is NOT part of
+ the C library, however. The master source lives in /gd/gnu/lib.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+/* Written by David MacKenzie <djm@gnu.ai.mit.edu>. */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <stdio.h>
+#if HAVE_LIBINTL_H
+# include <libintl.h>
+#endif
+
+#if HAVE_VPRINTF || HAVE_DOPRNT || _LIBC
+# if __STDC__
+# include <stdarg.h>
+# define VA_START(args, lastarg) va_start(args, lastarg)
+# else
+# include <varargs.h>
+# define VA_START(args, lastarg) va_start(args)
+# endif
+#else
+# define va_alist a1, a2, a3, a4, a5, a6, a7, a8
+# define va_dcl char *a1, *a2, *a3, *a4, *a5, *a6, *a7, *a8;
+#endif
+
+#if STDC_HEADERS || _LIBC
+# include <stdlib.h>
+# include <string.h>
+#else
+void exit ();
+#endif
+
+#include "error.h"
+
+#ifndef HAVE_DECL_STRERROR_R
+"this configure-time declaration test was not run"
+#endif
+#if !HAVE_DECL_STRERROR_R
+char *strerror_r ();
+#endif
+
+#ifndef _
+# define _(String) String
+#endif
+
+/* If NULL, error will flush stdout, then print on stderr the program
+ name, a colon and a space. Otherwise, error will call this
+ function without parameters instead. */
+void (*error_print_progname) (
+#if __STDC__ - 0
+ void
+#endif
+ );
+
+/* This variable is incremented each time `error' is called. */
+unsigned int error_message_count;
+
+#ifdef _LIBC
+/* In the GNU C library, there is a predefined variable for this. */
+
+# define program_name program_invocation_name
+# include <errno.h>
+
+/* In GNU libc we want do not want to use the common name `error' directly.
+ Instead make it a weak alias. */
+# define error __error
+# define error_at_line __error_at_line
+
+# ifdef USE_IN_LIBIO
+# include <libio/iolibio.h>
+# define fflush(s) _IO_fflush (s)
+# endif
+
+#else /* not _LIBC */
+
+/* The calling program should define program_name and set it to the
+ name of the executing program. */
+extern char *program_name;
+
+# ifdef HAVE_STRERROR_R
+# define __strerror_r strerror_r
+# else
+# if HAVE_STRERROR
+# ifndef strerror /* On some systems, strerror is a macro */
+char *strerror ();
+# endif
+# else
+static char *
+private_strerror (errnum)
+ int errnum;
+{
+ extern char *sys_errlist[];
+ extern int sys_nerr;
+
+ if (errnum > 0 && errnum <= sys_nerr)
+ return _(sys_errlist[errnum]);
+ return _("Unknown system error");
+}
+# define strerror private_strerror
+# endif /* HAVE_STRERROR */
+# endif /* HAVE_STRERROR_R */
+#endif /* not _LIBC */
+
+/* Print the program name and error message MESSAGE, which is a printf-style
+ format string with optional args.
+ If ERRNUM is nonzero, print its corresponding system error message.
+ Exit with status STATUS if it is nonzero. */
+/* VARARGS */
+
+void
+#if defined VA_START && __STDC__
+error (int status, int errnum, const char *message, ...)
+#else
+error (status, errnum, message, va_alist)
+ int status;
+ int errnum;
+ char *message;
+ va_dcl
+#endif
+{
+#ifdef VA_START
+ va_list args;
+#endif
+
+ if (error_print_progname)
+ (*error_print_progname) ();
+ else
+ {
+ fflush (stdout);
+ fprintf (stderr, "%s: ", program_name);
+ }
+
+#ifdef VA_START
+ VA_START (args, message);
+# if HAVE_VPRINTF || _LIBC
+ vfprintf (stderr, message, args);
+# else
+ _doprnt (message, args, stderr);
+# endif
+ va_end (args);
+#else
+ fprintf (stderr, message, a1, a2, a3, a4, a5, a6, a7, a8);
+#endif
+
+ ++error_message_count;
+ if (errnum)
+ {
+#if defined HAVE_STRERROR_R || _LIBC
+ char errbuf[1024];
+# if HAVE_WORKING_STRERROR_R || _LIBC
+ fprintf (stderr, ": %s", __strerror_r (errnum, errbuf, sizeof errbuf));
+# else
+ /* Don't use __strerror_r's return value because on some systems
+ (at least DEC UNIX 4.0[A-D]) strerror_r returns `int'. */
+ __strerror_r (errnum, errbuf, sizeof errbuf);
+ fprintf (stderr, ": %s", errbuf);
+# endif
+#else
+ fprintf (stderr, ": %s", strerror (errnum));
+#endif
+ }
+ putc ('\n', stderr);
+ fflush (stderr);
+ if (status)
+ exit (status);
+}
+
+/* Sometimes we want to have at most one error per line. This
+ variable controls whether this mode is selected or not. */
+int error_one_per_line;
+
+void
+#if defined VA_START && __STDC__
+error_at_line (int status, int errnum, const char *file_name,
+ unsigned int line_number, const char *message, ...)
+#else
+error_at_line (status, errnum, file_name, line_number, message, va_alist)
+ int status;
+ int errnum;
+ const char *file_name;
+ unsigned int line_number;
+ char *message;
+ va_dcl
+#endif
+{
+#ifdef VA_START
+ va_list args;
+#endif
+
+ if (error_one_per_line)
+ {
+ static const char *old_file_name;
+ static unsigned int old_line_number;
+
+ if (old_line_number == line_number &&
+ (file_name == old_file_name || !strcmp (old_file_name, file_name)))
+ /* Simply return and print nothing. */
+ return;
+
+ old_file_name = file_name;
+ old_line_number = line_number;
+ }
+
+ if (error_print_progname)
+ (*error_print_progname) ();
+ else
+ {
+ fflush (stdout);
+ fprintf (stderr, "%s:", program_name);
+ }
+
+ if (file_name != NULL)
+ fprintf (stderr, "%s:%d: ", file_name, line_number);
+
+#ifdef VA_START
+ VA_START (args, message);
+# if HAVE_VPRINTF || _LIBC
+ vfprintf (stderr, message, args);
+# else
+ _doprnt (message, args, stderr);
+# endif
+ va_end (args);
+#else
+ fprintf (stderr, message, a1, a2, a3, a4, a5, a6, a7, a8);
+#endif
+
+ ++error_message_count;
+ if (errnum)
+ {
+#if defined HAVE_STRERROR_R || _LIBC
+ char errbuf[1024];
+# if HAVE_WORKING_STRERROR_R || _LIBC
+ fprintf (stderr, ": %s", __strerror_r (errnum, errbuf, sizeof errbuf));
+# else
+ /* Don't use __strerror_r's return value because on some systems
+ (at least DEC UNIX 4.0[A-D]) strerror_r returns `int'. */
+ __strerror_r (errnum, errbuf, sizeof errbuf);
+ fprintf (stderr, ": %s", errbuf);
+# endif
+#else
+ fprintf (stderr, ": %s", strerror (errnum));
+#endif
+ }
+ putc ('\n', stderr);
+ fflush (stderr);
+ if (status)
+ exit (status);
+}
+
+#ifdef _LIBC
+/* Make the weak alias. */
+# undef error
+# undef error_at_line
+weak_alias (__error, error)
+weak_alias (__error_at_line, error_at_line)
+#endif
diff --git a/gnu/usr.bin/grep/error.h b/gnu/usr.bin/grep/error.h
new file mode 100644
index 000000000000..20f75824d615
--- /dev/null
+++ b/gnu/usr.bin/grep/error.h
@@ -0,0 +1,78 @@
+/* Declaration for error-reporting function
+ Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc.
+
+
+ NOTE: The canonical source of this file is maintained with the GNU C Library.
+ Bugs can be reported to bug-glibc@prep.ai.mit.edu.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the
+ Free Software Foundation; either version 2, or (at your option) any
+ later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+ USA. */
+
+#ifndef _ERROR_H
+#define _ERROR_H 1
+
+#ifndef __attribute__
+/* This feature is available in gcc versions 2.5 and later. */
+# if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 5) || __STRICT_ANSI__
+# define __attribute__(Spec) /* empty */
+# endif
+/* The __-protected variants of `format' and `printf' attributes
+ are accepted by gcc versions 2.6.4 (effectively 2.7) and later. */
+# if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 7)
+# define __format__ format
+# define __printf__ printf
+# endif
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if defined (__STDC__) && __STDC__
+
+/* Print a message with `fprintf (stderr, FORMAT, ...)';
+ if ERRNUM is nonzero, follow it with ": " and strerror (ERRNUM).
+ If STATUS is nonzero, terminate the program with `exit (STATUS)'. */
+
+extern void error (int status, int errnum, const char *format, ...)
+ __attribute__ ((__format__ (__printf__, 3, 4)));
+
+extern void error_at_line (int status, int errnum, const char *fname,
+ unsigned int lineno, const char *format, ...)
+ __attribute__ ((__format__ (__printf__, 5, 6)));
+
+/* If NULL, error will flush stdout, then print on stderr the program
+ name, a colon and a space. Otherwise, error will call this
+ function without parameters instead. */
+extern void (*error_print_progname) (void);
+
+#else
+void error ();
+void error_at_line ();
+extern void (*error_print_progname) ();
+#endif
+
+/* This variable is incremented each time `error' is called. */
+extern unsigned int error_message_count;
+
+/* Sometimes we want to have at most one error per line. This
+ variable controls whether this mode is selected or not. */
+extern int error_one_per_line;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* error.h */
diff --git a/gnu/usr.bin/grep/exclude.c b/gnu/usr.bin/grep/exclude.c
new file mode 100644
index 000000000000..1925a398a01c
--- /dev/null
+++ b/gnu/usr.bin/grep/exclude.c
@@ -0,0 +1,128 @@
+/* exclude.c -- exclude file names
+ Copyright 1992, 1993, 1994, 1997, 1999, 2000 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; see the file COPYING.
+ If not, write to the Free Software Foundation,
+ 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+/* Written by Paul Eggert <eggert@twinsun.com> */
+
+#if HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <errno.h>
+#ifndef errno
+extern int errno;
+#endif
+#include <exclude.h>
+#include <fnmatch.h>
+#include <stdio.h>
+#include <sys/types.h>
+
+void *xmalloc PARAMS ((size_t));
+void *xrealloc PARAMS ((void *, size_t));
+
+/* Keep track of excluded file name patterns. */
+
+struct exclude
+ {
+ char const **exclude;
+ int exclude_alloc;
+ int exclude_count;
+ };
+
+struct exclude *
+new_exclude (void)
+{
+ struct exclude *ex = (struct exclude *) xmalloc (sizeof (struct exclude));
+ ex->exclude_count = 0;
+ ex->exclude_alloc = 64;
+ ex->exclude = (char const **) xmalloc (ex->exclude_alloc * sizeof (char *));
+ return ex;
+}
+
+int
+excluded_filename (struct exclude const *ex, char const *f, int options)
+{
+ char const * const *exclude = ex->exclude;
+ int exclude_count = ex->exclude_count;
+ int i;
+
+ for (i = 0; i < exclude_count; i++)
+ if (fnmatch (exclude[i], f, options) == 0)
+ return 1;
+
+ return 0;
+}
+
+void
+add_exclude (struct exclude *ex, char const *pattern)
+{
+ if (ex->exclude_alloc <= ex->exclude_count)
+ ex->exclude = (char const **) xrealloc (ex->exclude,
+ ((ex->exclude_alloc *= 2)
+ * sizeof (char *)));
+
+ ex->exclude[ex->exclude_count++] = pattern;
+}
+
+int
+add_exclude_file (void (*add_func) PARAMS ((struct exclude *, char const *)),
+ struct exclude *ex, char const *filename, char line_end)
+{
+ int use_stdin = filename[0] == '-' && !filename[1];
+ FILE *in;
+ char *buf;
+ char *p;
+ char const *pattern;
+ char const *lim;
+ size_t buf_alloc = 1024;
+ size_t buf_count = 0;
+ int c;
+ int e = 0;
+
+ if (use_stdin)
+ in = stdin;
+ else if (! (in = fopen (filename, "r")))
+ return -1;
+
+ buf = xmalloc (buf_alloc);
+
+ while ((c = getc (in)) != EOF)
+ {
+ buf[buf_count++] = c;
+ if (buf_count == buf_alloc)
+ buf = xrealloc (buf, buf_alloc *= 2);
+ }
+
+ buf = xrealloc (buf, buf_count + 1);
+
+ if (ferror (in))
+ e = errno;
+
+ if (!use_stdin && fclose (in) != 0)
+ e = errno;
+
+ for (pattern = p = buf, lim = buf + buf_count; p <= lim; p++)
+ if (p < lim ? *p == line_end : buf < p && p[-1])
+ {
+ *p = '\0';
+ (*add_func) (ex, pattern);
+ pattern = p + 1;
+ }
+
+ errno = e;
+ return e ? -1 : 0;
+}
diff --git a/gnu/usr.bin/grep/exclude.h b/gnu/usr.bin/grep/exclude.h
new file mode 100644
index 000000000000..8a48a00ecea2
--- /dev/null
+++ b/gnu/usr.bin/grep/exclude.h
@@ -0,0 +1,35 @@
+/* exclude.h -- declarations for excluding file names
+ Copyright 1992, 1993, 1994, 1997, 1999 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; see the file COPYING.
+ If not, write to the Free Software Foundation,
+ 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+/* Written by Paul Eggert <eggert@twinsun.com> */
+
+#ifndef PARAMS
+# if defined PROTOTYPES || (defined __STDC__ && __STDC__)
+# define PARAMS(Args) Args
+# else
+# define PARAMS(Args) ()
+# endif
+#endif
+
+struct exclude;
+
+struct exclude *new_exclude PARAMS ((void));
+void add_exclude PARAMS ((struct exclude *, char const *));
+int add_exclude_file PARAMS ((void (*) (struct exclude *, char const *),
+ struct exclude *, char const *, char));
+int excluded_filename PARAMS ((struct exclude const *, char const *, int));
diff --git a/gnu/usr.bin/grep/getpagesize.h b/gnu/usr.bin/grep/getpagesize.h
index c7632e388021..198967b91973 100644
--- a/gnu/usr.bin/grep/getpagesize.h
+++ b/gnu/usr.bin/grep/getpagesize.h
@@ -2,6 +2,11 @@
#ifndef HAVE_GETPAGESIZE
+#if !defined getpagesize && defined __BEOS__
+# include <OS.h>
+# define getpagesize() B_PAGE_SIZE
+#endif
+
#ifdef HAVE_UNISTD_H
# include <unistd.h>
#endif
diff --git a/gnu/usr.bin/grep/grep.1 b/gnu/usr.bin/grep/grep.1
index 37460a6cb30e..a8320178287b 100644
--- a/gnu/usr.bin/grep/grep.1
+++ b/gnu/usr.bin/grep/grep.1
@@ -12,7 +12,7 @@
.de Id
.ds Dt \\$4
..
-.Id $Id: grep.1,v 1.11 2000/02/26 03:18:40 alainm Exp $
+.Id $Id: grep.1,v 1.23 2002/01/22 13:20:04 bero Exp $
.TH GREP 1 \*(Dt "GNU Project"
.SH NAME
grep, egrep, fgrep \- print lines matching a pattern
@@ -62,6 +62,9 @@ is the same as
Print
.I NUM
lines of trailing context after matching lines.
+Places a line containing
+.B \-\^\-
+between contiguous groups of matches.
.TP
.BR \-a ", " \-\^\-text
Process a binary file as if it were text; this is equivalent to the
@@ -72,11 +75,17 @@ option.
Print
.I NUM
lines of leading context before matching lines.
+Places a line containing
+.B \-\^\-
+between contiguous groups of matches.
.TP
-\fB\-C\fP [\fINUM\fP], \fB\-\fP\fINUM\fP, \fB\-\^\-context\fP[\fB=\fP\fINUM\fP]
+.BI \-C " NUM" "\fR,\fP \-\^\-context=" NUM
Print
.I NUM
-lines (default 2) of output context.
+lines of output context.
+Places a line containing
+.B \-\^\-
+between contiguous groups of matches.
.TP
.BR \-b ", " \-\^\-byte-offset
Print the byte offset within the input file before
@@ -117,6 +126,11 @@ might output binary garbage,
which can have nasty side effects if the output is a terminal and if the
terminal driver interprets some of it as commands.
.TP
+.BI \-\^\-colour[=\fIWHEN\fR] ", " \-\^\-color[=\fIWHEN\fR]
+Surround the matching string with the marker find in
+.B GREP_COLOR
+environment variable. WHEN may be `never', `always', or `auto'
+.TP
.BR \-c ", " \-\^\-count
Suppress normal output; instead print a count of
matching lines for each input file.
@@ -124,6 +138,20 @@ With the
.BR \-v ", " \-\^\-invert-match
option (see below), count non-matching lines.
.TP
+.BI \-D " ACTION" "\fR,\fP \-\^\-devices=" ACTION
+If an input file is a device, FIFO or socket, use
+.I ACTION
+to process it. By default,
+.I ACTION
+is
+.BR read ,
+which means that devices are read just as if they were ordinary files.
+If
+.I ACTION
+is
+.BR skip ,
+devices are silently skipped.
+.TP
.BI \-d " ACTION" "\fR,\fP \-\^\-directories=" ACTION
If an input file is a directory, use
.I ACTION
@@ -163,6 +191,10 @@ Interpret
.I PATTERN
as a list of fixed strings, separated by newlines,
any of which is to be matched.
+.BR \-P ", " \-\^\-perl-regexp
+Interpret
+.I PATTERN
+as a Perl regular expression.
.TP
.BI \-f " FILE" "\fR,\fP \-\^\-file=" FILE
Obtain patterns from
@@ -208,6 +240,39 @@ the name of each input file from which output
would normally have been printed. The scanning will
stop on the first match.
.TP
+.BI \-m " NUM" "\fR,\fP \-\^\-max-count=" NUM
+Stop reading a file after
+.I NUM
+matching lines. If the input is standard input from a regular file,
+and
+.I NUM
+matching lines are output,
+.B grep
+ensures that the standard input is positioned to just after the last
+matching line before exiting, regardless of the presence of trailing
+context lines. This enables a calling process to resume a search.
+When
+.B grep
+stops after
+.I NUM
+matching lines, it outputs any trailing context lines. When the
+.B \-c
+or
+.B \-\^\-count
+option is also used,
+.B grep
+does not output a count greater than
+.IR NUM .
+When the
+.B \-v
+or
+.B \-\^\-invert-match
+option is also used,
+.B grep
+stops after outputting
+.I NUM
+non-matching lines.
+.TP
.B \-\^\-mmap
If possible, use the
.BR mmap (2)
@@ -227,21 +292,43 @@ is operating, or if an I/O error occurs.
Prefix each line of output with the line number
within its input file.
.TP
+.BR \-o ", " \-\^\-only-matching
+Show only the part of a matching line that matches
+.I PATTERN.
+.TP
+.BI \-\^\-label= LABEL
+Displays input actually coming from standard input as input coming from file
+.I LABEL.
+This is especially useful for tools like zgrep, e.g.
+.B "gzip -cd foo.gz |grep --label=foo something"
+.TP
+.BR \-\^\-line-buffering
+Use line buffering, it can be a performance penality.
+.TP
.BR \-q ", " \-\^\-quiet ", " \-\^\-silent
-Quiet; suppress normal output. The scanning will stop
-on the first match.
+Quiet; do not write anything to standard output.
+Exit immediately with zero status if any match is found,
+even if an error was detected.
Also see the
.B \-s
or
.B \-\^\-no-messages
-option below.
+option.
.TP
-.BR \-r ", " \-\^\-recursive
+.BR \-R ", " \-r ", " \-\^\-recursive
Read all files under each directory, recursively;
this is equivalent to the
.B "\-d recurse"
option.
.TP
+.BR "\fR \fP \-\^\-include=" PATTERN
+Recurse in directories only searching file matching
+.I PATTERN.
+.TP
+.BR "\fR \fP \-\^\-exclude=" PATTERN
+Recurse in directories skip file matching
+.I PATTERN.
+.TP
.BR \-s ", " \-\^\-no-messages
Suppress error messages about nonexistent or unreadable files.
Portability note: unlike \s-1GNU\s0
@@ -358,11 +445,13 @@ a single character. Most characters, including all letters and digits,
are regular expressions that match themselves. Any metacharacter with
special meaning may be quoted by preceding it with a backslash.
.PP
-A list of characters enclosed by
+A
+.I "bracket expression"
+is a list of characters enclosed by
.B [
and
-.B ]
-matches any single
+.BR ] .
+It matches any single
character in that list; if the first character of the list
is the caret
.B ^
@@ -371,10 +460,32 @@ then it matches any character
in the list.
For example, the regular expression
.B [0123456789]
-matches any single digit. A range of characters
-may be specified by giving the first and last characters, separated
-by a hyphen.
-Finally, certain named classes of characters are predefined.
+matches any single digit.
+.PP
+Within a bracket expression, a
+.I "range expression"
+consists of two characters separated by a hyphen.
+It matches any single character that sorts between the two characters,
+inclusive, using the locale's collating sequence and character set.
+For example, in the default C locale,
+.B [a\-d]
+is equivalent to
+.BR [abcd] .
+Many locales sort characters in dictionary order, and in these locales
+.B [a\-d]
+is typically not equivalent to
+.BR [abcd] ;
+it might be equivalent to
+.BR [aBbCcDd] ,
+for example.
+To obtain the traditional interpretation of bracket expressions,
+you can use the C locale by setting the
+.B LC_ALL
+environment variable to the value
+.BR C .
+.PP
+Finally, certain named classes of characters are predefined within
+bracket expressions, as follows.
Their names are self explanatory, and they are
.BR [:alnum:] ,
.BR [:alpha:] ,
@@ -391,8 +502,8 @@ and
For example,
.B [[:alnum:]]
means
-.BR [0-9A-Za-z] ,
-except the latter form depends upon the \s-1POSIX\s0 locale and the
+.BR [0\-9A\-Za\-z] ,
+except the latter form depends upon the C locale and the
\s-1ASCII\s0 character encoding, whereas the former is independent
of locale and character set.
(Note that the brackets in these class names are part of the symbolic
@@ -539,6 +650,29 @@ instead of reporting a syntax error in the regular expression.
\s-1POSIX.2\s0 allows this behavior as an extension, but portable scripts
should avoid it.
.SH "ENVIRONMENT VARIABLES"
+Grep's behavior is affected by the following environment variables.
+.PP
+A locale
+.BI LC_ foo
+is specified by examining the three environment variables
+.BR LC_ALL ,
+.BR LC_\fIfoo\fP ,
+.BR LANG ,
+in that order.
+The first of these variables that is set specifies the locale.
+For example, if
+.B LC_ALL
+is not set, but
+.B LC_MESSAGES
+is set to
+.BR pt_BR ,
+then Brazilian Portuguese is used for the
+.B LC_MESSAGES
+locale.
+The C locale is used if none of these environment variables are set,
+or if the locale catalog is not installed, or if
+.B grep
+was not compiled with national language support (\s-1NLS\s0).
.TP
.B GREP_OPTIONS
This variable specifies default options to be placed in front of any
@@ -556,28 +690,29 @@ Option specifications are separated by whitespace.
A backslash escapes the next character,
so it can be used to specify an option containing whitespace or a backslash.
.TP
-\fBLC_ALL\fP, \fBLC_MESSAGES\fP, \fBLANG\fP
+.B GREP_COLOR
+Specifies the marker for highlighting.
+.TP
+\fBLC_ALL\fP, \fBLC_COLLATE\fP, \fBLANG\fP
These variables specify the
-.B LC_MESSAGES
-locale, which determines the language that
-.B grep
-uses for messages.
-The locale is determined by the first of these variables that is set.
-American English is used if none of these environment variables are set,
-or if the message catalog is not installed, or if
-.B grep
-was not compiled with national language support (\s-1NLS\s0).
+.B LC_COLLATE
+locale, which determines the collating sequence used to interpret
+range expressions like
+.BR [a\-z] .
.TP
\fBLC_ALL\fP, \fBLC_CTYPE\fP, \fBLANG\fP
These variables specify the
.B LC_CTYPE
locale, which determines the type of characters, e.g., which
characters are whitespace.
-The locale is determined by the first of these variables that is set.
-The \s-1POSIX\s0 locale is used if none of these environment variables
-are set, or if the locale catalog is not installed, or if
+.TP
+\fBLC_ALL\fP, \fBLC_MESSAGES\fP, \fBLANG\fP
+These variables specify the
+.B LC_MESSAGES
+locale, which determines the language that
.B grep
-was not compiled with national language support (\s-1NLS\s0).
+uses for messages.
+The default C locale uses American English messages.
.TP
.B POSIXLY_CORRECT
If set,
@@ -618,13 +753,14 @@ when
is not set.
.SH DIAGNOSTICS
.PP
-Normally, exit status is 0 if matches were found,
-and 1 if no matches were found. (The
-.B \-v
-option inverts the sense of the exit status.)
-Exit status is 2 if there were syntax errors
-in the pattern, inaccessible input files, or
-other system errors.
+Normally, exit status is 0 if selected lines are found and 1 otherwise.
+But the exit status is 2 if an error occurred, unless the
+.B \-q
+or
+.B \-\^\-quiet
+or
+.B \-\^\-silent
+option is used and a selected line is found.
.SH BUGS
.PP
Email bug reports to
@@ -633,7 +769,7 @@ Be sure to include the word \*(lqgrep\*(rq somewhere in the
\*(lqSubject:\*(rq field.
.PP
Large repetition counts in the
-.BI { m , n }
+.BI { n , m }
construct may cause grep to use lots of memory.
In addition,
certain other obscure regular expressions require exponential time
diff --git a/gnu/usr.bin/grep/grep.c b/gnu/usr.bin/grep/grep.c
index 0966a29c82e1..638663454f22 100644
--- a/gnu/usr.bin/grep/grep.c
+++ b/gnu/usr.bin/grep/grep.c
@@ -36,13 +36,18 @@
#include "getpagesize.h"
#include "grep.h"
#include "savedir.h"
+#include "xstrtol.h"
+#include "xalloc.h"
+#include "error.h"
+#include "exclude.h"
+#include "closeout.h"
#undef MAX
#define MAX(A,B) ((A) > (B) ? (A) : (B))
struct stats
{
- struct stats *parent;
+ struct stats const *parent;
struct stat stat;
};
@@ -55,47 +60,80 @@ static int show_help;
/* If non-zero, print the version on standard output and exit. */
static int show_version;
+/* If nonzero, suppress diagnostics for nonexistent or unreadable files. */
+static int suppress_errors;
+
/* If nonzero, use mmap if possible. */
static int mmap_option;
+/* If nonzero, use grep_color marker. */
+static int color_option;
+
+/* If nonzero, show only the part of a line matching the expression. */
+static int only_matching;
+
+/* The color string used. The user can overwrite it using the environment
+ variable GREP_COLOR. The default is to print red. */
+static const char *grep_color = "01;31";
+
+static struct exclude *excluded_patterns;
+static struct exclude *included_patterns;
/* Short options. */
static char const short_options[] =
-"0123456789A:B:C::EFGHIUVX:abcd:e:f:hiLlnqrsuvwxyZz";
+"0123456789A:B:C:D:EFGHIPUVX:abcd:e:f:hiKLlm:noqRrsuvwxyZz";
/* Non-boolean long options that have no corresponding short equivalents. */
enum
{
- BINARY_FILES_OPTION = CHAR_MAX + 1
+ BINARY_FILES_OPTION = CHAR_MAX + 1,
+ COLOR_OPTION,
+ INCLUDE_OPTION,
+ EXCLUDE_OPTION,
+ EXCLUDE_FROM_OPTION,
+ LINE_BUFFERED_OPTION,
+ LABEL_OPTION
};
/* Long options equivalences. */
-static struct option long_options[] =
+static struct option const long_options[] =
{
{"after-context", required_argument, NULL, 'A'},
{"basic-regexp", no_argument, NULL, 'G'},
{"before-context", required_argument, NULL, 'B'},
{"binary-files", required_argument, NULL, BINARY_FILES_OPTION},
{"byte-offset", no_argument, NULL, 'b'},
- {"context", optional_argument, NULL, 'C'},
+ {"context", required_argument, NULL, 'C'},
+ {"color", optional_argument, NULL, COLOR_OPTION},
+ {"colour", optional_argument, NULL, COLOR_OPTION},
{"count", no_argument, NULL, 'c'},
+ {"devices", required_argument, NULL, 'D'},
{"directories", required_argument, NULL, 'd'},
{"extended-regexp", no_argument, NULL, 'E'},
+ {"exclude", required_argument, NULL, EXCLUDE_OPTION},
+ {"exclude-from", required_argument, NULL, EXCLUDE_FROM_OPTION},
{"file", required_argument, NULL, 'f'},
{"files-with-matches", no_argument, NULL, 'l'},
{"files-without-match", no_argument, NULL, 'L'},
{"fixed-regexp", no_argument, NULL, 'F'},
{"fixed-strings", no_argument, NULL, 'F'},
{"help", no_argument, &show_help, 1},
+ {"include", required_argument, NULL, INCLUDE_OPTION},
{"ignore-case", no_argument, NULL, 'i'},
+ {"label", required_argument, NULL, LABEL_OPTION},
+ {"line-buffered", no_argument, NULL, LINE_BUFFERED_OPTION},
{"line-number", no_argument, NULL, 'n'},
{"line-regexp", no_argument, NULL, 'x'},
+ {"max-count", required_argument, NULL, 'm'},
{"mmap", no_argument, &mmap_option, 1},
{"no-filename", no_argument, NULL, 'h'},
{"no-messages", no_argument, NULL, 's'},
{"null", no_argument, NULL, 'Z'},
{"null-data", no_argument, NULL, 'z'},
+ {"only-matching", no_argument, NULL, 'o'},
+ {"perl-regexp", no_argument, NULL, 'P'},
{"quiet", no_argument, NULL, 'q'},
{"recursive", no_argument, NULL, 'r'},
+ {"recursive", no_argument, NULL, 'R'},
{"regexp", required_argument, NULL, 'e'},
{"invert-match", no_argument, NULL, 'v'},
{"silent", no_argument, NULL, 'q'},
@@ -115,7 +153,8 @@ int match_lines;
unsigned char eolbyte;
/* For error messages. */
-static char *prog;
+/* The name the program was run with, stripped of any leading path. */
+char *program_name;
static char const *filename;
static int errseen;
@@ -125,115 +164,70 @@ static enum
READ_DIRECTORIES,
RECURSE_DIRECTORIES,
SKIP_DIRECTORIES
- } directories;
-
-static int ck_atoi PARAMS ((char const *, int *));
-static void usage PARAMS ((int)) __attribute__((noreturn));
-static void error PARAMS ((const char *, int));
-static void setmatcher PARAMS ((char const *));
-static int install_matcher PARAMS ((char const *));
-static int prepend_args PARAMS ((char const *, char *, char **));
-static void prepend_default_options PARAMS ((char const *, int *, char ***));
-static char *page_alloc PARAMS ((size_t, char **));
-static int reset PARAMS ((int, char const *, struct stats *));
-static int fillbuf PARAMS ((size_t, struct stats *));
-static int grepbuf PARAMS ((char *, char *));
-static void prtext PARAMS ((char *, char *, int *));
-static void prpending PARAMS ((char *));
-static void prline PARAMS ((char *, char *, int));
-static void print_offset_sep PARAMS ((off_t, int));
-static void nlscan PARAMS ((char *));
-static int grep PARAMS ((int, char const *, struct stats *));
-static int grepdir PARAMS ((char const *, struct stats *));
-static int grepfile PARAMS ((char const *, struct stats *));
-#if O_BINARY
+ } directories = READ_DIRECTORIES;
+
+/* How to handle devices. */
+static enum
+ {
+ READ_DEVICES,
+ SKIP_DEVICES
+ } devices = READ_DEVICES;
+
+static int grepdir PARAMS ((char const *, struct stats const *));
+#if defined(HAVE_DOS_FILE_CONTENTS)
static inline int undossify_input PARAMS ((register char *, size_t));
#endif
/* Functions we'll use to search. */
-static void (*compile) PARAMS ((char *, size_t));
-static char *(*execute) PARAMS ((char *, size_t, char **));
+static void (*compile) PARAMS ((char const *, size_t));
+static size_t (*execute) PARAMS ((char const *, size_t, size_t *, int));
-/* Print a message and possibly an error string. Remember
- that something awful happened. */
+/* Like error, but suppress the diagnostic if requested. */
static void
-error (const char *mesg, int errnum)
+suppressible_error (char const *mesg, int errnum)
{
- if (errnum)
- fprintf (stderr, "%s: %s: %s\n", prog, mesg, strerror (errnum));
- else
- fprintf (stderr, "%s: %s\n", prog, mesg);
+ if (! suppress_errors)
+ error (0, errnum, "%s", mesg);
errseen = 1;
}
-/* Like error (), but die horribly after printing. */
-void
-fatal (const char *mesg, int errnum)
-{
- error (mesg, errnum);
- exit (2);
-}
-
-/* Interface to handle errors and fix library lossage. */
-char *
-xmalloc (size_t size)
-{
- char *result;
-
- result = malloc (size);
- if (size && !result)
- fatal (_("memory exhausted"), 0);
- return result;
-}
-
-/* Interface to handle errors and fix some library lossage. */
-char *
-xrealloc (char *ptr, size_t size)
-{
- char *result;
-
- if (ptr)
- result = realloc (ptr, size);
- else
- result = malloc (size);
- if (size && !result)
- fatal (_("memory exhausted"), 0);
- return result;
-}
-
/* Convert STR to a positive integer, storing the result in *OUT.
- If STR is not a valid integer, return -1 (otherwise 0). */
-static int
-ck_atoi (char const *str, int *out)
+ STR must be a valid context length argument; report an error if it
+ isn't. */
+static void
+context_length_arg (char const *str, int *out)
{
- char const *p;
- for (p = str; *p; p++)
- if (*p < '0' || *p > '9')
- return -1;
-
- *out = atoi (optarg);
- return 0;
+ uintmax_t value;
+ if (! (xstrtoumax (str, 0, 10, &value, "") == LONGINT_OK
+ && 0 <= (*out = value)
+ && *out == value))
+ {
+ error (2, 0, "%s: %s\n", str, _("invalid context length argument"));
+ }
}
/* Hairy buffering mechanism for grep. The intent is to keep
all reads aligned on a page boundary and multiples of the
- page size. */
+ page size, unless a read yields a partial page. */
-static char *ubuffer; /* Unaligned base of buffer. */
static char *buffer; /* Base of buffer. */
-static size_t bufsalloc; /* Allocated size of buffer save region. */
-static size_t bufalloc; /* Total buffer size. */
-#define PREFERRED_SAVE_FACTOR 5 /* Preferred value of bufalloc / bufsalloc. */
+static size_t bufalloc; /* Allocated buffer size, counting slop. */
+#define INITIAL_BUFSIZE 32768 /* Initial buffer size, not counting slop. */
static int bufdesc; /* File descriptor. */
static char *bufbeg; /* Beginning of user-visible stuff. */
static char *buflim; /* Limit of user-visible stuff. */
static size_t pagesize; /* alignment of memory pages */
static off_t bufoffset; /* Read offset; defined on regular files. */
+static off_t after_last_match; /* Pointer after last matching line that
+ would have been output if we were
+ outputting characters. */
#if defined(HAVE_MMAP)
static int bufmapped; /* True if buffer is memory-mapped. */
static off_t initial_bufoffset; /* Initial value of bufoffset. */
+#else
+# define bufmapped 0
#endif
/* Return VAL aligned to the next multiple of ALIGNMENT. VAL can be
@@ -243,66 +237,37 @@ static off_t initial_bufoffset; /* Initial value of bufoffset. */
? (val) \
: (val) + ((alignment) - (size_t) (val) % (alignment)))
-/* Return the address of a page-aligned buffer of size SIZE,
- reallocating it from *UP. Set *UP to the newly allocated (but
- possibly unaligned) buffer used to build the aligned buffer. To
- free the buffer, free (*UP). */
-static char *
-page_alloc (size_t size, char **up)
-{
- size_t asize = size + pagesize - 1;
- if (size <= asize)
- {
- char *p = *up ? realloc (*up, asize) : malloc (asize);
- if (p)
- {
- *up = p;
- return ALIGN_TO (p, pagesize);
- }
- }
- return NULL;
-}
-
/* Reset the buffer for a new file, returning zero if we should skip it.
Initialize on the first time through. */
static int
reset (int fd, char const *file, struct stats *stats)
{
- if (pagesize)
- bufsalloc = ALIGN_TO (bufalloc / PREFERRED_SAVE_FACTOR, pagesize);
- else
+ if (! pagesize)
{
- size_t ubufsalloc;
pagesize = getpagesize ();
- if (pagesize == 0)
+ if (pagesize == 0 || 2 * pagesize + 1 <= pagesize)
abort ();
-#ifndef BUFSALLOC
- ubufsalloc = MAX (8192, pagesize);
-#else
- ubufsalloc = BUFSALLOC;
-#endif
- bufsalloc = ALIGN_TO (ubufsalloc, pagesize);
- bufalloc = PREFERRED_SAVE_FACTOR * bufsalloc;
- /* The 1 byte of overflow is a kludge for dfaexec(), which
- inserts a sentinel newline at the end of the buffer
- being searched. There's gotta be a better way... */
- if (bufsalloc < ubufsalloc
- || bufalloc / PREFERRED_SAVE_FACTOR != bufsalloc
- || bufalloc + 1 < bufalloc
- || ! (buffer = page_alloc (bufalloc + 1, &ubuffer)))
- fatal (_("memory exhausted"), 0);
+ bufalloc = ALIGN_TO (INITIAL_BUFSIZE, pagesize) + pagesize + 1;
+ buffer = xmalloc (bufalloc);
}
- buflim = buffer;
+ bufbeg = buflim = ALIGN_TO (buffer + 1, pagesize);
+ bufbeg[-1] = eolbyte;
bufdesc = fd;
if (fstat (fd, &stats->stat) != 0)
{
- error ("fstat", errno);
+ error (0, errno, "fstat");
return 0;
}
if (directories == SKIP_DIRECTORIES && S_ISDIR (stats->stat.st_mode))
return 0;
+#ifndef DJGPP
+ if (devices == SKIP_DEVICES && (S_ISCHR(stats->stat.st_mode) || S_ISBLK(stats->stat.st_mode) || S_ISSOCK(stats->stat.st_mode)))
+#else
+ if (devices == SKIP_DEVICES && (S_ISCHR(stats->stat.st_mode) || S_ISBLK(stats->stat.st_mode)))
+#endif
+ return 0;
if (S_ISREG (stats->stat.st_mode))
{
if (file)
@@ -312,18 +277,18 @@ reset (int fd, char const *file, struct stats *stats)
bufoffset = lseek (fd, 0, SEEK_CUR);
if (bufoffset < 0)
{
- error ("lseek", errno);
+ error (0, errno, "lseek");
return 0;
}
}
-#ifdef HAVE_MMAP
+#if defined(HAVE_MMAP)
initial_bufoffset = bufoffset;
bufmapped = mmap_option && bufoffset % pagesize == 0;
#endif
}
else
{
-#ifdef HAVE_MMAP
+#if defined(HAVE_MMAP)
bufmapped = 0;
#endif
}
@@ -335,73 +300,68 @@ reset (int fd, char const *file, struct stats *stats)
to the beginning of the buffer contents, and 'buflim'
points just after the end. Return zero if there's an error. */
static int
-fillbuf (size_t save, struct stats *stats)
+fillbuf (size_t save, struct stats const *stats)
{
size_t fillsize = 0;
int cc = 1;
+ char *readbuf;
size_t readsize;
- /* Offset from start of unaligned buffer to start of old stuff
+ /* Offset from start of buffer to start of old stuff
that we want to save. */
- size_t saved_offset = buflim - ubuffer - save;
+ size_t saved_offset = buflim - save - buffer;
- if (bufsalloc < save)
+ if (pagesize <= buffer + bufalloc - buflim)
+ {
+ readbuf = buflim;
+ bufbeg = buflim - save;
+ }
+ else
{
- size_t aligned_save = ALIGN_TO (save, pagesize);
- size_t maxalloc = (size_t) -1;
+ size_t minsize = save + pagesize;
+ size_t newsize;
size_t newalloc;
-
+ char *newbuf;
+
+ /* Grow newsize until it is at least as great as minsize. */
+ for (newsize = bufalloc - pagesize - 1; newsize < minsize; newsize *= 2)
+ if (newsize * 2 < newsize || newsize * 2 + pagesize + 1 < newsize * 2)
+ xalloc_die ();
+
+ /* Try not to allocate more memory than the file size indicates,
+ as that might cause unnecessary memory exhaustion if the file
+ is large. However, do not use the original file size as a
+ heuristic if we've already read past the file end, as most
+ likely the file is growing. */
if (S_ISREG (stats->stat.st_mode))
{
- /* Calculate an upper bound on how much memory we should allocate.
- We can't use ALIGN_TO here, since off_t might be longer than
- size_t. Watch out for arithmetic overflow. */
off_t to_be_read = stats->stat.st_size - bufoffset;
- size_t slop = to_be_read % pagesize;
- off_t aligned_to_be_read = to_be_read + (slop ? pagesize - slop : 0);
- off_t maxalloc_off = aligned_save + aligned_to_be_read;
- if (0 <= maxalloc_off && maxalloc_off == (size_t) maxalloc_off)
- maxalloc = maxalloc_off;
+ off_t maxsize_off = save + to_be_read;
+ if (0 <= to_be_read && to_be_read <= maxsize_off
+ && maxsize_off == (size_t) maxsize_off
+ && minsize <= (size_t) maxsize_off
+ && (size_t) maxsize_off < newsize)
+ newsize = maxsize_off;
}
- /* Grow bufsalloc until it is at least as great as `save'; but
- if there is an overflow, just grow it to the next page boundary. */
- while (bufsalloc < save)
- if (bufsalloc < bufsalloc * 2)
- bufsalloc *= 2;
- else
- {
- bufsalloc = aligned_save;
- break;
- }
+ /* Add enough room so that the buffer is aligned and has room
+ for byte sentinels fore and aft. */
+ newalloc = newsize + pagesize + 1;
- /* Grow the buffer size to be PREFERRED_SAVE_FACTOR times
- bufsalloc.... */
- newalloc = PREFERRED_SAVE_FACTOR * bufsalloc;
- if (maxalloc < newalloc)
+ newbuf = bufalloc < newalloc ? xmalloc (bufalloc = newalloc) : buffer;
+ readbuf = ALIGN_TO (newbuf + 1 + save, pagesize);
+ bufbeg = readbuf - save;
+ memmove (bufbeg, buffer + saved_offset, save);
+ bufbeg[-1] = eolbyte;
+ if (newbuf != buffer)
{
- /* ... except don't grow it more than a pagesize past the
- file size, as that might cause unnecessary memory
- exhaustion if the file is large. */
- newalloc = maxalloc;
- bufsalloc = aligned_save;
+ free (buffer);
+ buffer = newbuf;
}
-
- /* Check that the above calculations made progress, which might
- not occur if there is arithmetic overflow. If there's no
- progress, or if the new buffer size is larger than the old
- and buffer reallocation fails, report memory exhaustion. */
- if (bufsalloc < save || newalloc < save
- || (newalloc == save && newalloc != maxalloc)
- || (bufalloc < newalloc
- && ! (buffer
- = page_alloc ((bufalloc = newalloc) + 1, &ubuffer))))
- fatal (_("memory exhausted"), 0);
}
- bufbeg = buffer + bufsalloc - save;
- memmove (bufbeg, ubuffer + saved_offset, save);
- readsize = bufalloc - bufsalloc;
+ readsize = buffer + bufalloc - readbuf;
+ readsize -= readsize % pagesize;
#if defined(HAVE_MMAP)
if (bufmapped)
@@ -417,7 +377,7 @@ fillbuf (size_t save, struct stats *stats)
}
if (mmapsize
- && (mmap ((caddr_t) (buffer + bufsalloc), mmapsize,
+ && (mmap ((caddr_t) readbuf, mmapsize,
PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_FIXED,
bufdesc, bufoffset)
!= (caddr_t) -1))
@@ -438,7 +398,7 @@ fillbuf (size_t save, struct stats *stats)
if (bufoffset != initial_bufoffset
&& lseek (bufdesc, bufoffset, SEEK_SET) < 0)
{
- error ("lseek", errno);
+ error (0, errno, "lseek");
cc = 0;
}
}
@@ -448,7 +408,7 @@ fillbuf (size_t save, struct stats *stats)
if (! fillsize)
{
ssize_t bytesread;
- while ((bytesread = read (bufdesc, buffer + bufsalloc, readsize)) < 0
+ while ((bytesread = read (bufdesc, readbuf, readsize)) < 0
&& errno == EINTR)
continue;
if (bytesread < 0)
@@ -458,21 +418,22 @@ fillbuf (size_t save, struct stats *stats)
}
bufoffset += fillsize;
-#if O_BINARY
+#if defined(HAVE_DOS_FILE_CONTENTS)
if (fillsize)
- fillsize = undossify_input (buffer + bufsalloc, fillsize);
+ fillsize = undossify_input (readbuf, fillsize);
#endif
- buflim = buffer + bufsalloc + fillsize;
+ buflim = readbuf + fillsize;
return cc;
}
/* Flags controlling the style of output. */
static enum
- {
- BINARY_BINARY_FILES,
- TEXT_BINARY_FILES,
- WITHOUT_MATCH_BINARY_FILES
- } binary_files; /* How to handle binary files. */
+{
+ BINARY_BINARY_FILES,
+ TEXT_BINARY_FILES,
+ WITHOUT_MATCH_BINARY_FILES
+} binary_files; /* How to handle binary files. */
+
static int filename_mask; /* If zero, output nulls after filenames. */
static int out_quiet; /* Suppress all normal output. */
static int out_invert; /* Print nonmatching stuff. */
@@ -484,36 +445,58 @@ static int out_after; /* Lines of trailing context. */
static int count_matches; /* Count matching lines. */
static int list_files; /* List matching files. */
static int no_filenames; /* Suppress file names. */
-static int suppress_errors; /* Suppress diagnostics. */
+static off_t max_count; /* Stop after outputting this many
+ lines from an input file. */
+static int line_buffered; /* If nonzero, use line buffering, i.e.
+ fflush everyline out. */
+static char *label = NULL; /* Fake filename for stdin */
+
/* Internal variables to keep track of byte count, context, etc. */
-static off_t totalcc; /* Total character count before bufbeg. */
-static char *lastnl; /* Pointer after last newline counted. */
-static char *lastout; /* Pointer after last character output;
+static uintmax_t totalcc; /* Total character count before bufbeg. */
+static char const *lastnl; /* Pointer after last newline counted. */
+static char const *lastout; /* Pointer after last character output;
NULL if no character has been output
or if it's conceptually before bufbeg. */
-static off_t totalnl; /* Total newline count before lastnl. */
-static int pending; /* Pending lines of output. */
-static int done_on_match; /* Stop scanning file on first match */
-
-#if O_BINARY
+static uintmax_t totalnl; /* Total newline count before lastnl. */
+static off_t outleft; /* Maximum number of lines to be output. */
+static int pending; /* Pending lines of output.
+ Always kept 0 if out_quiet is true. */
+static int done_on_match; /* Stop scanning file on first match. */
+static int exit_on_match; /* Exit on first match. */
+
+#if defined(HAVE_DOS_FILE_CONTENTS)
# include "dosbuf.c"
#endif
+/* Add two numbers that count input bytes or lines, and report an
+ error if the addition overflows. */
+static uintmax_t
+add_count (uintmax_t a, uintmax_t b)
+{
+ uintmax_t sum = a + b;
+ if (sum < a)
+ error (2, 0, _("input is too large to count"));
+ return sum;
+}
+
static void
-nlscan (char *lim)
+nlscan (char const *lim)
{
- char *beg;
- for (beg = lastnl; (beg = memchr (beg, eolbyte, lim - beg)); beg++)
- totalnl++;
+ size_t newlines = 0;
+ char const *beg;
+ for (beg = lastnl; beg != lim; beg = memchr (beg, eolbyte, lim - beg), beg++)
+ newlines++;
+ totalnl = add_count (totalnl, newlines);
lastnl = lim;
}
+/* Print a byte offset, followed by a character separator. */
static void
-print_offset_sep (off_t pos, int sep)
+print_offset_sep (uintmax_t pos, char sep)
{
- /* Do not rely on printf to print pos, since off_t may be longer than long,
- and long long is not portable. */
+ /* Do not rely on printf to print pos, since uintmax_t may be longer
+ than long, and long long is not portable. */
char buf[sizeof pos * CHAR_BIT];
char *p = buf + sizeof buf - 1;
@@ -527,56 +510,134 @@ print_offset_sep (off_t pos, int sep)
}
static void
-prline (char *beg, char *lim, int sep)
+prline (char const *beg, char const *lim, int sep)
{
if (out_file)
printf ("%s%c", filename, sep & filename_mask);
if (out_line)
{
nlscan (beg);
- print_offset_sep (++totalnl, sep);
+ totalnl = add_count (totalnl, 1);
+ print_offset_sep (totalnl, sep);
lastnl = lim;
}
if (out_byte)
{
- off_t pos = totalcc + (beg - bufbeg);
-#if O_BINARY
+ uintmax_t pos = add_count (totalcc, beg - bufbeg);
+#if defined(HAVE_DOS_FILE_CONTENTS)
pos = dossified_pos (pos);
#endif
print_offset_sep (pos, sep);
}
+ if (only_matching)
+ {
+ size_t match_size;
+ size_t match_offset;
+ while ((match_offset = (*execute) (beg, lim - beg, &match_size, 1))
+ != (size_t) -1)
+ {
+ char const *b = beg + match_offset;
+ if (b == lim)
+ break;
+ if (match_size == 0)
+ break;
+ if(color_option)
+ printf("\33[%sm", grep_color);
+ fwrite(b, sizeof (char), match_size, stdout);
+ if(color_option)
+ fputs("\33[00m", stdout);
+ fputs("\n", stdout);
+ beg = b + match_size;
+ }
+ lastout = lim;
+ if(line_buffered)
+ fflush(stdout);
+ return;
+ }
+ if (color_option)
+ {
+ size_t match_size;
+ size_t match_offset;
+ if(match_icase)
+ {
+ /* Yuck, this is tricky */
+ char *buf = (char*) xmalloc (lim - beg);
+ char *ibeg = buf;
+ char *ilim = ibeg + (lim - beg);
+ int i;
+ for (i = 0; i < lim - beg; i++)
+ ibeg[i] = tolower (beg[i]);
+ while ((match_offset = (*execute) (ibeg, ilim-ibeg, &match_size, 1))
+ != (size_t) -1)
+ {
+ char const *b = beg + match_offset;
+ if (b == lim)
+ break;
+ fwrite (beg, sizeof (char), match_offset, stdout);
+ printf ("\33[%sm", grep_color);
+ fwrite (b, sizeof (char), match_size, stdout);
+ fputs ("\33[00m", stdout);
+ beg = b + match_size;
+ ibeg = ibeg + match_offset + match_size;
+ }
+ fwrite (beg, 1, lim - beg, stdout);
+ free (buf);
+ lastout = lim;
+ return;
+ }
+ while (lim-beg && (match_offset = (*execute) (beg, lim - beg, &match_size, 1))
+ != (size_t) -1)
+ {
+ char const *b = beg + match_offset;
+ /* Avoid matching the empty line at the end of the buffer. */
+ if (b == lim)
+ break;
+ /* Avoid hanging on grep --color "" foo */
+ if (match_size == 0)
+ break;
+ fwrite (beg, sizeof (char), match_offset, stdout);
+ printf ("\33[%sm", grep_color);
+ fwrite (b, sizeof (char), match_size, stdout);
+ fputs ("\33[00m", stdout);
+ beg = b + match_size;
+ }
+ }
fwrite (beg, 1, lim - beg, stdout);
if (ferror (stdout))
- error (_("writing output"), errno);
+ error (0, errno, _("writing output"));
lastout = lim;
+ if (line_buffered)
+ fflush (stdout);
}
-/* Print pending lines of trailing context prior to LIM. */
+/* Print pending lines of trailing context prior to LIM. Trailing context ends
+ at the next matching line when OUTLEFT is 0. */
static void
-prpending (char *lim)
+prpending (char const *lim)
{
- char *nl;
-
if (!lastout)
lastout = bufbeg;
while (pending > 0 && lastout < lim)
{
+ char const *nl = memchr (lastout, eolbyte, lim - lastout);
+ size_t match_size;
--pending;
- if ((nl = memchr (lastout, eolbyte, lim - lastout)) != 0)
- ++nl;
+ if (outleft
+ || (((*execute) (lastout, nl - lastout, &match_size, 0) == (size_t) -1)
+ == !out_invert))
+ prline (lastout, nl + 1, '-');
else
- nl = lim;
- prline (lastout, nl, '-');
+ pending = 0;
}
}
/* Print the lines between BEG and LIM. Deal with context crap.
- If NLINESP is non-null, store a count of lines between BEG and LIM. */
+ If NLINESP is non-null, store a count of lines between BEG and LIM. */
static void
-prtext (char *beg, char *lim, int *nlinesp)
+prtext (char const *beg, char const *lim, int *nlinesp)
{
static int used; /* avoid printing "--" before any output */
- char *bp, *p, *nl;
+ char const *bp, *p;
char eol = eolbyte;
int i, n;
@@ -594,7 +655,7 @@ prtext (char *beg, char *lim, int *nlinesp)
if (p > bp)
do
--p;
- while (p > bp && p[-1] != eol);
+ while (p[-1] != eol);
/* We only print the "--" separator if our output is
discontiguous from the last output in the file. */
@@ -603,26 +664,28 @@ prtext (char *beg, char *lim, int *nlinesp)
while (p < beg)
{
- nl = memchr (p, eol, beg - p);
- prline (p, nl + 1, '-');
- p = nl + 1;
+ char const *nl = memchr (p, eol, beg - p);
+ nl++;
+ prline (p, nl, '-');
+ p = nl;
}
}
if (nlinesp)
{
/* Caller wants a line count. */
- for (n = 0; p < lim; ++n)
+ for (n = 0; p < lim && n < outleft; n++)
{
- if ((nl = memchr (p, eol, lim - p)) != 0)
- ++nl;
- else
- nl = lim;
+ char const *nl = memchr (p, eol, lim - p);
+ nl++;
if (!out_quiet)
prline (p, nl, ':');
p = nl;
}
*nlinesp = n;
+
+ /* relying on it that this function is never called when outleft = 0. */
+ after_last_match = bufoffset - (buflim - p);
}
else
if (!out_quiet)
@@ -636,31 +699,42 @@ prtext (char *beg, char *lim, int *nlinesp)
between matching lines if OUT_INVERT is true). Return a count of
lines printed. */
static int
-grepbuf (char *beg, char *lim)
+grepbuf (char const *beg, char const *lim)
{
int nlines, n;
- register char *p, *b;
- char *endp;
- char eol = eolbyte;
+ register char const *p;
+ size_t match_offset;
+ size_t match_size;
nlines = 0;
p = beg;
- while ((b = (*execute)(p, lim - p, &endp)) != 0)
+ while ((match_offset = (*execute) (p, lim - p, &match_size, 0)) != (size_t) -1)
{
+ char const *b = p + match_offset;
+ char const *endp = b + match_size;
/* Avoid matching the empty line at the end of the buffer. */
- if (b == lim && ((b > beg && b[-1] == eol) || b == beg))
+ if (b == lim)
break;
if (!out_invert)
{
prtext (b, endp, (int *) 0);
- nlines += 1;
- if (done_on_match)
- return nlines;
+ nlines++;
+ outleft--;
+ if (!outleft || done_on_match)
+ {
+ if (exit_on_match)
+ exit (0);
+ after_last_match = bufoffset - (buflim - endp);
+ return nlines;
+ }
}
else if (p < b)
{
prtext (p, b, &n);
nlines += n;
+ outleft -= n;
+ if (!outleft)
+ return nlines;
}
p = endp;
}
@@ -668,6 +742,7 @@ grepbuf (char *beg, char *lim)
{
prtext (p, lim, &n);
nlines += n;
+ outleft -= n;
}
return nlines;
}
@@ -681,7 +756,9 @@ grep (int fd, char const *file, struct stats *stats)
int nlines, i;
int not_text;
size_t residue, save;
- char *beg, *lim;
+ char oldc;
+ char *beg;
+ char *lim;
char eol = eolbyte;
if (!reset (fd, file, stats))
@@ -693,13 +770,15 @@ grep (int fd, char const *file, struct stats *stats)
/* Close fd now, so that we don't open a lot of file descriptors
when we recurse deeply. */
if (close (fd) != 0)
- error (file, errno);
+ error (0, errno, "%s", file);
return grepdir (file, stats) - 2;
}
totalcc = 0;
lastout = 0;
totalnl = 0;
+ outleft = max_count;
+ after_last_match = 0;
pending = 0;
nlines = 0;
@@ -708,8 +787,8 @@ grep (int fd, char const *file, struct stats *stats)
if (! fillbuf (save, stats))
{
- if (! (is_EISDIR (errno, file) && suppress_errors))
- error (filename, errno);
+ if (! is_EISDIR (errno, file))
+ suppressible_error (filename, errno);
return 0;
}
@@ -726,20 +805,38 @@ grep (int fd, char const *file, struct stats *stats)
lastnl = bufbeg;
if (lastout)
lastout = bufbeg;
- if (buflim - bufbeg == save)
+
+ beg = bufbeg + save;
+
+ /* no more data to scan (eof) except for maybe a residue -> break */
+ if (beg == buflim)
break;
- beg = bufbeg + save - residue;
- for (lim = buflim; lim > beg && lim[-1] != eol; --lim)
- ;
+
+ /* Determine new residue (the length of an incomplete line at the end of
+ the buffer, 0 means there is no incomplete last line). */
+ oldc = beg[-1];
+ beg[-1] = eol;
+ for (lim = buflim; lim[-1] != eol; lim--)
+ continue;
+ beg[-1] = oldc;
+ if (lim == beg)
+ lim = beg - residue;
+ beg -= residue;
residue = buflim - lim;
+
if (beg < lim)
{
- nlines += grepbuf (beg, lim);
+ if (outleft)
+ nlines += grepbuf (beg, lim);
if (pending)
prpending (lim);
- if (nlines && done_on_match && !out_invert)
+ if((!outleft && !pending) || (nlines && done_on_match && !out_invert))
goto finish_grep;
}
+
+ /* The last OUT_BEFORE lines at the end of the buffer will be needed as
+ leading context if there is a matching line at the begin of the
+ next data. Make beg point to their begin. */
i = 0;
beg = lim;
while (i < out_before && beg > bufbeg && beg != lastout)
@@ -747,27 +844,33 @@ grep (int fd, char const *file, struct stats *stats)
++i;
do
--beg;
- while (beg > bufbeg && beg[-1] != eol);
+ while (beg[-1] != eol);
}
+
+ /* detect if leading context is discontinuous from last printed line. */
if (beg != lastout)
lastout = 0;
+
+ /* Handle some details and read more data to scan. */
save = residue + lim - beg;
- totalcc += buflim - bufbeg - save;
+ if (out_byte)
+ totalcc = add_count (totalcc, buflim - bufbeg - save);
if (out_line)
nlscan (beg);
if (! fillbuf (save, stats))
{
- if (! (is_EISDIR (errno, file) && suppress_errors))
- error (filename, errno);
+ if (! is_EISDIR (errno, file))
+ suppressible_error (filename, errno);
goto finish_grep;
}
}
if (residue)
{
*buflim++ = eol;
- nlines += grepbuf (bufbeg + save - residue, buflim);
+ if (outleft)
+ nlines += grepbuf (bufbeg + save - residue, buflim);
if (pending)
- prpending (buflim);
+ prpending (buflim);
}
finish_grep:
@@ -788,7 +891,7 @@ grepfile (char const *file, struct stats *stats)
if (! file)
{
desc = 0;
- filename = _("(standard input)");
+ filename = label ? label : _("(standard input)");
}
else
{
@@ -798,46 +901,44 @@ grepfile (char const *file, struct stats *stats)
if (desc < 0)
{
int e = errno;
-
+
if (is_EISDIR (e, file) && directories == RECURSE_DIRECTORIES)
{
if (stat (file, &stats->stat) != 0)
{
- error (file, errno);
+ error (0, errno, "%s", file);
return 1;
}
return grepdir (file, stats);
}
-
+
if (!suppress_errors)
{
if (directories == SKIP_DIRECTORIES)
switch (e)
{
-#ifdef EISDIR
+#if defined(EISDIR)
case EISDIR:
return 1;
#endif
case EACCES:
/* When skipping directories, don't worry about
directories that can't be opened. */
- if (stat (file, &stats->stat) == 0
- && S_ISDIR (stats->stat.st_mode))
+ if (isdir (file))
return 1;
break;
}
-
- error (file, e);
}
+ suppressible_error (file, e);
return 1;
}
filename = file;
}
-#if O_BINARY
+#if defined(SET_BINARY)
/* Set input to binary mode. Pipes are simulated with files
on DOS, so this includes the case of "foo | grep bar". */
if (!isatty (desc))
@@ -860,11 +961,19 @@ grepfile (char const *file, struct stats *stats)
if (list_files == 1 - 2 * status)
printf ("%s%c", filename, '\n' & filename_mask);
- if (file)
+ if (! file)
+ {
+ off_t required_offset = outleft ? bufoffset : after_last_match;
+ if ((bufmapped || required_offset != bufoffset)
+ && lseek (desc, required_offset, SEEK_SET) < 0
+ && S_ISREG (stats->stat.st_mode))
+ error (0, errno, "%s", filename);
+ }
+ else
while (close (desc) != 0)
if (errno != EINTR)
{
- error (file, errno);
+ error (0, errno, "%s", file);
break;
}
}
@@ -873,33 +982,34 @@ grepfile (char const *file, struct stats *stats)
}
static int
-grepdir (char const *dir, struct stats *stats)
+grepdir (char const *dir, struct stats const *stats)
{
int status = 1;
- struct stats *ancestor;
+ struct stats const *ancestor;
char *name_space;
- for (ancestor = stats; (ancestor = ancestor->parent) != 0; )
- if (ancestor->stat.st_ino == stats->stat.st_ino
- && ancestor->stat.st_dev == stats->stat.st_dev)
- {
- if (!suppress_errors)
- fprintf (stderr, _("%s: warning: %s: %s\n"), prog, dir,
+ /* Mingw32 does not support st_ino. No known working hosts use zero
+ for st_ino, so assume that the Mingw32 bug applies if it's zero. */
+ if (stats->stat.st_ino)
+ for (ancestor = stats; (ancestor = ancestor->parent) != 0; )
+ if (ancestor->stat.st_ino == stats->stat.st_ino
+ && ancestor->stat.st_dev == stats->stat.st_dev)
+ {
+ if (!suppress_errors)
+ error (0, 0, _("warning: %s: %s\n"), dir,
_("recursive directory loop"));
- return 1;
- }
+ return 1;
+ }
- name_space = savedir (dir, (unsigned) stats->stat.st_size);
+ name_space = savedir (dir, stats->stat.st_size, included_patterns,
+ excluded_patterns);
if (! name_space)
{
if (errno)
- {
- if (!suppress_errors)
- error (dir, errno);
- }
+ suppressible_error (dir, errno);
else
- fatal (_("Memory exhausted"), 0);
+ xalloc_die ();
}
else
{
@@ -907,7 +1017,7 @@ grepdir (char const *dir, struct stats *stats)
int needs_slash = ! (dirlen == FILESYSTEM_PREFIX_LEN (dir)
|| IS_SLASH (dir[dirlen - 1]));
char *file = NULL;
- char *namep = name_space;
+ char const *namep = name_space;
struct stats child;
child.parent = stats;
out_file += !no_filenames;
@@ -935,21 +1045,24 @@ usage (int status)
{
if (status != 0)
{
- fprintf (stderr, _("Usage: %s [OPTION]... PATTERN [FILE]...\n"), prog);
- fprintf (stderr, _("Try `%s --help' for more information.\n"), prog);
+ fprintf (stderr, _("Usage: %s [OPTION]... PATTERN [FILE]...\n"),
+ program_name);
+ fprintf (stderr, _("Try `%s --help' for more information.\n"),
+ program_name);
}
else
{
- printf (_("Usage: %s [OPTION]... PATTERN [FILE] ...\n"), prog);
+ printf (_("Usage: %s [OPTION]... PATTERN [FILE] ...\n"), program_name);
printf (_("\
Search for PATTERN in each FILE or standard input.\n\
Example: %s -i 'hello world' menu.h main.c\n\
\n\
-Regexp selection and interpretation:\n"), prog);
+Regexp selection and interpretation:\n"), program_name);
printf (_("\
-E, --extended-regexp PATTERN is an extended regular expression\n\
-F, --fixed-strings PATTERN is a set of newline-separated strings\n\
- -G, --basic-regexp PATTERN is a basic regular expression\n"));
+ -G, --basic-regexp PATTERN is a basic regular expression\n\
+ -P, --perl-regexp PATTERN is a Perl regular expression\n"));
printf (_("\
-e, --regexp=PATTERN use PATTERN as a regular expression\n\
-f, --file=FILE obtain PATTERN from FILE\n\
@@ -968,18 +1081,27 @@ Miscellaneous:\n\
printf (_("\
\n\
Output control:\n\
+ -m, --max-count=NUM stop after NUM matches\n\
-b, --byte-offset print the byte offset with output lines\n\
-n, --line-number print line number with output lines\n\
+ --line-buffered flush output on every line\n\
-H, --with-filename print the filename for each match\n\
-h, --no-filename suppress the prefixing filename on output\n\
+ --label=LABEL print LABEL as filename for standard input\n\
+ -o, --only-matching show only the part of a line matching PATTERN\n\
-q, --quiet, --silent suppress all normal output\n\
--binary-files=TYPE assume that binary files are TYPE\n\
- TYPE is 'binary', 'text', or 'without-match'.\n\
+ TYPE is 'binary', 'text', or 'without-match'\n\
-a, --text equivalent to --binary-files=text\n\
-I equivalent to --binary-files=without-match\n\
-d, --directories=ACTION how to handle directories\n\
- ACTION is 'read', 'recurse', or 'skip'.\n\
- -r, --recursive equivalent to --directories=recurse.\n\
+ ACTION is 'read', 'recurse', or 'skip'\n\
+ -D, --devices=ACTION how to handle devices, FIFOs and sockets\n\
+ ACTION is 'read' or 'skip'\n\
+ -R, -r, --recursive equivalent to --directories=recurse\n\
+ --include=PATTERN files that match PATTERN will be examined\n\
+ --exclude=PATTERN files that match PATTERN will be skipped.\n\
+ --exclude-from=FILE files that match PATTERN in FILE will be skipped.\n\
-L, --files-without-match only print FILE names containing no match\n\
-l, --files-with-matches only print FILE names containing matches\n\
-c, --count only print a count of matching lines per FILE\n\
@@ -989,9 +1111,11 @@ Output control:\n\
Context control:\n\
-B, --before-context=NUM print NUM lines of leading context\n\
-A, --after-context=NUM print NUM lines of trailing context\n\
- -C, --context[=NUM] print NUM (default 2) lines of output context\n\
- unless overridden by -A or -B\n\
+ -C, --context=NUM print NUM lines of output context\n\
-NUM same as --context=NUM\n\
+ --color[=WHEN],\n\
+ --colour[=WHEN] use markers to distinguish the matching string\n\
+ WHEN may be `always', `never' or `auto'.\n\
-U, --binary do not strip CR characters at EOL (MSDOS)\n\
-u, --unix-byte-offsets report offsets as if CRs were not there (MSDOS)\n\
\n\
@@ -1009,7 +1133,7 @@ static void
setmatcher (char const *m)
{
if (matcher && strcmp (matcher, m) != 0)
- fatal (_("conflicting matchers specified"), 0);
+ error (2, 0, _("conflicting matchers specified"));
matcher = m;
}
@@ -1019,16 +1143,16 @@ static int
install_matcher (char const *name)
{
int i;
-#ifdef HAVE_SETRLIMIT
+#if defined(HAVE_SETRLIMIT)
struct rlimit rlim;
#endif
- for (i = 0; matchers[i].name; ++i)
+ for (i = 0; matchers[i].compile; i++)
if (strcmp (name, matchers[i].name) == 0)
{
compile = matchers[i].compile;
execute = matchers[i].execute;
-#if HAVE_SETRLIMIT && defined(RLIMIT_STACK)
+#if defined(HAVE_SETRLIMIT) && defined(RLIMIT_STACK)
/* I think every platform needs to do this, so that regex.c
doesn't oveflow the stack. The default value of
`re_max_failures' is too large for some platforms: it needs
@@ -1048,9 +1172,10 @@ install_matcher (char const *name)
re_max_failures = newlim / (2 * 20 * sizeof (char *));
}
if (rlim.rlim_cur < newlim)
- rlim.rlim_cur = newlim;
-
- setrlimit (RLIMIT_STACK, &rlim);
+ {
+ rlim.rlim_cur = newlim;
+ setrlimit (RLIMIT_STACK, &rlim);
+ }
}
#endif
return 1;
@@ -1111,6 +1236,47 @@ prepend_default_options (char const *options, int *pargc, char ***pargv)
}
}
+/* Get the next non-digit option from ARGC and ARGV.
+ Return -1 if there are no more options.
+ Process any digit options that were encountered on the way,
+ and store the resulting integer into *DEFAULT_CONTEXT. */
+static int
+get_nondigit_option (int argc, char *const *argv, int *default_context)
+{
+ int opt;
+ char buf[sizeof (uintmax_t) * CHAR_BIT + 4];
+ char *p = buf;
+
+ /* Set buf[0] to anything but '0', for the leading-zero test below. */
+ buf[0] = '\0';
+
+ while (opt = getopt_long (argc, argv, short_options, long_options, NULL),
+ '0' <= opt && opt <= '9')
+ {
+ /* Suppress trivial leading zeros, to avoid incorrect
+ diagnostic on strings like 00000000000. */
+ p -= buf[0] == '0';
+
+ *p++ = opt;
+ if (p == buf + sizeof buf - 4)
+ {
+ /* Too many digits. Append "..." to make context_length_arg
+ complain about "X...", where X contains the digits seen
+ so far. */
+ strcpy (p, "...");
+ p += 3;
+ break;
+ }
+ }
+ if (p != buf)
+ {
+ *p = '\0';
+ context_length_arg (buf, default_context);
+ }
+
+ return opt;
+}
+
int
main (int argc, char **argv)
{
@@ -1119,29 +1285,33 @@ main (int argc, char **argv)
int with_filenames;
int opt, cc, status;
int default_context;
- unsigned digit_args_val;
FILE *fp;
extern char *optarg;
extern int optind;
initialize_main (&argc, &argv);
- prog = argv[0];
- if (prog && strrchr (prog, '/'))
- prog = strrchr (prog, '/') + 1;
+ program_name = argv[0];
+ if (program_name && strrchr (program_name, '/'))
+ program_name = strrchr (program_name, '/') + 1;
+
+ if (!strcmp(program_name, "egrep"))
+ setmatcher ("egrep");
+ if (!strcmp(program_name, "fgrep"))
+ setmatcher ("fgrep");
#if defined(__MSDOS__) || defined(_WIN32)
/* DOS and MS-Windows use backslashes as directory separators, and usually
have an .exe suffix. They also have case-insensitive filesystems. */
- if (prog)
+ if (program_name)
{
- char *p = prog;
+ char *p = program_name;
char *bslash = strrchr (argv[0], '\\');
- if (bslash && bslash >= prog) /* for mixed forward/backslash case */
- prog = bslash + 1;
- else if (prog == argv[0]
+ if (bslash && bslash >= program_name) /* for mixed forward/backslash case */
+ program_name = bslash + 1;
+ else if (program_name == argv[0]
&& argv[0][0] && argv[0][1] == ':') /* "c:progname" */
- prog = argv[0] + 2;
+ program_name = argv[0] + 2;
/* Collapse the letter-case, so `strcmp' could be used hence. */
for ( ; *p; p++)
@@ -1149,7 +1319,7 @@ main (int argc, char **argv)
*p += 'a' - 'A';
/* Remove the .exe extension, if any. */
- if ((p = strrchr (prog, '.')) && strcmp (p, ".exe") == 0)
+ if ((p = strrchr (program_name, '.')) && strcmp (p, ".exe") == 0)
*p = '\0';
}
#endif
@@ -1160,108 +1330,110 @@ main (int argc, char **argv)
eolbyte = '\n';
filename_mask = ~0;
+ max_count = TYPE_MAXIMUM (off_t);
+
/* The value -1 means to use DEFAULT_CONTEXT. */
out_after = out_before = -1;
/* Default before/after context: chaged by -C/-NUM options */
default_context = 0;
- /* Accumulated value of individual digits in a -NUM option */
- digit_args_val = 0;
-
+ /* Changed by -o option */
+ only_matching = 0;
-/* Internationalization. */
-#if HAVE_SETLOCALE
+ /* Internationalization. */
+#if defined(HAVE_SETLOCALE)
setlocale (LC_ALL, "");
#endif
-#if ENABLE_NLS
+#if defined(ENABLE_NLS)
bindtextdomain (PACKAGE, LOCALEDIR);
textdomain (PACKAGE);
#endif
+ atexit (close_stdout);
+
prepend_default_options (getenv ("GREP_OPTIONS"), &argc, &argv);
- while ((opt = getopt_long (argc, argv, short_options, long_options, NULL))
- != -1)
+ while ((opt = get_nondigit_option (argc, argv, &default_context)) != -1)
switch (opt)
{
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- digit_args_val = 10 * digit_args_val + opt - '0';
- default_context = digit_args_val;
- break;
case 'A':
- if (optarg)
- {
- if (ck_atoi (optarg, &out_after))
- fatal (_("invalid context length argument"), 0);
- }
+ context_length_arg (optarg, &out_after);
break;
+
case 'B':
- if (optarg)
- {
- if (ck_atoi (optarg, &out_before))
- fatal (_("invalid context length argument"), 0);
- }
+ context_length_arg (optarg, &out_before);
break;
+
case 'C':
/* Set output match context, but let any explicit leading or
trailing amount specified with -A or -B stand. */
- if (optarg)
- {
- if (ck_atoi (optarg, &default_context))
- fatal (_("invalid context length argument"), 0);
- }
+ context_length_arg (optarg, &default_context);
+ break;
+
+ case 'D':
+ if (strcmp (optarg, "read") == 0)
+ devices = READ_DEVICES;
+ else if (strcmp (optarg, "skip") == 0)
+ devices = SKIP_DEVICES;
else
- default_context = 2;
+ error (2, 0, _("unknown devices method"));
break;
+
case 'E':
setmatcher ("egrep");
break;
+
case 'F':
setmatcher ("fgrep");
break;
+
+ case 'P':
+ setmatcher ("perl");
+ break;
+
case 'G':
setmatcher ("grep");
break;
+
case 'H':
with_filenames = 1;
break;
+
case 'I':
binary_files = WITHOUT_MATCH_BINARY_FILES;
break;
+
case 'U':
-#if O_BINARY
+#if defined(HAVE_DOS_FILE_CONTENTS)
dos_use_file_type = DOS_BINARY;
#endif
break;
+
case 'u':
-#if O_BINARY
+#if defined(HAVE_DOS_FILE_CONTENTS)
dos_report_unix_offset = 1;
#endif
break;
+
case 'V':
show_version = 1;
break;
+
case 'X':
setmatcher (optarg);
break;
+
case 'a':
binary_files = TEXT_BINARY_FILES;
break;
+
case 'b':
out_byte = 1;
break;
+
case 'c':
- out_quiet = 1;
count_matches = 1;
break;
+
case 'd':
if (strcmp (optarg, "read") == 0)
directories = READ_DIRECTORIES;
@@ -1270,8 +1442,9 @@ main (int argc, char **argv)
else if (strcmp (optarg, "recurse") == 0)
directories = RECURSE_DIRECTORIES;
else
- fatal (_("unknown directories method"), 0);
+ error (2, 0, _("unknown directories method"));
break;
+
case 'e':
cc = strlen (optarg);
keys = xrealloc (keys, keycc + cc + 1);
@@ -1279,10 +1452,11 @@ main (int argc, char **argv)
keycc += cc;
keys[keycc++] = '\n';
break;
+
case 'f':
fp = strcmp (optarg, "-") != 0 ? fopen (optarg, "r") : stdin;
if (!fp)
- fatal (optarg, errno);
+ error (2, errno, "%s", optarg);
for (keyalloc = 1; keyalloc <= keycc + 1; keyalloc *= 2)
;
keys = xrealloc (keys, keyalloc);
@@ -1300,53 +1474,88 @@ main (int argc, char **argv)
if (oldcc != keycc && keys[keycc - 1] != '\n')
keys[keycc++] = '\n';
break;
+
case 'h':
no_filenames = 1;
break;
+
case 'i':
case 'y': /* For old-timers . . . */
match_icase = 1;
break;
+
case 'L':
/* Like -l, except list files that don't contain matches.
Inspired by the same option in Hume's gre. */
- out_quiet = 1;
list_files = -1;
- done_on_match = 1;
break;
+
case 'l':
- out_quiet = 1;
list_files = 1;
- done_on_match = 1;
break;
+
+ case 'm':
+ {
+ uintmax_t value;
+ switch (xstrtoumax (optarg, 0, 10, &value, ""))
+ {
+ case LONGINT_OK:
+ max_count = value;
+ if (0 <= max_count && max_count == value)
+ break;
+ /* Fall through. */
+ case LONGINT_OVERFLOW:
+ max_count = TYPE_MAXIMUM (off_t);
+ break;
+
+ default:
+ error (2, 0, _("invalid max count"));
+ }
+ }
+ break;
+
case 'n':
out_line = 1;
break;
+
+ case 'o':
+ only_matching = 1;
+ break;
+
case 'q':
- done_on_match = 1;
- out_quiet = 1;
+ exit_on_match = 1;
+ close_stdout_set_status(0);
break;
+
+ case 'R':
case 'r':
directories = RECURSE_DIRECTORIES;
break;
+
case 's':
suppress_errors = 1;
break;
+
case 'v':
out_invert = 1;
break;
+
case 'w':
match_words = 1;
break;
+
case 'x':
match_lines = 1;
break;
+
case 'Z':
filename_mask = 0;
break;
+
case 'z':
eolbyte = '\0';
break;
+
case BINARY_FILES_OPTION:
if (strcmp (optarg, "binary") == 0)
binary_files = BINARY_BINARY_FILES;
@@ -1355,21 +1564,96 @@ main (int argc, char **argv)
else if (strcmp (optarg, "without-match") == 0)
binary_files = WITHOUT_MATCH_BINARY_FILES;
else
- fatal (_("unknown binary-files type"), 0);
+ error (2, 0, _("unknown binary-files type"));
+ break;
+
+ case COLOR_OPTION:
+ if(optarg) {
+ if(!strcasecmp(optarg, "always") || !strcasecmp(optarg, "yes") ||
+ !strcasecmp(optarg, "force"))
+ color_option = 1;
+ else if(!strcasecmp(optarg, "never") || !strcasecmp(optarg, "no") ||
+ !strcasecmp(optarg, "none"))
+ color_option = 0;
+ else if(!strcasecmp(optarg, "auto") || !strcasecmp(optarg, "tty") ||
+ !strcasecmp(optarg, "if-tty"))
+ color_option = 2;
+ else
+ show_help = 1;
+ } else
+ color_option = 2;
+ if(color_option == 2) {
+ if(isatty(STDOUT_FILENO) && getenv("TERM") &&
+ strcmp(getenv("TERM"), "dumb"))
+ color_option = 1;
+ else
+ color_option = 0;
+ }
break;
+
+ case EXCLUDE_OPTION:
+ if (!excluded_patterns)
+ excluded_patterns = new_exclude ();
+ add_exclude (excluded_patterns, optarg);
+ break;
+
+ case EXCLUDE_FROM_OPTION:
+ if (!excluded_patterns)
+ excluded_patterns = new_exclude ();
+ if (add_exclude_file (add_exclude, excluded_patterns, optarg, '\n')
+ != 0)
+ {
+ error (2, errno, "%s", optarg);
+ }
+ break;
+
+ case INCLUDE_OPTION:
+ if (!included_patterns)
+ included_patterns = new_exclude ();
+ add_exclude (included_patterns, optarg);
+ break;
+
+ case LINE_BUFFERED_OPTION:
+ line_buffered = 1;
+ break;
+
+ case LABEL_OPTION:
+ label = optarg;
+ break;
+
case 0:
/* long options */
break;
+
default:
usage (2);
break;
+
}
+ /* POSIX.2 says that -q overrides -l, which in turn overrides the
+ other output options. */
+ if (exit_on_match)
+ list_files = 0;
+ if (exit_on_match | list_files)
+ {
+ count_matches = 0;
+ done_on_match = 1;
+ }
+ out_quiet = count_matches | done_on_match;
+
if (out_after < 0)
out_after = default_context;
if (out_before < 0)
out_before = default_context;
+ if (color_option)
+ {
+ char *userval = getenv ("GREP_COLOR");
+ if (userval != NULL && *userval != '\0')
+ grep_color = userval;
+ }
+
if (! matcher)
matcher = "grep";
@@ -1378,7 +1662,7 @@ main (int argc, char **argv)
printf (_("%s (GNU grep) %s\n"), matcher, VERSION);
printf ("\n");
printf (_("\
-Copyright 1988, 1992-1999, 2000 Free Software Foundation, Inc.\n"));
+Copyright 1988, 1992-1999, 2000, 2001 Free Software Foundation, Inc.\n"));
printf (_("\
This is free software; see the source for copying conditions. There is NO\n\
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"));
@@ -1392,8 +1676,11 @@ warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"))
if (keys)
{
if (keycc == 0)
- /* No keys were specified (e.g. -f /dev/null). Match nothing. */
- out_invert ^= 1;
+ {
+ /* No keys were specified (e.g. -f /dev/null). Match nothing. */
+ out_invert ^= 1;
+ match_lines = match_words = 0;
+ }
else
/* Strip trailing newline. */
--keycc;
@@ -1415,13 +1702,15 @@ warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"))
if ((argc - optind > 1 && !no_filenames) || with_filenames)
out_file = 1;
-#if O_BINARY
+#ifdef SET_BINARY
/* Output is set to binary mode because we shouldn't convert
NL to CR-LF pairs, especially when grepping binary files. */
if (!isatty (1))
SET_BINARY (1);
#endif
+ if (max_count == 0)
+ exit (1);
if (optind < argc)
{
@@ -1429,6 +1718,16 @@ warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"))
do
{
char *file = argv[optind];
+ if ((included_patterns || excluded_patterns)
+ && !isdir (file))
+ {
+ if (included_patterns &&
+ ! excluded_filename (included_patterns, file, 0))
+ continue;
+ if (excluded_patterns &&
+ excluded_filename (excluded_patterns, file, 0))
+ continue;
+ }
status &= grepfile (strcmp (file, "-") == 0 ? (char *) NULL : file,
&stats_base);
}
@@ -1437,8 +1736,7 @@ warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"))
else
status = grepfile ((char *) NULL, &stats_base);
- if (fclose (stdout) == EOF)
- error (_("writing output"), errno);
-
+ /* We register via atexit() to test stdout. */
exit (errseen ? 2 : status);
}
+/* vim:set shiftwidth=2: */
diff --git a/gnu/usr.bin/grep/grep.h b/gnu/usr.bin/grep/grep.h
index 13f55a230f1b..c13f2120be28 100644
--- a/gnu/usr.bin/grep/grep.h
+++ b/gnu/usr.bin/grep/grep.h
@@ -1,5 +1,5 @@
/* grep.h - interface to grep driver for searching subroutines.
- Copyright (C) 1992, 1998 Free Software Foundation, Inc.
+ Copyright (C) 1992, 1998, 2001 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -20,20 +20,16 @@
# define __attribute__(x)
#endif
-extern void fatal PARAMS ((const char *, int)) __attribute__((noreturn));
-extern char *xmalloc PARAMS ((size_t size));
-extern char *xrealloc PARAMS ((char *ptr, size_t size));
-
/* Grep.c expects the matchers vector to be terminated
- by an entry with a NULL name, and to contain at least
+ by an entry with a NULL compile, and to contain at least
an entry named "default". */
extern struct matcher
{
- char *name;
- void (*compile) PARAMS ((char *, size_t));
- char *(*execute) PARAMS ((char *, size_t, char **));
-} matchers[];
+ char name[8];
+ void (*compile) PARAMS ((char const *, size_t));
+ size_t (*execute) PARAMS ((char const *, size_t, size_t *, int));
+} const matchers[];
/* Exported from fgrepmat.c, egrepmat.c, grepmat.c. */
extern char const *matcher;
diff --git a/gnu/usr.bin/grep/hard-locale.c b/gnu/usr.bin/grep/hard-locale.c
new file mode 100644
index 000000000000..1c75b390a684
--- /dev/null
+++ b/gnu/usr.bin/grep/hard-locale.c
@@ -0,0 +1,85 @@
+/* hard-locale.c -- Determine whether a locale is hard.
+ Copyright 1997, 1998, 1999 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+#if HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#ifndef __GNUC__
+# ifdef HAVE_ALLOCA_H
+# include <alloca.h>
+# else
+# ifdef _AIX
+ # pragma alloca
+# else
+# ifdef _WIN32
+# include <malloc.h>
+# include <io.h>
+# else
+# ifndef alloca
+char *alloca ();
+# endif
+# endif
+# endif
+# endif
+#endif
+
+#if HAVE_LOCALE_H
+# include <locale.h>
+#endif
+
+#if HAVE_STRING_H
+# include <string.h>
+#endif
+
+/* Return nonzero if the current CATEGORY locale is hard, i.e. if you
+ can't get away with assuming traditional C or POSIX behavior. */
+int
+hard_locale (int category)
+{
+#if ! (defined ENABLE_NLS && HAVE_SETLOCALE)
+ return 0;
+#else
+
+ int hard = 1;
+ char const *p = setlocale (category, 0);
+
+ if (p)
+ {
+# if defined __GLIBC__ && __GLIBC__ >= 2
+ if (strcmp (p, "C") == 0 || strcmp (p, "POSIX") == 0)
+ hard = 0;
+# else
+ char *locale = alloca (strlen (p) + 1);
+ strcpy (locale, p);
+
+ /* Temporarily set the locale to the "C" and "POSIX" locales to
+ find their names, so that we can determine whether one or the
+ other is the caller's locale. */
+ if (((p = setlocale (category, "C")) && strcmp (p, locale) == 0)
+ || ((p = setlocale (category, "POSIX")) && strcmp (p, locale) == 0))
+ hard = 0;
+
+ /* Restore the caller's locale. */
+ setlocale (category, locale);
+# endif
+ }
+
+ return hard;
+
+#endif
+}
diff --git a/gnu/usr.bin/grep/hard-locale.h b/gnu/usr.bin/grep/hard-locale.h
new file mode 100644
index 000000000000..5b054d9a5cb9
--- /dev/null
+++ b/gnu/usr.bin/grep/hard-locale.h
@@ -0,0 +1,18 @@
+#ifndef HARD_LOCALE_H_
+# define HARD_LOCALE_H_ 1
+
+# if HAVE_CONFIG_H
+# include <config.h>
+# endif
+
+# ifndef PARAMS
+# if defined PROTOTYPES || (defined __STDC__ && __STDC__)
+# define PARAMS(Args) Args
+# else
+# define PARAMS(Args) ()
+# endif
+# endif
+
+int hard_locale PARAMS ((int));
+
+#endif /* HARD_LOCALE_H_ */
diff --git a/gnu/usr.bin/grep/isdir.c b/gnu/usr.bin/grep/isdir.c
new file mode 100644
index 000000000000..01cf8d5e4af2
--- /dev/null
+++ b/gnu/usr.bin/grep/isdir.c
@@ -0,0 +1,42 @@
+/* isdir.c -- determine whether a directory exists
+ Copyright (C) 1990, 1998 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+#if HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#if STAT_MACROS_BROKEN
+# undef S_ISDIR
+#endif
+
+#if !defined S_ISDIR && defined S_IFDIR
+# define S_ISDIR(Mode) (((Mode) & S_IFMT) == S_IFDIR)
+#endif
+
+/* If PATH is an existing directory or symbolic link to a directory,
+ return nonzero, else 0. */
+
+int
+isdir (const char *path)
+{
+ struct stat stats;
+
+ return stat (path, &stats) == 0 && S_ISDIR (stats.st_mode);
+}
diff --git a/gnu/usr.bin/grep/kwset.c b/gnu/usr.bin/grep/kwset.c
index 61eff7bf72ba..97134bf6907c 100644
--- a/gnu/usr.bin/grep/kwset.c
+++ b/gnu/usr.bin/grep/kwset.c
@@ -81,22 +81,13 @@ struct kwset
struct trie *next[NCHAR]; /* Table of children of the root. */
char *target; /* Target string if there's only one. */
int mind2; /* Used in Boyer-Moore search for one string. */
- char *trans; /* Character translation table. */
+ char const *trans; /* Character translation table. */
};
-/* prototypes */
-static void enqueue PARAMS((struct tree *, struct trie **));
-static void treefails PARAMS((register struct tree *, struct trie *, struct trie *));
-static void treedelta PARAMS((register struct tree *,register unsigned int, unsigned char *));
-static int hasevery PARAMS((register struct tree *, register struct tree *));
-static void treenext PARAMS((struct tree *, struct trie **));
-static char * bmexec PARAMS((kwset_t, char *, size_t));
-static char * cwexec PARAMS((kwset_t, char *, size_t, struct kwsmatch *));
-
/* Allocate and initialize a keyword set object, returning an opaque
pointer to it. Return NULL if memory is not available. */
kwset_t
-kwsalloc (char *trans)
+kwsalloc (char const *trans)
{
struct kwset *kwset;
@@ -131,7 +122,7 @@ kwsalloc (char *trans)
/* Add the given string to the contents of the keyword set. Return NULL
for success, an error message otherwise. */
char *
-kwsincr (kwset_t kws, char *text, size_t len)
+kwsincr (kwset_t kws, char const *text, size_t len)
{
struct kwset *kwset;
register struct trie *trie;
@@ -301,7 +292,8 @@ enqueue (struct tree *tree, struct trie **last)
from the given tree, given the failure function for their parent as
well as a last resort failure node. */
static void
-treefails (register struct tree *tree, struct trie *fail, struct trie *recourse)
+treefails (register struct tree const *tree, struct trie const *fail,
+ struct trie *recourse)
{
register struct tree *link;
@@ -335,7 +327,7 @@ treefails (register struct tree *tree, struct trie *fail, struct trie *recourse)
/* Set delta entries for the links of the given tree such that
the preexisting delta value is larger than the current depth. */
static void
-treedelta (register struct tree *tree,
+treedelta (register struct tree const *tree,
register unsigned int depth,
unsigned char delta[])
{
@@ -349,7 +341,7 @@ treedelta (register struct tree *tree,
/* Return true if A has every label in B. */
static int
-hasevery (register struct tree *a, register struct tree *b)
+hasevery (register struct tree const *a, register struct tree const *b)
{
if (!b)
return 1;
@@ -368,7 +360,7 @@ hasevery (register struct tree *a, register struct tree *b)
/* Compute a vector, indexed by character code, of the trie nodes
referenced from the given tree. */
static void
-treenext (struct tree *tree, struct trie *next[])
+treenext (struct tree const *tree, struct trie *next[])
{
if (!tree)
return;
@@ -385,7 +377,7 @@ kwsprep (kwset_t kws)
register struct kwset *kwset;
register int i;
register struct trie *curr, *fail;
- register char *trans;
+ register char const *trans;
unsigned char delta[NCHAR];
struct trie *last, *next[NCHAR];
@@ -497,23 +489,26 @@ kwsprep (kwset_t kws)
#define U(C) ((unsigned char) (C))
/* Fast boyer-moore search. */
-static char *
-bmexec (kwset_t kws, char *text, size_t size)
+static size_t
+bmexec (kwset_t kws, char const *text, size_t size)
{
- struct kwset *kwset;
- register unsigned char *d1;
- register char *ep, *sp, *tp;
+ struct kwset const *kwset;
+ register unsigned char const *d1;
+ register char const *ep, *sp, *tp;
register int d, gc, i, len, md2;
- kwset = (struct kwset *) kws;
+ kwset = (struct kwset const *) kws;
len = kwset->mind;
if (len == 0)
- return text;
- if (len > size)
return 0;
+ if (len > size)
+ return -1;
if (len == 1)
- return memchr(text, kwset->target[0], size);
+ {
+ tp = memchr (text, kwset->target[0], size);
+ return tp ? tp - text : -1;
+ }
d1 = kwset->delta;
sp = kwset->target + len;
@@ -552,7 +547,7 @@ bmexec (kwset_t kws, char *text, size_t size)
for (i = 3; i <= len && U(tp[-i]) == U(sp[-i]); ++i)
;
if (i > len)
- return tp - len;
+ return tp - len - text;
}
tp += md2;
}
@@ -571,26 +566,29 @@ bmexec (kwset_t kws, char *text, size_t size)
for (i = 3; i <= len && U(tp[-i]) == U(sp[-i]); ++i)
;
if (i > len)
- return tp - len;
+ return tp - len - text;
}
d = md2;
}
- return 0;
+ return -1;
}
/* Hairy multiple string search. */
-static char *
-cwexec (kwset_t kws, char *text, size_t len, struct kwsmatch *kwsmatch)
+static size_t
+cwexec (kwset_t kws, char const *text, size_t len, struct kwsmatch *kwsmatch)
{
- struct kwset *kwset;
- struct trie **next, *trie, *accept;
- char *beg, *lim, *mch, *lmch;
- register unsigned char c, *delta;
+ struct kwset const *kwset;
+ struct trie * const *next;
+ struct trie const *trie;
+ struct trie const *accept;
+ char const *beg, *lim, *mch, *lmch;
+ register unsigned char c;
+ register unsigned char const *delta;
register int d;
- register char *end, *qlim;
- register struct tree *tree;
- register char *trans;
+ register char const *end, *qlim;
+ register struct tree const *tree;
+ register char const *trans;
#ifdef lint
accept = NULL;
@@ -599,7 +597,7 @@ cwexec (kwset_t kws, char *text, size_t len, struct kwsmatch *kwsmatch)
/* Initialize register copies and look for easy ways out. */
kwset = (struct kwset *) kws;
if (len < kwset->mind)
- return 0;
+ return -1;
next = kwset->next;
delta = kwset->delta;
trans = kwset->trans;
@@ -668,7 +666,7 @@ cwexec (kwset_t kws, char *text, size_t len, struct kwsmatch *kwsmatch)
if (mch)
goto match;
}
- return 0;
+ return -1;
match:
/* Given a known match, find the longest possible match anchored
@@ -728,10 +726,10 @@ cwexec (kwset_t kws, char *text, size_t len, struct kwsmatch *kwsmatch)
if (kwsmatch)
{
kwsmatch->index = accept->accepting / 2;
- kwsmatch->beg[0] = mch;
+ kwsmatch->offset[0] = mch - text;
kwsmatch->size[0] = accept->depth;
}
- return mch;
+ return mch - text;
}
/* Search through the given text for a match of any member of the
@@ -741,20 +739,18 @@ cwexec (kwset_t kws, char *text, size_t len, struct kwsmatch *kwsmatch)
matching substring. Similarly, if FOUNDIDX is non-NULL, store
in the referenced location the index number of the particular
keyword matched. */
-char *
-kwsexec (kwset_t kws, char *text, size_t size, struct kwsmatch *kwsmatch)
+size_t
+kwsexec (kwset_t kws, char const *text, size_t size,
+ struct kwsmatch *kwsmatch)
{
- struct kwset *kwset;
- char *ret;
-
- kwset = (struct kwset *) kws;
+ struct kwset const *kwset = (struct kwset *) kws;
if (kwset->words == 1 && kwset->trans == 0)
{
- ret = bmexec(kws, text, size);
- if (kwsmatch != 0 && ret != 0)
+ size_t ret = bmexec (kws, text, size);
+ if (kwsmatch != 0 && ret != (size_t) -1)
{
kwsmatch->index = 0;
- kwsmatch->beg[0] = ret;
+ kwsmatch->offset[0] = ret;
kwsmatch->size[0] = kwset->mind;
}
return ret;
diff --git a/gnu/usr.bin/grep/kwset.h b/gnu/usr.bin/grep/kwset.h
index e699258019d1..33ee261a972c 100644
--- a/gnu/usr.bin/grep/kwset.h
+++ b/gnu/usr.bin/grep/kwset.h
@@ -23,7 +23,7 @@
struct kwsmatch
{
int index; /* Index number of matching keyword. */
- char *beg[1]; /* Begin pointer for each submatch. */
+ size_t offset[1]; /* Offset of each submatch. */
size_t size[1]; /* Length of each submatch. */
};
@@ -33,12 +33,12 @@ typedef ptr_t kwset_t;
if enough memory cannot be obtained. The argument if non-NULL
specifies a table of character translations to be applied to all
pattern and search text. */
-extern kwset_t kwsalloc PARAMS((char *));
+extern kwset_t kwsalloc PARAMS((char const *));
/* Incrementally extend the keyword set to include the given string.
Return NULL for success, or an error message. Remember an index
number for each keyword included in the set. */
-extern char *kwsincr PARAMS((kwset_t, char *, size_t));
+extern char *kwsincr PARAMS((kwset_t, char const *, size_t));
/* When the keyword set has been completely built, prepare it for
use. Return NULL for success, or an error message. */
@@ -50,7 +50,7 @@ extern char *kwsprep PARAMS((kwset_t));
the matching substring in the integer it points to. Similarly,
if foundindex is non-NULL, store the index of the particular
keyword found therein. */
-extern char *kwsexec PARAMS((kwset_t, char *, size_t, struct kwsmatch *));
+extern size_t kwsexec PARAMS((kwset_t, char const *, size_t, struct kwsmatch *));
/* Deallocate the given keyword set and all its associated storage. */
extern void kwsfree PARAMS((kwset_t));
diff --git a/gnu/usr.bin/grep/quotearg.c b/gnu/usr.bin/grep/quotearg.c
new file mode 100644
index 000000000000..2a7ba4c445ba
--- /dev/null
+++ b/gnu/usr.bin/grep/quotearg.c
@@ -0,0 +1,613 @@
+/* quotearg.c - quote arguments for output
+ Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+/* Written by Paul Eggert <eggert@twinsun.com> */
+
+#if HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#if HAVE_STDDEF_H
+# include <stddef.h> /* For the definition of size_t on windows w/MSVC. */
+#endif
+#include <sys/types.h>
+#include <quotearg.h>
+#include <xalloc.h>
+
+#include <ctype.h>
+
+#if ENABLE_NLS
+# include <libintl.h>
+# define _(text) gettext (text)
+#else
+# define _(text) text
+#endif
+#define N_(text) text
+
+#if HAVE_LIMITS_H
+# include <limits.h>
+#endif
+#ifndef CHAR_BIT
+# define CHAR_BIT 8
+#endif
+#ifndef UCHAR_MAX
+# define UCHAR_MAX ((unsigned char) -1)
+#endif
+
+#if HAVE_C_BACKSLASH_A
+# define ALERT_CHAR '\a'
+#else
+# define ALERT_CHAR '\7'
+#endif
+
+#if HAVE_STDLIB_H
+# include <stdlib.h>
+#endif
+
+#if HAVE_STRING_H
+# include <string.h>
+#endif
+
+#if HAVE_WCHAR_H
+# include <wchar.h>
+#endif
+
+#if !HAVE_MBRTOWC
+/* Disable multibyte processing entirely. Since MB_CUR_MAX is 1, the
+ other macros are defined only for documentation and to satisfy C
+ syntax. */
+# undef MB_CUR_MAX
+# define MB_CUR_MAX 1
+# define mbrtowc(pwc, s, n, ps) ((*(pwc) = *(s)) != 0)
+# define mbsinit(ps) 1
+# define iswprint(wc) ISPRINT ((unsigned char) (wc))
+#endif
+
+#ifndef iswprint
+# if HAVE_WCTYPE_H
+# include <wctype.h>
+# endif
+# if !defined iswprint && !HAVE_ISWPRINT
+# define iswprint(wc) 1
+# endif
+#endif
+
+#define INT_BITS (sizeof (int) * CHAR_BIT)
+
+#if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
+# define IN_CTYPE_DOMAIN(c) 1
+#else
+# define IN_CTYPE_DOMAIN(c) isascii(c)
+#endif
+
+/* Undefine to protect against the definition in wctype.h of solaris2.6. */
+#undef ISPRINT
+#define ISPRINT(c) (IN_CTYPE_DOMAIN (c) && isprint (c))
+
+struct quoting_options
+{
+ /* Basic quoting style. */
+ enum quoting_style style;
+
+ /* Quote the characters indicated by this bit vector even if the
+ quoting style would not normally require them to be quoted. */
+ int quote_these_too[(UCHAR_MAX / INT_BITS) + 1];
+};
+
+/* Names of quoting styles. */
+char const *const quoting_style_args[] =
+{
+ "literal",
+ "shell",
+ "shell-always",
+ "c",
+ "escape",
+ "locale",
+ "clocale",
+ 0
+};
+
+/* Correspondences to quoting style names. */
+enum quoting_style const quoting_style_vals[] =
+{
+ literal_quoting_style,
+ shell_quoting_style,
+ shell_always_quoting_style,
+ c_quoting_style,
+ escape_quoting_style,
+ locale_quoting_style,
+ clocale_quoting_style
+};
+
+/* The default quoting options. */
+static struct quoting_options default_quoting_options;
+
+/* Allocate a new set of quoting options, with contents initially identical
+ to O if O is not null, or to the default if O is null.
+ It is the caller's responsibility to free the result. */
+struct quoting_options *
+clone_quoting_options (struct quoting_options *o)
+{
+ struct quoting_options *p
+ = (struct quoting_options *) xmalloc (sizeof (struct quoting_options));
+ *p = *(o ? o : &default_quoting_options);
+ return p;
+}
+
+/* Get the value of O's quoting style. If O is null, use the default. */
+enum quoting_style
+get_quoting_style (struct quoting_options *o)
+{
+ return (o ? o : &default_quoting_options)->style;
+}
+
+/* In O (or in the default if O is null),
+ set the value of the quoting style to S. */
+void
+set_quoting_style (struct quoting_options *o, enum quoting_style s)
+{
+ (o ? o : &default_quoting_options)->style = s;
+}
+
+/* In O (or in the default if O is null),
+ set the value of the quoting options for character C to I.
+ Return the old value. Currently, the only values defined for I are
+ 0 (the default) and 1 (which means to quote the character even if
+ it would not otherwise be quoted). */
+int
+set_char_quoting (struct quoting_options *o, char c, int i)
+{
+ unsigned char uc = c;
+ int *p = (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS;
+ int shift = uc % INT_BITS;
+ int r = (*p >> shift) & 1;
+ *p ^= ((i & 1) ^ r) << shift;
+ return r;
+}
+
+/* MSGID approximates a quotation mark. Return its translation if it
+ has one; otherwise, return either it or "\"", depending on S. */
+static char const *
+gettext_quote (char const *msgid, enum quoting_style s)
+{
+ char const *translation = _(msgid);
+ if (translation == msgid && s == clocale_quoting_style)
+ translation = "\"";
+ return translation;
+}
+
+/* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
+ argument ARG (of size ARGSIZE), using QUOTING_STYLE and the
+ non-quoting-style part of O to control quoting.
+ Terminate the output with a null character, and return the written
+ size of the output, not counting the terminating null.
+ If BUFFERSIZE is too small to store the output string, return the
+ value that would have been returned had BUFFERSIZE been large enough.
+ If ARGSIZE is -1, use the string length of the argument for ARGSIZE.
+
+ This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG,
+ ARGSIZE, O), except it uses QUOTING_STYLE instead of the quoting
+ style specified by O, and O may not be null. */
+
+static size_t
+quotearg_buffer_restyled (char *buffer, size_t buffersize,
+ char const *arg, size_t argsize,
+ enum quoting_style quoting_style,
+ struct quoting_options const *o)
+{
+ size_t i;
+ size_t len = 0;
+ char const *quote_string = 0;
+ size_t quote_string_len = 0;
+ int backslash_escapes = 0;
+ int unibyte_locale = MB_CUR_MAX == 1;
+
+#define STORE(c) \
+ do \
+ { \
+ if (len < buffersize) \
+ buffer[len] = (c); \
+ len++; \
+ } \
+ while (0)
+
+ switch (quoting_style)
+ {
+ case c_quoting_style:
+ STORE ('"');
+ backslash_escapes = 1;
+ quote_string = "\"";
+ quote_string_len = 1;
+ break;
+
+ case escape_quoting_style:
+ backslash_escapes = 1;
+ break;
+
+ case locale_quoting_style:
+ case clocale_quoting_style:
+ {
+ /* Get translations for open and closing quotation marks.
+
+ The message catalog should translate "`" to a left
+ quotation mark suitable for the locale, and similarly for
+ "'". If the catalog has no translation,
+ locale_quoting_style quotes `like this', and
+ clocale_quoting_style quotes "like this".
+
+ For example, an American English Unicode locale should
+ translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and
+ should translate "'" to U+201D (RIGHT DOUBLE QUOTATION
+ MARK). A British English Unicode locale should instead
+ translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and
+ U+2019 (RIGHT SINGLE QUOTATION MARK), respectively. */
+
+ char const *left = gettext_quote (N_("`"), quoting_style);
+ char const *right = gettext_quote (N_("'"), quoting_style);
+ for (quote_string = left; *quote_string; quote_string++)
+ STORE (*quote_string);
+ backslash_escapes = 1;
+ quote_string = right;
+ quote_string_len = strlen (quote_string);
+ }
+ break;
+
+ case shell_always_quoting_style:
+ STORE ('\'');
+ quote_string = "'";
+ quote_string_len = 1;
+ break;
+
+ default:
+ break;
+ }
+
+ for (i = 0; ! (argsize == (size_t) -1 ? arg[i] == '\0' : i == argsize); i++)
+ {
+ unsigned char c;
+ unsigned char esc;
+
+ if (backslash_escapes
+ && quote_string_len
+ && i + quote_string_len <= argsize
+ && memcmp (arg + i, quote_string, quote_string_len) == 0)
+ STORE ('\\');
+
+ c = arg[i];
+ switch (c)
+ {
+ case '?':
+ switch (quoting_style)
+ {
+ case shell_quoting_style:
+ goto use_shell_always_quoting_style;
+
+ case c_quoting_style:
+ if (i + 2 < argsize && arg[i + 1] == '?')
+ switch (arg[i + 2])
+ {
+ case '!': case '\'':
+ case '(': case ')': case '-': case '/':
+ case '<': case '=': case '>':
+ /* Escape the second '?' in what would otherwise be
+ a trigraph. */
+ i += 2;
+ c = arg[i + 2];
+ STORE ('?');
+ STORE ('\\');
+ STORE ('?');
+ break;
+ }
+ break;
+
+ default:
+ break;
+ }
+ break;
+
+ case ALERT_CHAR: esc = 'a'; goto c_escape;
+ case '\b': esc = 'b'; goto c_escape;
+ case '\f': esc = 'f'; goto c_escape;
+ case '\n': esc = 'n'; goto c_and_shell_escape;
+ case '\r': esc = 'r'; goto c_and_shell_escape;
+ case '\t': esc = 't'; goto c_and_shell_escape;
+ case '\v': esc = 'v'; goto c_escape;
+ case '\\': esc = c; goto c_and_shell_escape;
+
+ c_and_shell_escape:
+ if (quoting_style == shell_quoting_style)
+ goto use_shell_always_quoting_style;
+ c_escape:
+ if (backslash_escapes)
+ {
+ c = esc;
+ goto store_escape;
+ }
+ break;
+
+ case '#': case '~':
+ if (i != 0)
+ break;
+ /* Fall through. */
+ case ' ':
+ case '!': /* special in bash */
+ case '"': case '$': case '&':
+ case '(': case ')': case '*': case ';':
+ case '<': case '>': case '[':
+ case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */
+ case '`': case '|':
+ /* A shell special character. In theory, '$' and '`' could
+ be the first bytes of multibyte characters, which means
+ we should check them with mbrtowc, but in practice this
+ doesn't happen so it's not worth worrying about. */
+ if (quoting_style == shell_quoting_style)
+ goto use_shell_always_quoting_style;
+ break;
+
+ case '\'':
+ switch (quoting_style)
+ {
+ case shell_quoting_style:
+ goto use_shell_always_quoting_style;
+
+ case shell_always_quoting_style:
+ STORE ('\'');
+ STORE ('\\');
+ STORE ('\'');
+ break;
+
+ default:
+ break;
+ }
+ break;
+
+ case '%': case '+': case ',': case '-': case '.': case '/':
+ case '0': case '1': case '2': case '3': case '4': case '5':
+ case '6': case '7': case '8': case '9': case ':': case '=':
+ case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+ case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
+ case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
+ case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
+ case 'Y': case 'Z': case ']': case '_': case 'a': case 'b':
+ case 'c': case 'd': case 'e': case 'f': case 'g': case 'h':
+ case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
+ case 'o': case 'p': case 'q': case 'r': case 's': case 't':
+ case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
+ case '{': case '}':
+ /* These characters don't cause problems, no matter what the
+ quoting style is. They cannot start multibyte sequences. */
+ break;
+
+ default:
+ /* If we have a multibyte sequence, copy it until we reach
+ its end, find an error, or come back to the initial shift
+ state. For C-like styles, if the sequence has
+ unprintable characters, escape the whole sequence, since
+ we can't easily escape single characters within it. */
+ {
+ /* Length of multibyte sequence found so far. */
+ size_t m;
+
+ int printable;
+
+ if (unibyte_locale)
+ {
+ m = 1;
+ printable = ISPRINT (c);
+ }
+ else
+ {
+ mbstate_t mbstate;
+ memset (&mbstate, 0, sizeof mbstate);
+
+ m = 0;
+ printable = 1;
+ if (argsize == (size_t) -1)
+ argsize = strlen (arg);
+
+ do
+ {
+ wchar_t w;
+ size_t bytes = mbrtowc (&w, &arg[i + m],
+ argsize - (i + m), &mbstate);
+ if (bytes == 0)
+ break;
+ else if (bytes == (size_t) -1)
+ {
+ printable = 0;
+ break;
+ }
+ else if (bytes == (size_t) -2)
+ {
+ printable = 0;
+ while (i + m < argsize && arg[i + m])
+ m++;
+ break;
+ }
+ else
+ {
+ if (! iswprint (w))
+ printable = 0;
+ m += bytes;
+ }
+ }
+ while (! mbsinit (&mbstate));
+ }
+
+ if (1 < m || (backslash_escapes && ! printable))
+ {
+ /* Output a multibyte sequence, or an escaped
+ unprintable unibyte character. */
+ size_t ilim = i + m;
+
+ for (;;)
+ {
+ if (backslash_escapes && ! printable)
+ {
+ STORE ('\\');
+ STORE ('0' + (c >> 6));
+ STORE ('0' + ((c >> 3) & 7));
+ c = '0' + (c & 7);
+ }
+ if (ilim <= i + 1)
+ break;
+ STORE (c);
+ c = arg[++i];
+ }
+
+ goto store_c;
+ }
+ }
+ }
+
+ if (! (backslash_escapes
+ && o->quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS))))
+ goto store_c;
+
+ store_escape:
+ STORE ('\\');
+
+ store_c:
+ STORE (c);
+ }
+
+ if (quote_string)
+ for (; *quote_string; quote_string++)
+ STORE (*quote_string);
+
+ if (len < buffersize)
+ buffer[len] = '\0';
+ return len;
+
+ use_shell_always_quoting_style:
+ return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
+ shell_always_quoting_style, o);
+}
+
+/* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
+ argument ARG (of size ARGSIZE), using O to control quoting.
+ If O is null, use the default.
+ Terminate the output with a null character, and return the written
+ size of the output, not counting the terminating null.
+ If BUFFERSIZE is too small to store the output string, return the
+ value that would have been returned had BUFFERSIZE been large enough.
+ If ARGSIZE is -1, use the string length of the argument for ARGSIZE. */
+size_t
+quotearg_buffer (char *buffer, size_t buffersize,
+ char const *arg, size_t argsize,
+ struct quoting_options const *o)
+{
+ struct quoting_options const *p = o ? o : &default_quoting_options;
+ return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
+ p->style, p);
+}
+
+/* Use storage slot N to return a quoted version of the string ARG.
+ OPTIONS specifies the quoting options.
+ The returned value points to static storage that can be
+ reused by the next call to this function with the same value of N.
+ N must be nonnegative. N is deliberately declared with type "int"
+ to allow for future extensions (using negative values). */
+static char *
+quotearg_n_options (int n, char const *arg,
+ struct quoting_options const *options)
+{
+ /* Preallocate a slot 0 buffer, so that the caller can always quote
+ one small component of a "memory exhausted" message in slot 0. */
+ static char slot0[256];
+ static unsigned int nslots = 1;
+ struct slotvec
+ {
+ size_t size;
+ char *val;
+ };
+ static struct slotvec slotvec0 = {sizeof slot0, slot0};
+ static struct slotvec *slotvec = &slotvec0;
+
+ if (nslots <= n)
+ {
+ int n1 = n + 1;
+ size_t s = n1 * sizeof (struct slotvec);
+ if (! (0 < n1 && n1 == s / sizeof (struct slotvec)))
+ abort ();
+ if (slotvec == &slotvec0)
+ {
+ slotvec = (struct slotvec *) xmalloc (sizeof (struct slotvec));
+ *slotvec = slotvec0;
+ }
+ slotvec = (struct slotvec *) xrealloc (slotvec, s);
+ memset (slotvec + nslots, 0, (n1 - nslots) * sizeof (struct slotvec));
+ nslots = n;
+ }
+
+ {
+ size_t size = slotvec[n].size;
+ char *val = slotvec[n].val;
+ size_t qsize = quotearg_buffer (val, size, arg, (size_t) -1, options);
+
+ if (size <= qsize)
+ {
+ slotvec[n].size = size = qsize + 1;
+ slotvec[n].val = val = xrealloc (val == slot0 ? 0 : val, size);
+ quotearg_buffer (val, size, arg, (size_t) -1, options);
+ }
+
+ return val;
+ }
+}
+
+char *
+quotearg_n (unsigned int n, char const *arg)
+{
+ return quotearg_n_options (n, arg, &default_quoting_options);
+}
+
+char *
+quotearg (char const *arg)
+{
+ return quotearg_n (0, arg);
+}
+
+char *
+quotearg_n_style (unsigned int n, enum quoting_style s, char const *arg)
+{
+ struct quoting_options o;
+ o.style = s;
+ memset (o.quote_these_too, 0, sizeof o.quote_these_too);
+ return quotearg_n_options (n, arg, &o);
+}
+
+char *
+quotearg_style (enum quoting_style s, char const *arg)
+{
+ return quotearg_n_style (0, s, arg);
+}
+
+char *
+quotearg_char (char const *arg, char ch)
+{
+ struct quoting_options options;
+ options = default_quoting_options;
+ set_char_quoting (&options, ch, 1);
+ return quotearg_n_options (0, arg, &options);
+}
+
+char *
+quotearg_colon (char const *arg)
+{
+ return quotearg_char (arg, ':');
+}
diff --git a/gnu/usr.bin/grep/quotearg.h b/gnu/usr.bin/grep/quotearg.h
new file mode 100644
index 000000000000..f6463b1aa3bb
--- /dev/null
+++ b/gnu/usr.bin/grep/quotearg.h
@@ -0,0 +1,110 @@
+/* quotearg.h - quote arguments for output
+ Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+/* Written by Paul Eggert <eggert@twinsun.com> */
+
+/* Basic quoting styles. */
+enum quoting_style
+ {
+ literal_quoting_style, /* --quoting-style=literal */
+ shell_quoting_style, /* --quoting-style=shell */
+ shell_always_quoting_style, /* --quoting-style=shell-always */
+ c_quoting_style, /* --quoting-style=c */
+ escape_quoting_style, /* --quoting-style=escape */
+ locale_quoting_style, /* --quoting-style=locale */
+ clocale_quoting_style /* --quoting-style=clocale */
+ };
+
+/* For now, --quoting-style=literal is the default, but this may change. */
+#ifndef DEFAULT_QUOTING_STYLE
+# define DEFAULT_QUOTING_STYLE literal_quoting_style
+#endif
+
+/* Names of quoting styles and their corresponding values. */
+extern char const *const quoting_style_args[];
+extern enum quoting_style const quoting_style_vals[];
+
+struct quoting_options;
+
+#ifndef PARAMS
+# if defined PROTOTYPES || defined __STDC__
+# define PARAMS(Args) Args
+# else
+# define PARAMS(Args) ()
+# endif
+#endif
+
+/* The functions listed below set and use a hidden variable
+ that contains the default quoting style options. */
+
+/* Allocate a new set of quoting options, with contents initially identical
+ to O if O is not null, or to the default if O is null.
+ It is the caller's responsibility to free the result. */
+struct quoting_options *clone_quoting_options
+ PARAMS ((struct quoting_options *o));
+
+/* Get the value of O's quoting style. If O is null, use the default. */
+enum quoting_style get_quoting_style PARAMS ((struct quoting_options *o));
+
+/* In O (or in the default if O is null),
+ set the value of the quoting style to S. */
+void set_quoting_style PARAMS ((struct quoting_options *o,
+ enum quoting_style s));
+
+/* In O (or in the default if O is null),
+ set the value of the quoting options for character C to I.
+ Return the old value. Currently, the only values defined for I are
+ 0 (the default) and 1 (which means to quote the character even if
+ it would not otherwise be quoted). */
+int set_char_quoting PARAMS ((struct quoting_options *o, char c, int i));
+
+/* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
+ argument ARG (of size ARGSIZE), using O to control quoting.
+ If O is null, use the default.
+ Terminate the output with a null character, and return the written
+ size of the output, not counting the terminating null.
+ If BUFFERSIZE is too small to store the output string, return the
+ value that would have been returned had BUFFERSIZE been large enough.
+ If ARGSIZE is -1, use the string length of the argument for ARGSIZE. */
+size_t quotearg_buffer PARAMS ((char *buffer, size_t buffersize,
+ char const *arg, size_t argsize,
+ struct quoting_options const *o));
+
+/* Use storage slot N to return a quoted version of the string ARG.
+ Use the default quoting options.
+ The returned value points to static storage that can be
+ reused by the next call to this function with the same value of N.
+ N must be nonnegative. */
+char *quotearg_n PARAMS ((unsigned int n, char const *arg));
+
+/* Equivalent to quotearg_n (0, ARG). */
+char *quotearg PARAMS ((char const *arg));
+
+/* Use style S and storage slot N to return a quoted version of the string ARG.
+ This is like quotearg_n (N, ARG), except that it uses S with no other
+ options to specify the quoting method. */
+char *quotearg_n_style PARAMS ((unsigned int n, enum quoting_style s,
+ char const *arg));
+
+/* Equivalent to quotearg_n_style (0, S, ARG). */
+char *quotearg_style PARAMS ((enum quoting_style s, char const *arg));
+
+/* Like quotearg (ARG), except also quote any instances of CH. */
+char *quotearg_char PARAMS ((char const *arg, char ch));
+
+/* Equivalent to quotearg_char (ARG, ':'). */
+char *quotearg_colon PARAMS ((char const *arg));
diff --git a/gnu/usr.bin/grep/savedir.c b/gnu/usr.bin/grep/savedir.c
index e969407d32ee..9357cada9dfc 100644
--- a/gnu/usr.bin/grep/savedir.c
+++ b/gnu/usr.bin/grep/savedir.c
@@ -1,5 +1,5 @@
/* savedir.c -- save the list of files in a directory in a string
- Copyright (C) 1990, 1997, 1998, 1999, 2000 Free Software Foundation, Inc.
+ Copyright (C) 1990, 1997, 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -66,17 +66,41 @@ char *realloc ();
char *stpcpy ();
#endif
+#include <fnmatch.h>
#include "savedir.h"
+char *path;
+size_t pathlen;
+
+static int
+isdir1 (const char *dir, const char *file)
+{
+ int status;
+ int slash;
+ size_t dirlen = strlen (dir);
+ size_t filelen = strlen (file);
+ if ((dirlen + filelen + 2) > pathlen)
+ {
+ path = calloc (dirlen + 1 + filelen + 1, sizeof (*path));
+ pathlen = dirlen + filelen + 2;
+ }
+ strcpy (path, dir);
+ slash = (path[dirlen] != '/');
+ path[dirlen] = '/';
+ strcpy (path + dirlen + slash , file);
+ status = isdir (path);
+ return status;
+}
+
/* Return a freshly allocated string containing the filenames
in directory DIR, separated by '\0' characters;
the end is marked by two '\0' characters in a row.
NAME_SIZE is the number of bytes to initially allocate
for the string; it will be enlarged as needed.
Return NULL if DIR cannot be opened or if out of memory. */
-
char *
-savedir (const char *dir, off_t name_size)
+savedir (const char *dir, off_t name_size, struct exclude *included_patterns,
+ struct exclude *excluded_patterns)
{
DIR *dirp;
struct dirent *dp;
@@ -109,6 +133,17 @@ savedir (const char *dir, off_t name_size)
{
off_t size_needed = (namep - name_space) + NAMLEN (dp) + 2;
+ if ((included_patterns || excluded_patterns)
+ && !isdir1 (dir, dp->d_name))
+ {
+ if (included_patterns
+ && !excluded_filename (included_patterns, dp->d_name, 0))
+ continue;
+ if (excluded_patterns
+ && excluded_filename (excluded_patterns, dp->d_name, 0))
+ continue;
+ }
+
if (size_needed > name_size)
{
char *new_name_space;
@@ -134,5 +169,11 @@ savedir (const char *dir, off_t name_size)
free (name_space);
return NULL;
}
+ if (path)
+ {
+ free (path);
+ path = NULL;
+ pathlen = 0;
+ }
return name_space;
}
diff --git a/gnu/usr.bin/grep/savedir.h b/gnu/usr.bin/grep/savedir.h
index 89be04d0238b..49c331b587b6 100644
--- a/gnu/usr.bin/grep/savedir.h
+++ b/gnu/usr.bin/grep/savedir.h
@@ -1,6 +1,8 @@
#if !defined SAVEDIR_H_
# define SAVEDIR_H_
+#include "exclude.h"
+
# ifndef PARAMS
# if defined PROTOTYPES || (defined __STDC__ && __STDC__)
# define PARAMS(Args) Args
@@ -9,7 +11,8 @@
# endif
# endif
-char *
-savedir PARAMS ((const char *dir, off_t name_size));
+extern char *
+savedir PARAMS ((const char *dir, off_t name_size,
+ struct exclude *, struct exclude *));
#endif
diff --git a/gnu/usr.bin/grep/search.c b/gnu/usr.bin/grep/search.c
index 9763dba5f5bd..7bd233fbcaf1 100644
--- a/gnu/usr.bin/grep/search.c
+++ b/gnu/usr.bin/grep/search.c
@@ -22,54 +22,71 @@
# include <config.h>
#endif
#include <sys/types.h>
+#if defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H && defined HAVE_MBRTOWC
+/* We can handle multibyte string. */
+# define MBS_SUPPORT
+# include <wchar.h>
+# include <wctype.h>
+#endif
+
#include "system.h"
#include "grep.h"
#include "regex.h"
#include "dfa.h"
#include "kwset.h"
+#include "error.h"
+#include "xalloc.h"
+#ifdef HAVE_LIBPCRE
+# include <pcre.h>
+#endif
#define NCHAR (UCHAR_MAX + 1)
-static void Gcompile PARAMS((char *, size_t));
-static void Ecompile PARAMS((char *, size_t));
-static char *EGexecute PARAMS((char *, size_t, char **));
-static void Fcompile PARAMS((char *, size_t));
-static char *Fexecute PARAMS((char *, size_t, char **));
-static void kwsinit PARAMS((void));
-
-/* Here is the matchers vector for the main program. */
-struct matcher matchers[] = {
- { "default", Gcompile, EGexecute },
- { "grep", Gcompile, EGexecute },
- { "egrep", Ecompile, EGexecute },
- { "awk", Ecompile, EGexecute },
- { "fgrep", Fcompile, Fexecute },
- { 0, 0, 0 },
-};
-
/* For -w, we also consider _ to be word constituent. */
#define WCHAR(C) (ISALNUM(C) || (C) == '_')
/* DFA compiled regexp. */
static struct dfa dfa;
-/* Regex compiled regexp. */
-static struct re_pattern_buffer regexbuf;
+/* The Regex compiled patterns. */
+static struct patterns
+{
+ /* Regex compiled regexp. */
+ struct re_pattern_buffer regexbuf;
+ struct re_registers regs; /* This is here on account of a BRAIN-DEAD
+ Q@#%!# library interface in regex.c. */
+} patterns0;
+
+struct patterns *patterns;
+size_t pcount;
/* KWset compiled pattern. For Ecompile and Gcompile, we compile
a list of strings, at least one of which is known to occur in
any string matching the regexp. */
static kwset_t kwset;
-/* Last compiled fixed string known to exactly match the regexp.
- If kwsexec() returns < lastexact, then we don't need to
+/* Number of compiled fixed strings known to exactly match the regexp.
+ If kwsexec returns < kwset_exact_matches, then we don't need to
call the regexp matcher at all. */
-static int lastexact;
+static int kwset_exact_matches;
+
+#if defined(MBS_SUPPORT)
+static char* check_multibyte_string PARAMS ((char const *buf, size_t size));
+#endif
+static void kwsinit PARAMS ((void));
+static void kwsmusts PARAMS ((void));
+static void Gcompile PARAMS ((char const *, size_t));
+static void Ecompile PARAMS ((char const *, size_t));
+static size_t EGexecute PARAMS ((char const *, size_t, size_t *, int ));
+static void Fcompile PARAMS ((char const *, size_t));
+static size_t Fexecute PARAMS ((char const *, size_t, size_t *, int));
+static void Pcompile PARAMS ((char const *, size_t ));
+static size_t Pexecute PARAMS ((char const *, size_t, size_t *, int));
void
dfaerror (char const *mesg)
{
- fatal(mesg, 0);
+ error (2, 0, mesg);
}
static void
@@ -80,10 +97,10 @@ kwsinit (void)
if (match_icase)
for (i = 0; i < NCHAR; ++i)
- trans[i] = TOLOWER(i);
+ trans[i] = TOLOWER (i);
- if (!(kwset = kwsalloc(match_icase ? trans : (char *) 0)))
- fatal("memory exhausted", 0);
+ if (!(kwset = kwsalloc (match_icase ? trans : (char *) 0)))
+ error (2, 0, _("memory exhausted"));
}
/* If the DFA turns out to have some set of fixed strings one of
@@ -93,12 +110,12 @@ kwsinit (void)
static void
kwsmusts (void)
{
- struct dfamust *dm;
- char *err;
+ struct dfamust const *dm;
+ char const *err;
if (dfa.musts)
{
- kwsinit();
+ kwsinit ();
/* First, we compile in the substrings known to be exact
matches. The kwset matcher will return the index
of the matching string that it chooses. */
@@ -106,9 +123,9 @@ kwsmusts (void)
{
if (!dm->exact)
continue;
- ++lastexact;
- if ((err = kwsincr(kwset, dm->must, strlen(dm->must))) != 0)
- fatal(err, 0);
+ ++kwset_exact_matches;
+ if ((err = kwsincr (kwset, dm->must, strlen (dm->must))) != 0)
+ error (2, 0, err);
}
/* Now, we compile the substrings that will require
the use of the regexp matcher. */
@@ -116,24 +133,90 @@ kwsmusts (void)
{
if (dm->exact)
continue;
- if ((err = kwsincr(kwset, dm->must, strlen(dm->must))) != 0)
- fatal(err, 0);
+ if ((err = kwsincr (kwset, dm->must, strlen (dm->must))) != 0)
+ error (2, 0, err);
+ }
+ if ((err = kwsprep (kwset)) != 0)
+ error (2, 0, err);
+ }
+}
+
+#ifdef MBS_SUPPORT
+/* This function allocate the array which correspond to "buf".
+ Then this check multibyte string and mark on the positions which
+ are not singlebyte character nor the first byte of a multibyte
+ character. Caller must free the array. */
+static char*
+check_multibyte_string(char const *buf, size_t size)
+{
+ char *mb_properties = malloc(size);
+ mbstate_t cur_state;
+ int i;
+ memset(&cur_state, 0, sizeof(mbstate_t));
+ memset(mb_properties, 0, sizeof(char)*size);
+ for (i = 0; i < size ;)
+ {
+ size_t mbclen;
+ mbclen = mbrlen(buf + i, size - i, &cur_state);
+
+ if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0)
+ {
+ /* An invalid sequence, or a truncated multibyte character.
+ We treat it as a singlebyte character. */
+ mbclen = 1;
}
- if ((err = kwsprep(kwset)) != 0)
- fatal(err, 0);
+ mb_properties[i] = mbclen;
+ i += mbclen;
}
+
+ return mb_properties;
}
+#endif
static void
-Gcompile (char *pattern, size_t size)
+Gcompile (char const *pattern, size_t size)
{
const char *err;
+ char const *sep;
+ size_t total = size;
+ char const *motif = pattern;
+
+ re_set_syntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE);
+ dfasyntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE, match_icase, eolbyte);
+
+ /* For GNU regex compiler we have to pass the patterns separately to detect
+ errors like "[\nallo\n]\n". The patterns here are "[", "allo" and "]"
+ GNU regex should have raise a syntax error. The same for backref, where
+ the backref should have been local to each pattern. */
+ do
+ {
+ size_t len;
+ sep = memchr (motif, '\n', total);
+ if (sep)
+ {
+ len = sep - motif;
+ sep++;
+ total -= (len + 1);
+ }
+ else
+ {
+ len = total;
+ total = 0;
+ }
+
+ patterns = realloc (patterns, (pcount + 1) * sizeof (*patterns));
+ if (patterns == NULL)
+ error (2, errno, _("memory exhausted"));
+
+ patterns[pcount] = patterns0;
- re_set_syntax(RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE);
- dfasyntax(RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE, match_icase, eolbyte);
+ if ((err = re_compile_pattern (motif, len,
+ &(patterns[pcount].regexbuf))) != 0)
+ error (2, 0, err);
+ pcount++;
- if ((err = re_compile_pattern(pattern, size, &regexbuf)) != 0)
- fatal(err, 0);
+ motif = sep;
+ } while (sep && total != 0);
/* In the match_words and match_lines cases, we use a different pattern
for the DFA matcher that will quickly throw out cases that won't work.
@@ -142,49 +225,42 @@ Gcompile (char *pattern, size_t size)
if (match_words || match_lines)
{
/* In the whole-word case, we use the pattern:
- (^|[^A-Za-z_])(userpattern)([^A-Za-z_]|$).
+ \(^\|[^[:alnum:]_]\)\(userpattern\)\([^[:alnum:]_]|$\).
In the whole-line case, we use the pattern:
- ^(userpattern)$.
- BUG: Using [A-Za-z_] is locale-dependent!
- So will use [:alnum:] */
-
- char *n = malloc(size + 50);
- int i = 0;
-
- strcpy(n, "");
-
- if (match_lines)
- strcpy(n, "^\\(");
- if (match_words)
- strcpy(n, "\\(^\\|[^[:alnum:]_]\\)\\(");
-
- i = strlen(n);
- memcpy(n + i, pattern, size);
+ ^\(userpattern\)$. */
+
+ static char const line_beg[] = "^\\(";
+ static char const line_end[] = "\\)$";
+ static char const word_beg[] = "\\(^\\|[^[:alnum:]_]\\)\\(";
+ static char const word_end[] = "\\)\\([^[:alnum:]_]\\|$\\)";
+ char *n = malloc (sizeof word_beg - 1 + size + sizeof word_end);
+ size_t i;
+ strcpy (n, match_lines ? line_beg : word_beg);
+ i = strlen (n);
+ memcpy (n + i, pattern, size);
i += size;
-
- if (match_words)
- strcpy(n + i, "\\)\\([^[:alnum:]_]\\|$\\)");
- if (match_lines)
- strcpy(n + i, "\\)$");
-
- i += strlen(n + i);
- dfacomp(n, i, &dfa, 1);
+ strcpy (n + i, match_lines ? line_end : word_end);
+ i += strlen (n + i);
+ pattern = n;
+ size = i;
}
- else
- dfacomp(pattern, size, &dfa, 1);
- kwsmusts();
+ dfacomp (pattern, size, &dfa, 1);
+ kwsmusts ();
}
static void
-Ecompile (char *pattern, size_t size)
+Ecompile (char const *pattern, size_t size)
{
const char *err;
+ const char *sep;
+ size_t total = size;
+ char const *motif = pattern;
- if (strcmp(matcher, "awk") == 0)
+ if (strcmp (matcher, "awk") == 0)
{
- re_set_syntax(RE_SYNTAX_AWK);
- dfasyntax(RE_SYNTAX_AWK, match_icase, eolbyte);
+ re_set_syntax (RE_SYNTAX_AWK);
+ dfasyntax (RE_SYNTAX_AWK, match_icase, eolbyte);
}
else
{
@@ -192,8 +268,38 @@ Ecompile (char *pattern, size_t size)
dfasyntax (RE_SYNTAX_POSIX_EGREP, match_icase, eolbyte);
}
- if ((err = re_compile_pattern(pattern, size, &regexbuf)) != 0)
- fatal(err, 0);
+ /* For GNU regex compiler we have to pass the patterns separately to detect
+ errors like "[\nallo\n]\n". The patterns here are "[", "allo" and "]"
+ GNU regex should have raise a syntax error. The same for backref, where
+ the backref should have been local to each pattern. */
+ do
+ {
+ size_t len;
+ sep = memchr (motif, '\n', total);
+ if (sep)
+ {
+ len = sep - motif;
+ sep++;
+ total -= (len + 1);
+ }
+ else
+ {
+ len = total;
+ total = 0;
+ }
+
+ patterns = realloc (patterns, (pcount + 1) * sizeof (*patterns));
+ if (patterns == NULL)
+ error (2, errno, _("memory exhausted"));
+ patterns[pcount] = patterns0;
+
+ if ((err = re_compile_pattern (motif, len,
+ &(patterns[pcount].regexbuf))) != 0)
+ error (2, 0, err);
+ pcount++;
+
+ motif = sep;
+ } while (sep && total != 0);
/* In the match_words and match_lines cases, we use a different pattern
for the DFA matcher that will quickly throw out cases that won't work.
@@ -202,186 +308,236 @@ Ecompile (char *pattern, size_t size)
if (match_words || match_lines)
{
/* In the whole-word case, we use the pattern:
- (^|[^A-Za-z_])(userpattern)([^A-Za-z_]|$).
+ (^|[^[:alnum:]_])(userpattern)([^[:alnum:]_]|$).
In the whole-line case, we use the pattern:
- ^(userpattern)$.
- BUG: Using [A-Za-z_] is locale-dependent!
- so will use the char class */
-
- char *n = malloc(size + 50);
- int i = 0;
-
- strcpy(n, "");
-
- if (match_lines)
- strcpy(n, "^(");
- if (match_words)
- strcpy(n, "(^|[^[:alnum:]_])(");
-
+ ^(userpattern)$. */
+
+ static char const line_beg[] = "^(";
+ static char const line_end[] = ")$";
+ static char const word_beg[] = "(^|[^[:alnum:]_])(";
+ static char const word_end[] = ")([^[:alnum:]_]|$)";
+ char *n = malloc (sizeof word_beg - 1 + size + sizeof word_end);
+ size_t i;
+ strcpy (n, match_lines ? line_beg : word_beg);
i = strlen(n);
- memcpy(n + i, pattern, size);
+ memcpy (n + i, pattern, size);
i += size;
-
- if (match_words)
- strcpy(n + i, ")([^[:alnum:]_]|$)");
- if (match_lines)
- strcpy(n + i, ")$");
-
- i += strlen(n + i);
- dfacomp(n, i, &dfa, 1);
+ strcpy (n + i, match_lines ? line_end : word_end);
+ i += strlen (n + i);
+ pattern = n;
+ size = i;
}
- else
- dfacomp(pattern, size, &dfa, 1);
- kwsmusts();
+ dfacomp (pattern, size, &dfa, 1);
+ kwsmusts ();
}
-static char *
-EGexecute (char *buf, size_t size, char **endp)
+static size_t
+EGexecute (char const *buf, size_t size, size_t *match_size, int exact)
{
- register char *buflim, *beg, *end, save;
+ register char const *buflim, *beg, *end;
char eol = eolbyte;
int backref, start, len;
struct kwsmatch kwsm;
- static struct re_registers regs; /* This is static on account of a BRAIN-DEAD
- Q@#%!# library interface in regex.c. */
+ size_t i;
+#ifdef MBS_SUPPORT
+ char *mb_properties = NULL;
+#endif /* MBS_SUPPORT */
+
+#ifdef MBS_SUPPORT
+ if (MB_CUR_MAX > 1 && kwset)
+ mb_properties = check_multibyte_string(buf, size);
+#endif /* MBS_SUPPORT */
buflim = buf + size;
- for (beg = end = buf; end < buflim; beg = end + 1)
+ for (beg = end = buf; end < buflim; beg = end)
{
- if (kwset)
+ if (!exact)
{
- /* Find a possible match using the KWset matcher. */
- beg = kwsexec(kwset, beg, buflim - beg, &kwsm);
- if (!beg)
- goto failure;
- /* Narrow down to the line containing the candidate, and
- run it through DFA. */
- end = memchr(beg, eol, buflim - beg);
- if (!end)
- end = buflim;
- while (beg > buf && beg[-1] != eol)
- --beg;
- save = *end;
- if (kwsm.index < lastexact)
- goto success;
- if (!dfaexec(&dfa, beg, end, 0, (int *) 0, &backref))
+ if (kwset)
{
- *end = save;
- continue;
+ /* Find a possible match using the KWset matcher. */
+ size_t offset = kwsexec (kwset, beg, buflim - beg, &kwsm);
+ if (offset == (size_t) -1)
+ {
+#ifdef MBS_SUPPORT
+ if (MB_CUR_MAX > 1)
+ free(mb_properties);
+#endif
+ return (size_t)-1;
+ }
+ beg += offset;
+ /* Narrow down to the line containing the candidate, and
+ run it through DFA. */
+ end = memchr(beg, eol, buflim - beg);
+ end++;
+#ifdef MBS_SUPPORT
+ if (MB_CUR_MAX > 1 && mb_properties[beg - buf] == 0)
+ continue;
+#endif
+ while (beg > buf && beg[-1] != eol)
+ --beg;
+ if (kwsm.index < kwset_exact_matches)
+ goto success;
+ if (dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1)
+ continue;
+ }
+ else
+ {
+ /* No good fixed strings; start with DFA. */
+ size_t offset = dfaexec (&dfa, beg, buflim - beg, &backref);
+ if (offset == (size_t) -1)
+ break;
+ /* Narrow down to the line we've found. */
+ beg += offset;
+ end = memchr (beg, eol, buflim - beg);
+ end++;
+ while (beg > buf && beg[-1] != eol)
+ --beg;
}
- *end = save;
- /* Successful, no backreferences encountered. */
- if (!backref)
- goto success;
- }
- else
- {
- /* No good fixed strings; start with DFA. */
- save = *buflim;
- beg = dfaexec(&dfa, beg, buflim, 0, (int *) 0, &backref);
- *buflim = save;
- if (!beg)
- goto failure;
- /* Narrow down to the line we've found. */
- end = memchr(beg, eol, buflim - beg);
- if (!end)
- end = buflim;
- while (beg > buf && beg[-1] != eol)
- --beg;
/* Successful, no backreferences encountered! */
if (!backref)
goto success;
}
+ else
+ end = beg + size;
+
/* If we've made it to this point, this means DFA has seen
a probable match, and we need to run it through Regex. */
- regexbuf.not_eol = 0;
- if ((start = re_search(&regexbuf, beg, end - beg, 0, end - beg, &regs)) >= 0)
+ for (i = 0; i < pcount; i++)
{
- len = regs.end[0] - start;
- if ((!match_lines && !match_words)
- || (match_lines && len == end - beg))
- goto success;
- /* If -w, check if the match aligns with word boundaries.
- We do this iteratively because:
- (a) the line may contain more than one occurence of the pattern, and
- (b) Several alternatives in the pattern might be valid at a given
- point, and we may need to consider a shorter one to find a word
- boundary. */
- if (match_words)
- while (start >= 0)
- {
- if ((start == 0 || !WCHAR ((unsigned char) beg[start - 1]))
- && (len == end - beg
- || !WCHAR ((unsigned char) beg[start + len])))
- goto success;
- if (len > 0)
- {
- /* Try a shorter length anchored at the same place. */
- --len;
- regexbuf.not_eol = 1;
- len = re_match(&regexbuf, beg, start + len, start, &regs);
- }
- if (len <= 0)
+ patterns[i].regexbuf.not_eol = 0;
+ if (0 <= (start = re_search (&(patterns[i].regexbuf), beg,
+ end - beg - 1, 0,
+ end - beg - 1, &(patterns[i].regs))))
+ {
+ len = patterns[i].regs.end[0] - start;
+ if (exact)
+ {
+ *match_size = len;
+ return start;
+ }
+ if ((!match_lines && !match_words)
+ || (match_lines && len == end - beg - 1))
+ goto success;
+ /* If -w, check if the match aligns with word boundaries.
+ We do this iteratively because:
+ (a) the line may contain more than one occurence of the
+ pattern, and
+ (b) Several alternatives in the pattern might be valid at a
+ given point, and we may need to consider a shorter one to
+ find a word boundary. */
+ if (match_words)
+ while (start >= 0)
{
- /* Try looking further on. */
- if (start == end - beg)
- break;
- ++start;
- regexbuf.not_eol = 0;
- start = re_search(&regexbuf, beg, end - beg,
- start, end - beg - start, &regs);
- len = regs.end[0] - start;
+ if ((start == 0 || !WCHAR ((unsigned char) beg[start - 1]))
+ && (len == end - beg - 1
+ || !WCHAR ((unsigned char) beg[start + len])))
+ goto success;
+ if (len > 0)
+ {
+ /* Try a shorter length anchored at the same place. */
+ --len;
+ patterns[i].regexbuf.not_eol = 1;
+ len = re_match (&(patterns[i].regexbuf), beg,
+ start + len, start,
+ &(patterns[i].regs));
+ }
+ if (len <= 0)
+ {
+ /* Try looking further on. */
+ if (start == end - beg - 1)
+ break;
+ ++start;
+ patterns[i].regexbuf.not_eol = 0;
+ start = re_search (&(patterns[i].regexbuf), beg,
+ end - beg - 1,
+ start, end - beg - 1 - start,
+ &(patterns[i].regs));
+ len = patterns[i].regs.end[0] - start;
+ }
}
- }
- }
- }
-
- failure:
- return 0;
+ }
+ } /* for Regex patterns. */
+ } /* for (beg = end ..) */
+#ifdef MBS_SUPPORT
+ if (MB_CUR_MAX > 1 && mb_properties)
+ free (mb_properties);
+#endif /* MBS_SUPPORT */
+ return (size_t) -1;
success:
- *endp = end < buflim ? end + 1 : end;
- return beg;
+#ifdef MBS_SUPPORT
+ if (MB_CUR_MAX > 1 && mb_properties)
+ free (mb_properties);
+#endif /* MBS_SUPPORT */
+ *match_size = end - beg;
+ return beg - buf;
}
static void
-Fcompile (char *pattern, size_t size)
+Fcompile (char const *pattern, size_t size)
{
- char *beg, *lim, *err;
+ char const *beg, *lim, *err;
- kwsinit();
+ kwsinit ();
beg = pattern;
do
{
for (lim = beg; lim < pattern + size && *lim != '\n'; ++lim)
;
- if ((err = kwsincr(kwset, beg, lim - beg)) != 0)
- fatal(err, 0);
+ if ((err = kwsincr (kwset, beg, lim - beg)) != 0)
+ error (2, 0, err);
if (lim < pattern + size)
++lim;
beg = lim;
}
while (beg < pattern + size);
- if ((err = kwsprep(kwset)) != 0)
- fatal(err, 0);
+ if ((err = kwsprep (kwset)) != 0)
+ error (2, 0, err);
}
-static char *
-Fexecute (char *buf, size_t size, char **endp)
+static size_t
+Fexecute (char const *buf, size_t size, size_t *match_size, int exact)
{
- register char *beg, *try, *end;
+ register char const *beg, *try, *end;
register size_t len;
char eol = eolbyte;
struct kwsmatch kwsmatch;
+#ifdef MBS_SUPPORT
+ char *mb_properties;
+ if (MB_CUR_MAX > 1)
+ mb_properties = check_multibyte_string (buf, size);
+#endif /* MBS_SUPPORT */
for (beg = buf; beg <= buf + size; ++beg)
{
- if (!(beg = kwsexec(kwset, beg, buf + size - beg, &kwsmatch)))
- return 0;
+ size_t offset = kwsexec (kwset, beg, buf + size - beg, &kwsmatch);
+ if (offset == (size_t) -1)
+ {
+#ifdef MBS_SUPPORT
+ if (MB_CUR_MAX > 1)
+ free(mb_properties);
+#endif /* MBS_SUPPORT */
+ return offset;
+ }
+#ifdef MBS_SUPPORT
+ if (MB_CUR_MAX > 1 && mb_properties[offset+beg-buf] == 0)
+ continue; /* It is a part of multibyte character. */
+#endif /* MBS_SUPPORT */
+ beg += offset;
len = kwsmatch.size[0];
+ if (exact)
+ {
+ *match_size = len;
+#ifdef MBS_SUPPORT
+ if (MB_CUR_MAX > 1)
+ free (mb_properties);
+#endif /* MBS_SUPPORT */
+ return beg - buf;
+ }
if (match_lines)
{
if (beg > buf && beg[-1] != eol)
@@ -391,13 +547,22 @@ Fexecute (char *buf, size_t size, char **endp)
goto success;
}
else if (match_words)
- for (try = beg; len && try;)
+ for (try = beg; len; )
{
if (try > buf && WCHAR((unsigned char) try[-1]))
break;
if (try + len < buf + size && WCHAR((unsigned char) try[len]))
{
- try = kwsexec(kwset, beg, --len, &kwsmatch);
+ offset = kwsexec (kwset, beg, --len, &kwsmatch);
+ if (offset == (size_t) -1)
+ {
+#ifdef MBS_SUPPORT
+ if (MB_CUR_MAX > 1)
+ free (mb_properties);
+#endif /* MBS_SUPPORT */
+ return offset;
+ }
+ try = beg + offset;
len = kwsmatch.size[0];
}
else
@@ -407,15 +572,153 @@ Fexecute (char *buf, size_t size, char **endp)
goto success;
}
- return 0;
+#ifdef MBS_SUPPORT
+ if (MB_CUR_MAX > 1)
+ free (mb_properties);
+#endif /* MBS_SUPPORT */
+ return -1;
success:
- if ((end = memchr(beg + len, eol, (buf + size) - (beg + len))) != 0)
- ++end;
- else
- end = buf + size;
- *endp = end;
- while (beg > buf && beg[-1] != '\n')
+ end = memchr (beg + len, eol, (buf + size) - (beg + len));
+ end++;
+ while (buf < beg && beg[-1] != eol)
--beg;
- return beg;
+ *match_size = end - beg;
+#ifdef MBS_SUPPORT
+ if (MB_CUR_MAX > 1)
+ free (mb_properties);
+#endif /* MBS_SUPPORT */
+ return beg - buf;
+}
+
+#if HAVE_LIBPCRE
+/* Compiled internal form of a Perl regular expression. */
+static pcre *cre;
+
+/* Additional information about the pattern. */
+static pcre_extra *extra;
+#endif
+
+static void
+Pcompile (char const *pattern, size_t size)
+{
+#if !HAVE_LIBPCRE
+ error (2, 0, _("The -P option is not supported"));
+#else
+ int e;
+ char const *ep;
+ char *re = xmalloc (4 * size + 7);
+ int flags = PCRE_MULTILINE | (match_icase ? PCRE_CASELESS : 0);
+ char const *patlim = pattern + size;
+ char *n = re;
+ char const *p;
+ char const *pnul;
+
+ /* FIXME: Remove this restriction. */
+ if (eolbyte != '\n')
+ error (2, 0, _("The -P and -z options cannot be combined"));
+
+ *n = '\0';
+ if (match_lines)
+ strcpy (n, "^(");
+ if (match_words)
+ strcpy (n, "\\b(");
+ n += strlen (n);
+
+ /* The PCRE interface doesn't allow NUL bytes in the pattern, so
+ replace each NUL byte in the pattern with the four characters
+ "\000", removing a preceding backslash if there are an odd
+ number of backslashes before the NUL.
+
+ FIXME: This method does not work with some multibyte character
+ encodings, notably Shift-JIS, where a multibyte character can end
+ in a backslash byte. */
+ for (p = pattern; (pnul = memchr (p, '\0', patlim - p)); p = pnul + 1)
+ {
+ memcpy (n, p, pnul - p);
+ n += pnul - p;
+ for (p = pnul; pattern < p && p[-1] == '\\'; p--)
+ continue;
+ n -= (pnul - p) & 1;
+ strcpy (n, "\\000");
+ n += 4;
+ }
+
+ memcpy (n, p, patlim - p);
+ n += patlim - p;
+ *n = '\0';
+ if (match_words)
+ strcpy (n, ")\\b");
+ if (match_lines)
+ strcpy (n, ")$");
+
+ cre = pcre_compile (re, flags, &ep, &e, pcre_maketables ());
+ if (!cre)
+ error (2, 0, ep);
+
+ extra = pcre_study (cre, 0, &ep);
+ if (ep)
+ error (2, 0, ep);
+
+ free (re);
+#endif
+}
+
+static size_t
+Pexecute (char const *buf, size_t size, size_t *match_size, int exact)
+{
+#if !HAVE_LIBPCRE
+ abort ();
+ return -1;
+#else
+ /* This array must have at least two elements; everything after that
+ is just for performance improvement in pcre_exec. */
+ int sub[300];
+
+ int e = pcre_exec (cre, extra, buf, size, 0, 0,
+ sub, sizeof sub / sizeof *sub);
+
+ if (e <= 0)
+ {
+ switch (e)
+ {
+ case PCRE_ERROR_NOMATCH:
+ return -1;
+
+ case PCRE_ERROR_NOMEMORY:
+ error (2, 0, _("Memory exhausted"));
+
+ default:
+ abort ();
+ }
+ }
+ else
+ {
+ /* Narrow down to the line we've found. */
+ char const *beg = buf + sub[0];
+ char const *end = buf + sub[1];
+ char const *buflim = buf + size;
+ char eol = eolbyte;
+ if (!exact)
+ {
+ end = memchr (end, eol, buflim - end);
+ end++;
+ while (buf < beg && beg[-1] != eol)
+ --beg;
+ }
+
+ *match_size = end - beg;
+ return beg - buf;
+ }
+#endif
}
+
+struct matcher const matchers[] = {
+ { "default", Gcompile, EGexecute },
+ { "grep", Gcompile, EGexecute },
+ { "egrep", Ecompile, EGexecute },
+ { "awk", Ecompile, EGexecute },
+ { "fgrep", Fcompile, Fexecute },
+ { "perl", Pcompile, Pexecute },
+ { "", 0, 0 },
+};
diff --git a/gnu/usr.bin/grep/system.h b/gnu/usr.bin/grep/system.h
index a6654b694c8f..99906d8aaf70 100644
--- a/gnu/usr.bin/grep/system.h
+++ b/gnu/usr.bin/grep/system.h
@@ -53,18 +53,16 @@ extern char *sys_errlist[];
#endif
/* Some operating systems treat text and binary files differently. */
-#if O_BINARY
+#ifdef __BEOS__
+# undef O_BINARY /* BeOS 5 has O_BINARY and O_TEXT, but they have no effect. */
+#endif
+#ifdef HAVE_DOS_FILE_CONTENTS
# include <io.h>
# ifdef HAVE_SETMODE
# define SET_BINARY(fd) setmode (fd, O_BINARY)
# else
# define SET_BINARY(fd) _setmode (fd, O_BINARY)
# endif
-#else
-# ifndef O_BINARY
-# define O_BINARY 0
-# define SET_BINARY(fd) (void)0
-# endif
#endif
#ifdef HAVE_DOS_FILE_NAMES
@@ -80,14 +78,15 @@ extern char *sys_errlist[];
# define FILESYSTEM_PREFIX_LEN(f) 0
#endif
-/* This assumes _WIN32, like DJGPP, has D_OK. Does it? In what header? */
-#ifdef D_OK
+int isdir PARAMS ((char const *));
+
+#ifdef HAVE_DIR_EACCES_BUG
# ifdef EISDIR
# define is_EISDIR(e, f) \
((e) == EISDIR \
- || ((e) == EACCES && access (f, D_OK) == 0 && ((e) = EISDIR, 1)))
+ || ((e) == EACCES && isdir (f) && ((e) = EISDIR, 1)))
# else
-# define is_EISDIR(e, f) ((e) == EACCES && access (f, D_OK) == 0)
+# define is_EISDIR(e, f) ((e) == EACCES && isdir (f))
# endif
#endif
diff --git a/gnu/usr.bin/grep/tests/backref.sh b/gnu/usr.bin/grep/tests/backref.sh
new file mode 100755
index 000000000000..83f6dcc9408a
--- /dev/null
+++ b/gnu/usr.bin/grep/tests/backref.sh
@@ -0,0 +1,38 @@
+#!/bin/sh
+# Test that backrefs are local to regex.
+#
+#
+
+: ${srcdir=.}
+
+failures=0
+
+# checking for a palindrome
+echo "radar" | ${GREP} -e '\(.\)\(.\).\2\1' > /dev/null 2>&1
+if test $? -ne 0 ; then
+ echo "backref: palindrome, test \#1 failed"
+ failures=1
+fi
+
+# hit hard with the `Bond' tests
+echo "civic" | ${GREP} -E -e '^(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?).?\9\8\7\6\5\4\3\2\1$' > /dev/null 2>&1
+if test $? -ne 0 ; then
+ echo "Options: Bond, test \#2 failed"
+ failures=1
+fi
+
+# backref are local should be error
+echo "123" | ${GREP} -e 'a\(.\)' -e 'b\1' > /dev/null 2>&1
+if test $? -ne 2 ; then
+ echo "Options: Backref not local, test \#3 failed"
+ failures=1
+fi
+
+# Pattern should faile
+echo "123" | ${GREP} -e '[' -e ']' > /dev/null 2>&1
+if test $? -ne 2 ; then
+ echo "Options: Compiled not local, test \#3 failed"
+ failures=1
+fi
+
+exit $failures
diff --git a/gnu/usr.bin/grep/tests/bre.awk b/gnu/usr.bin/grep/tests/bre.awk
index 9c9fef8275a2..16a58147a39b 100644
--- a/gnu/usr.bin/grep/tests/bre.awk
+++ b/gnu/usr.bin/grep/tests/bre.awk
@@ -8,7 +8,8 @@ BEGIN {
$0 ~ /^#/ { next; }
NF == 3 {
- printf ("status=`echo '%s' | { ${GREP} -e '%s' > /dev/null 2>&1; echo $?; cat >/dev/null; }`\n",$3, $2);
+# printf ("status=`echo '%s' | { ${GREP} -e '%s' > /dev/null 2>&1; echo $?; cat >/dev/null; }`\n",$3, $2);
+ printf ("status=`echo '%s' | { ${GREP} -e '%s' > /dev/null 2>&1; echo $? ; }`\n",$3, $2);
printf ("if test $status -ne %s ; then\n", $1);
printf ("\techo Spencer bre test \\#%d failed\n", ++n);
printf ("\tfailures=1\n");
diff --git a/gnu/usr.bin/grep/tests/bre.sh b/gnu/usr.bin/grep/tests/bre.sh
index eecdd3ee2273..e7ccf0a037cc 100755
--- a/gnu/usr.bin/grep/tests/bre.sh
+++ b/gnu/usr.bin/grep/tests/bre.sh
@@ -1,4 +1,4 @@
-#! /bin/sh
+#!/bin/sh
# Regression test for GNU grep.
: ${srcdir=.}
diff --git a/gnu/usr.bin/grep/tests/bre.tests b/gnu/usr.bin/grep/tests/bre.tests
index 1ed159dad7a1..33d1689b85ae 100644
--- a/gnu/usr.bin/grep/tests/bre.tests
+++ b/gnu/usr.bin/grep/tests/bre.tests
@@ -17,7 +17,7 @@
2@\(\{1\}a\)@BADRPT@TO CORRECT
0@^*@*
2@^\{1\}@BADRPT@TO CORRECT
-0@\{@{
+0@{@{
1@a\(b*\)c\1d@abbcbd
1@a\(b*\)c\1d@abbcbbbd
1@^\(.\)\1@abc
diff --git a/gnu/usr.bin/grep/tests/empty.sh b/gnu/usr.bin/grep/tests/empty.sh
index 8c751966f5f1..d4225fecd02d 100755
--- a/gnu/usr.bin/grep/tests/empty.sh
+++ b/gnu/usr.bin/grep/tests/empty.sh
@@ -1,4 +1,4 @@
-#! /bin/sh
+#!/bin/sh
# test that the empty file means no pattern
# and an empty pattern means match all.
@@ -6,25 +6,28 @@
failures=0
-# should return 0 found a match
-echo "abcd" | ${GREP} -E -e '' > /dev/null 2>&1
-if test $? -ne 0 ; then
- echo "Status: Wrong status code, test \#1 failed"
- failures=1
-fi
+for options in '-E' '-E -w' '-F -x' '-G -w -x'; do
-# should return 1 found no match
-echo "abcd" | ${GREP} -E -f /dev/null > /dev/null 2>&1
-if test $? -ne 1 ; then
- echo "Status: Wrong status code, test \#2 failed"
- failures=1
-fi
+ # should return 0 found a match
+ echo "" | ${GREP} $options -e '' > /dev/null 2>&1
+ if test $? -ne 0 ; then
+ echo "Status: Wrong status code, test \#1 failed ($options)"
+ failures=1
+ fi
-# should return 0 found a match
-echo "abcd" | ${GREP} -E -f /dev/null -e "abc" > /dev/null 2>&1
-if test $? -ne 0 ; then
- echo "Status: Wrong status code, test \#3 failed"
- failures=1
-fi
+ # should return 1 found no match
+ echo "abcd" | ${GREP} $options -f /dev/null > /dev/null 2>&1
+ if test $? -ne 1 ; then
+ echo "Status: Wrong status code, test \#2 failed ($options)"
+ failures=1
+ fi
+
+ # should return 0 found a match
+ echo "abcd" | ${GREP} $options -f /dev/null -e "abcd" > /dev/null 2>&1
+ if test $? -ne 0 ; then
+ echo "Status: Wrong status code, test \#3 failed ($options)"
+ failures=1
+ fi
+done
exit $failures
diff --git a/gnu/usr.bin/grep/tests/ere.awk b/gnu/usr.bin/grep/tests/ere.awk
index 8f6a5b59fb18..234d219e8488 100644
--- a/gnu/usr.bin/grep/tests/ere.awk
+++ b/gnu/usr.bin/grep/tests/ere.awk
@@ -8,7 +8,8 @@ BEGIN {
$0 ~ /^#/ { next; }
NF == 3 {
- printf ("status=`echo '%s' | { ${GREP} -E -e '%s' > /dev/null 2>&1; echo $?; cat >/dev/null; }`\n",$3, $2);
+# printf ("status=`echo '%s' | { ${GREP} -E -e '%s' > /dev/null 2>&1; echo $?; cat >/dev/null; }`\n",$3, $2);
+ printf ("status=`echo '%s' | { ${GREP} -E -e '%s' > /dev/null 2>&1; echo $?; }`\n",$3, $2);
printf ("if test $status -ne %s ; then\n", $1);
printf ("\techo Spencer ere test \\#%d failed\n", ++n);
printf ("\tfailures=1\n");
diff --git a/gnu/usr.bin/grep/tests/ere.sh b/gnu/usr.bin/grep/tests/ere.sh
index 5bf169528362..d57e2fae5ab1 100755
--- a/gnu/usr.bin/grep/tests/ere.sh
+++ b/gnu/usr.bin/grep/tests/ere.sh
@@ -1,4 +1,4 @@
-#! /bin/sh
+#!/bin/sh
# Regression test for GNU grep.
: ${srcdir=.}
diff --git a/gnu/usr.bin/grep/tests/file.sh b/gnu/usr.bin/grep/tests/file.sh
new file mode 100755
index 000000000000..3db95bed6827
--- /dev/null
+++ b/gnu/usr.bin/grep/tests/file.sh
@@ -0,0 +1,59 @@
+#!/bin/sh
+# Test for POSIX.2 options for grep
+#
+# grep -E -f pattern_file file
+# grep -F -f pattern_file file
+# grep -G -f pattern_file file
+#
+
+: ${srcdir=.}
+
+failures=0
+
+cat <<EOF >patfile
+radar
+MILES
+GNU
+EOF
+
+# match
+echo "miles" | ${GREP} -i -E -f patfile > /dev/null 2>&1
+if test $? -ne 0 ; then
+ echo "File_pattern: Wrong status code, test \#1 failed"
+ failures=1
+fi
+
+# match
+echo "GNU" | ${GREP} -G -f patfile > /dev/null 2>&1
+if test $? -ne 0 ; then
+ echo "File_pattern: Wrong status code, test \#2 failed"
+ failures=1
+fi
+
+# checking for no match
+echo "ridar" | ${GREP} -F -f patfile > /dev/null 2>&1
+if test $? -ne 1 ; then
+ echo "File_pattern: Wrong status code, test \#3 failed"
+ failures=1
+fi
+
+cat <<EOF >patfile
+
+EOF
+# empty pattern : every match
+echo "abbcd" | ${GREP} -F -f patfile > /dev/null 2>&1
+if test $? -ne 0 ; then
+ echo "File_pattern: Wrong status code, test \#4 failed"
+ failures=1
+fi
+
+cp /dev/null patfile
+
+# null pattern : no match
+echo "abbcd" | ${GREP} -F -f patfile > /dev/null 2>&1
+if test $? -ne 1 ; then
+ echo "File_pattern: Wrong status code, test \#5 failed"
+ failures=1
+fi
+
+exit $failures
diff --git a/gnu/usr.bin/grep/tests/formatbre.awk b/gnu/usr.bin/grep/tests/formatbre.awk
new file mode 100644
index 000000000000..68a9c6266030
--- /dev/null
+++ b/gnu/usr.bin/grep/tests/formatbre.awk
@@ -0,0 +1,55 @@
+#
+# Basic Regular Expression
+
+# kip comments
+$0 ~ /^#/ { next; }
+
+# skip those option specific to regexec/regcomp
+$2 ~ /[msnr$#p^]/ { next; }
+
+# skip empty lines
+$0 ~ /^$/ { next; }
+
+# debug
+#{ printf ("<%s> <%s> <%s> <%s>\n", $1, $2, $3, $4); }
+
+# subreg expresion
+NF >= 5 { next; }
+
+# errors
+NF == 3 {
+# gsub (/@/, ",");
+# it means empty lines
+ gsub (/\"\"/, "");
+# escapes
+ gsub (/\\\'/, "\\\'\'");
+# error in regex
+ if (index ($2, "C") != 0)
+ {
+ if (index ($2, "b") != 0)
+ printf ("2@%s@%s\n", $1, $3);
+ }
+# erro no match
+ else
+ {
+ if (index ($2, "b") != 0)
+ printf ("1@%s@%s\n", $1, $3);
+ }
+ next;
+}
+
+# ok
+NF == 4 {
+# skip those magic cookies can't rely on echo to gnerate them
+ if (match($3, /[NSTZ]/))
+ next;
+
+# gsub (/@/, ",");
+# it means empty lines
+ gsub (/\"\"/, "");
+# escape escapes
+ gsub (/\\\'/, "\\\'\'");
+
+ if (index ($2, "b") != 0)
+ printf ("0@%s@%s\n", $1, $3);
+}
diff --git a/gnu/usr.bin/grep/tests/formatere.awk b/gnu/usr.bin/grep/tests/formatere.awk
new file mode 100644
index 000000000000..ba60439a1894
--- /dev/null
+++ b/gnu/usr.bin/grep/tests/formatere.awk
@@ -0,0 +1,60 @@
+#
+# Extended Regular Expression
+
+# skip comments
+$0 ~ /^#/ { next; }
+
+# skip specifics to regcomp/regexec
+$2 ~ /[msnr$#p^]/ { next; }
+
+# jump empty lines
+$0 ~ /^$/ { next; }
+
+# subreg skip
+NF >= 5 { next; }
+
+# debug
+#{ printf ("<%s> <%s> <%s> <%s>\n", $1, $2, $3, $4); }
+
+# errors
+NF == 3 {
+# nuke any remaining '@'
+# gsub (/@/, ",");
+# it means empty lines
+ gsub (/\"\"/, "");
+# escapes
+ gsub (/\\\'/, "\\\'\'");
+# error in regex
+ if (index ($2, "C") != 0)
+ {
+ if (index ($2, "b") == 0)
+ printf ("2@%s@%s\n", $1, $3);
+ }
+# error not matching
+ else
+ {
+ if (index ($2, "b") == 0)
+ printf ("1@%s@%s\n", $1, $3);
+ }
+ next;
+}
+
+# ok
+NF == 4 {
+# skip those magic cookies can't rely on echo to gnerate them
+ if (match($3, /[NSTZ]/))
+ next;
+
+# nuke any remaining '@'
+# gsub (/@/, ",");
+# it means empty lines
+ gsub (/\"\"/, "");
+# escape escapes
+ gsub (/\\\'/, "\\\'\'");
+
+ if (index ($2, "b") == 0)
+ {
+ printf ("0@%s@%s\n", $1, $3);
+ }
+ next;
+}
diff --git a/gnu/usr.bin/grep/tests/khadafy.sh b/gnu/usr.bin/grep/tests/khadafy.sh
index 141b3d8161c6..e3d2cde4c082 100755
--- a/gnu/usr.bin/grep/tests/khadafy.sh
+++ b/gnu/usr.bin/grep/tests/khadafy.sh
@@ -1,4 +1,4 @@
-#! /bin/sh
+#!/bin/sh
# Regression test for GNU grep.
: ${srcdir=.}
diff --git a/gnu/usr.bin/grep/tests/options.sh b/gnu/usr.bin/grep/tests/options.sh
index 3415018561aa..0f1852971d82 100755
--- a/gnu/usr.bin/grep/tests/options.sh
+++ b/gnu/usr.bin/grep/tests/options.sh
@@ -1,4 +1,4 @@
-#! /bin/sh
+#!/bin/sh
# Test for POSIX.2 options for grep
#
# grep [ -E| -F][ -c| -l| -q ][-insvx] -e pattern_list
diff --git a/gnu/usr.bin/grep/tests/spencer1.awk b/gnu/usr.bin/grep/tests/spencer1.awk
index 70c6118e5564..ba7aeb66af79 100644
--- a/gnu/usr.bin/grep/tests/spencer1.awk
+++ b/gnu/usr.bin/grep/tests/spencer1.awk
@@ -4,7 +4,8 @@ BEGIN {
}
$0 !~ /^#/ && NF = 3 {
- printf ("status=`echo '%s'| { ${GREP} -E -e '%s' > /dev/null 2>&1; echo $?; cat >/dev/null; }`\n",$3, $2);
+# printf ("status=`echo '%s'| { ${GREP} -E -e '%s' > /dev/null 2>&1; echo $?; cat >/dev/null; }`\n",$3, $2);
+ printf ("status=`echo '%s'| { ${GREP} -E -e '%s' >/dev/null 2>&1 ; echo $?; }`\n",$3, $2);
printf ("if test $status -ne %s ; then\n", $1);
printf ("\techo Spencer test \\#%d failed\n", ++n);
printf ("\tfailures=1\n");
diff --git a/gnu/usr.bin/grep/tests/spencer1.sh b/gnu/usr.bin/grep/tests/spencer1.sh
index f09b3fa974b6..2391761ab642 100755
--- a/gnu/usr.bin/grep/tests/spencer1.sh
+++ b/gnu/usr.bin/grep/tests/spencer1.sh
@@ -1,4 +1,4 @@
-#! /bin/sh
+#!/bin/sh
# Regression test for GNU grep.
: ${srcdir=.}
diff --git a/gnu/usr.bin/grep/tests/spencer2.sh b/gnu/usr.bin/grep/tests/spencer2.sh
new file mode 100755
index 000000000000..a26869c5121e
--- /dev/null
+++ b/gnu/usr.bin/grep/tests/spencer2.sh
@@ -0,0 +1,13 @@
+#!/bin/sh
+# Regression test for GNU grep.
+
+: ${srcdir=.}
+
+failures=0
+
+# . . . and the following by Henry Spencer.
+
+${AWK-awk} -f $srcdir/scriptgen.awk $srcdir/spencer2.tests > tmp2.script
+
+sh tmp2.script && exit $failures
+exit 1
diff --git a/gnu/usr.bin/grep/tests/spencer2.tests b/gnu/usr.bin/grep/tests/spencer2.tests
new file mode 100644
index 000000000000..47b61914c6e5
--- /dev/null
+++ b/gnu/usr.bin/grep/tests/spencer2.tests
@@ -0,0 +1,317 @@
+0@a@a
+0@abc@abc
+0@abc|de@abc
+0@a|b|c@abc
+0@a(b)c@abc
+1@a\(b\)c@abc
+2@a(@EPAREN
+2@a(@a(
+0@a\(@a(
+1@a\(@EPAREN
+1@a\(b@EPAREN
+2@a(b@EPAREN
+2@a(b@a(b
+2@a)@a)
+2@)@)
+2@a)@a)
+1@a\)@EPAREN
+1@\)@EPAREN
+0@a()b@ab
+1@a\(\)b@ab
+0@^abc$@abc
+1@a^b@a^b
+1@a^b@a^b
+1@a$b@a$b
+1@a$b@a$b
+0@^@abc
+0@$@abc
+1@^$@""
+1@$^@""
+1@\($\)\(^\)@""
+0@^^@""
+0@$$@""
+1@b$@abNc
+1@b$@abNc
+1@^b$@aNbNc
+1@^b$@aNbNc
+1@^$@aNNb
+1@^$@abc
+1@^$@abcN
+1@$^@aNNb
+1@\($\)\(^\)@aNNb
+0@^^@aNNb
+0@$$@aNNb
+0@^a@a
+0@a$@a
+0@^a@aNb
+1@^b@aNb
+0@a$@bNa
+1@b$@bNa
+0@a*(^b$)c*@b
+1@a*\(^b$\)c*@b
+0@|@EMPTY
+0@|@|
+0@*@BADRPT
+0@*@*
+0@+@BADRPT
+0@?@BADRPT
+1@""@EMPTY
+0@()@abc
+1@\(\)@abc
+0@a||b@EMPTY
+0@|ab@EMPTY
+0@ab|@EMPTY
+1@(|a)b@EMPTY
+1@(a|)b@EMPTY
+1@(*a)@BADRPT
+1@(+a)@BADRPT
+1@(?a)@BADRPT
+1@({1}a)@BADRPT
+1@\(\{1\}a\)@BADRPT
+1@(a|*b)@BADRPT
+1@(a|+b)@BADRPT
+1@(a|?b)@BADRPT
+1@(a|{1}b)@BADRPT
+0@^*@BADRPT
+0@^*@*
+0@^+@BADRPT
+0@^?@BADRPT
+0@^{1}@BADRPT
+1@^\{1\}@BADRPT
+0@a.c@abc
+0@a[bc]d@abd
+0@a\*c@a*c
+1@ac@abc
+1@a\bc@ac
+1@\{@BADRPT
+0@a\[b@a[b
+2@a[b@EBRACK
+0@a$@a
+1@a$@a$
+1@a\$@a
+0@a\$@a$
+1@a\$@a
+1@a\$@a\$
+2@a\(b\)\2c@ESUBREG
+2@a\(b\1\)c@ESUBREG
+2@a\(b*\)c\1d@abbcbd
+2@a\(b*\)c\1d@abbcbbbd
+2@^\(.\)\1@abc
+2@a\(\([bc]\)\2\)*d@abbccd
+2@a\(\([bc]\)\2\)*d@abbcbd
+2@a\(\(b\)*\2\)*d@abbbd
+2@\(a\)\1bcd@aabcd
+2@\(a\)\1bc*d@aabcd
+2@\(a\)\1bc*d@aabd
+2@\(a\)\1bc*d@aabcccd
+2@\(a\)\1bc*[ce]d@aabcccd
+2@^\(a\)\1b\(c\)*cd$@aabcccd
+0@ab*c@abc
+0@ab+c@abc
+0@ab?c@abc
+1@a\(*\)b@a*b
+1@a\(**\)b@ab
+1@a\(***\)b@BADRPT
+0@*a@*a
+0@**a@a
+1@***a@BADRPT
+2@{@{
+2@{abc@{abc
+2@{1@BADRPT
+0@{1}@BADRPT
+2@a{b@a{b
+0@a{1}b@ab
+1@a\{1\}b@ab
+0@a{1,}b@ab
+1@a\{1,\}b@ab
+0@a{1,2}b@aab
+1@a\{1,2\}b@aab
+2@a{1@EBRACE
+1@a\{1@EBRACE
+2@a{1a@EBRACE
+1@a\{1a@EBRACE
+2@a{1a}@BADBR
+1@a\{1a\}@BADBR
+0@a{,2}@a{,2}
+1@a\{,2\}@BADBR
+0@a{,}@a{,}
+1@a\{,\}@BADBR
+2@a{1,x}@BADBR
+1@a\{1,x\}@BADBR
+2@a{1,x@EBRACE
+1@a\{1,x@EBRACE
+1@a{300}@BADBR
+1@a\{300\}@BADBR
+1@a{1,0}@BADBR
+1@a\{1,0\}@BADBR
+0@ab{0,0}c@abcac
+1@ab\{0,0\}c@abcac
+0@ab{0,1}c@abcac
+1@ab\{0,1\}c@abcac
+0@ab{0,3}c@abbcac
+1@ab\{0,3\}c@abbcac
+0@ab{1,1}c@acabc
+1@ab\{1,1\}c@acabc
+0@ab{1,3}c@acabc
+1@ab\{1,3\}c@acabc
+0@ab{2,2}c@abcabbc
+1@ab\{2,2\}c@abcabbc
+0@ab{2,4}c@abcabbc
+1@ab\{2,4\}c@abcabbc
+0@a**@BADRPT
+1@a++@BADRPT
+0@a??@BADRPT
+0@a*+@BADRPT
+0@a*?@BADRPT
+0@a+*@BADRPT
+0@a+?@BADRPT
+0@a?*@BADRPT
+0@a?+@BADRPT
+1@a{1}{1}@BADRPT
+0@a*{1}@BADRPT
+1@a+{1}@BADRPT
+0@a?{1}@BADRPT
+0@a{1}*@BADRPT
+1@a{1}+@BADRPT
+0@a{1}?@BADRPT
+2@a*{b}@a{b}
+1@a\{1\}\{1\}@BADRPT
+1@a*\{1\}@BADRPT
+1@a\{1\}*@BADRPT
+0@a[b]c@abc
+0@a[ab]c@abc
+0@a[^ab]c@adc
+0@a[]b]c@a]c
+0@a[[b]c@a[c
+0@a[-b]c@a-c
+0@a[^]b]c@adc
+0@a[^-b]c@adc
+0@a[b-]c@a-c
+2@a[b@EBRACK
+2@a[]@EBRACK
+0@a[1-3]c@a2c
+1@a[3-1]c@ERANGE
+1@a[1-3-5]c@ERANGE
+1@a[[.-.]--]c@a-c
+2@a[1-@ERANGE
+2@a[[.@EBRACK
+2@a[[.x@EBRACK
+2@a[[.x.@EBRACK
+1@a[[.x.]@EBRACK
+1@a[[.x.]]@ax
+1@a[[.x,.]]@ECOLLATE
+1@a[[.one.]]b@a1b
+1@a[[.notdef.]]b@ECOLLATE
+1@a[[.].]]b@a]b
+0@a[[:alpha:]]c@abc
+2@a[[:notdef:]]c@ECTYPE
+2@a[[:@EBRACK
+2@a[[:alpha@EBRACK
+2@a[[:alpha:]@EBRACK
+2@a[[:alpha,:]@ECTYPE
+2@a[[:]:]]b@ECTYPE
+2@a[[:-:]]b@ECTYPE
+2@a[[:alph:]]@ECTYPE
+2@a[[:alphabet:]]@ECTYPE
+1@[[:blank:]]+@aSSTb
+1@[[:cntrl:]]+@aNTb
+0@[[:digit:]]+@a019b
+0@[[:graph:]]+@Sa%bS
+0@[[:lower:]]+@AabC
+0@[[:print:]]+@NaSbN
+0@[[:punct:]]+@S%-&T
+1@[[:space:]]+@aSNTb
+0@[[:upper:]]+@aBCd
+0@[[:xdigit:]]+@p0f3Cq
+1@a[[=b=]]c@abc
+2@a[[=@EBRACK
+2@a[[=b@EBRACK
+2@a[[=b=@EBRACK
+1@a[[=b=]@EBRACK
+1@a[[=b,=]]@ECOLLATE
+1@a[[=one=]]b@a1b
+0@a(((b)))c@abc
+0@a(b|(c))d@abd
+0@a(b*|c)d@abbd
+0@a[ab]{20}@aaaaabaaaabaaaabaaaab
+0@a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab]@aaaaabaaaabaaaabaaaab
+0@a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab](wee|week)(knights|night)@aaaaabaaaabaaaabaaaabweeknights
+0@12345678901234567890123456789@a12345678901234567890123456789b
+0@123456789012345678901234567890@a123456789012345678901234567890b
+0@1234567890123456789012345678901@a1234567890123456789012345678901b
+0@12345678901234567890123456789012@a12345678901234567890123456789012b
+0@123456789012345678901234567890123@a123456789012345678901234567890123b
+0@1234567890123456789012345678901234567890123456789012345678901234567890@a1234567890123456789012345678901234567890123456789012345678901234567890b
+0@[ab][cd][ef][gh][ij][kl][mn]@xacegikmoq
+0@[ab][cd][ef][gh][ij][kl][mn][op]@xacegikmoq
+0@[ab][cd][ef][gh][ij][kl][mn][op][qr]@xacegikmoqy
+0@[ab][cd][ef][gh][ij][kl][mn][op][q]@xacegikmoqy
+0@abc@xabcy
+2@a\(b\)?c\1d@acd
+1@aBc@Abc
+1@a[Bc]*d@abBCcd
+1@0[[:upper:]]1@0a1
+1@0[[:lower:]]1@0A1
+1@a[^b]c@abc
+0@a[^b]c@aBc
+0@a[^b]c@adc
+0@[a]b[c]@abc
+0@[a]b[a]@aba
+0@[abc]b[abc]@abc
+0@[abc]b[abd]@abd
+0@a(b?c)+d@accd
+0@(wee|week)(knights|night)@weeknights
+0@(we|wee|week|frob)(knights|night|day)@weeknights
+0@a[bc]d@xyzaaabcaababdacd
+0@a[ab]c@aaabc
+0@abc@abc
+0@a*@b
+0@/\*.*\*/@/*x*/
+0@/\*.*\*/@/*x*/y/*z*/
+0@/\*([^*]|\*[^/])*\*/@/*x*/
+0@/\*([^*]|\*[^/])*\*/@/*x*/y/*z*/
+0@/\*([^*]|\*[^/])*\*/@/*x**/y/*z*/
+0@/\*([^*]|\*+[^*/])*\*+/@/*x*/
+0@/\*([^*]|\*+[^*/])*\*+/@/*x*/y/*z*/
+0@/\*([^*]|\*+[^*/])*\*+/@/*x**/y/*z*/
+0@/\*([^*]|\*+[^*/])*\*+/@/*x****/y/*z*/
+0@/\*([^*]|\*+[^*/])*\*+/@/*x**x*/y/*z*/
+0@/\*([^*]|\*+[^*/])*\*+/@/*x***x/y/*z*/
+0@[abc]@a(b)c
+0@[abc]@a(d)c
+0@[abc]@a(bc)d
+0@[abc]@a(dc)d
+0@.@a()c
+0@b.*c@b(bc)c
+0@b.*@b(bc)c
+0@.*c@b(bc)c
+0@abc@abc
+0@abc@xabcy
+1@abc@xyz
+0@a*b@aba*b
+0@a*b@ab
+1@""@EMPTY
+1@aZb@a
+1@aZb@a
+0@aZb@(aZb)
+0@aZ*b@(ab)
+0@a.b@(aZb)
+0@a.*@(aZb)c
+2@[[:<:]]a@a
+2@[[:<:]]a@ba
+2@[[:<:]]a@-a
+2@a[[:>:]]@a
+2@a[[:>:]]@ab
+2@a[[:>:]]@a-
+2@[[:<:]]a.c[[:>:]]@axcd-dayc-dazce-abc
+2@[[:<:]]a.c[[:>:]]@axcd-dayc-dazce-abc-q
+2@[[:<:]]a.c[[:>:]]@axc-dayc-dazce-abc
+2@[[:<:]]b.c[[:>:]]@a_bxc-byc_d-bzc-q
+2@[[:<:]].x..[[:>:]]@y_xa_-_xb_y-_xc_-axdc
+2@[[:<:]]a_b[[:>:]]@x_a_b
+0@(A[1])|(A[2])|(A[3])|(A[4])|(A[5])|(A[6])|(A[7])|(A[8])|(A[9])|(A[A])@A1
+0@abcdefghijklmnop@abcdefghijklmnop
+0@abcdefghijklmnopqrstuv@abcdefghijklmnopqrstuv
+0@CC[13]1|a{21}[23][EO][123][Es][12]a{15}aa[34][EW]aaaaaaa[X]a@CC11
+0@a?b@ab
+1@-\{0,1\}[0-9]*$@-5
diff --git a/gnu/usr.bin/grep/tests/status.sh b/gnu/usr.bin/grep/tests/status.sh
index 50fc9126b557..94f59294a905 100755
--- a/gnu/usr.bin/grep/tests/status.sh
+++ b/gnu/usr.bin/grep/tests/status.sh
@@ -1,4 +1,4 @@
-#! /bin/sh
+#!/bin/sh
# Test for status code for GNU grep.
# status code
# 0 match found
@@ -24,15 +24,29 @@ if test $? -ne 1 ; then
fi
# the filename MMMMMMMM.MMM should not exist hopefully
-# should return 2 file not found
-if test -b MMMMMMMM.MMM; then
+if test -r MMMMMMMM.MMM; then
echo "Please remove MMMMMMMM.MMM to run check"
else
- ${GREP} -E -e 'abc' MMMMMMMM.MMM> /dev/null 2>&1
+ # should return 2 file not found
+ ${GREP} -E -e 'abc' MMMMMMMM.MMM > /dev/null 2>&1
if test $? -ne 2 ; then
echo "Status: Wrong status code, test \#3 failed"
failures=1
fi
+
+ # should return 2 file not found
+ ${GREP} -E -s -e 'abc' MMMMMMMM.MMM > /dev/null 2>&1
+ if test $? -ne 2 ; then
+ echo "Status: Wrong status code, test \#4 failed"
+ failures=1
+ fi
+
+ # should return 0 found a match
+ echo "abcd" | ${GREP} -E -q -s 'abc' MMMMMMMM.MMM - > /dev/null 2>&1
+ if test $? -ne 0 ; then
+ echo "Status: Wrong status code, test \#5 failed"
+ failures=1
+ fi
fi
exit $failures
diff --git a/gnu/usr.bin/grep/tests/tests b/gnu/usr.bin/grep/tests/tests
new file mode 100644
index 000000000000..8ed938d2d76a
--- /dev/null
+++ b/gnu/usr.bin/grep/tests/tests
@@ -0,0 +1,475 @@
+# regular expression test set
+# Lines are at least three fields, separated by one or more tabs. "" stands
+# for an empty field. First field is an RE. Second field is flags. If
+# C flag given, regcomp() is expected to fail, and the third field is the
+# error name (minus the leading REG_).
+#
+# Otherwise it is expected to succeed, and the third field is the string to
+# try matching it against. If there is no fourth field, the match is
+# expected to fail. If there is a fourth field, it is the substring that
+# the RE is expected to match. If there is a fifth field, it is a comma-
+# separated list of what the subexpressions should match, with - indicating
+# no match for that one. In both the fourth and fifth fields, a (sub)field
+# starting with @ indicates that the (sub)expression is expected to match
+# a null string followed by the stuff after the @; this provides a way to
+# test where null strings match. The character `N' in REs and strings
+# is newline, `S' is space, `T' is tab, `Z' is NUL.
+#
+# The full list of flags:
+# - placeholder, does nothing
+# b RE is a BRE, not an ERE
+# & try it as both an ERE and a BRE
+# C regcomp() error expected, third field is error name
+# i REG_ICASE
+# m ("mundane") REG_NOSPEC
+# s REG_NOSUB (not really testable)
+# n REG_NEWLINE
+# ^ REG_NOTBOL
+# $ REG_NOTEOL
+# # REG_STARTEND (see below)
+# p REG_PEND
+#
+# For REG_STARTEND, the start/end offsets are those of the substring
+# enclosed in ().
+
+# basics
+a & a a
+abc & abc abc
+abc|de - abc abc
+a|b|c - abc a
+
+# parentheses and perversions thereof
+a(b)c - abc abc
+a\(b\)c b abc abc
+a( C EPAREN
+a( b a( a(
+a\( - a( a(
+a\( bC EPAREN
+a\(b bC EPAREN
+a(b C EPAREN
+a(b b a(b a(b
+# gag me with a right parenthesis -- 1003.2 goofed here (my fault, partly)
+a) - a) a)
+) - ) )
+# end gagging (in a just world, those *should* give EPAREN)
+a) b a) a)
+a\) bC EPAREN
+\) bC EPAREN
+a()b - ab ab
+a\(\)b b ab ab
+
+# anchoring and REG_NEWLINE
+^abc$ & abc abc
+a^b - a^b
+a^b b a^b a^b
+a$b - a$b
+a$b b a$b a$b
+^ & abc @abc
+$ & abc @
+^$ & "" @
+$^ - "" @
+\($\)\(^\) b "" @
+# stop retching, those are legitimate (although disgusting)
+^^ - "" @
+$$ - "" @
+##b$ & abNc
+##b$ &n abNc b
+##^b$ & aNbNc
+##^b$ &n aNbNc b
+##^$ &n aNNb @Nb
+^$ n abc
+##^$ n abcN @
+##$^ n aNNb @Nb
+##\($\)\(^\) bn aNNb @Nb
+##^^ n^ aNNb @Nb
+##$$ n aNNb @NN
+^a ^ a
+a$ $ a
+##^a ^n aNb
+##^b ^n aNb b
+##a$ $n bNa
+##b$ $n bNa b
+a*(^b$)c* - b b
+a*\(^b$\)c* b b b
+
+# certain syntax errors and non-errors
+| C EMPTY
+| b | |
+* C BADRPT
+* b * *
++ C BADRPT
+? C BADRPT
+"" &C EMPTY
+() - abc @abc
+\(\) b abc @abc
+a||b C EMPTY
+|ab C EMPTY
+ab| C EMPTY
+(|a)b C EMPTY
+(a|)b C EMPTY
+(*a) C BADRPT
+(+a) C BADRPT
+(?a) C BADRPT
+({1}a) C BADRPT
+\(\{1\}a\) bC BADRPT
+(a|*b) C BADRPT
+(a|+b) C BADRPT
+(a|?b) C BADRPT
+(a|{1}b) C BADRPT
+^* C BADRPT
+^* b * *
+^+ C BADRPT
+^? C BADRPT
+^{1} C BADRPT
+^\{1\} bC BADRPT
+
+# metacharacters, backslashes
+a.c & abc abc
+a[bc]d & abd abd
+a\*c & a*c a*c
+a\\b & a\b a\b
+a\\\*b & a\*b a\*b
+a\bc & abc abc
+a\ &C EESCAPE
+a\\bc & a\bc a\bc
+\{ bC BADRPT
+a\[b & a[b a[b
+a[b &C EBRACK
+# trailing $ is a peculiar special case for the BRE code
+a$ & a a
+a$ & a$
+a\$ & a
+a\$ & a$ a$
+a\\$ & a
+a\\$ & a$
+a\\$ & a\$
+a\\$ & a\ a\
+
+# back references, ugh
+##a\(b\)\2c bC ESUBREG
+##a\(b\1\)c bC ESUBREG
+a\(b*\)c\1d b abbcbbd abbcbbd bb
+a\(b*\)c\1d b abbcbd
+a\(b*\)c\1d b abbcbbbd
+^\(.\)\1 b abc
+a\([bc]\)\1d b abcdabbd abbd b
+a\(\([bc]\)\2\)*d b abbccd abbccd
+a\(\([bc]\)\2\)*d b abbcbd
+# actually, this next one probably ought to fail, but the spec is unclear
+a\(\(b\)*\2\)*d b abbbd abbbd
+# here is a case that no NFA implementation does right
+\(ab*\)[ab]*\1 b ababaaa ababaaa a
+# check out normal matching in the presence of back refs
+\(a\)\1bcd b aabcd aabcd
+\(a\)\1bc*d b aabcd aabcd
+\(a\)\1bc*d b aabd aabd
+\(a\)\1bc*d b aabcccd aabcccd
+\(a\)\1bc*[ce]d b aabcccd aabcccd
+^\(a\)\1b\(c\)*cd$ b aabcccd aabcccd
+
+# ordinary repetitions
+ab*c & abc abc
+ab+c - abc abc
+ab?c - abc abc
+a\(*\)b b a*b a*b
+a\(**\)b b ab ab
+a\(***\)b bC BADRPT
+*a b *a *a
+**a b a a
+***a bC BADRPT
+
+# the dreaded bounded repetitions
+{ & { {
+{abc & {abc {abc
+{1 C BADRPT
+{1} C BADRPT
+a{b & a{b a{b
+a{1}b - ab ab
+a\{1\}b b ab ab
+a{1,}b - ab ab
+a\{1,\}b b ab ab
+a{1,2}b - aab aab
+a\{1,2\}b b aab aab
+a{1 C EBRACE
+a\{1 bC EBRACE
+a{1a C EBRACE
+a\{1a bC EBRACE
+a{1a} C BADBR
+a\{1a\} bC BADBR
+a{,2} - a{,2} a{,2}
+a\{,2\} bC BADBR
+a{,} - a{,} a{,}
+a\{,\} bC BADBR
+a{1,x} C BADBR
+a\{1,x\} bC BADBR
+a{1,x C EBRACE
+a\{1,x bC EBRACE
+a{300} C BADBR
+a\{300\} bC BADBR
+a{1,0} C BADBR
+a\{1,0\} bC BADBR
+ab{0,0}c - abcac ac
+ab\{0,0\}c b abcac ac
+ab{0,1}c - abcac abc
+ab\{0,1\}c b abcac abc
+ab{0,3}c - abbcac abbc
+ab\{0,3\}c b abbcac abbc
+ab{1,1}c - acabc abc
+ab\{1,1\}c b acabc abc
+ab{1,3}c - acabc abc
+ab\{1,3\}c b acabc abc
+ab{2,2}c - abcabbc abbc
+ab\{2,2\}c b abcabbc abbc
+ab{2,4}c - abcabbc abbc
+ab\{2,4\}c b abcabbc abbc
+((a{1,10}){1,10}){1,10} - a a a,a
+
+# multiple repetitions
+a** &C BADRPT
+a++ C BADRPT
+a?? C BADRPT
+a*+ C BADRPT
+a*? C BADRPT
+a+* C BADRPT
+a+? C BADRPT
+a?* C BADRPT
+a?+ C BADRPT
+a{1}{1} C BADRPT
+a*{1} C BADRPT
+a+{1} C BADRPT
+a?{1} C BADRPT
+a{1}* C BADRPT
+a{1}+ C BADRPT
+a{1}? C BADRPT
+a*{b} - a{b} a{b}
+a\{1\}\{1\} bC BADRPT
+a*\{1\} bC BADRPT
+a\{1\}* bC BADRPT
+
+# brackets, and numerous perversions thereof
+a[b]c & abc abc
+a[ab]c & abc abc
+a[^ab]c & adc adc
+a[]b]c & a]c a]c
+a[[b]c & a[c a[c
+a[-b]c & a-c a-c
+a[^]b]c & adc adc
+a[^-b]c & adc adc
+a[b-]c & a-c a-c
+a[b &C EBRACK
+a[] &C EBRACK
+a[1-3]c & a2c a2c
+a[3-1]c &C ERANGE
+a[1-3-5]c &C ERANGE
+a[[.-.]--]c & a-c a-c
+a[1- &C ERANGE
+a[[. &C EBRACK
+a[[.x &C EBRACK
+a[[.x. &C EBRACK
+a[[.x.] &C EBRACK
+a[[.x.]] & ax ax
+a[[.x,.]] &C ECOLLATE
+a[[.one.]]b & a1b a1b
+a[[.notdef.]]b &C ECOLLATE
+a[[.].]]b & a]b a]b
+a[[:alpha:]]c & abc abc
+a[[:notdef:]]c &C ECTYPE
+a[[: &C EBRACK
+a[[:alpha &C EBRACK
+a[[:alpha:] &C EBRACK
+a[[:alpha,:] &C ECTYPE
+a[[:]:]]b &C ECTYPE
+a[[:-:]]b &C ECTYPE
+a[[:alph:]] &C ECTYPE
+a[[:alphabet:]] &C ECTYPE
+##[[:alnum:]]+ - -%@a0X- a0X
+##[[:alpha:]]+ - -%@aX0- aX
+[[:blank:]]+ - aSSTb SST
+##[[:cntrl:]]+ - aNTb NT
+[[:digit:]]+ - a019b 019
+##[[:graph:]]+ - Sa%bS a%b
+[[:lower:]]+ - AabC ab
+##[[:print:]]+ - NaSbN aSb
+##[[:punct:]]+ - S%-&T %-&
+[[:space:]]+ - aSNTb SNT
+[[:upper:]]+ - aBCd BC
+[[:xdigit:]]+ - p0f3Cq 0f3C
+a[[=b=]]c & abc abc
+a[[= &C EBRACK
+a[[=b &C EBRACK
+a[[=b= &C EBRACK
+a[[=b=] &C EBRACK
+a[[=b,=]] &C ECOLLATE
+a[[=one=]]b & a1b a1b
+
+# complexities
+a(((b)))c - abc abc
+a(b|(c))d - abd abd
+a(b*|c)d - abbd abbd
+# just gotta have one DFA-buster, of course
+a[ab]{20} - aaaaabaaaabaaaabaaaab aaaaabaaaabaaaabaaaab
+# and an inline expansion in case somebody gets tricky
+a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab] - aaaaabaaaabaaaabaaaab aaaaabaaaabaaaabaaaab
+# and in case somebody just slips in an NFA...
+a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab](wee|week)(knights|night) - aaaaabaaaabaaaabaaaabweeknights aaaaabaaaabaaaabaaaabweeknights
+# fish for anomalies as the number of states passes 32
+12345678901234567890123456789 - a12345678901234567890123456789b 12345678901234567890123456789
+123456789012345678901234567890 - a123456789012345678901234567890b 123456789012345678901234567890
+1234567890123456789012345678901 - a1234567890123456789012345678901b 1234567890123456789012345678901
+12345678901234567890123456789012 - a12345678901234567890123456789012b 12345678901234567890123456789012
+123456789012345678901234567890123 - a123456789012345678901234567890123b 123456789012345678901234567890123
+# and one really big one, beyond any plausible word width
+1234567890123456789012345678901234567890123456789012345678901234567890 - a1234567890123456789012345678901234567890123456789012345678901234567890b 1234567890123456789012345678901234567890123456789012345678901234567890
+# fish for problems as brackets go past 8
+[ab][cd][ef][gh][ij][kl][mn] - xacegikmoq acegikm
+[ab][cd][ef][gh][ij][kl][mn][op] - xacegikmoq acegikmo
+[ab][cd][ef][gh][ij][kl][mn][op][qr] - xacegikmoqy acegikmoq
+[ab][cd][ef][gh][ij][kl][mn][op][q] - xacegikmoqy acegikmoq
+
+# subtleties of matching
+abc & xabcy abc
+a\(b\)?c\1d b acd
+aBc i Abc Abc
+a[Bc]*d i abBCcd abBCcd
+0[[:upper:]]1 &i 0a1 0a1
+0[[:lower:]]1 &i 0A1 0A1
+a[^b]c &i abc
+a[^b]c &i aBc
+a[^b]c &i adc adc
+[a]b[c] - abc abc
+[a]b[a] - aba aba
+[abc]b[abc] - abc abc
+[abc]b[abd] - abd abd
+a(b?c)+d - accd accd
+(wee|week)(knights|night) - weeknights weeknights
+(we|wee|week|frob)(knights|night|day) - weeknights weeknights
+a[bc]d - xyzaaabcaababdacd abd
+a[ab]c - aaabc abc
+abc s abc abc
+a* & b @b
+
+# Let's have some fun -- try to match a C comment.
+# first the obvious, which looks okay at first glance...
+/\*.*\*/ - /*x*/ /*x*/
+# but...
+/\*.*\*/ - /*x*/y/*z*/ /*x*/y/*z*/
+# okay, we must not match */ inside; try to do that...
+/\*([^*]|\*[^/])*\*/ - /*x*/ /*x*/
+/\*([^*]|\*[^/])*\*/ - /*x*/y/*z*/ /*x*/
+# but...
+/\*([^*]|\*[^/])*\*/ - /*x**/y/*z*/ /*x**/y/*z*/
+# and a still fancier version, which does it right (I think)...
+/\*([^*]|\*+[^*/])*\*+/ - /*x*/ /*x*/
+/\*([^*]|\*+[^*/])*\*+/ - /*x*/y/*z*/ /*x*/
+/\*([^*]|\*+[^*/])*\*+/ - /*x**/y/*z*/ /*x**/
+/\*([^*]|\*+[^*/])*\*+/ - /*x****/y/*z*/ /*x****/
+/\*([^*]|\*+[^*/])*\*+/ - /*x**x*/y/*z*/ /*x**x*/
+/\*([^*]|\*+[^*/])*\*+/ - /*x***x/y/*z*/ /*x***x/y/*z*/
+
+# subexpressions
+a(b)(c)d - abcd abcd b,c
+a(((b)))c - abc abc b,b,b
+a(b|(c))d - abd abd b,-
+a(b*|c|e)d - abbd abbd bb
+a(b*|c|e)d - acd acd c
+a(b*|c|e)d - ad ad @d
+a(b?)c - abc abc b
+a(b?)c - ac ac @c
+a(b+)c - abc abc b
+a(b+)c - abbbc abbbc bbb
+a(b*)c - ac ac @c
+(a|ab)(bc([de]+)f|cde) - abcdef abcdef a,bcdef,de
+# the regression tester only asks for 9 subexpressions
+a(b)(c)(d)(e)(f)(g)(h)(i)(j)k - abcdefghijk abcdefghijk b,c,d,e,f,g,h,i,j
+a(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)l - abcdefghijkl abcdefghijkl b,c,d,e,f,g,h,i,j,k
+a([bc]?)c - abc abc b
+a([bc]?)c - ac ac @c
+a([bc]+)c - abc abc b
+a([bc]+)c - abcc abcc bc
+a([bc]+)bc - abcbc abcbc bc
+a(bb+|b)b - abb abb b
+a(bbb+|bb+|b)b - abb abb b
+a(bbb+|bb+|b)b - abbb abbb bb
+a(bbb+|bb+|b)bb - abbb abbb b
+(.*).* - abcdef abcdef abcdef
+##(a*)* - bc @b @b
+
+# do we get the right subexpression when it is used more than once?
+a(b|c)*d - ad ad -
+a(b|c)*d - abcd abcd c
+a(b|c)+d - abd abd b
+a(b|c)+d - abcd abcd c
+a(b|c?)+d - ad ad @d
+a(b|c?)+d - abcd abcd @d
+a(b|c){0,0}d - ad ad -
+a(b|c){0,1}d - ad ad -
+a(b|c){0,1}d - abd abd b
+a(b|c){0,2}d - ad ad -
+a(b|c){0,2}d - abcd abcd c
+a(b|c){0,}d - ad ad -
+a(b|c){0,}d - abcd abcd c
+a(b|c){1,1}d - abd abd b
+a(b|c){1,1}d - acd acd c
+a(b|c){1,2}d - abd abd b
+a(b|c){1,2}d - abcd abcd c
+a(b|c){1,}d - abd abd b
+a(b|c){1,}d - abcd abcd c
+a(b|c){2,2}d - acbd acbd b
+a(b|c){2,2}d - abcd abcd c
+a(b|c){2,4}d - abcd abcd c
+a(b|c){2,4}d - abcbd abcbd b
+a(b|c){2,4}d - abcbcd abcbcd c
+a(b|c){2,}d - abcd abcd c
+a(b|c){2,}d - abcbd abcbd b
+##a(b+|((c)*))+d - abd abd @d,@d,-
+##a(b+|((c)*))+d - abcd abcd @d,@d,-
+
+# check out the STARTEND option
+[abc] &# a(b)c b
+[abc] &# a(d)c
+[abc] &# a(bc)d b
+[abc] &# a(dc)d c
+. &# a()c
+b.*c &# b(bc)c bc
+b.* &# b(bc)c bc
+.*c &# b(bc)c bc
+
+# plain strings, with the NOSPEC flag
+abc m abc abc
+abc m xabcy abc
+abc m xyz
+a*b m aba*b a*b
+a*b m ab
+"" mC EMPTY
+
+# cases involving NULs
+aZb & a a
+aZb &p a
+#aZb &p# (aZb) aZb
+aZ*b &p# (ab) ab
+#a.b &# (aZb) aZb
+#a.* &# (aZb)c aZb
+
+# word boundaries (ick)
+[[:<:]]a & a a
+[[:<:]]a & ba
+[[:<:]]a & -a a
+a[[:>:]] & a a
+a[[:>:]] & ab
+a[[:>:]] & a- a
+[[:<:]]a.c[[:>:]] & axcd-dayc-dazce-abc abc
+[[:<:]]a.c[[:>:]] & axcd-dayc-dazce-abc-q abc
+[[:<:]]a.c[[:>:]] & axc-dayc-dazce-abc axc
+[[:<:]]b.c[[:>:]] & a_bxc-byc_d-bzc-q bzc
+[[:<:]].x..[[:>:]] & y_xa_-_xb_y-_xc_-axdc _xc_
+[[:<:]]a_b[[:>:]] & x_a_b
+
+# past problems, and suspected problems
+(A[1])|(A[2])|(A[3])|(A[4])|(A[5])|(A[6])|(A[7])|(A[8])|(A[9])|(A[A]) - A1 A1
+abcdefghijklmnop i abcdefghijklmnop abcdefghijklmnop
+abcdefghijklmnopqrstuv i abcdefghijklmnopqrstuv abcdefghijklmnopqrstuv
+(ALAK)|(ALT[AB])|(CC[123]1)|(CM[123]1)|(GAMC)|(LC[23][EO ])|(SEM[1234])|(SL[ES][12])|(SLWW)|(SLF )|(SLDT)|(VWH[12])|(WH[34][EW])|(WP1[ESN]) - CC11 CC11
+CC[13]1|a{21}[23][EO][123][Es][12]a{15}aa[34][EW]aaaaaaa[X]a - CC11 CC11
+Char \([a-z0-9_]*\)\[.* b Char xyz[k Char xyz[k xyz
+a?b - ab ab
+-\{0,1\}[0-9]*$ b -5 -5
diff --git a/gnu/usr.bin/grep/tests/warning.sh b/gnu/usr.bin/grep/tests/warning.sh
index d2dc6d5a5ebb..bfca533a9675 100755
--- a/gnu/usr.bin/grep/tests/warning.sh
+++ b/gnu/usr.bin/grep/tests/warning.sh
@@ -1,4 +1,4 @@
-#! /bin/sh
+#!/bin/sh
#
# Tell them not to be alarmed.
diff --git a/gnu/usr.bin/grep/xalloc.h b/gnu/usr.bin/grep/xalloc.h
new file mode 100644
index 000000000000..098a6c2e0730
--- /dev/null
+++ b/gnu/usr.bin/grep/xalloc.h
@@ -0,0 +1,87 @@
+/* xalloc.h -- malloc with out-of-memory checking
+ Copyright (C) 1990-1998, 1999, 2000 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+#ifndef XALLOC_H_
+# define XALLOC_H_
+
+# ifndef PARAMS
+# if defined PROTOTYPES || (defined __STDC__ && __STDC__)
+# define PARAMS(Args) Args
+# else
+# define PARAMS(Args) ()
+# endif
+# endif
+
+# ifndef __attribute__
+# if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 8) || __STRICT_ANSI__
+# define __attribute__(x)
+# endif
+# endif
+
+# ifndef ATTRIBUTE_NORETURN
+# define ATTRIBUTE_NORETURN __attribute__ ((__noreturn__))
+# endif
+
+/* Exit value when the requested amount of memory is not available.
+ It is initialized to EXIT_FAILURE, but the caller may set it to
+ some other value. */
+extern int xalloc_exit_failure;
+
+/* If this pointer is non-zero, run the specified function upon each
+ allocation failure. It is initialized to zero. */
+extern void (*xalloc_fail_func) PARAMS ((void));
+
+/* If XALLOC_FAIL_FUNC is undefined or a function that returns, this
+ message is output. It is translated via gettext.
+ Its value is "memory exhausted". */
+extern char const xalloc_msg_memory_exhausted[];
+
+/* This function is always triggered when memory is exhausted. It is
+ in charge of honoring the three previous items. This is the
+ function to call when one wants the program to die because of a
+ memory allocation failure. */
+extern void xalloc_die PARAMS ((void)) ATTRIBUTE_NORETURN;
+
+void *xmalloc PARAMS ((size_t n));
+void *xcalloc PARAMS ((size_t n, size_t s));
+void *xrealloc PARAMS ((void *p, size_t n));
+char *xstrdup PARAMS ((const char *str));
+
+# define XMALLOC(Type, N_items) ((Type *) xmalloc (sizeof (Type) * (N_items)))
+# define XCALLOC(Type, N_items) ((Type *) xcalloc (sizeof (Type), (N_items)))
+# define XREALLOC(Ptr, Type, N_items) \
+ ((Type *) xrealloc ((void *) (Ptr), sizeof (Type) * (N_items)))
+
+/* Declare and alloc memory for VAR of type TYPE. */
+# define NEW(Type, Var) Type *(Var) = XMALLOC (Type, 1)
+
+/* Free VAR only if non NULL. */
+# define XFREE(Var) \
+ do { \
+ if (Var) \
+ free (Var); \
+ } while (0)
+
+/* Return a pointer to a malloc'ed copy of the array SRC of NUM elements. */
+# define CCLONE(Src, Num) \
+ (memcpy (xmalloc (sizeof (*Src) * (Num)), (Src), sizeof (*Src) * (Num)))
+
+/* Return a malloc'ed copy of SRC. */
+# define CLONE(Src) CCLONE (Src, 1)
+
+
+#endif /* !XALLOC_H_ */
diff --git a/gnu/usr.bin/grep/xmalloc.c b/gnu/usr.bin/grep/xmalloc.c
new file mode 100644
index 000000000000..2f103d604917
--- /dev/null
+++ b/gnu/usr.bin/grep/xmalloc.c
@@ -0,0 +1,116 @@
+/* xmalloc.c -- malloc with out of memory checking
+ Copyright (C) 1990-1999, 2000 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+#if HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <sys/types.h>
+
+#if STDC_HEADERS
+# include <stdlib.h>
+#else
+void *calloc ();
+void *malloc ();
+void *realloc ();
+void free ();
+#endif
+
+#if ENABLE_NLS
+# include <libintl.h>
+# define _(Text) gettext (Text)
+#else
+# define textdomain(Domain)
+# define _(Text) Text
+#endif
+#define N_(Text) Text
+
+#include "error.h"
+#include "xalloc.h"
+
+#ifndef EXIT_FAILURE
+# define EXIT_FAILURE 1
+#endif
+
+#ifndef HAVE_DONE_WORKING_MALLOC_CHECK
+"you must run the autoconf test for a properly working malloc -- see malloc.m4"
+#endif
+
+#ifndef HAVE_DONE_WORKING_REALLOC_CHECK
+"you must run the autoconf test for a properly working realloc --see realloc.m4"
+#endif
+
+/* Exit value when the requested amount of memory is not available.
+ The caller may set it to some other value. */
+int xalloc_exit_failure = EXIT_FAILURE;
+
+/* If non NULL, call this function when memory is exhausted. */
+void (*xalloc_fail_func) PARAMS ((void)) = 0;
+
+/* If XALLOC_FAIL_FUNC is NULL, or does return, display this message
+ before exiting when memory is exhausted. Goes through gettext. */
+char const xalloc_msg_memory_exhausted[] = N_("memory exhausted");
+
+void
+xalloc_die (void)
+{
+ if (xalloc_fail_func)
+ (*xalloc_fail_func) ();
+ error (xalloc_exit_failure, 0, "%s", _(xalloc_msg_memory_exhausted));
+ /* The `noreturn' cannot be given to error, since it may return if
+ its first argument is 0. To help compilers understand the
+ xalloc_die does terminate, call exit. */
+ exit (EXIT_FAILURE);
+}
+
+/* Allocate N bytes of memory dynamically, with error checking. */
+
+void *
+xmalloc (size_t n)
+{
+ void *p;
+
+ p = malloc (n);
+ if (p == 0)
+ xalloc_die ();
+ return p;
+}
+
+/* Change the size of an allocated block of memory P to N bytes,
+ with error checking. */
+
+void *
+xrealloc (void *p, size_t n)
+{
+ p = realloc (p, n);
+ if (p == 0)
+ xalloc_die ();
+ return p;
+}
+
+/* Allocate memory for N elements of S bytes, with error checking. */
+
+void *
+xcalloc (size_t n, size_t s)
+{
+ void *p;
+
+ p = calloc (n, s);
+ if (p == 0)
+ xalloc_die ();
+ return p;
+}
diff --git a/gnu/usr.bin/grep/xstrtol.c b/gnu/usr.bin/grep/xstrtol.c
new file mode 100644
index 000000000000..07023d9ffdb7
--- /dev/null
+++ b/gnu/usr.bin/grep/xstrtol.c
@@ -0,0 +1,282 @@
+/* A more useful interface to strtol.
+ Copyright (C) 1995, 1996, 1998-2000 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+/* Written by Jim Meyering. */
+
+#if HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#ifndef __strtol
+# define __strtol strtol
+# define __strtol_t long int
+# define __xstrtol xstrtol
+#endif
+
+/* Some pre-ANSI implementations (e.g. SunOS 4)
+ need stderr defined if assertion checking is enabled. */
+#include <stdio.h>
+
+#if STDC_HEADERS
+# include <stdlib.h>
+#endif
+
+#if HAVE_STRING_H
+# include <string.h>
+#else
+# include <strings.h>
+# ifndef strchr
+# define strchr index
+# endif
+#endif
+
+#include <assert.h>
+#include <ctype.h>
+
+#include <errno.h>
+#ifndef errno
+extern int errno;
+#endif
+
+#if HAVE_LIMITS_H
+# include <limits.h>
+#endif
+
+#ifndef CHAR_BIT
+# define CHAR_BIT 8
+#endif
+
+/* The extra casts work around common compiler bugs. */
+#define TYPE_SIGNED(t) (! ((t) 0 < (t) -1))
+/* The outer cast is needed to work around a bug in Cray C 5.0.3.0.
+ It is necessary at least when t == time_t. */
+#define TYPE_MINIMUM(t) ((t) (TYPE_SIGNED (t) \
+ ? ~ (t) 0 << (sizeof (t) * CHAR_BIT - 1) : (t) 0))
+#define TYPE_MAXIMUM(t) (~ (t) 0 - TYPE_MINIMUM (t))
+
+#if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
+# define IN_CTYPE_DOMAIN(c) 1
+#else
+# define IN_CTYPE_DOMAIN(c) isascii(c)
+#endif
+
+#define ISSPACE(c) (IN_CTYPE_DOMAIN (c) && isspace (c))
+
+#include "xstrtol.h"
+
+#ifndef strtol
+long int strtol ();
+#endif
+
+#ifndef strtoul
+unsigned long int strtoul ();
+#endif
+
+#ifndef strtoumax
+uintmax_t strtoumax ();
+#endif
+
+static int
+bkm_scale (__strtol_t *x, int scale_factor)
+{
+ __strtol_t product = *x * scale_factor;
+ if (*x != product / scale_factor)
+ return 1;
+ *x = product;
+ return 0;
+}
+
+static int
+bkm_scale_by_power (__strtol_t *x, int base, int power)
+{
+ while (power--)
+ if (bkm_scale (x, base))
+ return 1;
+
+ return 0;
+}
+
+/* FIXME: comment. */
+
+strtol_error
+__xstrtol (const char *s, char **ptr, int strtol_base,
+ __strtol_t *val, const char *valid_suffixes)
+{
+ char *t_ptr;
+ char **p;
+ __strtol_t tmp;
+
+ assert (0 <= strtol_base && strtol_base <= 36);
+
+ p = (ptr ? ptr : &t_ptr);
+
+ if (! TYPE_SIGNED (__strtol_t))
+ {
+ const char *q = s;
+ while (ISSPACE ((unsigned char) *q))
+ ++q;
+ if (*q == '-')
+ return LONGINT_INVALID;
+ }
+
+ errno = 0;
+ tmp = __strtol (s, p, strtol_base);
+ if (errno != 0)
+ return LONGINT_OVERFLOW;
+ if (*p == s)
+ return LONGINT_INVALID;
+
+ /* Let valid_suffixes == NULL mean `allow any suffix'. */
+ /* FIXME: update all callers except the ones that allow suffixes
+ after the number, changing last parameter NULL to `""'. */
+ if (!valid_suffixes)
+ {
+ *val = tmp;
+ return LONGINT_OK;
+ }
+
+ if (**p != '\0')
+ {
+ int base = 1024;
+ int suffixes = 1;
+ int overflow;
+
+ if (!strchr (valid_suffixes, **p))
+ {
+ *val = tmp;
+ return LONGINT_INVALID_SUFFIX_CHAR;
+ }
+
+ if (strchr (valid_suffixes, '0'))
+ {
+ /* The ``valid suffix'' '0' is a special flag meaning that
+ an optional second suffix is allowed, which can change
+ the base, e.g. "100MD" for 100 megabytes decimal. */
+
+ switch (p[0][1])
+ {
+ case 'B':
+ suffixes++;
+ break;
+
+ case 'D':
+ base = 1000;
+ suffixes++;
+ break;
+ }
+ }
+
+ switch (**p)
+ {
+ case 'b':
+ overflow = bkm_scale (&tmp, 512);
+ break;
+
+ case 'B':
+ overflow = bkm_scale (&tmp, 1024);
+ break;
+
+ case 'c':
+ overflow = 0;
+ break;
+
+ case 'E': /* Exa */
+ overflow = bkm_scale_by_power (&tmp, base, 6);
+ break;
+
+ case 'G': /* Giga */
+ overflow = bkm_scale_by_power (&tmp, base, 3);
+ break;
+
+ case 'k': /* kilo */
+ overflow = bkm_scale_by_power (&tmp, base, 1);
+ break;
+
+ case 'M': /* Mega */
+ case 'm': /* 'm' is undocumented; for backward compatibility only */
+ overflow = bkm_scale_by_power (&tmp, base, 2);
+ break;
+
+ case 'P': /* Peta */
+ overflow = bkm_scale_by_power (&tmp, base, 5);
+ break;
+
+ case 'T': /* Tera */
+ overflow = bkm_scale_by_power (&tmp, base, 4);
+ break;
+
+ case 'w':
+ overflow = bkm_scale (&tmp, 2);
+ break;
+
+ case 'Y': /* Yotta */
+ overflow = bkm_scale_by_power (&tmp, base, 8);
+ break;
+
+ case 'Z': /* Zetta */
+ overflow = bkm_scale_by_power (&tmp, base, 7);
+ break;
+
+ default:
+ *val = tmp;
+ return LONGINT_INVALID_SUFFIX_CHAR;
+ break;
+ }
+
+ if (overflow)
+ return LONGINT_OVERFLOW;
+
+ (*p) += suffixes;
+ }
+
+ *val = tmp;
+ return LONGINT_OK;
+}
+
+#ifdef TESTING_XSTRTO
+
+# include <stdio.h>
+# include "error.h"
+
+char *program_name;
+
+int
+main (int argc, char** argv)
+{
+ strtol_error s_err;
+ int i;
+
+ program_name = argv[0];
+ for (i=1; i<argc; i++)
+ {
+ char *p;
+ __strtol_t val;
+
+ s_err = __xstrtol (argv[i], &p, 0, &val, "bckmw");
+ if (s_err == LONGINT_OK)
+ {
+ printf ("%s->%lu (%s)\n", argv[i], val, p);
+ }
+ else
+ {
+ STRTOL_FATAL_ERROR (argv[i], "arg", s_err);
+ }
+ }
+ exit (0);
+}
+
+#endif /* TESTING_XSTRTO */
diff --git a/gnu/usr.bin/grep/xstrtol.h b/gnu/usr.bin/grep/xstrtol.h
new file mode 100644
index 000000000000..7a9a024457f5
--- /dev/null
+++ b/gnu/usr.bin/grep/xstrtol.h
@@ -0,0 +1,64 @@
+#ifndef XSTRTOL_H_
+# define XSTRTOL_H_ 1
+
+# if HAVE_INTTYPES_H
+# include <inttypes.h> /* for uintmax_t */
+# endif
+
+# ifndef PARAMS
+# if defined PROTOTYPES || (defined __STDC__ && __STDC__)
+# define PARAMS(Args) Args
+# else
+# define PARAMS(Args) ()
+# endif
+# endif
+
+# ifndef _STRTOL_ERROR
+enum strtol_error
+ {
+ LONGINT_OK, LONGINT_INVALID, LONGINT_INVALID_SUFFIX_CHAR, LONGINT_OVERFLOW
+ };
+typedef enum strtol_error strtol_error;
+# endif
+
+# define _DECLARE_XSTRTOL(name, type) \
+ strtol_error \
+ name PARAMS ((const char *s, char **ptr, int base, \
+ type *val, const char *valid_suffixes));
+_DECLARE_XSTRTOL (xstrtol, long int)
+_DECLARE_XSTRTOL (xstrtoul, unsigned long int)
+_DECLARE_XSTRTOL (xstrtoumax, uintmax_t)
+
+# define _STRTOL_ERROR(Exit_code, Str, Argument_type_string, Err) \
+ do \
+ { \
+ switch ((Err)) \
+ { \
+ case LONGINT_OK: \
+ abort (); \
+ \
+ case LONGINT_INVALID: \
+ error ((Exit_code), 0, "invalid %s `%s'", \
+ (Argument_type_string), (Str)); \
+ break; \
+ \
+ case LONGINT_INVALID_SUFFIX_CHAR: \
+ error ((Exit_code), 0, "invalid character following %s `%s'", \
+ (Argument_type_string), (Str)); \
+ break; \
+ \
+ case LONGINT_OVERFLOW: \
+ error ((Exit_code), 0, "%s `%s' too large", \
+ (Argument_type_string), (Str)); \
+ break; \
+ } \
+ } \
+ while (0)
+
+# define STRTOL_FATAL_ERROR(Str, Argument_type_string, Err) \
+ _STRTOL_ERROR (2, Str, Argument_type_string, Err)
+
+# define STRTOL_FAIL_WARN(Str, Argument_type_string, Err) \
+ _STRTOL_ERROR (0, Str, Argument_type_string, Err)
+
+#endif /* not XSTRTOL_H_ */
diff --git a/gnu/usr.bin/grep/xstrtoumax.c b/gnu/usr.bin/grep/xstrtoumax.c
new file mode 100644
index 000000000000..04d7cf98ae4c
--- /dev/null
+++ b/gnu/usr.bin/grep/xstrtoumax.c
@@ -0,0 +1,31 @@
+/* xstrtoumax.c -- A more useful interface to strtoumax.
+ Copyright 1999 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+/* Written by Paul Eggert. */
+
+#if HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#if HAVE_INTTYPES_H
+# include <inttypes.h>
+#endif
+
+#define __strtol strtoumax
+#define __strtol_t uintmax_t
+#define __xstrtol xstrtoumax
+#include "xstrtol.c"