summaryrefslogtreecommitdiffstats
path: root/abs/core-testing/grep
diff options
context:
space:
mode:
Diffstat (limited to 'abs/core-testing/grep')
-rw-r--r--abs/core-testing/grep/01-fgrep.patch145
-rw-r--r--abs/core-testing/grep/02-bracket.patch11
-rw-r--r--abs/core-testing/grep/03-i18n.patch303
-rw-r--r--abs/core-testing/grep/04-oi.patch48
-rw-r--r--abs/core-testing/grep/05-manpage.patch19
-rw-r--r--abs/core-testing/grep/06-color.patch10
-rw-r--r--abs/core-testing/grep/07-icolor.patch36
-rw-r--r--abs/core-testing/grep/08-skip.patch42
-rw-r--r--abs/core-testing/grep/09-egf-speedup.patch823
-rw-r--r--abs/core-testing/grep/10-dfa-optional.patch67
-rw-r--r--abs/core-testing/grep/11-tests.patch138
-rw-r--r--abs/core-testing/grep/12-w.patch121
-rw-r--r--abs/core-testing/grep/13-P.patch14
-rw-r--r--abs/core-testing/grep/14-mem-exhausted.patch15
-rw-r--r--abs/core-testing/grep/15-empty-pattern.patch36
-rw-r--r--abs/core-testing/grep/64-egf-speedup.patch791
-rw-r--r--abs/core-testing/grep/PKGBUILD50
17 files changed, 2669 insertions, 0 deletions
diff --git a/abs/core-testing/grep/01-fgrep.patch b/abs/core-testing/grep/01-fgrep.patch
new file mode 100644
index 0000000..c7f8f96
--- /dev/null
+++ b/abs/core-testing/grep/01-fgrep.patch
@@ -0,0 +1,145 @@
+--- grep-2.5.1/src/search.c.fgrep 2001-04-19 04:42:14.000000000 +0100
++++ grep-2.5.1/src/search.c 2004-02-26 13:09:32.000000000 +0000
+@@ -360,13 +360,7 @@
+ /* Find a possible match using the KWset matcher. */
+ size_t offset = kwsexec (kwset, beg, buflim - beg, &kwsm);
+ if (offset == (size_t) -1)
+- {
+-#ifdef MBS_SUPPORT
+- if (MB_CUR_MAX > 1)
+- free(mb_properties);
+-#endif
+- return (size_t)-1;
+- }
++ goto failure;
+ beg += offset;
+ /* Narrow down to the line containing the candidate, and
+ run it through DFA. */
+@@ -379,7 +373,7 @@
+ while (beg > buf && beg[-1] != eol)
+ --beg;
+ if (kwsm.index < kwset_exact_matches)
+- goto success;
++ goto success_in_beg_and_end;
+ if (dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1)
+ continue;
+ }
+@@ -398,7 +392,7 @@
+ }
+ /* Successful, no backreferences encountered! */
+ if (!backref)
+- goto success;
++ goto success_in_beg_and_end;
+ }
+ else
+ end = beg + size;
+@@ -413,14 +407,11 @@
+ end - beg - 1, &(patterns[i].regs))))
+ {
+ len = patterns[i].regs.end[0] - start;
+- if (exact)
+- {
+- *match_size = len;
+- return start;
+- }
++ if (exact && !match_words)
++ goto success_in_start_and_len;
+ if ((!match_lines && !match_words)
+ || (match_lines && len == end - beg - 1))
+- goto success;
++ goto success_in_beg_and_end;
+ /* If -w, check if the match aligns with word boundaries.
+ We do this iteratively because:
+ (a) the line may contain more than one occurence of the
+@@ -434,7 +425,7 @@
+ if ((start == 0 || !WCHAR ((unsigned char) beg[start - 1]))
+ && (len == end - beg - 1
+ || !WCHAR ((unsigned char) beg[start + len])))
+- goto success;
++ goto success_in_beg_and_end;
+ if (len > 0)
+ {
+ /* Try a shorter length anchored at the same place. */
+@@ -461,19 +452,26 @@
+ }
+ } /* for Regex patterns. */
+ } /* for (beg = end ..) */
++
++ failure:
+ #ifdef MBS_SUPPORT
+ if (MB_CUR_MAX > 1 && mb_properties)
+ free (mb_properties);
+ #endif /* MBS_SUPPORT */
+ return (size_t) -1;
+
+- success:
++ success_in_beg_and_end:
++ len = end - beg;
++ start = beg - buf;
++ /* FALLTHROUGH */
++
++ success_in_start_and_len:
+ #ifdef MBS_SUPPORT
+ if (MB_CUR_MAX > 1 && mb_properties)
+ free (mb_properties);
+ #endif /* MBS_SUPPORT */
+- *match_size = end - beg;
+- return beg - buf;
++ *match_size = len;
++ return start;
+ }
+
+ static void
+@@ -516,28 +514,15 @@
+ {
+ size_t offset = kwsexec (kwset, beg, buf + size - beg, &kwsmatch);
+ if (offset == (size_t) -1)
+- {
+-#ifdef MBS_SUPPORT
+- if (MB_CUR_MAX > 1)
+- free(mb_properties);
+-#endif /* MBS_SUPPORT */
+- return offset;
+- }
++ goto failure;
+ #ifdef MBS_SUPPORT
+ if (MB_CUR_MAX > 1 && mb_properties[offset+beg-buf] == 0)
+ continue; /* It is a part of multibyte character. */
+ #endif /* MBS_SUPPORT */
+ beg += offset;
+ len = kwsmatch.size[0];
+- if (exact)
+- {
+- *match_size = len;
+-#ifdef MBS_SUPPORT
+- if (MB_CUR_MAX > 1)
+- free (mb_properties);
+-#endif /* MBS_SUPPORT */
+- return beg - buf;
+- }
++ if (exact && !match_words)
++ goto success_in_beg_and_len;
+ if (match_lines)
+ {
+ if (beg > buf && beg[-1] != eol)
+@@ -551,6 +536,7 @@
+ goto success;
+ }
+
++ failure:
+ #ifdef MBS_SUPPORT
+ if (MB_CUR_MAX > 1)
+ free (mb_properties);
+@@ -583,7 +569,11 @@
+ end++;
+ while (buf < beg && beg[-1] != eol)
+ --beg;
+- *match_size = end - beg;
++ len = end - beg;
++ /* FALLTHROUGH */
++
++ success_in_beg_and_len:
++ *match_size = len;
+ #ifdef MBS_SUPPORT
+ if (MB_CUR_MAX > 1)
+ free (mb_properties);
diff --git a/abs/core-testing/grep/02-bracket.patch b/abs/core-testing/grep/02-bracket.patch
new file mode 100644
index 0000000..f99571c
--- /dev/null
+++ b/abs/core-testing/grep/02-bracket.patch
@@ -0,0 +1,11 @@
+--- grep-2.5.1/src/dfa.c.bracket 2003-10-30 16:21:14.000000000 +0000
++++ grep-2.5.1/src/dfa.c 2003-10-30 16:22:38.000000000 +0000
+@@ -586,7 +586,7 @@
+ work_mbc->coll_elems[work_mbc->ncoll_elems++] = elem;
+ }
+ }
+- wc = -1;
++ wc1 = wc = -1;
+ }
+ else
+ /* We treat '[' as a normal character here. */
diff --git a/abs/core-testing/grep/03-i18n.patch b/abs/core-testing/grep/03-i18n.patch
new file mode 100644
index 0000000..8dc3dfe
--- /dev/null
+++ b/abs/core-testing/grep/03-i18n.patch
@@ -0,0 +1,303 @@
+--- grep-2.5.1/src/dfa.c 2004-02-26 13:09:54.000000000 +0000
++++ grep-2.5.1/src/dfa.c 2004-05-18 16:43:31.189200479 +0100
+@@ -414,7 +414,7 @@
+
+ /* This function fetch a wide character, and update cur_mb_len,
+ used only if the current locale is a multibyte environment. */
+-static wchar_t
++static wint_t
+ fetch_wc (char const *eoferr)
+ {
+ wchar_t wc;
+@@ -423,7 +423,7 @@
+ if (eoferr != 0)
+ dfaerror (eoferr);
+ else
+- return -1;
++ return WEOF;
+ }
+
+ cur_mb_len = mbrtowc(&wc, lexptr, lexleft, &mbs);
+@@ -459,7 +459,7 @@
+ static void
+ parse_bracket_exp_mb ()
+ {
+- wchar_t wc, wc1, wc2;
++ wint_t wc, wc1, wc2;
+
+ /* Work area to build a mb_char_classes. */
+ struct mb_char_classes *work_mbc;
+@@ -496,7 +496,7 @@
+ work_mbc->invert = 0;
+ do
+ {
+- wc1 = -1; /* mark wc1 is not initialized". */
++ wc1 = WEOF; /* mark wc1 is not initialized". */
+
+ /* Note that if we're looking at some other [:...:] construct,
+ we just treat it as a bunch of ordinary characters. We can do
+@@ -586,7 +586,7 @@
+ work_mbc->coll_elems[work_mbc->ncoll_elems++] = elem;
+ }
+ }
+- wc1 = wc = -1;
++ wc1 = wc = WEOF;
+ }
+ else
+ /* We treat '[' as a normal character here. */
+@@ -600,7 +600,7 @@
+ wc = fetch_wc(("Unbalanced ["));
+ }
+
+- if (wc1 == -1)
++ if (wc1 == WEOF)
+ wc1 = fetch_wc(_("Unbalanced ["));
+
+ if (wc1 == L'-')
+@@ -630,17 +630,17 @@
+ }
+ REALLOC_IF_NECESSARY(work_mbc->range_sts, wchar_t,
+ range_sts_al, work_mbc->nranges + 1);
+- work_mbc->range_sts[work_mbc->nranges] = wc;
++ work_mbc->range_sts[work_mbc->nranges] = (wchar_t)wc;
+ REALLOC_IF_NECESSARY(work_mbc->range_ends, wchar_t,
+ range_ends_al, work_mbc->nranges + 1);
+- work_mbc->range_ends[work_mbc->nranges++] = wc2;
++ work_mbc->range_ends[work_mbc->nranges++] = (wchar_t)wc2;
+ }
+- else if (wc != -1)
++ else if (wc != WEOF)
+ /* build normal characters. */
+ {
+ REALLOC_IF_NECESSARY(work_mbc->chars, wchar_t, chars_al,
+ work_mbc->nchars + 1);
+- work_mbc->chars[work_mbc->nchars++] = wc;
++ work_mbc->chars[work_mbc->nchars++] = (wchar_t)wc;
+ }
+ }
+ while ((wc = wc1) != L']');
+@@ -2552,6 +2552,8 @@
+ }
+
+ /* match with a character? */
++ if (case_fold)
++ wc = towlower (wc);
+ for (i = 0; i<work_mbc->nchars; i++)
+ {
+ if (wc == work_mbc->chars[i])
+--- grep-2.5.1/src/grep.c.i18n 2002-03-26 15:54:12.000000000 +0000
++++ grep-2.5.1/src/grep.c 2004-02-26 13:09:54.000000000 +0000
+@@ -30,6 +30,12 @@
+ # include <sys/time.h>
+ # include <sys/resource.h>
+ #endif
++#if defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H && defined HAVE_MBRTOWC
++/* We can handle multibyte string. */
++# define MBS_SUPPORT
++# include <wchar.h>
++# include <wctype.h>
++#endif
+ #include <stdio.h>
+ #include "system.h"
+ #include "getopt.h"
+@@ -1697,6 +1703,37 @@
+ if (!install_matcher (matcher) && !install_matcher ("default"))
+ abort ();
+
++#ifdef MBS_SUPPORT
++ if (MB_CUR_MAX != 1 && match_icase)
++ {
++ wchar_t wc;
++ mbstate_t cur_state, prev_state;
++ int i, len = strlen(keys);
++
++ memset(&cur_state, 0, sizeof(mbstate_t));
++ for (i = 0; i <= len ;)
++ {
++ size_t mbclen;
++ mbclen = mbrtowc(&wc, keys + i, len - i, &cur_state);
++ if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0)
++ {
++ /* An invalid sequence, or a truncated multibyte character.
++ We treat it as a singlebyte character. */
++ mbclen = 1;
++ }
++ else
++ {
++ if (iswupper((wint_t)wc))
++ {
++ wc = towlower((wint_t)wc);
++ wcrtomb(keys + i, wc, &cur_state);
++ }
++ }
++ i += mbclen;
++ }
++ }
++#endif /* MBS_SUPPORT */
++
+ (*compile)(keys, keycc);
+
+ if ((argc - optind > 1 && !no_filenames) || with_filenames)
+--- grep-2.5.1/src/search.c.i18n 2004-02-26 13:09:54.000000000 +0000
++++ grep-2.5.1/src/search.c 2004-02-26 13:17:12.000000000 +0000
+@@ -149,15 +149,16 @@
+ static char*
+ check_multibyte_string(char const *buf, size_t size)
+ {
+- char *mb_properties = malloc(size);
++ char *mb_properties = xmalloc(size);
+ mbstate_t cur_state;
++ wchar_t wc;
+ int i;
+ memset(&cur_state, 0, sizeof(mbstate_t));
+ memset(mb_properties, 0, sizeof(char)*size);
+ for (i = 0; i < size ;)
+ {
+ size_t mbclen;
+- mbclen = mbrlen(buf + i, size - i, &cur_state);
++ mbclen = mbrtowc(&wc, buf + i, size - i, &cur_state);
+
+ if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0)
+ {
+@@ -165,6 +166,14 @@
+ We treat it as a singlebyte character. */
+ mbclen = 1;
+ }
++ else if (match_icase)
++ {
++ if (iswupper((wint_t)wc))
++ {
++ wc = towlower((wint_t)wc);
++ wcrtomb(buf + i, wc, &cur_state);
++ }
++ }
+ mb_properties[i] = mbclen;
+ i += mbclen;
+ }
+@@ -233,7 +242,7 @@
+ static char const line_end[] = "\\)$";
+ static char const word_beg[] = "\\(^\\|[^[:alnum:]_]\\)\\(";
+ static char const word_end[] = "\\)\\([^[:alnum:]_]\\|$\\)";
+- char *n = malloc (sizeof word_beg - 1 + size + sizeof word_end);
++ char *n = xmalloc (sizeof word_beg - 1 + size + sizeof word_end);
+ size_t i;
+ strcpy (n, match_lines ? line_beg : word_beg);
+ i = strlen (n);
+@@ -316,7 +325,7 @@
+ static char const line_end[] = ")$";
+ static char const word_beg[] = "(^|[^[:alnum:]_])(";
+ static char const word_end[] = ")([^[:alnum:]_]|$)";
+- char *n = malloc (sizeof word_beg - 1 + size + sizeof word_end);
++ char *n = xmalloc (sizeof word_beg - 1 + size + sizeof word_end);
+ size_t i;
+ strcpy (n, match_lines ? line_beg : word_beg);
+ i = strlen(n);
+@@ -339,14 +348,20 @@
+ char eol = eolbyte;
+ int backref, start, len;
+ struct kwsmatch kwsm;
+- size_t i;
++ size_t i, ret_val;
+ #ifdef MBS_SUPPORT
+ char *mb_properties = NULL;
+-#endif /* MBS_SUPPORT */
+-
+-#ifdef MBS_SUPPORT
+- if (MB_CUR_MAX > 1 && kwset)
+- mb_properties = check_multibyte_string(buf, size);
++ if (MB_CUR_MAX > 1)
++ {
++ if (match_icase)
++ {
++ char *case_buf = xmalloc(size);
++ memcpy(case_buf, buf, size);
++ buf = case_buf;
++ }
++ if (kwset)
++ mb_properties = check_multibyte_string(buf, size);
++ }
+ #endif /* MBS_SUPPORT */
+
+ buflim = buf + size;
+@@ -455,8 +470,13 @@
+
+ failure:
+ #ifdef MBS_SUPPORT
+- if (MB_CUR_MAX > 1 && mb_properties)
+- free (mb_properties);
++ if (MB_CUR_MAX > 1)
++ {
++ if (mb_properties)
++ free (mb_properties);
++ if (match_icase)
++ free ((char *) buf);
++ }
+ #endif /* MBS_SUPPORT */
+ return (size_t) -1;
+
+@@ -467,8 +487,13 @@
+
+ success_in_start_and_len:
+ #ifdef MBS_SUPPORT
+- if (MB_CUR_MAX > 1 && mb_properties)
+- free (mb_properties);
++ if (MB_CUR_MAX > 1)
++ {
++ if (mb_properties)
++ free (mb_properties);
++ if (match_icase)
++ free ((char *) buf);
++ }
+ #endif /* MBS_SUPPORT */
+ *match_size = len;
+ return start;
+@@ -504,10 +529,19 @@
+ register size_t len;
+ char eol = eolbyte;
+ struct kwsmatch kwsmatch;
++ size_t ret_val;
+ #ifdef MBS_SUPPORT
+- char *mb_properties;
++ char *mb_properties = NULL;
+ if (MB_CUR_MAX > 1)
+- mb_properties = check_multibyte_string (buf, size);
++ {
++ if (match_icase)
++ {
++ char *case_buf = xmalloc(size);
++ memcpy(case_buf, buf, size);
++ buf = case_buf;
++ }
++ mb_properties = check_multibyte_string(buf, size);
++ }
+ #endif /* MBS_SUPPORT */
+
+ for (beg = buf; beg <= buf + size; ++beg)
+@@ -565,7 +599,12 @@
+ failure:
+ #ifdef MBS_SUPPORT
+ if (MB_CUR_MAX > 1)
+- free (mb_properties);
++ {
++ if (match_icase)
++ free((char *) buf);
++ if (mb_properties)
++ free(mb_properties);
++ }
+ #endif /* MBS_SUPPORT */
+ return -1;
+
+@@ -581,7 +620,12 @@
+ *match_size = len;
+ #ifdef MBS_SUPPORT
+ if (MB_CUR_MAX > 1)
+- free (mb_properties);
++ {
++ if (mb_properties)
++ free (mb_properties);
++ if (match_icase)
++ free ((char *) buf);
++ }
+ #endif /* MBS_SUPPORT */
+ return beg - buf;
+ }
diff --git a/abs/core-testing/grep/04-oi.patch b/abs/core-testing/grep/04-oi.patch
new file mode 100644
index 0000000..eb997ad
--- /dev/null
+++ b/abs/core-testing/grep/04-oi.patch
@@ -0,0 +1,48 @@
+--- grep-2.5.1/lib/posix/regex.h.oi 2004-01-05 12:09:12.984391131 +0000
++++ grep-2.5.1/lib/posix/regex.h 2004-01-05 12:09:24.717990622 +0000
+@@ -109,6 +109,10 @@
+ If not set, \{, \}, {, and } are literals. */
+ #define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
+
++/* If this bit is set, then ignore case when matching.
++ If not set, then case is significant. */
++#define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1)
++
+ /* If this bit is set, +, ? and | aren't recognized as operators.
+ If not set, they are. */
+ #define RE_LIMITED_OPS (RE_INTERVALS << 1)
+--- grep-2.5.1/src/search.c.oi 2004-01-05 12:07:00.550199415 +0000
++++ grep-2.5.1/src/search.c 2004-01-05 12:07:00.566197505 +0000
+@@ -31,7 +31,7 @@
+
+ #include "system.h"
+ #include "grep.h"
+-#include "regex.h"
++#include <regex.h>
+ #include "dfa.h"
+ #include "kwset.h"
+ #include "error.h"
+@@ -190,7 +190,7 @@
+ size_t total = size;
+ char const *motif = pattern;
+
+- re_set_syntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE);
++ re_set_syntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE | (match_icase ? RE_ICASE : 0));
+ dfasyntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE, match_icase, eolbyte);
+
+ /* For GNU regex compiler we have to pass the patterns separately to detect
+@@ -268,12 +268,12 @@
+
+ if (strcmp (matcher, "awk") == 0)
+ {
+- re_set_syntax (RE_SYNTAX_AWK);
++ re_set_syntax (RE_SYNTAX_AWK | (match_icase ? RE_ICASE : 0));
+ dfasyntax (RE_SYNTAX_AWK, match_icase, eolbyte);
+ }
+ else
+ {
+- re_set_syntax (RE_SYNTAX_POSIX_EGREP);
++ re_set_syntax (RE_SYNTAX_POSIX_EGREP | (match_icase ? RE_ICASE : 0));
+ dfasyntax (RE_SYNTAX_POSIX_EGREP, match_icase, eolbyte);
+ }
+
diff --git a/abs/core-testing/grep/05-manpage.patch b/abs/core-testing/grep/05-manpage.patch
new file mode 100644
index 0000000..284f0c4
--- /dev/null
+++ b/abs/core-testing/grep/05-manpage.patch
@@ -0,0 +1,19 @@
+--- grep-2.5.1/doc/grep.1.manpage 2002-01-22 13:20:04.000000000 +0000
++++ grep-2.5.1/doc/grep.1 2003-10-08 09:37:32.000000000 +0100
+@@ -191,6 +191,7 @@
+ .I PATTERN
+ as a list of fixed strings, separated by newlines,
+ any of which is to be matched.
++.TP
+ .BR \-P ", " \-\^\-perl-regexp
+ Interpret
+ .I PATTERN
+@@ -302,7 +303,7 @@
+ This is especially useful for tools like zgrep, e.g.
+ .B "gzip -cd foo.gz |grep --label=foo something"
+ .TP
+-.BR \-\^\-line-buffering
++.BR \-\^\-line-buffered
+ Use line buffering, it can be a performance penality.
+ .TP
+ .BR \-q ", " \-\^\-quiet ", " \-\^\-silent
diff --git a/abs/core-testing/grep/06-color.patch b/abs/core-testing/grep/06-color.patch
new file mode 100644
index 0000000..f54c258
--- /dev/null
+++ b/abs/core-testing/grep/06-color.patch
@@ -0,0 +1,10 @@
+--- grep-2.5.1/src/grep.c.color 2004-11-16 16:46:22.845505847 +0000
++++ grep-2.5.1/src/grep.c 2004-11-16 16:46:27.961530537 +0000
+@@ -607,6 +607,7 @@
+ fputs ("\33[00m", stdout);
+ beg = b + match_size;
+ }
++ fputs ("\33[K", stdout);
+ }
+ fwrite (beg, 1, lim - beg, stdout);
+ if (ferror (stdout))
diff --git a/abs/core-testing/grep/07-icolor.patch b/abs/core-testing/grep/07-icolor.patch
new file mode 100644
index 0000000..14b2617
--- /dev/null
+++ b/abs/core-testing/grep/07-icolor.patch
@@ -0,0 +1,36 @@
+--- grep-2.5.1a/src/grep.c.icolor 2005-01-07 12:05:20.877785250 +0000
++++ grep-2.5.1a/src/grep.c 2005-01-07 12:05:44.690194388 +0000
+@@ -564,33 +564,6 @@
+ {
+ size_t match_size;
+ size_t match_offset;
+- if(match_icase)
+- {
+- /* Yuck, this is tricky */
+- char *buf = (char*) xmalloc (lim - beg);
+- char *ibeg = buf;
+- char *ilim = ibeg + (lim - beg);
+- int i;
+- for (i = 0; i < lim - beg; i++)
+- ibeg[i] = tolower (beg[i]);
+- while ((match_offset = (*execute) (ibeg, ilim-ibeg, &match_size, 1))
+- != (size_t) -1)
+- {
+- char const *b = beg + match_offset;
+- if (b == lim)
+- break;
+- fwrite (beg, sizeof (char), match_offset, stdout);
+- printf ("\33[%sm", grep_color);
+- fwrite (b, sizeof (char), match_size, stdout);
+- fputs ("\33[00m", stdout);
+- beg = b + match_size;
+- ibeg = ibeg + match_offset + match_size;
+- }
+- fwrite (beg, 1, lim - beg, stdout);
+- free (buf);
+- lastout = lim;
+- return;
+- }
+ while (lim-beg && (match_offset = (*execute) (beg, lim - beg, &match_size, 1))
+ != (size_t) -1)
+ {
diff --git a/abs/core-testing/grep/08-skip.patch b/abs/core-testing/grep/08-skip.patch
new file mode 100644
index 0000000..fb6645f
--- /dev/null
+++ b/abs/core-testing/grep/08-skip.patch
@@ -0,0 +1,42 @@
+--- grep-2.5.1a/src/grep.c.skip 2006-05-31 09:26:58.000000000 +0100
++++ grep-2.5.1a/src/grep.c 2006-05-31 09:28:24.000000000 +0100
+@@ -261,19 +261,6 @@
+ bufbeg[-1] = eolbyte;
+ bufdesc = fd;
+
+- if (fstat (fd, &stats->stat) != 0)
+- {
+- error (0, errno, "fstat");
+- return 0;
+- }
+- if (directories == SKIP_DIRECTORIES && S_ISDIR (stats->stat.st_mode))
+- return 0;
+-#ifndef DJGPP
+- if (devices == SKIP_DEVICES && (S_ISCHR(stats->stat.st_mode) || S_ISBLK(stats->stat.st_mode) || S_ISSOCK(stats->stat.st_mode)))
+-#else
+- if (devices == SKIP_DEVICES && (S_ISCHR(stats->stat.st_mode) || S_ISBLK(stats->stat.st_mode)))
+-#endif
+- return 0;
+ if (S_ISREG (stats->stat.st_mode))
+ {
+ if (file)
+@@ -875,6 +862,19 @@
+ }
+ else
+ {
++ if (stat (file, &stats->stat) != 0)
++ {
++ suppressible_error (file, errno);
++ return 1;
++ }
++ if (directories == SKIP_DIRECTORIES && S_ISDIR (stats->stat.st_mode))
++ return 1;
++#ifndef DJGPP
++ if (devices == SKIP_DEVICES && (S_ISCHR(stats->stat.st_mode) || S_ISBLK(stats->stat.st_mode) || S_ISSOCK(stats->stat.st_mode) || S_ISFIFO(stats->stat.st_mode)))
++#else
++ if (devices == SKIP_DEVICES && (S_ISCHR(stats->stat.st_mode) || S_ISBLK(stats->stat.st_mode)))
++#endif
++ return 1;
+ while ((desc = open (file, O_RDONLY)) < 0 && errno == EINTR)
+ continue;
+
diff --git a/abs/core-testing/grep/09-egf-speedup.patch b/abs/core-testing/grep/09-egf-speedup.patch
new file mode 100644
index 0000000..08e92c7
--- /dev/null
+++ b/abs/core-testing/grep/09-egf-speedup.patch
@@ -0,0 +1,823 @@
+--- grep-2.5.1/src/search.c 2004-12-31 15:28:35.720391036 +0000
++++ grep-2.5.1a/src/search.c 2005-01-07 14:53:10.308860193 +0000
+@@ -18,9 +18,13 @@
+
+ /* Written August 1992 by Mike Haertel. */
+
++#ifndef _GNU_SOURCE
++# define _GNU_SOURCE 1
++#endif
+ #ifdef HAVE_CONFIG_H
+ # include <config.h>
+ #endif
++#include <assert.h>
+ #include <sys/types.h>
+ #if defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H && defined HAVE_MBRTOWC
+ /* We can handle multibyte string. */
+@@ -39,6 +43,9 @@
+ #ifdef HAVE_LIBPCRE
+ # include <pcre.h>
+ #endif
++#ifdef HAVE_LANGINFO_CODESET
++# include <langinfo.h>
++#endif
+
+ #define NCHAR (UCHAR_MAX + 1)
+
+@@ -70,9 +77,10 @@
+ call the regexp matcher at all. */
+ static int kwset_exact_matches;
+
+-#if defined(MBS_SUPPORT)
+-static char* check_multibyte_string PARAMS ((char const *buf, size_t size));
+-#endif
++/* UTF-8 encoding allows some optimizations that we can't otherwise
++ assume in a multibyte encoding. */
++static int using_utf8;
++
+ static void kwsinit PARAMS ((void));
+ static void kwsmusts PARAMS ((void));
+ static void Gcompile PARAMS ((char const *, size_t));
+@@ -84,6 +92,15 @@
+ static size_t Pexecute PARAMS ((char const *, size_t, size_t *, int));
+
+ void
++check_utf8 (void)
++{
++#ifdef HAVE_LANGINFO_CODESET
++ if (strcmp (nl_langinfo (CODESET), "UTF-8") == 0)
++ using_utf8 = 1;
++#endif
++}
++
++void
+ dfaerror (char const *mesg)
+ {
+ error (2, 0, mesg);
+@@ -141,47 +158,6 @@
+ }
+ }
+
+-#ifdef MBS_SUPPORT
+-/* This function allocate the array which correspond to "buf".
+- Then this check multibyte string and mark on the positions which
+- are not singlebyte character nor the first byte of a multibyte
+- character. Caller must free the array. */
+-static char*
+-check_multibyte_string(char const *buf, size_t size)
+-{
+- char *mb_properties = xmalloc(size);
+- mbstate_t cur_state;
+- wchar_t wc;
+- int i;
+- memset(&cur_state, 0, sizeof(mbstate_t));
+- memset(mb_properties, 0, sizeof(char)*size);
+- for (i = 0; i < size ;)
+- {
+- size_t mbclen;
+- mbclen = mbrtowc(&wc, buf + i, size - i, &cur_state);
+-
+- if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0)
+- {
+- /* An invalid sequence, or a truncated multibyte character.
+- We treat it as a singlebyte character. */
+- mbclen = 1;
+- }
+- else if (match_icase)
+- {
+- if (iswupper((wint_t)wc))
+- {
+- wc = towlower((wint_t)wc);
+- wcrtomb(buf + i, wc, &cur_state);
+- }
+- }
+- mb_properties[i] = mbclen;
+- i += mbclen;
+- }
+-
+- return mb_properties;
+-}
+-#endif
+-
+ static void
+ Gcompile (char const *pattern, size_t size)
+ {
+@@ -190,6 +166,7 @@
+ size_t total = size;
+ char const *motif = pattern;
+
++ check_utf8 ();
+ re_set_syntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE | (match_icase ? RE_ICASE : 0));
+ dfasyntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE, match_icase, eolbyte);
+
+@@ -266,6 +243,7 @@
+ size_t total = size;
+ char const *motif = pattern;
+
++ check_utf8 ();
+ if (strcmp (matcher, "awk") == 0)
+ {
+ re_set_syntax (RE_SYNTAX_AWK | (match_icase ? RE_ICASE : 0));
+@@ -350,18 +328,9 @@
+ struct kwsmatch kwsm;
+ size_t i, ret_val;
+ #ifdef MBS_SUPPORT
+- char *mb_properties = NULL;
+- if (MB_CUR_MAX > 1)
+- {
+- if (match_icase)
+- {
+- char *case_buf = xmalloc(size);
+- memcpy(case_buf, buf, size);
+- buf = case_buf;
+- }
+- if (kwset)
+- mb_properties = check_multibyte_string(buf, size);
+- }
++ int mb_cur_max = MB_CUR_MAX;
++ mbstate_t mbs;
++ memset (&mbs, '\0', sizeof (mbstate_t));
+ #endif /* MBS_SUPPORT */
+
+ buflim = buf + size;
+@@ -373,21 +342,63 @@
+ if (kwset)
+ {
+ /* Find a possible match using the KWset matcher. */
+- size_t offset = kwsexec (kwset, beg, buflim - beg, &kwsm);
++#ifdef MBS_SUPPORT
++ size_t bytes_left = 0;
++#endif /* MBS_SUPPORT */
++ size_t offset;
++#ifdef MBS_SUPPORT
++ /* kwsexec doesn't work with match_icase and multibyte input. */
++ if (match_icase && mb_cur_max > 1)
++ /* Avoid kwset */
++ offset = 0;
++ else
++#endif /* MBS_SUPPORT */
++ offset = kwsexec (kwset, beg, buflim - beg, &kwsm);
+ if (offset == (size_t) -1)
+ goto failure;
++#ifdef MBS_SUPPORT
++ if (mb_cur_max > 1 && !using_utf8)
++ {
++ bytes_left = offset;
++ while (bytes_left)
++ {
++ size_t mlen = mbrlen (beg, bytes_left, &mbs);
++ if (mlen == (size_t) -1 || mlen == 0)
++ {
++ /* Incomplete character: treat as single-byte. */
++ memset (&mbs, '\0', sizeof (mbstate_t));
++ beg++;
++ bytes_left--;
++ continue;
++ }
++
++ if (mlen == (size_t) -2)
++ /* Offset points inside multibyte character:
++ * no good. */
++ break;
++
++ beg += mlen;
++ bytes_left -= mlen;
++ }
++ }
++ else
++#endif /* MBS_SUPPORT */
+ beg += offset;
+ /* Narrow down to the line containing the candidate, and
+ run it through DFA. */
+ end = memchr(beg, eol, buflim - beg);
+ end++;
+ #ifdef MBS_SUPPORT
+- if (MB_CUR_MAX > 1 && mb_properties[beg - buf] == 0)
++ if (mb_cur_max > 1 && bytes_left)
+ continue;
+-#endif
++#endif /* MBS_SUPPORT */
+ while (beg > buf && beg[-1] != eol)
+ --beg;
+- if (kwsm.index < kwset_exact_matches)
++ if (
++#ifdef MBS_SUPPORT
++ !(match_icase && mb_cur_max > 1) &&
++#endif /* MBS_SUPPORT */
++ (kwsm.index < kwset_exact_matches))
+ goto success_in_beg_and_end;
+ if (dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1)
+ continue;
+@@ -395,13 +406,47 @@
+ else
+ {
+ /* No good fixed strings; start with DFA. */
++#ifdef MBS_SUPPORT
++ size_t bytes_left = 0;
++#endif /* MBS_SUPPORT */
+ size_t offset = dfaexec (&dfa, beg, buflim - beg, &backref);
+ if (offset == (size_t) -1)
+ break;
+ /* Narrow down to the line we've found. */
++#ifdef MBS_SUPPORT
++ if (mb_cur_max > 1 && !using_utf8)
++ {
++ bytes_left = offset;
++ while (bytes_left)
++ {
++ size_t mlen = mbrlen (beg, bytes_left, &mbs);
++ if (mlen == (size_t) -1 || mlen == 0)
++ {
++ /* Incomplete character: treat as single-byte. */
++ memset (&mbs, '\0', sizeof (mbstate_t));
++ beg++;
++ bytes_left--;
++ continue;
++ }
++
++ if (mlen == (size_t) -2)
++ /* Offset points inside multibyte character:
++ * no good. */
++ break;
++
++ beg += mlen;
++ bytes_left -= mlen;
++ }
++ }
++ else
++#endif /* MBS_SUPPORT */
+ beg += offset;
+ end = memchr (beg, eol, buflim - beg);
+ end++;
++#ifdef MBS_SUPPORT
++ if (mb_cur_max > 1 && bytes_left)
++ continue;
++#endif /* MBS_SUPPORT */
+ while (beg > buf && beg[-1] != eol)
+ --beg;
+ }
+@@ -469,15 +514,6 @@
+ } /* for (beg = end ..) */
+
+ failure:
+-#ifdef MBS_SUPPORT
+- if (MB_CUR_MAX > 1)
+- {
+- if (mb_properties)
+- free (mb_properties);
+- if (match_icase)
+- free ((char *) buf);
+- }
+-#endif /* MBS_SUPPORT */
+ return (size_t) -1;
+
+ success_in_beg_and_end:
+@@ -486,24 +522,144 @@
+ /* FALLTHROUGH */
+
+ success_in_start_and_len:
+-#ifdef MBS_SUPPORT
+- if (MB_CUR_MAX > 1)
+- {
+- if (mb_properties)
+- free (mb_properties);
+- if (match_icase)
+- free ((char *) buf);
+- }
+-#endif /* MBS_SUPPORT */
+ *match_size = len;
+ return start;
+ }
+
++#ifdef MBS_SUPPORT
++static int f_i_multibyte; /* whether we're using the new -Fi MB method */
++static struct
++{
++ wchar_t **patterns;
++ size_t count, maxlen;
++ unsigned char *match;
++} Fimb;
++#endif
++
+ static void
+ Fcompile (char const *pattern, size_t size)
+ {
++ int mb_cur_max = MB_CUR_MAX;
+ char const *beg, *lim, *err;
+
++ check_utf8 ();
++#ifdef MBS_SUPPORT
++ /* Support -F -i for UTF-8 input. */
++ if (match_icase && mb_cur_max > 1)
++ {
++ mbstate_t mbs;
++ wchar_t *wcpattern = xmalloc ((size + 1) * sizeof (wchar_t));
++ const char *patternend = pattern;
++ size_t wcsize;
++ kwset_t fimb_kwset = NULL;
++ char *starts = NULL;
++ wchar_t *wcbeg, *wclim;
++ size_t allocated = 0;
++
++ memset (&mbs, '\0', sizeof (mbs));
++# ifdef __GNU_LIBRARY__
++ wcsize = mbsnrtowcs (wcpattern, &patternend, size, size, &mbs);
++ if (patternend != pattern + size)
++ wcsize = (size_t) -1;
++# else
++ {
++ char *patterncopy = xmalloc (size + 1);
++
++ memcpy (patterncopy, pattern, size);
++ patterncopy[size] = '\0';
++ patternend = patterncopy;
++ wcsize = mbsrtowcs (wcpattern, &patternend, size, &mbs);
++ if (patternend != patterncopy + size)
++ wcsize = (size_t) -1;
++ free (patterncopy);
++ }
++# endif
++ if (wcsize + 2 <= 2)
++ {
++fimb_fail:
++ free (wcpattern);
++ free (starts);
++ if (fimb_kwset)
++ kwsfree (fimb_kwset);
++ free (Fimb.patterns);
++ Fimb.patterns = NULL;
++ }
++ else
++ {
++ if (!(fimb_kwset = kwsalloc (NULL)))
++ error (2, 0, _("memory exhausted"));
++
++ starts = xmalloc (mb_cur_max * 3);
++ wcbeg = wcpattern;
++ do
++ {
++ int i;
++ size_t wclen;
++
++ if (Fimb.count >= allocated)
++ {
++ if (allocated == 0)
++ allocated = 128;
++ else
++ allocated *= 2;
++ Fimb.patterns = xrealloc (Fimb.patterns,
++ sizeof (wchar_t *) * allocated);
++ }
++ Fimb.patterns[Fimb.count++] = wcbeg;
++ for (wclim = wcbeg;
++ wclim < wcpattern + wcsize && *wclim != L'\n'; ++wclim)
++ *wclim = towlower (*wclim);
++ *wclim = L'\0';
++ wclen = wclim - wcbeg;
++ if (wclen > Fimb.maxlen)
++ Fimb.maxlen = wclen;
++ if (wclen > 3)
++ wclen = 3;
++ if (wclen == 0)
++ {
++ if ((err = kwsincr (fimb_kwset, "", 0)) != 0)
++ error (2, 0, err);
++ }
++ else
++ for (i = 0; i < (1 << wclen); i++)
++ {
++ char *p = starts;
++ int j, k;
++
++ for (j = 0; j < wclen; ++j)
++ {
++ wchar_t wc = wcbeg[j];
++ if (i & (1 << j))
++ {
++ wc = towupper (wc);
++ if (wc == wcbeg[j])
++ continue;
++ }
++ k = wctomb (p, wc);
++ if (k <= 0)
++ goto fimb_fail;
++ p += k;
++ }
++ if ((err = kwsincr (fimb_kwset, starts, p - starts)) != 0)
++ error (2, 0, err);
++ }
++ if (wclim < wcpattern + wcsize)
++ ++wclim;
++ wcbeg = wclim;
++ }
++ while (wcbeg < wcpattern + wcsize);
++ f_i_multibyte = 1;
++ kwset = fimb_kwset;
++ free (starts);
++ Fimb.match = xmalloc (Fimb.count);
++ if ((err = kwsprep (kwset)) != 0)
++ error (2, 0, err);
++ return;
++ }
++ }
++#endif /* MBS_SUPPORT */
++
++
+ kwsinit ();
+ beg = pattern;
+ do
+@@ -522,6 +678,76 @@
+ error (2, 0, err);
+ }
+
++#ifdef MBS_SUPPORT
++static int
++Fimbexec (const char *buf, size_t size, size_t *plen, int exact)
++{
++ size_t len, letter, i;
++ int ret = -1;
++ mbstate_t mbs;
++ wchar_t wc;
++ int patterns_left;
++
++ assert (match_icase && f_i_multibyte == 1);
++ assert (MB_CUR_MAX > 1);
++
++ memset (&mbs, '\0', sizeof (mbs));
++ memset (Fimb.match, '\1', Fimb.count);
++ letter = len = 0;
++ patterns_left = 1;
++ while (patterns_left && len <= size)
++ {
++ size_t c;
++
++ patterns_left = 0;
++ if (len < size)
++ {
++ c = mbrtowc (&wc, buf + len, size - len, &mbs);
++ if (c + 2 <= 2)
++ return ret;
++
++ wc = towlower (wc);
++ }
++ else
++ {
++ c = 1;
++ wc = L'\0';
++ }
++
++ for (i = 0; i < Fimb.count; i++)
++ {
++ if (Fimb.match[i])
++ {
++ if (Fimb.patterns[i][letter] == L'\0')
++ {
++ /* Found a match. */
++ *plen = len;
++ if (!exact && !match_words)
++ return 0;
++ else
++ {
++ /* For -w or exact look for longest match. */
++ ret = 0;
++ Fimb.match[i] = '\0';
++ continue;
++ }
++ }
++
++ if (Fimb.patterns[i][letter] == wc)
++ patterns_left = 1;
++ else
++ Fimb.match[i] = '\0';
++ }
++ }
++
++ len += c;
++ letter++;
++ }
++
++ return ret;
++}
++#endif /* MBS_SUPPORT */
++
+ static size_t
+ Fexecute (char const *buf, size_t size, size_t *match_size, int exact)
+ {
+@@ -531,80 +757,258 @@
+ struct kwsmatch kwsmatch;
+ size_t ret_val;
+ #ifdef MBS_SUPPORT
+- char *mb_properties = NULL;
+- if (MB_CUR_MAX > 1)
+- {
+- if (match_icase)
+- {
+- char *case_buf = xmalloc(size);
+- memcpy(case_buf, buf, size);
+- buf = case_buf;
+- }
+- mb_properties = check_multibyte_string(buf, size);
+- }
++ int mb_cur_max = MB_CUR_MAX;
++ mbstate_t mbs;
++ memset (&mbs, '\0', sizeof (mbstate_t));
++ const char *last_char = NULL;
+ #endif /* MBS_SUPPORT */
+
+ for (beg = buf; beg <= buf + size; ++beg)
+ {
+- size_t offset = kwsexec (kwset, beg, buf + size - beg, &kwsmatch);
++ size_t offset;
++ offset = kwsexec (kwset, beg, buf + size - beg, &kwsmatch);
++
+ if (offset == (size_t) -1)
+ goto failure;
+ #ifdef MBS_SUPPORT
+- if (MB_CUR_MAX > 1 && mb_properties[offset+beg-buf] == 0)
+- continue; /* It is a part of multibyte character. */
++ if (mb_cur_max > 1 && !using_utf8)
++ {
++ size_t bytes_left = offset;
++ while (bytes_left)
++ {
++ size_t mlen = mbrlen (beg, bytes_left, &mbs);
++
++ last_char = beg;
++ if (mlen == (size_t) -1 || mlen == 0)
++ {
++ /* Incomplete character: treat as single-byte. */
++ memset (&mbs, '\0', sizeof (mbstate_t));
++ beg++;
++ bytes_left--;
++ continue;
++ }
++
++ if (mlen == (size_t) -2)
++ /* Offset points inside multibyte character: no good. */
++ break;
++
++ beg += mlen;
++ bytes_left -= mlen;
++ }
++
++ if (bytes_left)
++ continue;
++ }
++ else
+ #endif /* MBS_SUPPORT */
+ beg += offset;
++#ifdef MBS_SUPPORT
++ /* For f_i_multibyte, the string at beg now matches first 3 chars of
++ one of the search strings (less if there are shorter search strings).
++ See if this is a real match. */
++ if (f_i_multibyte
++ && Fimbexec (beg, buf + size - beg, &kwsmatch.size[0], exact))
++ goto next_char;
++#endif /* MBS_SUPPORT */
+ len = kwsmatch.size[0];
+ if (exact && !match_words)
+ goto success_in_beg_and_len;
+ if (match_lines)
+ {
+ if (beg > buf && beg[-1] != eol)
+- continue;
++ goto next_char;
+ if (beg + len < buf + size && beg[len] != eol)
+- continue;
++ goto next_char;
+ goto success;
+ }
+ else if (match_words)
+- for (try = beg; len; )
+- {
+- if (try > buf && WCHAR((unsigned char) try[-1]))
+- break;
+- if (try + len < buf + size && WCHAR((unsigned char) try[len]))
+- {
+- offset = kwsexec (kwset, beg, --len, &kwsmatch);
+- if (offset == (size_t) -1)
+- {
++ {
++ while (len)
++ {
++ int word_match = 0;
++ if (beg > buf)
++ {
+ #ifdef MBS_SUPPORT
+- if (MB_CUR_MAX > 1)
+- free (mb_properties);
++ if (mb_cur_max > 1)
++ {
++ const char *s;
++ int mr;
++ wchar_t pwc;
++
++ if (using_utf8)
++ {
++ s = beg - 1;
++ while (s > buf
++ && (unsigned char) *s >= 0x80
++ && (unsigned char) *s <= 0xbf)
++ --s;
++ }
++ else
++ s = last_char;
++ mr = mbtowc (&pwc, s, beg - s);
++ if (mr <= 0)
++ memset (&mbs, '\0', sizeof (mbstate_t));
++ else if ((iswalnum (pwc) || pwc == L'_')
++ && mr == (int) (beg - s))
++ goto next_char;
++ }
++ else
+ #endif /* MBS_SUPPORT */
+- return offset;
+- }
+- try = beg + offset;
+- len = kwsmatch.size[0];
+- }
+- else
+- goto success;
+- }
++ if (WCHAR ((unsigned char) beg[-1]))
++ goto next_char;
++ }
++#ifdef MBS_SUPPORT
++ if (mb_cur_max > 1)
++ {
++ wchar_t nwc;
++ int mr;
++
++ mr = mbtowc (&nwc, beg + len, buf + size - beg - len);
++ if (mr <= 0)
++ {
++ memset (&mbs, '\0', sizeof (mbstate_t));
++ word_match = 1;
++ }
++ else if (!iswalnum (nwc) && nwc != L'_')
++ word_match = 1;
++ }
++ else
++#endif /* MBS_SUPPORT */
++ if (beg + len >= buf + size || !WCHAR ((unsigned char) beg[len]))
++ word_match = 1;
++ if (word_match)
++ {
++ if (!exact)
++ /* Returns the whole line now we know there's a word match. */
++ goto success;
++ else
++ /* Returns just this word match. */
++ goto success_in_beg_and_len;
++ }
++ if (len > 0)
++ {
++ /* Try a shorter length anchored at the same place. */
++ --len;
++ offset = kwsexec (kwset, beg, len, &kwsmatch);
++
++ if (offset == -1)
++ goto next_char; /* Try a different anchor. */
++#ifdef MBS_SUPPORT
++ if (mb_cur_max > 1 && !using_utf8)
++ {
++ size_t bytes_left = offset;
++ while (bytes_left)
++ {
++ size_t mlen = mbrlen (beg, bytes_left, &mbs);
++
++ last_char = beg;
++ if (mlen == (size_t) -1 || mlen == 0)
++ {
++ /* Incomplete character: treat as single-byte. */
++ memset (&mbs, '\0', sizeof (mbstate_t));
++ beg++;
++ bytes_left--;
++ continue;
++ }
++
++ if (mlen == (size_t) -2)
++ {
++ /* Offset points inside multibyte character:
++ * no good. */
++ break;
++ }
++
++ beg += mlen;
++ bytes_left -= mlen;
++ }
++
++ if (bytes_left)
++ {
++ memset (&mbs, '\0', sizeof (mbstate_t));
++ goto next_char; /* Try a different anchor. */
++ }
++ }
++ else
++#endif /* MBS_SUPPORT */
++ beg += offset;
++#ifdef MBS_SUPPORT
++ /* The string at beg now matches first 3 chars of one of
++ the search strings (less if there are shorter search
++ strings). See if this is a real match. */
++ if (f_i_multibyte
++ && Fimbexec (beg, len - offset, &kwsmatch.size[0],
++ exact))
++ goto next_char;
++#endif /* MBS_SUPPORT */
++ len = kwsmatch.size[0];
++ }
++ }
++ }
+ else
+ goto success;
++next_char:;
++#ifdef MBS_SUPPORT
++ /* Advance to next character. For MB_CUR_MAX == 1 case this is handled
++ by ++beg above. */
++ if (mb_cur_max > 1)
++ {
++ if (using_utf8)
++ {
++ unsigned char c = *beg;
++ if (c >= 0xc2)
++ {
++ if (c < 0xe0)
++ ++beg;
++ else if (c < 0xf0)
++ beg += 2;
++ else if (c < 0xf8)
++ beg += 3;
++ else if (c < 0xfc)
++ beg += 4;
++ else if (c < 0xfe)
++ beg += 5;
++ }
++ }
++ else
++ {
++ size_t l = mbrlen (beg, buf + size - beg, &mbs);
++
++ last_char = beg;
++ if (l + 2 >= 2)
++ beg += l - 1;
++ else
++ memset (&mbs, '\0', sizeof (mbstate_t));
++ }
++ }
++#endif /* MBS_SUPPORT */
+ }
+
+ failure:
++ return -1;
++
++ success:
+ #ifdef MBS_SUPPORT
+- if (MB_CUR_MAX > 1)
++ if (mb_cur_max > 1 && !using_utf8)
+ {
+- if (match_icase)
+- free((char *) buf);
+- if (mb_properties)
+- free(mb_properties);
++ end = beg + len;
++ while (end < buf + size)
++ {
++ size_t mlen = mbrlen (end, buf + size - end, &mbs);
++ if (mlen == (size_t) -1 || mlen == (size_t) -2 || mlen == 0)
++ {
++ memset (&mbs, '\0', sizeof (mbstate_t));
++ mlen = 1;
++ }
++ if (mlen == 1 && *end == eol)
++ break;
++
++ end += mlen;
++ }
+ }
++ else
+ #endif /* MBS_SUPPORT */
+- return -1;
+-
+- success:
+ end = memchr (beg + len, eol, (buf + size) - (beg + len));
++
+ end++;
+ while (buf < beg && beg[-1] != eol)
+ --beg;
+@@ -613,15 +1017,6 @@
+
+ success_in_beg_and_len:
+ *match_size = len;
+-#ifdef MBS_SUPPORT
+- if (MB_CUR_MAX > 1)
+- {
+- if (mb_properties)
+- free (mb_properties);
+- if (match_icase)
+- free ((char *) buf);
+- }
+-#endif /* MBS_SUPPORT */
+ return beg - buf;
+ }
+
diff --git a/abs/core-testing/grep/10-dfa-optional.patch b/abs/core-testing/grep/10-dfa-optional.patch
new file mode 100644
index 0000000..784eba9
--- /dev/null
+++ b/abs/core-testing/grep/10-dfa-optional.patch
@@ -0,0 +1,67 @@
+--- grep-2.5.1a/src/search.c.dfa-optional 2005-01-07 14:58:45.714869815 +0000
++++ grep-2.5.1a/src/search.c 2005-01-07 14:58:45.725867716 +0000
+@@ -327,12 +327,34 @@
+ int backref, start, len;
+ struct kwsmatch kwsm;
+ size_t i, ret_val;
++ static int use_dfa;
++ static int use_dfa_checked = 0;
+ #ifdef MBS_SUPPORT
+ int mb_cur_max = MB_CUR_MAX;
+ mbstate_t mbs;
+ memset (&mbs, '\0', sizeof (mbstate_t));
+ #endif /* MBS_SUPPORT */
+
++ if (!use_dfa_checked)
++ {
++ char *grep_use_dfa = getenv ("GREP_USE_DFA");
++ if (!grep_use_dfa)
++ {
++#ifdef MBS_SUPPORT
++ /* Turn off DFA when processing multibyte input. */
++ use_dfa = (MB_CUR_MAX == 1);
++#else
++ use_dfa = 1;
++#endif /* MBS_SUPPORT */
++ }
++ else
++ {
++ use_dfa = atoi (grep_use_dfa);
++ }
++
++ use_dfa_checked = 1;
++ }
++
+ buflim = buf + size;
+
+ for (beg = end = buf; end < buflim; beg = end)
+@@ -400,7 +422,8 @@
+ #endif /* MBS_SUPPORT */
+ (kwsm.index < kwset_exact_matches))
+ goto success_in_beg_and_end;
+- if (dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1)
++ if (use_dfa &&
++ dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1)
+ continue;
+ }
+ else
+@@ -409,7 +432,9 @@
+ #ifdef MBS_SUPPORT
+ size_t bytes_left = 0;
+ #endif /* MBS_SUPPORT */
+- size_t offset = dfaexec (&dfa, beg, buflim - beg, &backref);
++ size_t offset = 0;
++ if (use_dfa)
++ offset = dfaexec (&dfa, beg, buflim - beg, &backref);
+ if (offset == (size_t) -1)
+ break;
+ /* Narrow down to the line we've found. */
+@@ -451,7 +476,7 @@
+ --beg;
+ }
+ /* Successful, no backreferences encountered! */
+- if (!backref)
++ if (use_dfa && !backref)
+ goto success_in_beg_and_end;
+ }
+ else
diff --git a/abs/core-testing/grep/11-tests.patch b/abs/core-testing/grep/11-tests.patch
new file mode 100644
index 0000000..2934a21
--- /dev/null
+++ b/abs/core-testing/grep/11-tests.patch
@@ -0,0 +1,138 @@
+--- grep-2.5.1/tests/Makefile.am.jj 2001-03-07 05:11:27.000000000 +0100
++++ grep-2.5.1/tests/Makefile.am 2004-12-31 11:42:41.595492300 +0100
+@@ -3,7 +3,8 @@
+ AWK=@AWK@
+
+ TESTS = warning.sh khadafy.sh spencer1.sh bre.sh ere.sh \
+- status.sh empty.sh options.sh backref.sh file.sh
++ status.sh empty.sh options.sh backref.sh file.sh \
++ fmbtest.sh
+ EXTRA_DIST = $(TESTS) \
+ khadafy.lines khadafy.regexp \
+ spencer1.awk spencer1.tests \
+--- grep-2.5.1/tests/fmbtest.sh 2004-12-31 13:30:23.942871250 +0100
++++ grep-2.5.1/tests/fmbtest.sh 2004-12-31 14:09:13.219463855 +0100
+@@ -0,0 +1,111 @@
++#!/bin/sh
++
++: ${srcdir=.}
++
++# If cs_CZ.UTF-8 locale doesn't work, skip this test silently
++LC_ALL=cs_CZ.UTF-8 locale -k LC_CTYPE 2>/dev/null | ${GREP} -q charmap.*UTF-8 \
++ || exit 77
++
++failures=0
++
++cat > csinput <<EOF
++01 Žluťoučká číše
++ČíŠE 02
++03 Z číší Čiší cosi
++04 Čí
++Še 05
++06 ČČČČČČČíšČÍŠčíš
++07 ČČČ ČČČČíšČÍŠčíšEEEE
++čAs 08
++09Čapka
++10ČaSy se měnÍ
++ČÍšE11
++Čas12
++𝇕ČÍšE𝇓13
++ŽČÍšE𝇓14
++𝇕ČÍšEŽ15
++ŽČÍšEŽ16
++ČÍšE𝇓17
++ČÍšEŽ18
++19𝇕ČÍše
++20ŽČÍše
++EOF
++cat > cspatfile <<EOF
++ČÍšE
++Čas
++EOF
++
++for mode in F G E; do
++
++test1="$(echo `LC_ALL=cs_CZ.UTF-8 ${GREP} -${mode} -f cspatfile csinput \
++ | LC_ALL=C sed 's/^.*\([0-9][0-9]\).*$/\1/'`)"
++if test "$test1" != "11 12 13 14 15 16 17 18"; then
++ echo "Test #1 ${mode} failed: $test1"
++ failures=1
++fi
++
++test2="$(echo `LC_ALL=cs_CZ.UTF-8 ${GREP} -${mode}i -f cspatfile csinput \
++ | LC_ALL=C sed 's/^.*\([0-9][0-9]\).*$/\1/'`)"
++if test "$test2" != "01 02 07 08 10 11 12 13 14 15 16 17 18 19 20"; then
++ echo "Test #2 ${mode} failed: $test2"
++ failures=1
++fi
++
++test3="$(echo `LC_ALL=cs_CZ.UTF-8 ${GREP} -${mode}i -e 'ČÍšE' -e 'Čas' csinput \
++ | LC_ALL=C sed 's/^.*\([0-9][0-9]\).*$/\1/'`)"
++if test "$test3" != "01 02 07 08 10 11 12 13 14 15 16 17 18 19 20"; then
++ echo "Test #3 ${mode} failed: $test3"
++ failures=1
++fi
++
++test4="$(echo `LC_ALL=cs_CZ.UTF-8 ${GREP} -${mode}iw -f cspatfile csinput \
++ | LC_ALL=C sed 's/^.*\([0-9][0-9]\).*$/\1/'`)"
++if test "$test4" != "01 02 08 13 17 19"; then
++ echo "Test #4 ${mode} failed: $test4"
++ failures=1
++fi
++
++done
++
++# Test that -F --color=always prefers longer matches.
++test5="`echo 'Cosi tu ČišÍ...' \
++ | LC_ALL=cs_CZ.UTF-8 ${GREP} --color=always -Fi -e 'čiš' -e 'čiší'`"
++if echo "$test5" | LC_ALL=C ${GREP} -q 'Cosi tu .*\[.*mČišÍ.*\[.*m\(.\[K\)\?\.\.\.'; then
++ :
++else
++ echo "Test #5 F failed: $test5"
++ failures=1
++fi
++
++for mode in G E; do
++
++# Test that -{G,E} --color=always prefers earlier pattern matches.
++test6="`echo 'Cosi tu ČišÍ...' \
++ | LC_ALL=cs_CZ.UTF-8 ${GREP} --color=always -${mode}i -e 'čiš' -e 'čiší'`"
++if echo "$test6" | LC_ALL=C ${GREP} -q 'Cosi tu .*\[.*mČiš.*\[.*m\(.\[K\)\?Í\.\.\.'; then
++ :
++else
++ echo "Test #6 ${mode} failed: $test6"
++ failures=1
++fi
++
++# Test that -{G,E} --color=always prefers earlier pattern matches.
++test7="`echo 'Cosi tu ČišÍ...' \
++ | LC_ALL=cs_CZ.UTF-8 ${GREP} --color=always -${mode}i -e 'čiší' -e 'čiš'`"
++if echo "$test7" | LC_ALL=C ${GREP} -q 'Cosi tu .*\[.*mČišÍ.*\[.*m\(.\[K\)\?\.\.\.'; then
++ :
++else
++ echo "Test #7 ${mode} failed: $test7"
++ failures=1
++fi
++
++test8="$(echo `LC_ALL=cs_CZ.UTF-8 ${GREP} -${mode}i -e 'Č.šE' -e 'Č[a-f]s' csinput \
++ | LC_ALL=C sed 's/^.*\([0-9][0-9]\).*$/\1/'`)"
++if test "$test8" != "01 02 07 08 10 11 12 13 14 15 16 17 18 19 20"; then
++ echo "Test #8 ${mode} failed: $test8"
++ failures=1
++fi
++
++done
++
++exit $failures
+--- grep-2.5.1/tests/Makefile.in.jj 2004-12-31 11:42:53.000000000 +0100
++++ grep-2.5.1/tests/Makefile.in 2004-12-31 11:43:36.871514505 +0100
+@@ -97,7 +97,8 @@ install_sh = @install_sh@
+ AWK = @AWK@
+
+ TESTS = warning.sh khadafy.sh spencer1.sh bre.sh ere.sh \
+- status.sh empty.sh options.sh backref.sh file.sh
++ status.sh empty.sh options.sh backref.sh file.sh \
++ fmbtest.sh
+
+ EXTRA_DIST = $(TESTS) \
+ khadafy.lines khadafy.regexp \
diff --git a/abs/core-testing/grep/12-w.patch b/abs/core-testing/grep/12-w.patch
new file mode 100644
index 0000000..79ae2ae
--- /dev/null
+++ b/abs/core-testing/grep/12-w.patch
@@ -0,0 +1,121 @@
+--- grep-2.5.1a/src/search.c.w 2006-02-20 14:27:27.000000000 +0000
++++ grep-2.5.1a/src/search.c 2006-02-20 14:32:07.000000000 +0000
+@@ -507,10 +507,114 @@
+ if (match_words)
+ while (start >= 0)
+ {
+- if ((start == 0 || !WCHAR ((unsigned char) beg[start - 1]))
+- && (len == end - beg - 1
+- || !WCHAR ((unsigned char) beg[start + len])))
+- goto success_in_beg_and_end;
++ int lword_match = 0;
++ if (start == 0)
++ lword_match = 1;
++ else
++ {
++ assert (start > 0);
++#ifdef MBS_SUPPORT
++ if (mb_cur_max > 1)
++ {
++ const char *s;
++ size_t mr;
++ wchar_t pwc;
++
++ /* Locate the start of the multibyte character
++ before the match position (== beg + start). */
++ if (using_utf8)
++ {
++ /* UTF-8 is a special case: scan backwards
++ until we find a 7-bit character or a
++ lead byte. */
++ s = beg + start - 1;
++ while (s > buf
++ && (unsigned char) *s >= 0x80
++ && (unsigned char) *s <= 0xbf)
++ --s;
++ }
++ else
++ {
++ /* Scan forwards to find the start of the
++ last complete character before the
++ match position. */
++ size_t bytes_left = start - 1;
++ s = beg;
++ while (bytes_left > 0)
++ {
++ mr = mbrlen (s, bytes_left, &mbs);
++ if (mr == (size_t) -1 || mr == 0)
++ {
++ memset (&mbs, '\0', sizeof (mbs));
++ s++;
++ bytes_left--;
++ continue;
++ }
++ if (mr == (size_t) -2)
++ {
++ memset (&mbs, '\0', sizeof (mbs));
++ break;
++ }
++ s += mr;
++ bytes_left -= mr;
++ }
++ }
++ mr = mbrtowc (&pwc, s, beg + start - s, &mbs);
++ if (mr == (size_t) -2 || mr == (size_t) -1 ||
++ mr == 0)
++ {
++ memset (&mbs, '\0', sizeof (mbstate_t));
++ lword_match = 1;
++ }
++ else if (!(iswalnum (pwc) || pwc == L'_')
++ && mr == beg + start - s)
++ lword_match = 1;
++ }
++ else
++#endif /* MBS_SUPPORT */
++ if (!WCHAR ((unsigned char) beg[start - 1]))
++ lword_match = 1;
++ }
++
++ if (lword_match)
++ {
++ int rword_match = 0;
++ if (start + len == end - beg - 1)
++ rword_match = 1;
++ else
++ {
++#ifdef MBS_SUPPORT
++ if (mb_cur_max > 1)
++ {
++ wchar_t nwc;
++ int mr;
++
++ mr = mbtowc (&nwc, beg + start + len,
++ end - beg - start - len - 1);
++ if (mr <= 0)
++ {
++ memset (&mbs, '\0', sizeof (mbstate_t));
++ rword_match = 1;
++ }
++ else if (!iswalnum (nwc) && nwc != L'_')
++ rword_match = 1;
++ }
++ else
++#endif /* MBS_SUPPORT */
++ if (!WCHAR ((unsigned char) beg[start + len]))
++ rword_match = 1;
++ }
++
++ if (rword_match)
++ {
++ if (!exact)
++ /* Returns the whole line. */
++ goto success_in_beg_and_end;
++ else
++ /* Returns just this word match. */
++ goto success_in_start_and_len;
++ }
++ }
+ if (len > 0)
+ {
+ /* Try a shorter length anchored at the same place. */
diff --git a/abs/core-testing/grep/13-P.patch b/abs/core-testing/grep/13-P.patch
new file mode 100644
index 0000000..9dca4ad
--- /dev/null
+++ b/abs/core-testing/grep/13-P.patch
@@ -0,0 +1,14 @@
+--- grep-2.5.1a/src/search.c.P 2006-02-03 14:08:00.000000000 +0000
++++ grep-2.5.1a/src/search.c 2006-02-03 14:11:20.000000000 +0000
+@@ -1234,8 +1234,9 @@
+ char eol = eolbyte;
+ if (!exact)
+ {
+- end = memchr (end, eol, buflim - end);
+- end++;
++ while (end < buflim)
++ if (*end++ == eol)
++ break;
+ while (buf < beg && beg[-1] != eol)
+ --beg;
+ }
diff --git a/abs/core-testing/grep/14-mem-exhausted.patch b/abs/core-testing/grep/14-mem-exhausted.patch
new file mode 100644
index 0000000..d6a996d
--- /dev/null
+++ b/abs/core-testing/grep/14-mem-exhausted.patch
@@ -0,0 +1,15 @@
+--- grep-2.5.1a/src/grep.c.mem-exhausted 2006-11-22 14:49:35.000000000 +0000
++++ grep-2.5.1a/src/grep.c 2006-11-22 14:53:12.000000000 +0000
+@@ -299,6 +299,12 @@
+ int cc = 1;
+ char *readbuf;
+ size_t readsize;
++ const size_t max_save = 200 * 1024 * 1024;
++
++ /* Limit the amount of saved data to 200Mb so we don't fail on
++ * large files. */
++ if (save > max_save)
++ save = max_save;
+
+ /* Offset from start of buffer to start of old stuff
+ that we want to save. */
diff --git a/abs/core-testing/grep/15-empty-pattern.patch b/abs/core-testing/grep/15-empty-pattern.patch
new file mode 100644
index 0000000..acb702a
--- /dev/null
+++ b/abs/core-testing/grep/15-empty-pattern.patch
@@ -0,0 +1,36 @@
+--- grep-2.5.1a/src/grep.c.empty-pattern 2006-11-22 19:05:43.000000000 +0000
++++ grep-2.5.1a/src/grep.c 2006-11-22 19:22:04.000000000 +0000
+@@ -1667,9 +1667,6 @@
+ out_invert ^= 1;
+ match_lines = match_words = 0;
+ }
+- else
+- /* Strip trailing newline. */
+- --keycc;
+ }
+ else
+ if (optind < argc)
+--- grep-2.5.1a/src/search.c.empty-pattern 2006-11-22 19:21:11.000000000 +0000
++++ grep-2.5.1a/src/search.c 2006-11-22 19:35:06.000000000 +0000
+@@ -204,6 +204,10 @@
+ motif = sep;
+ } while (sep && total != 0);
+
++ /* Strip trailing newline. */
++ if (size && pattern[size - 1] == '\n')
++ size--;
++
+ /* In the match_words and match_lines cases, we use a different pattern
+ for the DFA matcher that will quickly throw out cases that won't work.
+ Then if DFA succeeds we do some hairy stuff using the regex matcher
+@@ -288,6 +292,10 @@
+ motif = sep;
+ } while (sep && total != 0);
+
++ /* Strip trailing newline. */
++ if (size && pattern[size - 1] == '\n')
++ size--;
++
+ /* In the match_words and match_lines cases, we use a different pattern
+ for the DFA matcher that will quickly throw out cases that won't work.
+ Then if DFA succeeds we do some hairy stuff using the regex matcher
diff --git a/abs/core-testing/grep/64-egf-speedup.patch b/abs/core-testing/grep/64-egf-speedup.patch
new file mode 100644
index 0000000..a1fa024
--- /dev/null
+++ b/abs/core-testing/grep/64-egf-speedup.patch
@@ -0,0 +1,791 @@
+--- a/src/search.c.orig
++++ b/src/search.c
+@@ -18,10 +18,15 @@
+
+ /* Written August 1992 by Mike Haertel. */
+
++#ifndef _GNU_SOURCE
++# define _GNU_SOURCE 1
++#endif
+ #ifdef HAVE_CONFIG_H
+ # include <config.h>
+ #endif
+
++#include <assert.h>
++
+ #include <sys/types.h>
+
+ #include "mbsupport.h"
+@@ -43,6 +48,9 @@
+ #ifdef HAVE_LIBPCRE
+ # include <pcre.h>
+ #endif
++#ifdef HAVE_LANGINFO_CODESET
++# include <langinfo.h>
++#endif
+
+ #define NCHAR (UCHAR_MAX + 1)
+
+@@ -68,6 +76,19 @@
+ error (2, 0, _("memory exhausted"));
+ }
+
++/* UTF-8 encoding allows some optimizations that we can't otherwise
++ assume in a multibyte encoding. */
++static int using_utf8;
++
++void
++check_utf8 (void)
++{
++#ifdef HAVE_LANGINFO_CODESET
++ if (strcmp (nl_langinfo (CODESET), "UTF-8") == 0)
++ using_utf8 = 1;
++#endif
++}
++
+ #ifndef FGREP_PROGRAM
+ /* DFA compiled regexp. */
+ static struct dfa dfa;
+@@ -134,49 +155,6 @@
+ }
+ #endif /* !FGREP_PROGRAM */
+
+-#ifdef MBS_SUPPORT
+-/* This function allocate the array which correspond to "buf".
+- Then this check multibyte string and mark on the positions which
+- are not single byte character nor the first byte of a multibyte
+- character. Caller must free the array. */
+-static char*
+-check_multibyte_string(char const *buf, size_t size)
+-{
+- char *mb_properties = xmalloc(size);
+- mbstate_t cur_state;
+- wchar_t wc;
+- int i;
+-
+- memset(&cur_state, 0, sizeof(mbstate_t));
+- memset(mb_properties, 0, sizeof(char)*size);
+-
+- for (i = 0; i < size ;)
+- {
+- size_t mbclen;
+- mbclen = mbrtowc(&wc, buf + i, size - i, &cur_state);
+-
+- if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0)
+- {
+- /* An invalid sequence, or a truncated multibyte character.
+- We treat it as a single byte character. */
+- mbclen = 1;
+- }
+- else if (match_icase)
+- {
+- if (iswupper((wint_t)wc))
+- {
+- wc = towlower((wint_t)wc);
+- wcrtomb(buf + i, wc, &cur_state);
+- }
+- }
+- mb_properties[i] = mbclen;
+- i += mbclen;
+- }
+-
+- return mb_properties;
+-}
+-#endif /* MBS_SUPPORT */
+-
+ #if defined(GREP_PROGRAM) || defined(EGREP_PROGRAM)
+ #ifdef EGREP_PROGRAM
+ COMPILE_FCT(Ecompile)
+@@ -193,6 +171,7 @@
+ size_t total = size;
+ char const *motif = pattern;
+
++ check_utf8 ();
+ #if 0
+ if (match_icase)
+ syntax_bits |= RE_ICASE;
+@@ -303,20 +282,9 @@ hunk6
+ struct kwsmatch kwsm;
+ size_t i, ret_val;
+ #ifdef MBS_SUPPORT
+- char *mb_properties = NULL;
+- if (MB_CUR_MAX > 1)
+- {
+- if (match_icase)
+- {
+- char *case_buf = xmalloc(size);
+- memcpy(case_buf, buf, size);
+- if (start_ptr)
+- start_ptr = case_buf + (start_ptr - buf);
+- buf = case_buf;
+- }
+- if (kwset)
+- mb_properties = check_multibyte_string(buf, size);
+- }
++ int mb_cur_max = MB_CUR_MAX;
++ mbstate_t mbs;
++ memset (&mbs, '\0', sizeof (mbstate_t));
+ #endif /* MBS_SUPPORT */
+
+ buflim = buf + size;
+@@ -329,21 +282,63 @@ hunk6
+ if (kwset)
+ {
+ /* Find a possible match using the KWset matcher. */
+- size_t offset = kwsexec (kwset, beg, buflim - beg, &kwsm);
++#ifdef MBS_SUPPORT
++ size_t bytes_left = 0;
++#endif /* MBS_SUPPORT */
++ size_t offset;
++#ifdef MBS_SUPPORT
++ /* kwsexec doesn't work with match_icase and multibyte input. */
++ if (match_icase && mb_cur_max > 1)
++ /* Avoid kwset */
++ offset = 0;
++ else
++#endif /* MBS_SUPPORT */
++ offset = kwsexec (kwset, beg, buflim - beg, &kwsm);
+ if (offset == (size_t) -1)
+- goto failure;
++ return (size_t)-1;
++#ifdef MBS_SUPPORT
++ if (mb_cur_max > 1 && !using_utf8)
++ {
++ bytes_left = offset;
++ while (bytes_left)
++ {
++ size_t mlen = mbrlen (beg, bytes_left, &mbs);
++ if (mlen == (size_t) -1 || mlen == 0)
++ {
++ /* Incomplete character: treat as single-byte. */
++ memset (&mbs, '\0', sizeof (mbstate_t));
++ beg++;
++ bytes_left--;
++ continue;
++ }
++
++ if (mlen == (size_t) -2)
++ /* Offset points inside multibyte character:
++ * no good. */
++ break;
++
++ beg += mlen;
++ bytes_left -= mlen;
++ }
++ }
++ else
++#endif /* MBS_SUPPORT */
+ beg += offset;
+ /* Narrow down to the line containing the candidate, and
+ run it through DFA. */
+ end = memchr(beg, eol, buflim - beg);
+ end++;
+ #ifdef MBS_SUPPORT
+- if (MB_CUR_MAX > 1 && mb_properties[beg - buf] == 0)
++ if (mb_cur_max > 1 && bytes_left)
+ continue;
+ #endif
+ while (beg > buf && beg[-1] != eol)
+ --beg;
+- if (kwsm.index < kwset_exact_matches)
++ if (
++#ifdef MBS_SUPPORT
++ !(match_icase && mb_cur_max > 1) &&
++#endif /* MBS_SUPPORT */
++ (kwsm.index < kwset_exact_matches))
+ goto success;
+ if (dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1)
+ continue;
+@@ -351,13 +363,47 @@
+ else
+ {
+ /* No good fixed strings; start with DFA. */
++#ifdef MBS_SUPPORT
++ size_t bytes_left = 0;
++#endif /* MBS_SUPPORT */
+ size_t offset = dfaexec (&dfa, beg, buflim - beg, &backref);
+ if (offset == (size_t) -1)
+ break;
+ /* Narrow down to the line we've found. */
++#ifdef MBS_SUPPORT
++ if (mb_cur_max > 1 && !using_utf8)
++ {
++ bytes_left = offset;
++ while (bytes_left)
++ {
++ size_t mlen = mbrlen (beg, bytes_left, &mbs);
++ if (mlen == (size_t) -1 || mlen == 0)
++ {
++ /* Incomplete character: treat as single-byte. */
++ memset (&mbs, '\0', sizeof (mbstate_t));
++ beg++;
++ bytes_left--;
++ continue;
++ }
++
++ if (mlen == (size_t) -2)
++ /* Offset points inside multibyte character:
++ * no good. */
++ break;
++
++ beg += mlen;
++ bytes_left -= mlen;
++ }
++ }
++ else
++#endif /* MBS_SUPPORT */
+ beg += offset;
+ end = memchr (beg, eol, buflim - beg);
+ end++;
++#ifdef MBS_SUPPORT
++ if (mb_cur_max > 1 && bytes_left)
++ continue;
++#endif /* MBS_SUPPORT */
+ while (beg > buf && beg[-1] != eol)
+ --beg;
+ }
+@@ -475,24 +521,144 @@
+ *match_size = len;
+ ret_val = beg - buf;
+ out:
+-#ifdef MBS_SUPPORT
+- if (MB_CUR_MAX > 1)
+- {
+- if (match_icase)
+- free((char*)buf);
+- if (mb_properties)
+- free(mb_properties);
+- }
+-#endif /* MBS_SUPPORT */
+ return ret_val;
+ }
+ #endif /* defined(GREP_PROGRAM) || defined(EGREP_PROGRAM) */
+
++#ifdef MBS_SUPPORT
++static int f_i_multibyte; /* whether we're using the new -Fi MB method */
++static struct
++{
++ wchar_t **patterns;
++ size_t count, maxlen;
++ unsigned char *match;
++} Fimb;
++#endif
++
+ #if defined(GREP_PROGRAM) || defined(FGREP_PROGRAM)
+ COMPILE_FCT(Fcompile)
+ {
++ int mb_cur_max = MB_CUR_MAX;
+ char const *beg, *lim, *err;
+
++ check_utf8 ();
++#ifdef MBS_SUPPORT
++ /* Support -F -i for UTF-8 input. */
++ if (match_icase && mb_cur_max > 1)
++ {
++ mbstate_t mbs;
++ wchar_t *wcpattern = xmalloc ((size + 1) * sizeof (wchar_t));
++ const char *patternend = pattern;
++ size_t wcsize;
++ kwset_t fimb_kwset = NULL;
++ char *starts = NULL;
++ wchar_t *wcbeg, *wclim;
++ size_t allocated = 0;
++
++ memset (&mbs, '\0', sizeof (mbs));
++# ifdef __GNU_LIBRARY__
++ wcsize = mbsnrtowcs (wcpattern, &patternend, size, size, &mbs);
++ if (patternend != pattern + size)
++ wcsize = (size_t) -1;
++# else
++ {
++ char *patterncopy = xmalloc (size + 1);
++
++ memcpy (patterncopy, pattern, size);
++ patterncopy[size] = '\0';
++ patternend = patterncopy;
++ wcsize = mbsrtowcs (wcpattern, &patternend, size, &mbs);
++ if (patternend != patterncopy + size)
++ wcsize = (size_t) -1;
++ free (patterncopy);
++ }
++# endif
++ if (wcsize + 2 <= 2)
++ {
++fimb_fail:
++ free (wcpattern);
++ free (starts);
++ if (fimb_kwset)
++ kwsfree (fimb_kwset);
++ free (Fimb.patterns);
++ Fimb.patterns = NULL;
++ }
++ else
++ {
++ if (!(fimb_kwset = kwsalloc (NULL)))
++ error (2, 0, _("memory exhausted"));
++
++ starts = xmalloc (mb_cur_max * 3);
++ wcbeg = wcpattern;
++ do
++ {
++ int i;
++ size_t wclen;
++
++ if (Fimb.count >= allocated)
++ {
++ if (allocated == 0)
++ allocated = 128;
++ else
++ allocated *= 2;
++ Fimb.patterns = xrealloc (Fimb.patterns,
++ sizeof (wchar_t *) * allocated);
++ }
++ Fimb.patterns[Fimb.count++] = wcbeg;
++ for (wclim = wcbeg;
++ wclim < wcpattern + wcsize && *wclim != L'\n'; ++wclim)
++ *wclim = towlower (*wclim);
++ *wclim = L'\0';
++ wclen = wclim - wcbeg;
++ if (wclen > Fimb.maxlen)
++ Fimb.maxlen = wclen;
++ if (wclen > 3)
++ wclen = 3;
++ if (wclen == 0)
++ {
++ if ((err = kwsincr (fimb_kwset, "", 0)) != 0)
++ error (2, 0, err);
++ }
++ else
++ for (i = 0; i < (1 << wclen); i++)
++ {
++ char *p = starts;
++ int j, k;
++
++ for (j = 0; j < wclen; ++j)
++ {
++ wchar_t wc = wcbeg[j];
++ if (i & (1 << j))
++ {
++ wc = towupper (wc);
++ if (wc == wcbeg[j])
++ continue;
++ }
++ k = wctomb (p, wc);
++ if (k <= 0)
++ goto fimb_fail;
++ p += k;
++ }
++ if ((err = kwsincr (fimb_kwset, starts, p - starts)) != 0)
++ error (2, 0, err);
++ }
++ if (wclim < wcpattern + wcsize)
++ ++wclim;
++ wcbeg = wclim;
++ }
++ while (wcbeg < wcpattern + wcsize);
++ f_i_multibyte = 1;
++ kwset = fimb_kwset;
++ free (starts);
++ Fimb.match = xmalloc (Fimb.count);
++ if ((err = kwsprep (kwset)) != 0)
++ error (2, 0, err);
++ return;
++ }
++ }
++#endif /* MBS_SUPPORT */
++
++
+ kwsinit ();
+ beg = pattern;
+ do
+@@ -511,6 +677,76 @@
+ error (2, 0, err);
+ }
+
++#ifdef MBS_SUPPORT
++static int
++Fimbexec (const char *buf, size_t size, size_t *plen, int exact)
++{
++ size_t len, letter, i;
++ int ret = -1;
++ mbstate_t mbs;
++ wchar_t wc;
++ int patterns_left;
++
++ assert (match_icase && f_i_multibyte == 1);
++ assert (MB_CUR_MAX > 1);
++
++ memset (&mbs, '\0', sizeof (mbs));
++ memset (Fimb.match, '\1', Fimb.count);
++ letter = len = 0;
++ patterns_left = 1;
++ while (patterns_left && len <= size)
++ {
++ size_t c;
++
++ patterns_left = 0;
++ if (len < size)
++ {
++ c = mbrtowc (&wc, buf + len, size - len, &mbs);
++ if (c + 2 <= 2)
++ return ret;
++
++ wc = towlower (wc);
++ }
++ else
++ {
++ c = 1;
++ wc = L'\0';
++ }
++
++ for (i = 0; i < Fimb.count; i++)
++ {
++ if (Fimb.match[i])
++ {
++ if (Fimb.patterns[i][letter] == L'\0')
++ {
++ /* Found a match. */
++ *plen = len;
++ if (!exact && !match_words)
++ return 0;
++ else
++ {
++ /* For -w or exact look for longest match. */
++ ret = 0;
++ Fimb.match[i] = '\0';
++ continue;
++ }
++ }
++
++ if (Fimb.patterns[i][letter] == wc)
++ patterns_left = 1;
++ else
++ Fimb.match[i] = '\0';
++ }
++ }
++
++ len += c;
++ letter++;
++ }
++
++ return ret;
++}
++#endif /* MBS_SUPPORT */
++
+ EXECUTE_FCT(Fexecute)
+ {
+ register char const *beg, *try, *end;
+@@ -519,69 +755,256 @@
+ struct kwsmatch kwsmatch;
+ size_t ret_val;
+ #ifdef MBS_SUPPORT
+- char *mb_properties = NULL;
+- if (MB_CUR_MAX > 1)
+- {
+- if (match_icase)
+- {
+- char *case_buf = xmalloc(size);
+- memcpy(case_buf, buf, size);
+- if (start_ptr)
+- start_ptr = case_buf + (start_ptr - buf);
+- buf = case_buf;
+- }
+- mb_properties = check_multibyte_string(buf, size);
+- }
++ int mb_cur_max = MB_CUR_MAX;
++ mbstate_t mbs;
++ memset (&mbs, '\0', sizeof (mbstate_t));
++ const char *last_char = NULL;
+ #endif /* MBS_SUPPORT */
+
+ for (beg = start_ptr ? start_ptr : buf; beg <= buf + size; beg++)
+ {
+ size_t offset = kwsexec (kwset, beg, buf + size - beg, &kwsmatch);
+ if (offset == (size_t) -1)
+- goto failure;
++ return offset;
+ #ifdef MBS_SUPPORT
+- if (MB_CUR_MAX > 1 && mb_properties[offset+beg-buf] == 0)
+- continue; /* It is a part of multibyte character. */
++ if (mb_cur_max > 1 && !using_utf8)
++ {
++ size_t bytes_left = offset;
++ while (bytes_left)
++ {
++ size_t mlen = mbrlen (beg, bytes_left, &mbs);
++
++ last_char = beg;
++ if (mlen == (size_t) -1 || mlen == 0)
++ {
++ /* Incomplete character: treat as single-byte. */
++ memset (&mbs, '\0', sizeof (mbstate_t));
++ beg++;
++ bytes_left--;
++ continue;
++ }
++
++ if (mlen == (size_t) -2)
++ /* Offset points inside multibyte character: no good. */
++ break;
++
++ beg += mlen;
++ bytes_left -= mlen;
++ }
++
++ if (bytes_left)
++ continue;
++ }
++ else
+ #endif /* MBS_SUPPORT */
+ beg += offset;
++#ifdef MBS_SUPPORT
++ /* For f_i_multibyte, the string at beg now matches first 3 chars of
++ one of the search strings (less if there are shorter search strings).
++ See if this is a real match. */
++ if (f_i_multibyte
++ && Fimbexec (beg, buf + size - beg, &kwsmatch.size[0], start_ptr == NULL))
++ goto next_char;
++#endif /* MBS_SUPPORT */
+ len = kwsmatch.size[0];
+ if (start_ptr && !match_words)
+ goto success_in_beg_and_len;
+ if (match_lines)
+ {
+ if (beg > buf && beg[-1] != eol)
+- continue;
++ goto next_char;
+ if (beg + len < buf + size && beg[len] != eol)
+- continue;
++ goto next_char;
+ goto success;
+ }
+ else if (match_words)
+- for (try = beg; len; )
+- {
+- if (try > buf && WCHAR((unsigned char) try[-1]))
+- break;
+- if (try + len < buf + size && WCHAR((unsigned char) try[len]))
+- {
+- offset = kwsexec (kwset, beg, --len, &kwsmatch);
+- if (offset == (size_t) -1)
+- break;
+- try = beg + offset;
+- len = kwsmatch.size[0];
+- }
+- else if (!start_ptr)
+- goto success;
+- else
+- goto success_in_beg_and_len;
+- } /* for (try) */
+- else
+- goto success;
+- } /* for (beg in buf) */
++ {
++ while (len)
++ {
++ int word_match = 0;
++ if (beg > buf)
++ {
++#ifdef MBS_SUPPORT
++ if (mb_cur_max > 1)
++ {
++ const char *s;
++ int mr;
++ wchar_t pwc;
++
++ if (using_utf8)
++ {
++ s = beg - 1;
++ while (s > buf
++ && (unsigned char) *s >= 0x80
++ && (unsigned char) *s <= 0xbf)
++ --s;
++ }
++ else
++ s = last_char;
++ mr = mbtowc (&pwc, s, beg - s);
++ if (mr <= 0)
++ memset (&mbs, '\0', sizeof (mbstate_t));
++ else if ((iswalnum (pwc) || pwc == L'_')
++ && mr == (int) (beg - s))
++ goto next_char;
++ }
++ else
++#endif /* MBS_SUPPORT */
++ if (WCHAR ((unsigned char) beg[-1]))
++ goto next_char;
++ }
++#ifdef MBS_SUPPORT
++ if (mb_cur_max > 1)
++ {
++ wchar_t nwc;
++ int mr;
+
+- failure:
+- ret_val = -1;
+- goto out;
++ mr = mbtowc (&nwc, beg + len, buf + size - beg - len);
++ if (mr <= 0)
++ {
++ memset (&mbs, '\0', sizeof (mbstate_t));
++ word_match = 1;
++ }
++ else if (!iswalnum (nwc) && nwc != L'_')
++ word_match = 1;
++ }
++ else
++#endif /* MBS_SUPPORT */
++ if (beg + len >= buf + size || !WCHAR ((unsigned char) beg[len]))
++ word_match = 1;
++ if (word_match)
++ {
++ if (start_ptr == NULL)
++ /* Returns the whole line now we know there's a word match. */
++ goto success;
++ else {
++ /* Returns just this word match. */
++ *match_size = len;
++ return beg - buf;
++ }
++ }
++ if (len > 0)
++ {
++ /* Try a shorter length anchored at the same place. */
++ --len;
++ offset = kwsexec (kwset, beg, len, &kwsmatch);
++
++ if (offset == -1)
++ goto next_char; /* Try a different anchor. */
++#ifdef MBS_SUPPORT
++
++ if (mb_cur_max > 1 && !using_utf8)
++ {
++ size_t bytes_left = offset;
++ while (bytes_left)
++ {
++ size_t mlen = mbrlen (beg, bytes_left, &mbs);
++
++ last_char = beg;
++ if (mlen == (size_t) -1 || mlen == 0)
++ {
++ /* Incomplete character: treat as single-byte. */
++ memset (&mbs, '\0', sizeof (mbstate_t));
++ beg++;
++ bytes_left--;
++ continue;
++ }
++
++ if (mlen == (size_t) -2)
++ {
++ /* Offset points inside multibyte character:
++ * no good. */
++ break;
++ }
++
++ beg += mlen;
++ bytes_left -= mlen;
++ }
++
++ if (bytes_left)
++ {
++ memset (&mbs, '\0', sizeof (mbstate_t));
++ goto next_char; /* Try a different anchor. */
++ }
++ }
++ else
++#endif /* MBS_SUPPORT */
++ beg += offset;
++#ifdef MBS_SUPPORT
++ /* The string at beg now matches first 3 chars of one of
++ the search strings (less if there are shorter search
++ strings). See if this is a real match. */
++ if (f_i_multibyte
++ && Fimbexec (beg, len - offset, &kwsmatch.size[0],
++ start_ptr == NULL))
++ goto next_char;
++#endif /* MBS_SUPPORT */
++ len = kwsmatch.size[0];
++ }
++ }
++ }
++ else
++ goto success;
++next_char:;
++#ifdef MBS_SUPPORT
++ /* Advance to next character. For MB_CUR_MAX == 1 case this is handled
++ by ++beg above. */
++ if (mb_cur_max > 1)
++ {
++ if (using_utf8)
++ {
++ unsigned char c = *beg;
++ if (c >= 0xc2)
++ {
++ if (c < 0xe0)
++ ++beg;
++ else if (c < 0xf0)
++ beg += 2;
++ else if (c < 0xf8)
++ beg += 3;
++ else if (c < 0xfc)
++ beg += 4;
++ else if (c < 0xfe)
++ beg += 5;
++ }
++ }
++ else
++ {
++ size_t l = mbrlen (beg, buf + size - beg, &mbs);
++
++ last_char = beg;
++ if (l + 2 >= 2)
++ beg += l - 1;
++ else
++ memset (&mbs, '\0', sizeof (mbstate_t));
++ }
++ }
++#endif /* MBS_SUPPORT */
++ }
++
++ return -1;
+
+ success:
++#ifdef MBS_SUPPORT
++ if (mb_cur_max > 1 && !using_utf8)
++ {
++ end = beg + len;
++ while (end < buf + size)
++ {
++ size_t mlen = mbrlen (end, buf + size - end, &mbs);
++ if (mlen == (size_t) -1 || mlen == (size_t) -2 || mlen == 0)
++ {
++ memset (&mbs, '\0', sizeof (mbstate_t));
++ mlen = 1;
++ }
++ if (mlen == 1 && *end == eol)
++ break;
++
++ end += mlen;
++ }
++ }
++ else
++ #endif /* MBS_SUPPORT */
+ end = memchr (beg + len, eol, (buf + size) - (beg + len));
+ end++;
+ while (buf < beg && beg[-1] != eol)
+@@ -591,15 +1016,6 @@
+ *match_size = len;
+ ret_val = beg - buf;
+ out:
+-#ifdef MBS_SUPPORT
+- if (MB_CUR_MAX > 1)
+- {
+- if (match_icase)
+- free((char*)buf);
+- if (mb_properties)
+- free(mb_properties);
+- }
+-#endif /* MBS_SUPPORT */
+ return ret_val;
+ }
+ #endif /* defined(GREP_PROGRAM) || defined(FGREP_PROGRAM) */
diff --git a/abs/core-testing/grep/PKGBUILD b/abs/core-testing/grep/PKGBUILD
new file mode 100644
index 0000000..168d2d3
--- /dev/null
+++ b/abs/core-testing/grep/PKGBUILD
@@ -0,0 +1,50 @@
+# $Id: PKGBUILD 356 2008-04-18 22:56:27Z aaron $
+# Maintainer: judd <jvinet@zeroflux.org>
+pkgname=grep
+pkgver=2.5.3
+pkgrel=12
+pkgdesc="A string search utility"
+arch=('i686' 'x86_64')
+license=('GPL')
+url="http://www.gnu.org/software/grep/grep.html"
+groups=('base')
+depends=('glibc' 'pcre')
+makedepends=('texinfo>=4.8a')
+source=(ftp://ftp.gnu.org/gnu/$pkgname/$pkgname-$pkgver.tar.gz
+ # patches from fedora cvs
+ #01-fgrep.patch
+ #02-bracket.patch
+ #03-i18n.patch
+ #04-oi.patch
+ #05-manpage.patch
+ #06-color.patch
+ #07-icolor.patch
+ #08-skip.patch
+ #09-egf-speedup.patch
+ #10-dfa-optional.patch
+ #11-tests.patch
+ #12-w.patch
+ #13-P.patch
+ 14-mem-exhausted.patch
+ 15-empty-pattern.patch
+ 64-egf-speedup.patch
+)
+md5sums=('4f371f25f413f700fb1984b878421f9d'
+ 'bc937da562d468f32c1fef2894610283'
+ 'f421415b679ebcc9152797caaa0b1d51'
+ 'efbe9d49d71a74092db6b86224b09fdd')
+
+
+
+build() {
+ cd $startdir/src/$pkgname-$pkgver
+ for i in ../*.patch; do
+ patch -Np1 -i ../$i
+ done
+ ./configure --prefix=/usr
+ make || return 1
+ make DESTDIR=$startdir/pkg install
+ mkdir $startdir/pkg/bin
+ mv $startdir/pkg/usr/bin/*grep $startdir/pkg/bin/
+ rmdir $startdir/pkg/usr/bin
+}