diff options
Diffstat (limited to 'abs/core-testing/grep/64-egf-speedup.patch')
-rw-r--r-- | abs/core-testing/grep/64-egf-speedup.patch | 791 |
1 files changed, 0 insertions, 791 deletions
diff --git a/abs/core-testing/grep/64-egf-speedup.patch b/abs/core-testing/grep/64-egf-speedup.patch deleted file mode 100644 index a1fa024..0000000 --- a/abs/core-testing/grep/64-egf-speedup.patch +++ /dev/null @@ -1,791 +0,0 @@ ---- a/src/search.c.orig -+++ b/src/search.c -@@ -18,10 +18,15 @@ - - /* Written August 1992 by Mike Haertel. */ - -+#ifndef _GNU_SOURCE -+# define _GNU_SOURCE 1 -+#endif - #ifdef HAVE_CONFIG_H - # include <config.h> - #endif - -+#include <assert.h> -+ - #include <sys/types.h> - - #include "mbsupport.h" -@@ -43,6 +48,9 @@ - #ifdef HAVE_LIBPCRE - # include <pcre.h> - #endif -+#ifdef HAVE_LANGINFO_CODESET -+# include <langinfo.h> -+#endif - - #define NCHAR (UCHAR_MAX + 1) - -@@ -68,6 +76,19 @@ - error (2, 0, _("memory exhausted")); - } - -+/* UTF-8 encoding allows some optimizations that we can't otherwise -+ assume in a multibyte encoding. */ -+static int using_utf8; -+ -+void -+check_utf8 (void) -+{ -+#ifdef HAVE_LANGINFO_CODESET -+ if (strcmp (nl_langinfo (CODESET), "UTF-8") == 0) -+ using_utf8 = 1; -+#endif -+} -+ - #ifndef FGREP_PROGRAM - /* DFA compiled regexp. */ - static struct dfa dfa; -@@ -134,49 +155,6 @@ - } - #endif /* !FGREP_PROGRAM */ - --#ifdef MBS_SUPPORT --/* This function allocate the array which correspond to "buf". -- Then this check multibyte string and mark on the positions which -- are not single byte character nor the first byte of a multibyte -- character. Caller must free the array. */ --static char* --check_multibyte_string(char const *buf, size_t size) --{ -- char *mb_properties = xmalloc(size); -- mbstate_t cur_state; -- wchar_t wc; -- int i; -- -- memset(&cur_state, 0, sizeof(mbstate_t)); -- memset(mb_properties, 0, sizeof(char)*size); -- -- for (i = 0; i < size ;) -- { -- size_t mbclen; -- mbclen = mbrtowc(&wc, buf + i, size - i, &cur_state); -- -- if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0) -- { -- /* An invalid sequence, or a truncated multibyte character. -- We treat it as a single byte character. */ -- mbclen = 1; -- } -- else if (match_icase) -- { -- if (iswupper((wint_t)wc)) -- { -- wc = towlower((wint_t)wc); -- wcrtomb(buf + i, wc, &cur_state); -- } -- } -- mb_properties[i] = mbclen; -- i += mbclen; -- } -- -- return mb_properties; --} --#endif /* MBS_SUPPORT */ -- - #if defined(GREP_PROGRAM) || defined(EGREP_PROGRAM) - #ifdef EGREP_PROGRAM - COMPILE_FCT(Ecompile) -@@ -193,6 +171,7 @@ - size_t total = size; - char const *motif = pattern; - -+ check_utf8 (); - #if 0 - if (match_icase) - syntax_bits |= RE_ICASE; -@@ -303,20 +282,9 @@ hunk6 - struct kwsmatch kwsm; - size_t i, ret_val; - #ifdef MBS_SUPPORT -- char *mb_properties = NULL; -- if (MB_CUR_MAX > 1) -- { -- if (match_icase) -- { -- char *case_buf = xmalloc(size); -- memcpy(case_buf, buf, size); -- if (start_ptr) -- start_ptr = case_buf + (start_ptr - buf); -- buf = case_buf; -- } -- if (kwset) -- mb_properties = check_multibyte_string(buf, size); -- } -+ int mb_cur_max = MB_CUR_MAX; -+ mbstate_t mbs; -+ memset (&mbs, '\0', sizeof (mbstate_t)); - #endif /* MBS_SUPPORT */ - - buflim = buf + size; -@@ -329,21 +282,63 @@ hunk6 - if (kwset) - { - /* Find a possible match using the KWset matcher. */ -- size_t offset = kwsexec (kwset, beg, buflim - beg, &kwsm); -+#ifdef MBS_SUPPORT -+ size_t bytes_left = 0; -+#endif /* MBS_SUPPORT */ -+ size_t offset; -+#ifdef MBS_SUPPORT -+ /* kwsexec doesn't work with match_icase and multibyte input. */ -+ if (match_icase && mb_cur_max > 1) -+ /* Avoid kwset */ -+ offset = 0; -+ else -+#endif /* MBS_SUPPORT */ -+ offset = kwsexec (kwset, beg, buflim - beg, &kwsm); - if (offset == (size_t) -1) -- goto failure; -+ return (size_t)-1; -+#ifdef MBS_SUPPORT -+ if (mb_cur_max > 1 && !using_utf8) -+ { -+ bytes_left = offset; -+ while (bytes_left) -+ { -+ size_t mlen = mbrlen (beg, bytes_left, &mbs); -+ if (mlen == (size_t) -1 || mlen == 0) -+ { -+ /* Incomplete character: treat as single-byte. */ -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ beg++; -+ bytes_left--; -+ continue; -+ } -+ -+ if (mlen == (size_t) -2) -+ /* Offset points inside multibyte character: -+ * no good. */ -+ break; -+ -+ beg += mlen; -+ bytes_left -= mlen; -+ } -+ } -+ else -+#endif /* MBS_SUPPORT */ - beg += offset; - /* Narrow down to the line containing the candidate, and - run it through DFA. */ - end = memchr(beg, eol, buflim - beg); - end++; - #ifdef MBS_SUPPORT -- if (MB_CUR_MAX > 1 && mb_properties[beg - buf] == 0) -+ if (mb_cur_max > 1 && bytes_left) - continue; - #endif - while (beg > buf && beg[-1] != eol) - --beg; -- if (kwsm.index < kwset_exact_matches) -+ if ( -+#ifdef MBS_SUPPORT -+ !(match_icase && mb_cur_max > 1) && -+#endif /* MBS_SUPPORT */ -+ (kwsm.index < kwset_exact_matches)) - goto success; - if (dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1) - continue; -@@ -351,13 +363,47 @@ - else - { - /* No good fixed strings; start with DFA. */ -+#ifdef MBS_SUPPORT -+ size_t bytes_left = 0; -+#endif /* MBS_SUPPORT */ - size_t offset = dfaexec (&dfa, beg, buflim - beg, &backref); - if (offset == (size_t) -1) - break; - /* Narrow down to the line we've found. */ -+#ifdef MBS_SUPPORT -+ if (mb_cur_max > 1 && !using_utf8) -+ { -+ bytes_left = offset; -+ while (bytes_left) -+ { -+ size_t mlen = mbrlen (beg, bytes_left, &mbs); -+ if (mlen == (size_t) -1 || mlen == 0) -+ { -+ /* Incomplete character: treat as single-byte. */ -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ beg++; -+ bytes_left--; -+ continue; -+ } -+ -+ if (mlen == (size_t) -2) -+ /* Offset points inside multibyte character: -+ * no good. */ -+ break; -+ -+ beg += mlen; -+ bytes_left -= mlen; -+ } -+ } -+ else -+#endif /* MBS_SUPPORT */ - beg += offset; - end = memchr (beg, eol, buflim - beg); - end++; -+#ifdef MBS_SUPPORT -+ if (mb_cur_max > 1 && bytes_left) -+ continue; -+#endif /* MBS_SUPPORT */ - while (beg > buf && beg[-1] != eol) - --beg; - } -@@ -475,24 +521,144 @@ - *match_size = len; - ret_val = beg - buf; - out: --#ifdef MBS_SUPPORT -- if (MB_CUR_MAX > 1) -- { -- if (match_icase) -- free((char*)buf); -- if (mb_properties) -- free(mb_properties); -- } --#endif /* MBS_SUPPORT */ - return ret_val; - } - #endif /* defined(GREP_PROGRAM) || defined(EGREP_PROGRAM) */ - -+#ifdef MBS_SUPPORT -+static int f_i_multibyte; /* whether we're using the new -Fi MB method */ -+static struct -+{ -+ wchar_t **patterns; -+ size_t count, maxlen; -+ unsigned char *match; -+} Fimb; -+#endif -+ - #if defined(GREP_PROGRAM) || defined(FGREP_PROGRAM) - COMPILE_FCT(Fcompile) - { -+ int mb_cur_max = MB_CUR_MAX; - char const *beg, *lim, *err; - -+ check_utf8 (); -+#ifdef MBS_SUPPORT -+ /* Support -F -i for UTF-8 input. */ -+ if (match_icase && mb_cur_max > 1) -+ { -+ mbstate_t mbs; -+ wchar_t *wcpattern = xmalloc ((size + 1) * sizeof (wchar_t)); -+ const char *patternend = pattern; -+ size_t wcsize; -+ kwset_t fimb_kwset = NULL; -+ char *starts = NULL; -+ wchar_t *wcbeg, *wclim; -+ size_t allocated = 0; -+ -+ memset (&mbs, '\0', sizeof (mbs)); -+# ifdef __GNU_LIBRARY__ -+ wcsize = mbsnrtowcs (wcpattern, &patternend, size, size, &mbs); -+ if (patternend != pattern + size) -+ wcsize = (size_t) -1; -+# else -+ { -+ char *patterncopy = xmalloc (size + 1); -+ -+ memcpy (patterncopy, pattern, size); -+ patterncopy[size] = '\0'; -+ patternend = patterncopy; -+ wcsize = mbsrtowcs (wcpattern, &patternend, size, &mbs); -+ if (patternend != patterncopy + size) -+ wcsize = (size_t) -1; -+ free (patterncopy); -+ } -+# endif -+ if (wcsize + 2 <= 2) -+ { -+fimb_fail: -+ free (wcpattern); -+ free (starts); -+ if (fimb_kwset) -+ kwsfree (fimb_kwset); -+ free (Fimb.patterns); -+ Fimb.patterns = NULL; -+ } -+ else -+ { -+ if (!(fimb_kwset = kwsalloc (NULL))) -+ error (2, 0, _("memory exhausted")); -+ -+ starts = xmalloc (mb_cur_max * 3); -+ wcbeg = wcpattern; -+ do -+ { -+ int i; -+ size_t wclen; -+ -+ if (Fimb.count >= allocated) -+ { -+ if (allocated == 0) -+ allocated = 128; -+ else -+ allocated *= 2; -+ Fimb.patterns = xrealloc (Fimb.patterns, -+ sizeof (wchar_t *) * allocated); -+ } -+ Fimb.patterns[Fimb.count++] = wcbeg; -+ for (wclim = wcbeg; -+ wclim < wcpattern + wcsize && *wclim != L'\n'; ++wclim) -+ *wclim = towlower (*wclim); -+ *wclim = L'\0'; -+ wclen = wclim - wcbeg; -+ if (wclen > Fimb.maxlen) -+ Fimb.maxlen = wclen; -+ if (wclen > 3) -+ wclen = 3; -+ if (wclen == 0) -+ { -+ if ((err = kwsincr (fimb_kwset, "", 0)) != 0) -+ error (2, 0, err); -+ } -+ else -+ for (i = 0; i < (1 << wclen); i++) -+ { -+ char *p = starts; -+ int j, k; -+ -+ for (j = 0; j < wclen; ++j) -+ { -+ wchar_t wc = wcbeg[j]; -+ if (i & (1 << j)) -+ { -+ wc = towupper (wc); -+ if (wc == wcbeg[j]) -+ continue; -+ } -+ k = wctomb (p, wc); -+ if (k <= 0) -+ goto fimb_fail; -+ p += k; -+ } -+ if ((err = kwsincr (fimb_kwset, starts, p - starts)) != 0) -+ error (2, 0, err); -+ } -+ if (wclim < wcpattern + wcsize) -+ ++wclim; -+ wcbeg = wclim; -+ } -+ while (wcbeg < wcpattern + wcsize); -+ f_i_multibyte = 1; -+ kwset = fimb_kwset; -+ free (starts); -+ Fimb.match = xmalloc (Fimb.count); -+ if ((err = kwsprep (kwset)) != 0) -+ error (2, 0, err); -+ return; -+ } -+ } -+#endif /* MBS_SUPPORT */ -+ -+ - kwsinit (); - beg = pattern; - do -@@ -511,6 +677,76 @@ - error (2, 0, err); - } - -+#ifdef MBS_SUPPORT -+static int -+Fimbexec (const char *buf, size_t size, size_t *plen, int exact) -+{ -+ size_t len, letter, i; -+ int ret = -1; -+ mbstate_t mbs; -+ wchar_t wc; -+ int patterns_left; -+ -+ assert (match_icase && f_i_multibyte == 1); -+ assert (MB_CUR_MAX > 1); -+ -+ memset (&mbs, '\0', sizeof (mbs)); -+ memset (Fimb.match, '\1', Fimb.count); -+ letter = len = 0; -+ patterns_left = 1; -+ while (patterns_left && len <= size) -+ { -+ size_t c; -+ -+ patterns_left = 0; -+ if (len < size) -+ { -+ c = mbrtowc (&wc, buf + len, size - len, &mbs); -+ if (c + 2 <= 2) -+ return ret; -+ -+ wc = towlower (wc); -+ } -+ else -+ { -+ c = 1; -+ wc = L'\0'; -+ } -+ -+ for (i = 0; i < Fimb.count; i++) -+ { -+ if (Fimb.match[i]) -+ { -+ if (Fimb.patterns[i][letter] == L'\0') -+ { -+ /* Found a match. */ -+ *plen = len; -+ if (!exact && !match_words) -+ return 0; -+ else -+ { -+ /* For -w or exact look for longest match. */ -+ ret = 0; -+ Fimb.match[i] = '\0'; -+ continue; -+ } -+ } -+ -+ if (Fimb.patterns[i][letter] == wc) -+ patterns_left = 1; -+ else -+ Fimb.match[i] = '\0'; -+ } -+ } -+ -+ len += c; -+ letter++; -+ } -+ -+ return ret; -+} -+#endif /* MBS_SUPPORT */ -+ - EXECUTE_FCT(Fexecute) - { - register char const *beg, *try, *end; -@@ -519,69 +755,256 @@ - struct kwsmatch kwsmatch; - size_t ret_val; - #ifdef MBS_SUPPORT -- char *mb_properties = NULL; -- if (MB_CUR_MAX > 1) -- { -- if (match_icase) -- { -- char *case_buf = xmalloc(size); -- memcpy(case_buf, buf, size); -- if (start_ptr) -- start_ptr = case_buf + (start_ptr - buf); -- buf = case_buf; -- } -- mb_properties = check_multibyte_string(buf, size); -- } -+ int mb_cur_max = MB_CUR_MAX; -+ mbstate_t mbs; -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ const char *last_char = NULL; - #endif /* MBS_SUPPORT */ - - for (beg = start_ptr ? start_ptr : buf; beg <= buf + size; beg++) - { - size_t offset = kwsexec (kwset, beg, buf + size - beg, &kwsmatch); - if (offset == (size_t) -1) -- goto failure; -+ return offset; - #ifdef MBS_SUPPORT -- if (MB_CUR_MAX > 1 && mb_properties[offset+beg-buf] == 0) -- continue; /* It is a part of multibyte character. */ -+ if (mb_cur_max > 1 && !using_utf8) -+ { -+ size_t bytes_left = offset; -+ while (bytes_left) -+ { -+ size_t mlen = mbrlen (beg, bytes_left, &mbs); -+ -+ last_char = beg; -+ if (mlen == (size_t) -1 || mlen == 0) -+ { -+ /* Incomplete character: treat as single-byte. */ -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ beg++; -+ bytes_left--; -+ continue; -+ } -+ -+ if (mlen == (size_t) -2) -+ /* Offset points inside multibyte character: no good. */ -+ break; -+ -+ beg += mlen; -+ bytes_left -= mlen; -+ } -+ -+ if (bytes_left) -+ continue; -+ } -+ else - #endif /* MBS_SUPPORT */ - beg += offset; -+#ifdef MBS_SUPPORT -+ /* For f_i_multibyte, the string at beg now matches first 3 chars of -+ one of the search strings (less if there are shorter search strings). -+ See if this is a real match. */ -+ if (f_i_multibyte -+ && Fimbexec (beg, buf + size - beg, &kwsmatch.size[0], start_ptr == NULL)) -+ goto next_char; -+#endif /* MBS_SUPPORT */ - len = kwsmatch.size[0]; - if (start_ptr && !match_words) - goto success_in_beg_and_len; - if (match_lines) - { - if (beg > buf && beg[-1] != eol) -- continue; -+ goto next_char; - if (beg + len < buf + size && beg[len] != eol) -- continue; -+ goto next_char; - goto success; - } - else if (match_words) -- for (try = beg; len; ) -- { -- if (try > buf && WCHAR((unsigned char) try[-1])) -- break; -- if (try + len < buf + size && WCHAR((unsigned char) try[len])) -- { -- offset = kwsexec (kwset, beg, --len, &kwsmatch); -- if (offset == (size_t) -1) -- break; -- try = beg + offset; -- len = kwsmatch.size[0]; -- } -- else if (!start_ptr) -- goto success; -- else -- goto success_in_beg_and_len; -- } /* for (try) */ -- else -- goto success; -- } /* for (beg in buf) */ -+ { -+ while (len) -+ { -+ int word_match = 0; -+ if (beg > buf) -+ { -+#ifdef MBS_SUPPORT -+ if (mb_cur_max > 1) -+ { -+ const char *s; -+ int mr; -+ wchar_t pwc; -+ -+ if (using_utf8) -+ { -+ s = beg - 1; -+ while (s > buf -+ && (unsigned char) *s >= 0x80 -+ && (unsigned char) *s <= 0xbf) -+ --s; -+ } -+ else -+ s = last_char; -+ mr = mbtowc (&pwc, s, beg - s); -+ if (mr <= 0) -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ else if ((iswalnum (pwc) || pwc == L'_') -+ && mr == (int) (beg - s)) -+ goto next_char; -+ } -+ else -+#endif /* MBS_SUPPORT */ -+ if (WCHAR ((unsigned char) beg[-1])) -+ goto next_char; -+ } -+#ifdef MBS_SUPPORT -+ if (mb_cur_max > 1) -+ { -+ wchar_t nwc; -+ int mr; - -- failure: -- ret_val = -1; -- goto out; -+ mr = mbtowc (&nwc, beg + len, buf + size - beg - len); -+ if (mr <= 0) -+ { -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ word_match = 1; -+ } -+ else if (!iswalnum (nwc) && nwc != L'_') -+ word_match = 1; -+ } -+ else -+#endif /* MBS_SUPPORT */ -+ if (beg + len >= buf + size || !WCHAR ((unsigned char) beg[len])) -+ word_match = 1; -+ if (word_match) -+ { -+ if (start_ptr == NULL) -+ /* Returns the whole line now we know there's a word match. */ -+ goto success; -+ else { -+ /* Returns just this word match. */ -+ *match_size = len; -+ return beg - buf; -+ } -+ } -+ if (len > 0) -+ { -+ /* Try a shorter length anchored at the same place. */ -+ --len; -+ offset = kwsexec (kwset, beg, len, &kwsmatch); -+ -+ if (offset == -1) -+ goto next_char; /* Try a different anchor. */ -+#ifdef MBS_SUPPORT -+ -+ if (mb_cur_max > 1 && !using_utf8) -+ { -+ size_t bytes_left = offset; -+ while (bytes_left) -+ { -+ size_t mlen = mbrlen (beg, bytes_left, &mbs); -+ -+ last_char = beg; -+ if (mlen == (size_t) -1 || mlen == 0) -+ { -+ /* Incomplete character: treat as single-byte. */ -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ beg++; -+ bytes_left--; -+ continue; -+ } -+ -+ if (mlen == (size_t) -2) -+ { -+ /* Offset points inside multibyte character: -+ * no good. */ -+ break; -+ } -+ -+ beg += mlen; -+ bytes_left -= mlen; -+ } -+ -+ if (bytes_left) -+ { -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ goto next_char; /* Try a different anchor. */ -+ } -+ } -+ else -+#endif /* MBS_SUPPORT */ -+ beg += offset; -+#ifdef MBS_SUPPORT -+ /* The string at beg now matches first 3 chars of one of -+ the search strings (less if there are shorter search -+ strings). See if this is a real match. */ -+ if (f_i_multibyte -+ && Fimbexec (beg, len - offset, &kwsmatch.size[0], -+ start_ptr == NULL)) -+ goto next_char; -+#endif /* MBS_SUPPORT */ -+ len = kwsmatch.size[0]; -+ } -+ } -+ } -+ else -+ goto success; -+next_char:; -+#ifdef MBS_SUPPORT -+ /* Advance to next character. For MB_CUR_MAX == 1 case this is handled -+ by ++beg above. */ -+ if (mb_cur_max > 1) -+ { -+ if (using_utf8) -+ { -+ unsigned char c = *beg; -+ if (c >= 0xc2) -+ { -+ if (c < 0xe0) -+ ++beg; -+ else if (c < 0xf0) -+ beg += 2; -+ else if (c < 0xf8) -+ beg += 3; -+ else if (c < 0xfc) -+ beg += 4; -+ else if (c < 0xfe) -+ beg += 5; -+ } -+ } -+ else -+ { -+ size_t l = mbrlen (beg, buf + size - beg, &mbs); -+ -+ last_char = beg; -+ if (l + 2 >= 2) -+ beg += l - 1; -+ else -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ } -+ } -+#endif /* MBS_SUPPORT */ -+ } -+ -+ return -1; - - success: -+#ifdef MBS_SUPPORT -+ if (mb_cur_max > 1 && !using_utf8) -+ { -+ end = beg + len; -+ while (end < buf + size) -+ { -+ size_t mlen = mbrlen (end, buf + size - end, &mbs); -+ if (mlen == (size_t) -1 || mlen == (size_t) -2 || mlen == 0) -+ { -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ mlen = 1; -+ } -+ if (mlen == 1 && *end == eol) -+ break; -+ -+ end += mlen; -+ } -+ } -+ else -+ #endif /* MBS_SUPPORT */ - end = memchr (beg + len, eol, (buf + size) - (beg + len)); - end++; - while (buf < beg && beg[-1] != eol) -@@ -591,15 +1016,6 @@ - *match_size = len; - ret_val = beg - buf; - out: --#ifdef MBS_SUPPORT -- if (MB_CUR_MAX > 1) -- { -- if (match_icase) -- free((char*)buf); -- if (mb_properties) -- free(mb_properties); -- } --#endif /* MBS_SUPPORT */ - return ret_val; - } - #endif /* defined(GREP_PROGRAM) || defined(FGREP_PROGRAM) */ |