diff options
Diffstat (limited to 'abs/core/grep/09-egf-speedup.patch')
-rw-r--r-- | abs/core/grep/09-egf-speedup.patch | 823 |
1 files changed, 0 insertions, 823 deletions
diff --git a/abs/core/grep/09-egf-speedup.patch b/abs/core/grep/09-egf-speedup.patch deleted file mode 100644 index 08e92c7..0000000 --- a/abs/core/grep/09-egf-speedup.patch +++ /dev/null @@ -1,823 +0,0 @@ ---- grep-2.5.1/src/search.c 2004-12-31 15:28:35.720391036 +0000 -+++ grep-2.5.1a/src/search.c 2005-01-07 14:53:10.308860193 +0000 -@@ -18,9 +18,13 @@ - - /* Written August 1992 by Mike Haertel. */ - -+#ifndef _GNU_SOURCE -+# define _GNU_SOURCE 1 -+#endif - #ifdef HAVE_CONFIG_H - # include <config.h> - #endif -+#include <assert.h> - #include <sys/types.h> - #if defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H && defined HAVE_MBRTOWC - /* We can handle multibyte string. */ -@@ -39,6 +43,9 @@ - #ifdef HAVE_LIBPCRE - # include <pcre.h> - #endif -+#ifdef HAVE_LANGINFO_CODESET -+# include <langinfo.h> -+#endif - - #define NCHAR (UCHAR_MAX + 1) - -@@ -70,9 +77,10 @@ - call the regexp matcher at all. */ - static int kwset_exact_matches; - --#if defined(MBS_SUPPORT) --static char* check_multibyte_string PARAMS ((char const *buf, size_t size)); --#endif -+/* UTF-8 encoding allows some optimizations that we can't otherwise -+ assume in a multibyte encoding. */ -+static int using_utf8; -+ - static void kwsinit PARAMS ((void)); - static void kwsmusts PARAMS ((void)); - static void Gcompile PARAMS ((char const *, size_t)); -@@ -84,6 +92,15 @@ - static size_t Pexecute PARAMS ((char const *, size_t, size_t *, int)); - - void -+check_utf8 (void) -+{ -+#ifdef HAVE_LANGINFO_CODESET -+ if (strcmp (nl_langinfo (CODESET), "UTF-8") == 0) -+ using_utf8 = 1; -+#endif -+} -+ -+void - dfaerror (char const *mesg) - { - error (2, 0, mesg); -@@ -141,47 +158,6 @@ - } - } - --#ifdef MBS_SUPPORT --/* This function allocate the array which correspond to "buf". -- Then this check multibyte string and mark on the positions which -- are not singlebyte character nor the first byte of a multibyte -- character. Caller must free the array. */ --static char* --check_multibyte_string(char const *buf, size_t size) --{ -- char *mb_properties = xmalloc(size); -- mbstate_t cur_state; -- wchar_t wc; -- int i; -- memset(&cur_state, 0, sizeof(mbstate_t)); -- memset(mb_properties, 0, sizeof(char)*size); -- for (i = 0; i < size ;) -- { -- size_t mbclen; -- mbclen = mbrtowc(&wc, buf + i, size - i, &cur_state); -- -- if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0) -- { -- /* An invalid sequence, or a truncated multibyte character. -- We treat it as a singlebyte character. */ -- mbclen = 1; -- } -- else if (match_icase) -- { -- if (iswupper((wint_t)wc)) -- { -- wc = towlower((wint_t)wc); -- wcrtomb(buf + i, wc, &cur_state); -- } -- } -- mb_properties[i] = mbclen; -- i += mbclen; -- } -- -- return mb_properties; --} --#endif -- - static void - Gcompile (char const *pattern, size_t size) - { -@@ -190,6 +166,7 @@ - size_t total = size; - char const *motif = pattern; - -+ check_utf8 (); - re_set_syntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE | (match_icase ? RE_ICASE : 0)); - dfasyntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE, match_icase, eolbyte); - -@@ -266,6 +243,7 @@ - size_t total = size; - char const *motif = pattern; - -+ check_utf8 (); - if (strcmp (matcher, "awk") == 0) - { - re_set_syntax (RE_SYNTAX_AWK | (match_icase ? RE_ICASE : 0)); -@@ -350,18 +328,9 @@ - struct kwsmatch kwsm; - size_t i, ret_val; - #ifdef MBS_SUPPORT -- char *mb_properties = NULL; -- if (MB_CUR_MAX > 1) -- { -- if (match_icase) -- { -- char *case_buf = xmalloc(size); -- memcpy(case_buf, buf, size); -- buf = case_buf; -- } -- if (kwset) -- mb_properties = check_multibyte_string(buf, size); -- } -+ int mb_cur_max = MB_CUR_MAX; -+ mbstate_t mbs; -+ memset (&mbs, '\0', sizeof (mbstate_t)); - #endif /* MBS_SUPPORT */ - - buflim = buf + size; -@@ -373,21 +342,63 @@ - if (kwset) - { - /* Find a possible match using the KWset matcher. */ -- size_t offset = kwsexec (kwset, beg, buflim - beg, &kwsm); -+#ifdef MBS_SUPPORT -+ size_t bytes_left = 0; -+#endif /* MBS_SUPPORT */ -+ size_t offset; -+#ifdef MBS_SUPPORT -+ /* kwsexec doesn't work with match_icase and multibyte input. */ -+ if (match_icase && mb_cur_max > 1) -+ /* Avoid kwset */ -+ offset = 0; -+ else -+#endif /* MBS_SUPPORT */ -+ offset = kwsexec (kwset, beg, buflim - beg, &kwsm); - if (offset == (size_t) -1) - goto failure; -+#ifdef MBS_SUPPORT -+ if (mb_cur_max > 1 && !using_utf8) -+ { -+ bytes_left = offset; -+ while (bytes_left) -+ { -+ size_t mlen = mbrlen (beg, bytes_left, &mbs); -+ if (mlen == (size_t) -1 || mlen == 0) -+ { -+ /* Incomplete character: treat as single-byte. */ -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ beg++; -+ bytes_left--; -+ continue; -+ } -+ -+ if (mlen == (size_t) -2) -+ /* Offset points inside multibyte character: -+ * no good. */ -+ break; -+ -+ beg += mlen; -+ bytes_left -= mlen; -+ } -+ } -+ else -+#endif /* MBS_SUPPORT */ - beg += offset; - /* Narrow down to the line containing the candidate, and - run it through DFA. */ - end = memchr(beg, eol, buflim - beg); - end++; - #ifdef MBS_SUPPORT -- if (MB_CUR_MAX > 1 && mb_properties[beg - buf] == 0) -+ if (mb_cur_max > 1 && bytes_left) - continue; --#endif -+#endif /* MBS_SUPPORT */ - while (beg > buf && beg[-1] != eol) - --beg; -- if (kwsm.index < kwset_exact_matches) -+ if ( -+#ifdef MBS_SUPPORT -+ !(match_icase && mb_cur_max > 1) && -+#endif /* MBS_SUPPORT */ -+ (kwsm.index < kwset_exact_matches)) - goto success_in_beg_and_end; - if (dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1) - continue; -@@ -395,13 +406,47 @@ - else - { - /* No good fixed strings; start with DFA. */ -+#ifdef MBS_SUPPORT -+ size_t bytes_left = 0; -+#endif /* MBS_SUPPORT */ - size_t offset = dfaexec (&dfa, beg, buflim - beg, &backref); - if (offset == (size_t) -1) - break; - /* Narrow down to the line we've found. */ -+#ifdef MBS_SUPPORT -+ if (mb_cur_max > 1 && !using_utf8) -+ { -+ bytes_left = offset; -+ while (bytes_left) -+ { -+ size_t mlen = mbrlen (beg, bytes_left, &mbs); -+ if (mlen == (size_t) -1 || mlen == 0) -+ { -+ /* Incomplete character: treat as single-byte. */ -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ beg++; -+ bytes_left--; -+ continue; -+ } -+ -+ if (mlen == (size_t) -2) -+ /* Offset points inside multibyte character: -+ * no good. */ -+ break; -+ -+ beg += mlen; -+ bytes_left -= mlen; -+ } -+ } -+ else -+#endif /* MBS_SUPPORT */ - beg += offset; - end = memchr (beg, eol, buflim - beg); - end++; -+#ifdef MBS_SUPPORT -+ if (mb_cur_max > 1 && bytes_left) -+ continue; -+#endif /* MBS_SUPPORT */ - while (beg > buf && beg[-1] != eol) - --beg; - } -@@ -469,15 +514,6 @@ - } /* for (beg = end ..) */ - - failure: --#ifdef MBS_SUPPORT -- if (MB_CUR_MAX > 1) -- { -- if (mb_properties) -- free (mb_properties); -- if (match_icase) -- free ((char *) buf); -- } --#endif /* MBS_SUPPORT */ - return (size_t) -1; - - success_in_beg_and_end: -@@ -486,24 +522,144 @@ - /* FALLTHROUGH */ - - success_in_start_and_len: --#ifdef MBS_SUPPORT -- if (MB_CUR_MAX > 1) -- { -- if (mb_properties) -- free (mb_properties); -- if (match_icase) -- free ((char *) buf); -- } --#endif /* MBS_SUPPORT */ - *match_size = len; - return start; - } - -+#ifdef MBS_SUPPORT -+static int f_i_multibyte; /* whether we're using the new -Fi MB method */ -+static struct -+{ -+ wchar_t **patterns; -+ size_t count, maxlen; -+ unsigned char *match; -+} Fimb; -+#endif -+ - static void - Fcompile (char const *pattern, size_t size) - { -+ int mb_cur_max = MB_CUR_MAX; - char const *beg, *lim, *err; - -+ check_utf8 (); -+#ifdef MBS_SUPPORT -+ /* Support -F -i for UTF-8 input. */ -+ if (match_icase && mb_cur_max > 1) -+ { -+ mbstate_t mbs; -+ wchar_t *wcpattern = xmalloc ((size + 1) * sizeof (wchar_t)); -+ const char *patternend = pattern; -+ size_t wcsize; -+ kwset_t fimb_kwset = NULL; -+ char *starts = NULL; -+ wchar_t *wcbeg, *wclim; -+ size_t allocated = 0; -+ -+ memset (&mbs, '\0', sizeof (mbs)); -+# ifdef __GNU_LIBRARY__ -+ wcsize = mbsnrtowcs (wcpattern, &patternend, size, size, &mbs); -+ if (patternend != pattern + size) -+ wcsize = (size_t) -1; -+# else -+ { -+ char *patterncopy = xmalloc (size + 1); -+ -+ memcpy (patterncopy, pattern, size); -+ patterncopy[size] = '\0'; -+ patternend = patterncopy; -+ wcsize = mbsrtowcs (wcpattern, &patternend, size, &mbs); -+ if (patternend != patterncopy + size) -+ wcsize = (size_t) -1; -+ free (patterncopy); -+ } -+# endif -+ if (wcsize + 2 <= 2) -+ { -+fimb_fail: -+ free (wcpattern); -+ free (starts); -+ if (fimb_kwset) -+ kwsfree (fimb_kwset); -+ free (Fimb.patterns); -+ Fimb.patterns = NULL; -+ } -+ else -+ { -+ if (!(fimb_kwset = kwsalloc (NULL))) -+ error (2, 0, _("memory exhausted")); -+ -+ starts = xmalloc (mb_cur_max * 3); -+ wcbeg = wcpattern; -+ do -+ { -+ int i; -+ size_t wclen; -+ -+ if (Fimb.count >= allocated) -+ { -+ if (allocated == 0) -+ allocated = 128; -+ else -+ allocated *= 2; -+ Fimb.patterns = xrealloc (Fimb.patterns, -+ sizeof (wchar_t *) * allocated); -+ } -+ Fimb.patterns[Fimb.count++] = wcbeg; -+ for (wclim = wcbeg; -+ wclim < wcpattern + wcsize && *wclim != L'\n'; ++wclim) -+ *wclim = towlower (*wclim); -+ *wclim = L'\0'; -+ wclen = wclim - wcbeg; -+ if (wclen > Fimb.maxlen) -+ Fimb.maxlen = wclen; -+ if (wclen > 3) -+ wclen = 3; -+ if (wclen == 0) -+ { -+ if ((err = kwsincr (fimb_kwset, "", 0)) != 0) -+ error (2, 0, err); -+ } -+ else -+ for (i = 0; i < (1 << wclen); i++) -+ { -+ char *p = starts; -+ int j, k; -+ -+ for (j = 0; j < wclen; ++j) -+ { -+ wchar_t wc = wcbeg[j]; -+ if (i & (1 << j)) -+ { -+ wc = towupper (wc); -+ if (wc == wcbeg[j]) -+ continue; -+ } -+ k = wctomb (p, wc); -+ if (k <= 0) -+ goto fimb_fail; -+ p += k; -+ } -+ if ((err = kwsincr (fimb_kwset, starts, p - starts)) != 0) -+ error (2, 0, err); -+ } -+ if (wclim < wcpattern + wcsize) -+ ++wclim; -+ wcbeg = wclim; -+ } -+ while (wcbeg < wcpattern + wcsize); -+ f_i_multibyte = 1; -+ kwset = fimb_kwset; -+ free (starts); -+ Fimb.match = xmalloc (Fimb.count); -+ if ((err = kwsprep (kwset)) != 0) -+ error (2, 0, err); -+ return; -+ } -+ } -+#endif /* MBS_SUPPORT */ -+ -+ - kwsinit (); - beg = pattern; - do -@@ -522,6 +678,76 @@ - error (2, 0, err); - } - -+#ifdef MBS_SUPPORT -+static int -+Fimbexec (const char *buf, size_t size, size_t *plen, int exact) -+{ -+ size_t len, letter, i; -+ int ret = -1; -+ mbstate_t mbs; -+ wchar_t wc; -+ int patterns_left; -+ -+ assert (match_icase && f_i_multibyte == 1); -+ assert (MB_CUR_MAX > 1); -+ -+ memset (&mbs, '\0', sizeof (mbs)); -+ memset (Fimb.match, '\1', Fimb.count); -+ letter = len = 0; -+ patterns_left = 1; -+ while (patterns_left && len <= size) -+ { -+ size_t c; -+ -+ patterns_left = 0; -+ if (len < size) -+ { -+ c = mbrtowc (&wc, buf + len, size - len, &mbs); -+ if (c + 2 <= 2) -+ return ret; -+ -+ wc = towlower (wc); -+ } -+ else -+ { -+ c = 1; -+ wc = L'\0'; -+ } -+ -+ for (i = 0; i < Fimb.count; i++) -+ { -+ if (Fimb.match[i]) -+ { -+ if (Fimb.patterns[i][letter] == L'\0') -+ { -+ /* Found a match. */ -+ *plen = len; -+ if (!exact && !match_words) -+ return 0; -+ else -+ { -+ /* For -w or exact look for longest match. */ -+ ret = 0; -+ Fimb.match[i] = '\0'; -+ continue; -+ } -+ } -+ -+ if (Fimb.patterns[i][letter] == wc) -+ patterns_left = 1; -+ else -+ Fimb.match[i] = '\0'; -+ } -+ } -+ -+ len += c; -+ letter++; -+ } -+ -+ return ret; -+} -+#endif /* MBS_SUPPORT */ -+ - static size_t - Fexecute (char const *buf, size_t size, size_t *match_size, int exact) - { -@@ -531,80 +757,258 @@ - struct kwsmatch kwsmatch; - size_t ret_val; - #ifdef MBS_SUPPORT -- char *mb_properties = NULL; -- if (MB_CUR_MAX > 1) -- { -- if (match_icase) -- { -- char *case_buf = xmalloc(size); -- memcpy(case_buf, buf, size); -- buf = case_buf; -- } -- mb_properties = check_multibyte_string(buf, size); -- } -+ int mb_cur_max = MB_CUR_MAX; -+ mbstate_t mbs; -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ const char *last_char = NULL; - #endif /* MBS_SUPPORT */ - - for (beg = buf; beg <= buf + size; ++beg) - { -- size_t offset = kwsexec (kwset, beg, buf + size - beg, &kwsmatch); -+ size_t offset; -+ offset = kwsexec (kwset, beg, buf + size - beg, &kwsmatch); -+ - if (offset == (size_t) -1) - goto failure; - #ifdef MBS_SUPPORT -- if (MB_CUR_MAX > 1 && mb_properties[offset+beg-buf] == 0) -- continue; /* It is a part of multibyte character. */ -+ if (mb_cur_max > 1 && !using_utf8) -+ { -+ size_t bytes_left = offset; -+ while (bytes_left) -+ { -+ size_t mlen = mbrlen (beg, bytes_left, &mbs); -+ -+ last_char = beg; -+ if (mlen == (size_t) -1 || mlen == 0) -+ { -+ /* Incomplete character: treat as single-byte. */ -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ beg++; -+ bytes_left--; -+ continue; -+ } -+ -+ if (mlen == (size_t) -2) -+ /* Offset points inside multibyte character: no good. */ -+ break; -+ -+ beg += mlen; -+ bytes_left -= mlen; -+ } -+ -+ if (bytes_left) -+ continue; -+ } -+ else - #endif /* MBS_SUPPORT */ - beg += offset; -+#ifdef MBS_SUPPORT -+ /* For f_i_multibyte, the string at beg now matches first 3 chars of -+ one of the search strings (less if there are shorter search strings). -+ See if this is a real match. */ -+ if (f_i_multibyte -+ && Fimbexec (beg, buf + size - beg, &kwsmatch.size[0], exact)) -+ goto next_char; -+#endif /* MBS_SUPPORT */ - len = kwsmatch.size[0]; - if (exact && !match_words) - goto success_in_beg_and_len; - if (match_lines) - { - if (beg > buf && beg[-1] != eol) -- continue; -+ goto next_char; - if (beg + len < buf + size && beg[len] != eol) -- continue; -+ goto next_char; - goto success; - } - else if (match_words) -- for (try = beg; len; ) -- { -- if (try > buf && WCHAR((unsigned char) try[-1])) -- break; -- if (try + len < buf + size && WCHAR((unsigned char) try[len])) -- { -- offset = kwsexec (kwset, beg, --len, &kwsmatch); -- if (offset == (size_t) -1) -- { -+ { -+ while (len) -+ { -+ int word_match = 0; -+ if (beg > buf) -+ { - #ifdef MBS_SUPPORT -- if (MB_CUR_MAX > 1) -- free (mb_properties); -+ if (mb_cur_max > 1) -+ { -+ const char *s; -+ int mr; -+ wchar_t pwc; -+ -+ if (using_utf8) -+ { -+ s = beg - 1; -+ while (s > buf -+ && (unsigned char) *s >= 0x80 -+ && (unsigned char) *s <= 0xbf) -+ --s; -+ } -+ else -+ s = last_char; -+ mr = mbtowc (&pwc, s, beg - s); -+ if (mr <= 0) -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ else if ((iswalnum (pwc) || pwc == L'_') -+ && mr == (int) (beg - s)) -+ goto next_char; -+ } -+ else - #endif /* MBS_SUPPORT */ -- return offset; -- } -- try = beg + offset; -- len = kwsmatch.size[0]; -- } -- else -- goto success; -- } -+ if (WCHAR ((unsigned char) beg[-1])) -+ goto next_char; -+ } -+#ifdef MBS_SUPPORT -+ if (mb_cur_max > 1) -+ { -+ wchar_t nwc; -+ int mr; -+ -+ mr = mbtowc (&nwc, beg + len, buf + size - beg - len); -+ if (mr <= 0) -+ { -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ word_match = 1; -+ } -+ else if (!iswalnum (nwc) && nwc != L'_') -+ word_match = 1; -+ } -+ else -+#endif /* MBS_SUPPORT */ -+ if (beg + len >= buf + size || !WCHAR ((unsigned char) beg[len])) -+ word_match = 1; -+ if (word_match) -+ { -+ if (!exact) -+ /* Returns the whole line now we know there's a word match. */ -+ goto success; -+ else -+ /* Returns just this word match. */ -+ goto success_in_beg_and_len; -+ } -+ if (len > 0) -+ { -+ /* Try a shorter length anchored at the same place. */ -+ --len; -+ offset = kwsexec (kwset, beg, len, &kwsmatch); -+ -+ if (offset == -1) -+ goto next_char; /* Try a different anchor. */ -+#ifdef MBS_SUPPORT -+ if (mb_cur_max > 1 && !using_utf8) -+ { -+ size_t bytes_left = offset; -+ while (bytes_left) -+ { -+ size_t mlen = mbrlen (beg, bytes_left, &mbs); -+ -+ last_char = beg; -+ if (mlen == (size_t) -1 || mlen == 0) -+ { -+ /* Incomplete character: treat as single-byte. */ -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ beg++; -+ bytes_left--; -+ continue; -+ } -+ -+ if (mlen == (size_t) -2) -+ { -+ /* Offset points inside multibyte character: -+ * no good. */ -+ break; -+ } -+ -+ beg += mlen; -+ bytes_left -= mlen; -+ } -+ -+ if (bytes_left) -+ { -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ goto next_char; /* Try a different anchor. */ -+ } -+ } -+ else -+#endif /* MBS_SUPPORT */ -+ beg += offset; -+#ifdef MBS_SUPPORT -+ /* The string at beg now matches first 3 chars of one of -+ the search strings (less if there are shorter search -+ strings). See if this is a real match. */ -+ if (f_i_multibyte -+ && Fimbexec (beg, len - offset, &kwsmatch.size[0], -+ exact)) -+ goto next_char; -+#endif /* MBS_SUPPORT */ -+ len = kwsmatch.size[0]; -+ } -+ } -+ } - else - goto success; -+next_char:; -+#ifdef MBS_SUPPORT -+ /* Advance to next character. For MB_CUR_MAX == 1 case this is handled -+ by ++beg above. */ -+ if (mb_cur_max > 1) -+ { -+ if (using_utf8) -+ { -+ unsigned char c = *beg; -+ if (c >= 0xc2) -+ { -+ if (c < 0xe0) -+ ++beg; -+ else if (c < 0xf0) -+ beg += 2; -+ else if (c < 0xf8) -+ beg += 3; -+ else if (c < 0xfc) -+ beg += 4; -+ else if (c < 0xfe) -+ beg += 5; -+ } -+ } -+ else -+ { -+ size_t l = mbrlen (beg, buf + size - beg, &mbs); -+ -+ last_char = beg; -+ if (l + 2 >= 2) -+ beg += l - 1; -+ else -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ } -+ } -+#endif /* MBS_SUPPORT */ - } - - failure: -+ return -1; -+ -+ success: - #ifdef MBS_SUPPORT -- if (MB_CUR_MAX > 1) -+ if (mb_cur_max > 1 && !using_utf8) - { -- if (match_icase) -- free((char *) buf); -- if (mb_properties) -- free(mb_properties); -+ end = beg + len; -+ while (end < buf + size) -+ { -+ size_t mlen = mbrlen (end, buf + size - end, &mbs); -+ if (mlen == (size_t) -1 || mlen == (size_t) -2 || mlen == 0) -+ { -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ mlen = 1; -+ } -+ if (mlen == 1 && *end == eol) -+ break; -+ -+ end += mlen; -+ } - } -+ else - #endif /* MBS_SUPPORT */ -- return -1; -- -- success: - end = memchr (beg + len, eol, (buf + size) - (beg + len)); -+ - end++; - while (buf < beg && beg[-1] != eol) - --beg; -@@ -613,15 +1017,6 @@ - - success_in_beg_and_len: - *match_size = len; --#ifdef MBS_SUPPORT -- if (MB_CUR_MAX > 1) -- { -- if (mb_properties) -- free (mb_properties); -- if (match_icase) -- free ((char *) buf); -- } --#endif /* MBS_SUPPORT */ - return beg - buf; - } - |