1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
|
--- grep-2.5.1a/src/search.c.w 2006-02-20 14:27:27.000000000 +0000
+++ grep-2.5.1a/src/search.c 2006-02-20 14:32:07.000000000 +0000
@@ -507,10 +507,114 @@
if (match_words)
while (start >= 0)
{
- if ((start == 0 || !WCHAR ((unsigned char) beg[start - 1]))
- && (len == end - beg - 1
- || !WCHAR ((unsigned char) beg[start + len])))
- goto success_in_beg_and_end;
+ int lword_match = 0;
+ if (start == 0)
+ lword_match = 1;
+ else
+ {
+ assert (start > 0);
+#ifdef MBS_SUPPORT
+ if (mb_cur_max > 1)
+ {
+ const char *s;
+ size_t mr;
+ wchar_t pwc;
+
+ /* Locate the start of the multibyte character
+ before the match position (== beg + start). */
+ if (using_utf8)
+ {
+ /* UTF-8 is a special case: scan backwards
+ until we find a 7-bit character or a
+ lead byte. */
+ s = beg + start - 1;
+ while (s > buf
+ && (unsigned char) *s >= 0x80
+ && (unsigned char) *s <= 0xbf)
+ --s;
+ }
+ else
+ {
+ /* Scan forwards to find the start of the
+ last complete character before the
+ match position. */
+ size_t bytes_left = start - 1;
+ s = beg;
+ while (bytes_left > 0)
+ {
+ mr = mbrlen (s, bytes_left, &mbs);
+ if (mr == (size_t) -1 || mr == 0)
+ {
+ memset (&mbs, '\0', sizeof (mbs));
+ s++;
+ bytes_left--;
+ continue;
+ }
+ if (mr == (size_t) -2)
+ {
+ memset (&mbs, '\0', sizeof (mbs));
+ break;
+ }
+ s += mr;
+ bytes_left -= mr;
+ }
+ }
+ mr = mbrtowc (&pwc, s, beg + start - s, &mbs);
+ if (mr == (size_t) -2 || mr == (size_t) -1 ||
+ mr == 0)
+ {
+ memset (&mbs, '\0', sizeof (mbstate_t));
+ lword_match = 1;
+ }
+ else if (!(iswalnum (pwc) || pwc == L'_')
+ && mr == beg + start - s)
+ lword_match = 1;
+ }
+ else
+#endif /* MBS_SUPPORT */
+ if (!WCHAR ((unsigned char) beg[start - 1]))
+ lword_match = 1;
+ }
+
+ if (lword_match)
+ {
+ int rword_match = 0;
+ if (start + len == end - beg - 1)
+ rword_match = 1;
+ else
+ {
+#ifdef MBS_SUPPORT
+ if (mb_cur_max > 1)
+ {
+ wchar_t nwc;
+ int mr;
+
+ mr = mbtowc (&nwc, beg + start + len,
+ end - beg - start - len - 1);
+ if (mr <= 0)
+ {
+ memset (&mbs, '\0', sizeof (mbstate_t));
+ rword_match = 1;
+ }
+ else if (!iswalnum (nwc) && nwc != L'_')
+ rword_match = 1;
+ }
+ else
+#endif /* MBS_SUPPORT */
+ if (!WCHAR ((unsigned char) beg[start + len]))
+ rword_match = 1;
+ }
+
+ if (rword_match)
+ {
+ if (!exact)
+ /* Returns the whole line. */
+ goto success_in_beg_and_end;
+ else
+ /* Returns just this word match. */
+ goto success_in_start_and_len;
+ }
+ }
if (len > 0)
{
/* Try a shorter length anchored at the same place. */
|