1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
|
--- src/codecs/qutfcodec.cpp
+++ src/codecs/qutfcodec.cpp
@@ -154,6 +154,7 @@
class QUtf8Decoder : public QTextDecoder {
uint uc;
+ uint min_uc;
int need;
bool headerDone;
public:
@@ -167,8 +168,9 @@
result.setLength( len ); // worst case
QChar *qch = (QChar *)result.unicode();
uchar ch;
+ int error = -1;
for (int i=0; i<len; i++) {
- ch = *chars++;
+ ch = chars[i];
if (need) {
if ( (ch&0xc0) == 0x80 ) {
uc = (uc << 6) | (ch & 0x3f);
@@ -182,6 +184,8 @@
*qch++ = QChar(high);
*qch++ = QChar(low);
headerDone = TRUE;
+ } else if ((uc < min_uc) || (uc >= 0xd800 && uc <= 0xdfff) || (uc >= 0xfffe)) {
+ *qch++ = QChar::replacement;
} else {
if (headerDone || QChar(uc) != QChar::byteOrderMark)
*qch++ = uc;
@@ -190,6 +194,7 @@
}
} else {
// error
+ i = error;
*qch++ = QChar::replacement;
need = 0;
}
@@ -200,12 +205,21 @@
} else if ((ch & 0xe0) == 0xc0) {
uc = ch & 0x1f;
need = 1;
+ error = i;
+ min_uc = 0x80;
} else if ((ch & 0xf0) == 0xe0) {
uc = ch & 0x0f;
need = 2;
+ error = i;
+ min_uc = 0x800;
} else if ((ch&0xf8) == 0xf0) {
uc = ch & 0x07;
need = 3;
+ error = i;
+ min_uc = 0x10000;
+ } else {
+ // error
+ *qch++ = QChar::replacement;
}
}
}
--- src/tools/qstring.cpp
+++ src/tools/qstring.cpp
@@ -5805,6 +5805,7 @@
result.setLength( len ); // worst case
QChar *qch = (QChar *)result.unicode();
uint uc = 0;
+ uint min_uc = 0;
int need = 0;
int error = -1;
uchar ch;
@@ -5822,6 +5823,12 @@
unsigned short low = uc%0x400 + 0xdc00;
*qch++ = QChar(high);
*qch++ = QChar(low);
+ } else if (uc < min_uc || (uc >= 0xd800 && uc <= 0xdfff) || (uc >= 0xfffe)) {
+ // overlong seqence, UTF16 surrogate or BOM
+ i = error;
+ qch = addOne(qch, result);
+ *qch++ = QChar(0xdbff);
+ *qch++ = QChar(0xde00+((uchar)utf8[i]));
} else {
*qch++ = uc;
}
@@ -5844,14 +5851,17 @@
uc = ch & 0x1f;
need = 1;
error = i;
+ min_uc = 0x80;
} else if ((ch & 0xf0) == 0xe0) {
uc = ch & 0x0f;
need = 2;
error = i;
+ min_uc = 0x800;
} else if ((ch&0xf8) == 0xf0) {
uc = ch & 0x07;
need = 3;
error = i;
+ min_uc = 0x10000;
} else {
// Error
qch = addOne(qch, result);
|