abs/extra/icu/icu.8198.revert.icu5431.patch


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129

Index: icu/trunk/source/layout/IndicReordering.cpp
===================================================================
--- icu/trunk/source/layout/IndicReordering.cpp	(revision 25772)
+++ icu/trunk/source/layout/IndicReordering.cpp	(revision 26090)
@@ -126,4 +126,8 @@
     FeatureMask fSMFeatures;
 
+    LEUnicode   fPreBaseConsonant;
+    LEUnicode   fPreBaseVirama;
+    le_int32    fPBCIndex;
+    FeatureMask fPBCFeatures;
 
     void saveMatra(LEUnicode matra, le_int32 matraIndex, IndicClassTable::CharClass matraClass)
@@ -172,5 +176,6 @@
           fMatraFeatures(0), fMPreOutIndex(-1), fMPreFixups(mpreFixups),
           fVMabove(0), fVMpost(0), fVMIndex(0), fVMFeatures(0),
-          fSMabove(0), fSMbelow(0), fSMIndex(0), fSMFeatures(0)
+          fSMabove(0), fSMbelow(0), fSMIndex(0), fSMFeatures(0),
+          fPreBaseConsonant(0), fPreBaseVirama(0), fPBCIndex(0), fPBCFeatures(0)
     {
         // nothing else to do...
@@ -191,4 +196,6 @@
         fVMabove = fVMpost  = 0;
         fSMabove = fSMbelow = 0;
+
+        fPreBaseConsonant = fPreBaseVirama = 0;
     }
 
@@ -386,4 +393,12 @@
     }
 
+    void notePreBaseConsonant(le_uint32 index,LEUnicode PBConsonant, LEUnicode PBVirama, FeatureMask features)
+    {
+        fPBCIndex = index;
+        fPreBaseConsonant = PBConsonant;
+        fPreBaseVirama = PBVirama;
+        fPBCFeatures = features;
+    }
+
     void noteBaseConsonant()
     {
@@ -465,4 +480,20 @@
     }
     
+    void writePreBaseConsonant()
+    {
+        // The TDIL spec says that consonant + virama + RRA should produce a rakar in Malayalam.  However,
+        // it seems that almost none of the fonts for Malayalam are set up to handle this.
+        // So, we're going to force the issue here by using the rakar as defined with RA in most fonts.
+
+        if (fPreBaseConsonant == 0x0d31) { // RRA
+            fPreBaseConsonant = 0x0d30; // RA
+        }
+        
+        if (fPreBaseConsonant != 0) {
+            writeChar(fPreBaseConsonant, fPBCIndex, fPBCFeatures);
+            writeChar(fPreBaseVirama,fPBCIndex-1,fPBCFeatures);
+        }
+    }
+
     le_int32 getOutputIndex()
     {
@@ -723,4 +754,5 @@
             }
 
+            
             IndicClassTable::CharClass charClass = CC_RESERVED;
             IndicClassTable::CharClass nextClass = CC_RESERVED;
@@ -730,7 +762,9 @@
             le_bool  seenVattu = FALSE;
             le_bool  seenBelowBaseForm = FALSE;
+            le_bool  seenPreBaseForm = FALSE;
             le_bool  hasNukta = FALSE;
             le_bool  hasBelowBaseForm = FALSE;
             le_bool  hasPostBaseForm = FALSE;
+            le_bool  hasPreBaseForm = FALSE;
 
             if (postBase < markStart && classTable->isNukta(chars[postBase])) {
@@ -746,12 +780,20 @@
                 hasBelowBaseForm = IndicClassTable::hasBelowBaseForm(charClass) && !hasNukta;
                 hasPostBaseForm  = IndicClassTable::hasPostBaseForm(charClass)  && !hasNukta;
+                hasPreBaseForm = IndicClassTable::hasPreBaseForm(charClass) && !hasNukta;
 
                 if (IndicClassTable::isConsonant(charClass)) {
                     if (postBaseLimit == 0 || seenVattu ||
                         (baseConsonant > baseLimit && !classTable->isVirama(chars[baseConsonant - 1])) ||
-                        !(hasBelowBaseForm || hasPostBaseForm)) {
+                        !(hasBelowBaseForm || hasPostBaseForm || hasPreBaseForm)) {
                         break;
                     }
 
+                    // Note any pre-base consonants
+                    if ( baseConsonant == lastConsonant && lastConsonant > 0 && 
+                         hasPreBaseForm && classTable->isVirama(chars[baseConsonant - 1])) {
+                        output.notePreBaseConsonant(lastConsonant,chars[lastConsonant],chars[lastConsonant-1],tagArray2);
+                        seenPreBaseForm = TRUE;
+   
+                    }
                     // consonants with nuktas are never vattus
                     seenVattu = IndicClassTable::isVattu(charClass) && !hasNukta;
@@ -786,10 +828,12 @@
 
             // write any pre-base consonants
+            output.writePreBaseConsonant();
+
             le_bool supressVattu = TRUE;
 
             for (i = baseLimit; i < baseConsonant; i += 1) {
                 LEUnicode ch = chars[i];
-                // Don't put 'blwf' on first consonant.
-                FeatureMask features = (i == baseLimit? tagArray2 : tagArray1);
+                // Don't put 'pstf' or 'blwf' on anything before the base consonant.
+                FeatureMask features = tagArray1 & ~( pstfFeatureMask | blwfFeatureMask );
 
                 charClass = classTable->getCharClass(ch);
@@ -842,5 +886,5 @@
 
             // write below-base consonants
-            if (baseConsonant != lastConsonant) {
+            if (baseConsonant != lastConsonant && !seenPreBaseForm) {
                 for (i = bcSpan + 1; i < postBase; i += 1) {
                     output.writeChar(chars[i], i, tagArray1);
@@ -872,5 +916,5 @@
             // write post-base consonants
             // FIXME: does this put the right tags on post-base consonants?
-            if (baseConsonant != lastConsonant) {
+            if (baseConsonant != lastConsonant && !seenPreBaseForm) {
                 if (postBase <= lastConsonant) {
                     for (i = postBase; i <= lastConsonant; i += 1) {