abs/core/coreutils/coreutils-7.1-sort-endoffields.patch


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102

diff -urNp coreutils-7.1-orig/src/sort.c coreutils-7.1/src/sort.c
--- coreutils-7.1-orig/src/sort.c	2009-02-25 16:15:52.000000000 +0100
+++ coreutils-7.1/src/sort.c	2009-02-25 16:20:35.000000000 +0100
@@ -1598,6 +1598,9 @@ limfield_uni (const struct line *line, c
   size_t eword = key->eword, echar = key->echar;
   size_t remaining_bytes;
 
+  if (echar == 0)
+    eword++; /* skip all of end field. */
+
   /* Move PTR past EWORD fields or to one past the last byte on LINE,
      whichever comes first.  If there are more than EWORD fields, leave
      PTR pointing at the beginning of the field having zero-based index,
@@ -1673,19 +1676,22 @@ limfield_uni (const struct line *line, c
     }
 #endif
 
-  /* If we're ignoring leading blanks when computing the End
-     of the field, don't start counting bytes until after skipping
-     past any leading blanks. */
-  if (key->skipeblanks)
-    while (ptr < lim && blanks[to_uchar (*ptr)])
-      ++ptr;
 
-  /* Advance PTR by ECHAR (if possible), but no further than LIM.  */
-  remaining_bytes = lim - ptr;
-  if (echar < remaining_bytes)
-    ptr += echar;
-  else
-    ptr = lim;
+  if (echar != 0) /* We need to skip over a portion of the end field.  */
+    {
+      if (key->skipeblanks) /* blanks not counted in echar.  */
+        {
+          while (ptr < lim && blanks[to_uchar (*ptr)])
+            ++ptr;
+        }
+
+      /* Advance PTR by ECHAR (if possible), but no further than LIM.  */
+      remaining_bytes = lim - ptr;
+      if (echar < remaining_bytes)
+        ptr += echar;
+      else
+        ptr = lim;
+    }
 
   return ptr;
 }
@@ -3736,12 +3742,9 @@ main (int argc, char **argv)
 		  badfieldspec (optarg, N_("field number is zero"));
 		}
 	      if (*s == '.')
-		s = parse_field_count (s + 1, &key->echar,
-				       N_("invalid number after `.'"));
-	      else
 		{
-		  /* `-k 2,3' is equivalent to `+1 -3'.  */
-		  key->eword++;
+                 s = parse_field_count (s + 1, &key->echar,
+                                        N_("invalid number after `.'"));
 		}
 	      s = set_ordering (s, key, bl_end);
 	    }
diff -urNp coreutils-7.1-orig/tests/misc/sort coreutils-7.1/tests/misc/sort
--- coreutils-7.1-orig/tests/misc/sort	2009-01-27 22:11:25.000000000 +0100
+++ coreutils-7.1/tests/misc/sort	2009-02-25 16:21:48.000000000 +0100
@@ -24,6 +24,10 @@ my $prog = 'sort';
 # Turn off localization of executable's output.
 @ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3;
 
+my $mb_locale = $ENV{LOCALE_FR_UTF8};
+! defined $mb_locale || $mb_locale eq 'none'
+ and $mb_locale = 'C';
+
 # Since each test is run with a file name and with redirected stdin,
 # the name in the diagnostic is either the file name or "-".
 # Normalize each diagnostic to use '-'.
@@ -110,6 +114,8 @@ my @Tests =
 ["07b", '-k 2,3', {IN=>"a a b\nz a a\n"}, {OUT=>"z a a\na a b\n"}],
 ["07c", '-k 2,3', {IN=>"y k b\nz k a\n"}, {OUT=>"z k a\ny k b\n"}],
 ["07d", '+1 -3', {IN=>"y k b\nz k a\n"}, {OUT=>"z k a\ny k b\n"}],
+["07e", '-k 2,3.0', {IN=>"a a b\nz a a\n"}, {OUT=>"z a a\na a b\n"}],
+
 #
 # report an error for `.' without following char spec
 ["08a", '-k 2.,3', {EXIT=>2},
@@ -210,6 +216,15 @@ my @Tests =
 # key start and key end.
 ["18e", '-nb -k1.1,1.2', {IN=>" 901\n100\n"}, {OUT=>"100\n 901\n"}],
 
+# When ignoring leading blanks for end position, ensure blanks from
+# next field are not included in the sort. I.E. order should not change here.
+["18f", '-k1,1b', {IN=>"a  y\na z\n"}, {OUT=>"a  y\na z\n"}],
+
+# When ignoring leading blanks for start position, ensure blanks from
+# next field are not included in the sort. I.E. order should not change here.
+# This was noticed as an issue on fedora 8 (only in multibyte locales).
+["18g", '-k1b,1', {IN=>"a y\na z\n"}, {OUT=>"a y\na z\n"},
+ {ENV => "LC_ALL=$mb_locale"}],
 # This looks odd, but works properly -- 2nd keyspec is never
 # used because all lines are different.
 ["19a", '+0 +1nr', {IN=>"b 2\nb 1\nb 3\n"}, {OUT=>"b 1\nb 2\nb 3\n"}],