diff options
author | James Meyer <james.meyer@operamail.com> | 2010-09-10 01:46:40 (GMT) |
---|---|---|
committer | James Meyer <james.meyer@operamail.com> | 2010-09-10 01:46:40 (GMT) |
commit | 5c35c43b8a5d3e1912e4fcb44cc1e210b20322ca (patch) | |
tree | d7bc342b2b23a2b9cac188db883e86c43634256f /abs/core-testing/coreutils | |
parent | 690fa2bf80dba5e7576234094575bef0475e1f74 (diff) | |
download | linhes_pkgbuild-5c35c43b8a5d3e1912e4fcb44cc1e210b20322ca.zip linhes_pkgbuild-5c35c43b8a5d3e1912e4fcb44cc1e210b20322ca.tar.gz linhes_pkgbuild-5c35c43b8a5d3e1912e4fcb44cc1e210b20322ca.tar.bz2 |
core-utils: update to work with new glibc
Diffstat (limited to 'abs/core-testing/coreutils')
-rw-r--r-- | abs/core-testing/coreutils/PKGBUILD | 90 | ||||
-rw-r--r-- | abs/core-testing/coreutils/__changelog | 2 | ||||
-rw-r--r-- | abs/core-testing/coreutils/coreutils-6.10-configuration.patch | 88 | ||||
-rw-r--r-- | abs/core-testing/coreutils/coreutils-7.1-cp-recursiveinfloop.patch | 154 | ||||
-rw-r--r-- | abs/core-testing/coreutils/coreutils-7.1-sort-endoffields.patch | 102 | ||||
l--------- | abs/core-testing/coreutils/coreutils-8.5-2-i686.pkg.tar.gz | 1 | ||||
-rw-r--r-- | abs/core-testing/coreutils/coreutils-i18n.patch | 4065 | ||||
-rw-r--r-- | abs/core-testing/coreutils/coreutils-pam.patch | 241 | ||||
-rw-r--r-- | abs/core-testing/coreutils/coreutils-uname.patch | 30 | ||||
-rw-r--r-- | abs/core-testing/coreutils/coreutils.install | 4 | ||||
-rw-r--r-- | abs/core-testing/coreutils/futimes.patch | 47 |
11 files changed, 183 insertions, 4641 deletions
diff --git a/abs/core-testing/coreutils/PKGBUILD b/abs/core-testing/coreutils/PKGBUILD index b8418e9..2c6fab9 100644 --- a/abs/core-testing/coreutils/PKGBUILD +++ b/abs/core-testing/coreutils/PKGBUILD @@ -1,73 +1,67 @@ -# $Id: PKGBUILD 29340 2009-03-08 00:18:55Z andyrtr $ -# Maintainer: Andreas Radke <andyrtr@archlinux.org> +# $Id: PKGBUILD 82551 2010-06-13 12:07:11Z allan $ +# Maintainer: Allan McRae <allan@archlinux.org> # Contributor: judd <jvinet@zeroflux.org> pkgname=coreutils -pkgver=7.1 -pkgrel=5 +pkgver=8.5 +pkgrel=2 pkgdesc="The basic file, shell and text manipulation utilities of the GNU operating system" -arch=(i686 x86_64) +arch=('i686' 'x86_64') license=('GPL3') url="http://www.gnu.org/software/coreutils" groups=('base') -depends=('glibc>=2.9-4' 'shadow>=4.1.2.1-2' 'pam>=1.0.3' 'acl>=2.2.47-1' 'gmp>=4.2.4') -provides=('mktemp') -conflicts=('mktemp') -replaces=('sh-utils' 'fileutils' 'textutils' 'mktemp') +depends=('glibc' 'shadow' 'pam' 'acl' 'gmp>=5.0' 'libcap') +replaces=('mktemp') backup=('etc/pam.d/su') install=${pkgname}.install -options=('!emptydirs' '!makeflags') -source=(ftp://ftp.gnu.org/gnu/$pkgname/$pkgname-$pkgver.tar.gz - coreutils-i18n.patch +options=('!emptydirs') +source=(ftp://ftp.gnu.org/gnu/$pkgname/$pkgname-$pkgver.tar.xz coreutils-uname.patch coreutils-pam.patch - coreutils-6.10-configuration.patch - coreutils-7.1-sort-endoffields.patch - coreutils-7.1-cp-recursiveinfloop.patch - su) + su.pam) +md5sums=('55170ed640e300f5b81640c6f4641513' + 'c4fcca138b6abf6d443d48a6f0cd8833' + 'aad79a2aa6d566c375d7bdd1b0767278' + 'fa85e5cce5d723275b14365ba71a8aad') build() { cd ${srcdir}/${pkgname}-${pkgver} - # added pam patch and i18n patch from fedora cvs -# patch -Np1 -i ../coreutils-pam.patch || return 1 - patch -Np1 -i ../coreutils-i18n.patch || return 1 - patch -Np1 -i ../coreutils-6.10-configuration.patch || return 1 + # added su wheel group pam patch (from fedora cvs) + patch -Np1 -i ${srcdir}/coreutils-pam.patch - # from gentoo portage - patch -Np1 -i ../coreutils-uname.patch || return 1 + # linux specific uname improvement (from gentoo portage) + patch -Np1 -i ${srcdir}/coreutils-uname.patch - # bugfix patches from fedora - patch -Np1 -i ../coreutils-7.1-sort-endoffields.patch || return 1 - patch -Np1 -i ../coreutils-7.1-cp-recursiveinfloop.patch || return 1 - - # only needed if new autoconf 2.62 is used - sed -i 's/1.10a/1.10.2/' configure.ac || return 1 # aclocal fix - sed -i 's/dist-xz/dist-lzma/' configure.ac || return 1 autoreconf -v - ./configure --prefix=/usr \ - --enable-install-program=su \ - --enable-pam ac_cv_func_openat=no || return 1 - make || return 1 - make DESTDIR=${pkgdir} install || return 1 + --enable-install-program=su \ + --enable-no-install-program=groups,hostname,kill,uptime \ + --enable-pam ac_cv_func_openat=no + make + make check +} - rm -f ${pkgdir}/usr/bin/hostname ${pkgdir}/usr/share/man/man1/hostname.1 || return 1 - rm -f ${pkgdir}/usr/bin/uptime ${pkgdir}/usr/share/man/man1/uptime.1 || return 1 - rm -f ${pkgdir}/usr/bin/groups ${pkgdir}/usr/share/man/man1/groups.1 || return 1 - rm -f ${pkgdir}/usr/bin/kill ${pkgdir}/usr/share/man/man1/kill.1|| return 1 +package() { + cd ${srcdir}/${pkgname}-${pkgver} + make DESTDIR=${pkgdir} install + cd ${pkgdir}/usr/bin - mkdir -p ${pkgdir}/bin ${pkgdir}/sbin ${pkgdir}/usr/sbin - mv su date echo false pwd stty true uname cat tr cut readlink ../../bin - mv dd cp df du ln ls mv rm dir sync vdir chgrp chmod chown ../../bin - mv mkdir mknod rmdir shred touch mkfifo dircolors install sleep ../../bin - mv chroot ../sbin - ln -sf test [ - ln -sf /bin/sleep ${pkgdir}/usr/bin/sleep - install -D -m644 $startdir/src/su ${pkgdir}/etc/pam.d/su - + install -dm755 ${pkgdir}/{bin,usr/sbin} + + # binaries required by FHS + _fhs="cat chgrp chmod chown cp date dd df echo false ln ls \ + mkdir mknod mv pwd rm rmdir stty su sync true uname" + mv ${_fhs} ${pkgdir}/bin ls -lha ${pkgdir}/bin/su chmod -v 4555 ${pkgdir}/bin/su - rm -f ${pkgdir}/usr/share/info/dir + # binaries required by various Arch scripts + _bin="cut dir dircolors du install mkfifo readlink shred \ + sleep touch tr vdir" + mv ${_bin} ${pkgdir}/bin + ln -sf /bin/sleep ${pkgdir}/usr/bin/sleep + + mv chroot ${pkgdir}/usr/sbin + install -Dm644 ${srcdir}/su.pam ${pkgdir}/etc/pam.d/su } diff --git a/abs/core-testing/coreutils/__changelog b/abs/core-testing/coreutils/__changelog deleted file mode 100644 index f24cf44..0000000 --- a/abs/core-testing/coreutils/__changelog +++ /dev/null @@ -1,2 +0,0 @@ -removed PAM patch, as it causes su to go bonkers and log you out. - diff --git a/abs/core-testing/coreutils/coreutils-6.10-configuration.patch b/abs/core-testing/coreutils/coreutils-6.10-configuration.patch deleted file mode 100644 index b9bddda..0000000 --- a/abs/core-testing/coreutils/coreutils-6.10-configuration.patch +++ /dev/null @@ -1,88 +0,0 @@ -diff -urN coreutils-6.11-orig/tests/mkdir/selinux coreutils-6.11/tests/mkdir/selinux ---- coreutils-6.11-orig/tests/mkdir/selinux 2008-04-19 23:34:23.000000000 +0200 -+++ coreutils-6.11/tests/mkdir/selinux 2008-04-22 13:23:50.000000000 +0200 -@@ -38,6 +28,7 @@ - # successfully, in spite of the invalid context string. - - . $srcdir/test-lib.sh -+require_selinux_ - - c=invalid-selinux-context - msg="failed to set default file creation context to \`$c':" -diff -urNp coreutils-6.11-orig/tests/test-lib.sh coreutils-6.11/tests/test-lib.sh ---- coreutils-6.11-orig/tests/test-lib.sh 2008-04-19 23:34:23.000000000 +0200 -+++ coreutils-6.11/tests/test-lib.sh 2008-04-24 14:18:59.000000000 +0200 -@@ -97,8 +97,8 @@ skip_if_() - - require_selinux_() - { -- case `ls -Zd .` in -- '? .'|'unlabeled .') -+ case `ls --scontext -d . | cut -f1 -d" "` in -+ '?'|'unlabeled') - skip_test_ "this system (or maybe just" \ - "the current file system) lacks SELinux support" - ;; -diff -urNp coreutils-7.1-orig/gnulib-tests/test-getaddrinfo.c coreutils-7.1/gnulib-tests/test-getaddrinfo.c ---- coreutils-7.1-orig/gnulib-tests/test-getaddrinfo.c 2009-01-27 21:33:19.000000000 +0100 -+++ coreutils-7.1/gnulib-tests/test-getaddrinfo.c 2009-02-25 13:52:59.000000000 +0100 -@@ -36,6 +36,8 @@ - # define dbgprintf if (0) printf - #endif - -+static int skip = 0; -+ - /* BeOS does not have AF_UNSPEC. */ - #ifndef AF_UNSPEC - # define AF_UNSPEC 0 -@@ -52,6 +54,9 @@ int simple (char *host, char *service) - struct addrinfo *ai0, *ai; - int res; - -+ if (skip) -+ return 0; -+ - dbgprintf ("Finding %s service %s...\n", host, service); - - /* This initializes "hints" but does not use it. Is there a reason -@@ -72,8 +77,12 @@ int simple (char *host, char *service) - in-law's farm. */ - if (res == EAI_AGAIN) - { -- fprintf (stderr, "skipping getaddrinfo test: no network?\n"); -- return 77; -+ if (!skip) -+ { -+ skip++; -+ fprintf (stderr, "skipping getaddrinfo test: no network?\n"); -+ return 77; -+ } - } - /* IRIX reports EAI_NONAME for "https". Don't fail the test - merely because of this. */ -diff -urNp coreutils-7.1-orig/src/ls.c coreutils-7.1/src/ls.c ---- coreutils-7.1-orig/src/ls.c 2009-02-25 13:23:59.000000000 +0100 -+++ coreutils-7.1/src/ls.c 2009-02-25 13:25:20.000000000 +0100 -@@ -38,10 +38,6 @@ - #include <config.h> - #include <sys/types.h> - --#ifdef HAVE_CAP --# include <sys/capability.h> --#endif -- - #if HAVE_TERMIOS_H - # include <termios.h> - #endif -@@ -84,6 +80,10 @@ - #include "system.h" - #include <fnmatch.h> - -+#ifdef HAVE_CAP -+# include <sys/capability.h> -+#endif -+ - #include "acl.h" - #include "argmatch.h" - #include "dev-ino.h" - diff --git a/abs/core-testing/coreutils/coreutils-7.1-cp-recursiveinfloop.patch b/abs/core-testing/coreutils/coreutils-7.1-cp-recursiveinfloop.patch deleted file mode 100644 index 963af0b..0000000 --- a/abs/core-testing/coreutils/coreutils-7.1-cp-recursiveinfloop.patch +++ /dev/null @@ -1,154 +0,0 @@ -diff -urNp coreutils-7.1-orig/src/copy.c coreutils-7.1/src/copy.c ---- coreutils-7.1-orig/src/copy.c 2009-02-27 12:07:29.000000000 +0100 -+++ coreutils-7.1/src/copy.c 2009-02-27 12:14:29.000000000 +0100 -@@ -104,6 +104,7 @@ static bool copy_internal (char const *s - struct dir_list *ancestors, - const struct cp_options *x, - bool command_line_arg, -+ bool *first_dir_created_per_command_line_arg, - bool *copy_into_self, - bool *rename_succeeded); - static bool owner_failure_ok (struct cp_options const *x); -@@ -201,13 +202,16 @@ copy_attr_by_name (char const *src_path, - DST_NAME_IN is a directory that was created previously in the - recursion. SRC_SB and ANCESTORS describe SRC_NAME_IN. - Set *COPY_INTO_SELF if SRC_NAME_IN is a parent of -+ FIRST_DIR_CREATED_PER_COMMAND_LINE_ARG FIXME - (or the same as) DST_NAME_IN; otherwise, clear it. - Return true if successful. */ - - static bool - copy_dir (char const *src_name_in, char const *dst_name_in, bool new_dst, - const struct stat *src_sb, struct dir_list *ancestors, -- const struct cp_options *x, bool *copy_into_self) -+ const struct cp_options *x, -+ bool *first_dir_created_per_command_line_arg, -+ bool *copy_into_self) - { - char *name_space; - char *namep; -@@ -237,12 +241,20 @@ copy_dir (char const *src_name_in, char - - ok &= copy_internal (src_name, dst_name, new_dst, src_sb->st_dev, - ancestors, &non_command_line_options, false, -+ first_dir_created_per_command_line_arg, - &local_copy_into_self, NULL); - *copy_into_self |= local_copy_into_self; - - free (dst_name); - free (src_name); - -+ /* If we're copying into self, there's no point in continuing, -+ and in fact, that would even infloop, now that we record only -+ the first created directory per command line argument. */ -+ if (local_copy_into_self) -+ break; -+ -+ - namep += strlen (namep) + 1; - } - free (name_space); -@@ -1125,6 +1137,7 @@ restore_default_fscreatecon_or_die (void - not known. ANCESTORS points to a linked, null terminated list of - devices and inodes of parent directories of SRC_NAME. COMMAND_LINE_ARG - is true iff SRC_NAME was specified on the command line. -+ FIRST_DIR_CREATED_PER_COMMAND_LINE_ARG is both input and output. - Set *COPY_INTO_SELF if SRC_NAME is a parent of (or the - same as) DST_NAME; otherwise, clear it. - Return true if successful. */ -@@ -1135,6 +1148,7 @@ copy_internal (char const *src_name, cha - struct dir_list *ancestors, - const struct cp_options *x, - bool command_line_arg, -+ bool *first_dir_created_per_command_line_arg, - bool *copy_into_self, - bool *rename_succeeded) - { -@@ -1815,11 +1829,15 @@ copy_internal (char const *src_name, cha - } - } - -- /* Insert the created directory's inode and device -- numbers into the search structure, so that we can -- avoid copying it again. */ -- if (!x->hard_link) -- remember_copied (dst_name, dst_sb.st_ino, dst_sb.st_dev); -+ /* Record the created directory's inode and device numbers into -+ the search structure, so that we can avoid copying it again. -+ Do this only for the first directory that is created for each -+ source command line argument. */ -+ if (!*first_dir_created_per_command_line_arg) -+ { -+ remember_copied (dst_name, dst_sb.st_ino, dst_sb.st_dev); -+ *first_dir_created_per_command_line_arg = true; -+ } - - if (x->verbose) - emit_verbose (src_name, dst_name, NULL); -@@ -1840,6 +1858,7 @@ copy_internal (char const *src_name, cha - in a source directory would cause the containing destination - directory not to have owner/perms set properly. */ - delayed_ok = copy_dir (src_name, dst_name, new_dst, &src_sb, dir, x, -+ first_dir_created_per_command_line_arg, - copy_into_self); - } - } -@@ -2187,8 +2206,11 @@ copy (char const *src_name, char const * - top_level_src_name = src_name; - top_level_dst_name = dst_name; - -+ bool first_dir_created_per_command_line_arg = false; - return copy_internal (src_name, dst_name, nonexistent_dst, 0, NULL, -- options, true, copy_into_self, rename_succeeded); -+ options, true, -+ &first_dir_created_per_command_line_arg, -+ copy_into_self, rename_succeeded); - } - - /* Set *X to the default options for a value of type struct cp_options. */ -diff -urNp coreutils-7.1-orig/tests/cp/into-self coreutils-7.1/tests/cp/into-self ---- coreutils-7.1-orig/tests/cp/into-self 2008-09-18 09:06:57.000000000 +0200 -+++ coreutils-7.1/tests/cp/into-self 2009-02-27 12:16:21.000000000 +0100 -@@ -1,7 +1,7 @@ - #!/bin/sh - # Confirm that copying a directory into itself gets a proper diagnostic. - --# Copyright (C) 2001, 2002, 2004, 2006-2008 Free Software Foundation, Inc. -+# Copyright (C) 2001, 2002, 2004, 2006-2009 Free Software Foundation, Inc. - - # This program is free software: you can redistribute it and/or modify - # it under the terms of the GNU General Public License as published by -@@ -28,15 +28,32 @@ fi - - . $srcdir/test-lib.sh - --mkdir dir || framework_failure -+mkdir a dir || framework_failure - - fail=0 - - # This command should exit nonzero. - cp -R dir dir 2> out && fail=1 -+echo 1 >> out -+ -+# This should, too. However, with coreutils-7.1 it would infloop. -+cp -rl dir dir 2>> out && fail=1 -+echo 2 >> out -+ -+cp -rl a dir dir 2>> out && fail=1 -+echo 3 >> out -+cp -rl a dir dir 2>> out && fail=1 -+echo 4 >> out - - cat > exp <<\EOF - cp: cannot copy a directory, `dir', into itself, `dir/dir' -+1 -+cp: cannot copy a directory, `dir', into itself, `dir/dir' -+2 -+cp: cannot copy a directory, `dir', into itself, `dir/dir' -+3 -+cp: cannot copy a directory, `dir', into itself, `dir/dir' -+4 - EOF - #' - diff --git a/abs/core-testing/coreutils/coreutils-7.1-sort-endoffields.patch b/abs/core-testing/coreutils/coreutils-7.1-sort-endoffields.patch deleted file mode 100644 index 45d1e28..0000000 --- a/abs/core-testing/coreutils/coreutils-7.1-sort-endoffields.patch +++ /dev/null @@ -1,102 +0,0 @@ -diff -urNp coreutils-7.1-orig/src/sort.c coreutils-7.1/src/sort.c ---- coreutils-7.1-orig/src/sort.c 2009-02-25 16:15:52.000000000 +0100 -+++ coreutils-7.1/src/sort.c 2009-02-25 16:20:35.000000000 +0100 -@@ -1598,6 +1598,9 @@ limfield_uni (const struct line *line, c - size_t eword = key->eword, echar = key->echar; - size_t remaining_bytes; - -+ if (echar == 0) -+ eword++; /* skip all of end field. */ -+ - /* Move PTR past EWORD fields or to one past the last byte on LINE, - whichever comes first. If there are more than EWORD fields, leave - PTR pointing at the beginning of the field having zero-based index, -@@ -1673,19 +1676,22 @@ limfield_uni (const struct line *line, c - } - #endif - -- /* If we're ignoring leading blanks when computing the End -- of the field, don't start counting bytes until after skipping -- past any leading blanks. */ -- if (key->skipeblanks) -- while (ptr < lim && blanks[to_uchar (*ptr)]) -- ++ptr; - -- /* Advance PTR by ECHAR (if possible), but no further than LIM. */ -- remaining_bytes = lim - ptr; -- if (echar < remaining_bytes) -- ptr += echar; -- else -- ptr = lim; -+ if (echar != 0) /* We need to skip over a portion of the end field. */ -+ { -+ if (key->skipeblanks) /* blanks not counted in echar. */ -+ { -+ while (ptr < lim && blanks[to_uchar (*ptr)]) -+ ++ptr; -+ } -+ -+ /* Advance PTR by ECHAR (if possible), but no further than LIM. */ -+ remaining_bytes = lim - ptr; -+ if (echar < remaining_bytes) -+ ptr += echar; -+ else -+ ptr = lim; -+ } - - return ptr; - } -@@ -3736,12 +3742,9 @@ main (int argc, char **argv) - badfieldspec (optarg, N_("field number is zero")); - } - if (*s == '.') -- s = parse_field_count (s + 1, &key->echar, -- N_("invalid number after `.'")); -- else - { -- /* `-k 2,3' is equivalent to `+1 -3'. */ -- key->eword++; -+ s = parse_field_count (s + 1, &key->echar, -+ N_("invalid number after `.'")); - } - s = set_ordering (s, key, bl_end); - } -diff -urNp coreutils-7.1-orig/tests/misc/sort coreutils-7.1/tests/misc/sort ---- coreutils-7.1-orig/tests/misc/sort 2009-01-27 22:11:25.000000000 +0100 -+++ coreutils-7.1/tests/misc/sort 2009-02-25 16:21:48.000000000 +0100 -@@ -24,6 +24,10 @@ my $prog = 'sort'; - # Turn off localization of executable's output. - @ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3; - -+my $mb_locale = $ENV{LOCALE_FR_UTF8}; -+! defined $mb_locale || $mb_locale eq 'none' -+ and $mb_locale = 'C'; -+ - # Since each test is run with a file name and with redirected stdin, - # the name in the diagnostic is either the file name or "-". - # Normalize each diagnostic to use '-'. -@@ -110,6 +114,8 @@ my @Tests = - ["07b", '-k 2,3', {IN=>"a a b\nz a a\n"}, {OUT=>"z a a\na a b\n"}], - ["07c", '-k 2,3', {IN=>"y k b\nz k a\n"}, {OUT=>"z k a\ny k b\n"}], - ["07d", '+1 -3', {IN=>"y k b\nz k a\n"}, {OUT=>"z k a\ny k b\n"}], -+["07e", '-k 2,3.0', {IN=>"a a b\nz a a\n"}, {OUT=>"z a a\na a b\n"}], -+ - # - # report an error for `.' without following char spec - ["08a", '-k 2.,3', {EXIT=>2}, -@@ -210,6 +216,15 @@ my @Tests = - # key start and key end. - ["18e", '-nb -k1.1,1.2', {IN=>" 901\n100\n"}, {OUT=>"100\n 901\n"}], - -+# When ignoring leading blanks for end position, ensure blanks from -+# next field are not included in the sort. I.E. order should not change here. -+["18f", '-k1,1b', {IN=>"a y\na z\n"}, {OUT=>"a y\na z\n"}], -+ -+# When ignoring leading blanks for start position, ensure blanks from -+# next field are not included in the sort. I.E. order should not change here. -+# This was noticed as an issue on fedora 8 (only in multibyte locales). -+["18g", '-k1b,1', {IN=>"a y\na z\n"}, {OUT=>"a y\na z\n"}, -+ {ENV => "LC_ALL=$mb_locale"}], - # This looks odd, but works properly -- 2nd keyspec is never - # used because all lines are different. - ["19a", '+0 +1nr', {IN=>"b 2\nb 1\nb 3\n"}, {OUT=>"b 1\nb 2\nb 3\n"}], diff --git a/abs/core-testing/coreutils/coreutils-8.5-2-i686.pkg.tar.gz b/abs/core-testing/coreutils/coreutils-8.5-2-i686.pkg.tar.gz new file mode 120000 index 0000000..3eef1a4 --- /dev/null +++ b/abs/core-testing/coreutils/coreutils-8.5-2-i686.pkg.tar.gz @@ -0,0 +1 @@ +/data/pkg_repo/packages/coreutils-8.5-2-i686.pkg.tar.gz
\ No newline at end of file diff --git a/abs/core-testing/coreutils/coreutils-i18n.patch b/abs/core-testing/coreutils/coreutils-i18n.patch deleted file mode 100644 index 626bac0..0000000 --- a/abs/core-testing/coreutils/coreutils-i18n.patch +++ /dev/null @@ -1,4065 +0,0 @@ -diff -urN coreutils-6.12-orig/tests/misc/cut coreutils-6.12/tests/misc/cut ---- coreutils-6.12-orig/tests/misc/cut 2008-05-17 08:41:11.000000000 +0200 -+++ coreutils-6.12/tests/misc/cut 2008-06-02 11:13:08.000000000 +0200 -@@ -26,7 +26,7 @@ - my $prog = 'cut'; - my $try = "Try \`$prog --help' for more information.\n"; - my $from_1 = "$prog: fields and positions are numbered from 1\n$try"; --my $inval = "$prog: invalid byte or field list\n$try"; -+my $inval = "$prog: invalid byte, character or field list\n$try"; - my $no_endpoint = "$prog: invalid range with no endpoint: -\n$try"; - - my @Tests = -@@ -140,8 +140,8 @@ - ['od-overlap5', '-b1-3,1-4', '--output-d=:', {IN=>"abcde\n"}, {OUT=>"abcd\n"}], - - # None of the following invalid ranges provoked an error up to coreutils-6.9. -- ['inval1', qw(-f 2-0), {IN=>''}, {OUT=>''}, {EXIT=>1}, -- {ERR=>"$prog: invalid decreasing range\n$try"}], -+ ['inval1', qw(-f 2-0), {IN=>''}, {OUT=>''}, {EXIT=>1}, -+ {ERR=>"$prog: invalid byte, character or field list\n$try"}], - ['inval2', qw(-f -), {IN=>''}, {OUT=>''}, {EXIT=>1}, {ERR=>$no_endpoint}], - ['inval3', '-f', '4,-', {IN=>''}, {OUT=>''}, {EXIT=>1}, {ERR=>$no_endpoint}], - ['inval4', '-f', '1-2,-', {IN=>''}, {OUT=>''}, {EXIT=>1}, {ERR=>$no_endpoint}], ---- /dev/null 2007-03-01 09:16:39.219409909 +0000 -+++ coreutils-6.8+/tests/misc/sort-mb-tests 2007-03-01 15:08:24.000000000 +0000 -@@ -0,0 +1,58 @@ -+#! /bin/sh -+case $# in -+ 0) xx='../src/sort';; -+ *) xx="$1";; -+esac -+test "$VERBOSE" && echo=echo || echo=: -+$echo testing program: $xx -+errors=0 -+test "$srcdir" || srcdir=. -+test "$VERBOSE" && $xx --version 2> /dev/null -+ -+export LC_ALL=en_US.UTF-8 -+locale -k LC_CTYPE 2>&1 | grep -q charmap.*UTF-8 || exit 77 -+errors=0 -+ -+$xx -t @ -k2 -n misc/mb1.I > misc/mb1.O -+code=$? -+if test $code != 0; then -+ $echo "Test mb1 failed: $xx return code $code differs from expected value 0" 1>&2 -+ errors=`expr $errors + 1` -+else -+ cmp misc/mb1.O $srcdir/misc/mb1.X > /dev/null 2>&1 -+ case $? in -+ 0) if test "$VERBOSE"; then $echo "passed mb1"; fi;; -+ 1) $echo "Test mb1 failed: files misc/mb1.O and $srcdir/misc/mb1.X differ" 1>&2 -+ (diff -c misc/mb1.O $srcdir/misc/mb1.X) 2> /dev/null -+ errors=`expr $errors + 1`;; -+ 2) $echo "Test mb1 may have failed." 1>&2 -+ $echo The command "cmp misc/mb1.O $srcdir/misc/mb1.X" failed. 1>&2 -+ errors=`expr $errors + 1`;; -+ esac -+fi -+ -+$xx -t @ -k4 -n misc/mb2.I > misc/mb2.O -+code=$? -+if test $code != 0; then -+ $echo "Test mb2 failed: $xx return code $code differs from expected value 0" 1>&2 -+ errors=`expr $errors + 1` -+else -+ cmp misc/mb2.O $srcdir/misc/mb2.X > /dev/null 2>&1 -+ case $? in -+ 0) if test "$VERBOSE"; then $echo "passed mb2"; fi;; -+ 1) $echo "Test mb2 failed: files misc/mb2.O and $srcdir/misc/mb2.X differ" 1>&2 -+ (diff -c misc/mb2.O $srcdir/misc/mb2.X) 2> /dev/null -+ errors=`expr $errors + 1`;; -+ 2) $echo "Test mb2 may have failed." 1>&2 -+ $echo The command "cmp misc/mb2.O $srcdir/misc/mb2.X" failed. 1>&2 -+ errors=`expr $errors + 1`;; -+ esac -+fi -+ -+if test $errors = 0; then -+ $echo Passed all 113 tests. 1>&2 -+else -+ $echo Failed $errors tests. 1>&2 -+fi -+test $errors = 0 || errors=1 -+exit $errors ---- /dev/null 2007-03-01 09:16:39.219409909 +0000 -+++ coreutils-6.8+/tests/misc/mb2.I 2007-03-01 15:08:24.000000000 +0000 -@@ -0,0 +1,4 @@ -+Apple@AA10@@20 -+Banana@AA5@@30 -+Citrus@AA20@@5 -+Cherry@AA30@@10 ---- /dev/null 2007-03-01 09:16:39.219409909 +0000 -+++ coreutils-6.8+/tests/misc/mb2.X 2007-03-01 15:08:24.000000000 +0000 -@@ -0,0 +1,4 @@ -+Citrus@AA20@@5 -+Cherry@AA30@@10 -+Apple@AA10@@20 -+Banana@AA5@@30 ---- /dev/null 2007-03-01 09:16:39.219409909 +0000 -+++ coreutils-6.8+/tests/misc/mb1.I 2007-03-01 15:08:24.000000000 +0000 -@@ -0,0 +1,4 @@ -+Apple@10 -+Banana@5 -+Citrus@20 -+Cherry@30 ---- /dev/null 2007-03-01 09:16:39.219409909 +0000 -+++ coreutils-6.8+/tests/misc/mb1.X 2007-03-01 15:08:24.000000000 +0000 -@@ -0,0 +1,4 @@ -+Banana@5 -+Apple@10 -+Citrus@20 -+Cherry@30 -diff -urN coreutils-6.12-orig/tests/Makefile.am coreutils-6.12/tests/Makefile.am ---- coreutils-6.12-orig/tests/Makefile.am 2008-05-27 13:47:53.000000000 +0200 -+++ coreutils-6.12/tests/Makefile.am 2008-06-02 10:06:03.000000000 +0200 -@@ -192,6 +192,7 @@ - misc/sort \ - misc/sort-compress \ - misc/sort-files0-from \ -+ misc/sort-mb-tests \ - misc/sort-merge \ - misc/sort-rand \ - misc/sort-version \ -@@ -391,6 +392,10 @@ - $(root_tests) - - pr_data = \ -+ misc/mb1.X \ -+ misc/mb1.I \ -+ misc/mb2.X \ -+ misc/mb2.I \ - pr/0F \ - pr/0FF \ - pr/0FFnt \ ---- coreutils-6.8+/lib/linebuffer.h.i18n 2005-05-14 07:44:24.000000000 +0100 -+++ coreutils-6.8+/lib/linebuffer.h 2007-03-01 15:08:24.000000000 +0000 -@@ -22,6 +22,11 @@ - - # include <stdio.h> - -+/* Get mbstate_t. */ -+# if HAVE_WCHAR_H -+# include <wchar.h> -+# endif -+ - /* A `struct linebuffer' holds a line of text. */ - - struct linebuffer -@@ -29,6 +34,9 @@ - size_t size; /* Allocated. */ - size_t length; /* Used. */ - char *buffer; -+# if HAVE_WCHAR_H -+ mbstate_t state; -+# endif - }; - - /* Initialize linebuffer LINEBUFFER for use. */ ---- coreutils-6.8+/src/expand.c.i18n 2007-01-14 15:41:28.000000000 +0000 -+++ coreutils-6.8+/src/expand.c 2007-03-01 15:08:24.000000000 +0000 -@@ -38,11 +38,28 @@ - #include <stdio.h> - #include <getopt.h> - #include <sys/types.h> -+ -+/* Get mbstate_t, mbrtowc(), wcwidth(). */ -+#if HAVE_WCHAR_H -+# include <wchar.h> -+#endif -+ - #include "system.h" - #include "error.h" - #include "quote.h" - #include "xstrndup.h" - -+/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC -+ installation; work around this configuration error. */ -+#if !defined MB_LEN_MAX || MB_LEN_MAX < 2 -+# define MB_LEN_MAX 16 -+#endif -+ -+/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ -+#if HAVE_MBRTOWC && defined mbstate_t -+# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) -+#endif -+ - /* The official name of this program (e.g., no `g' prefix). */ - #define PROGRAM_NAME "expand" - -@@ -183,6 +200,7 @@ - stops = num_start + len - 1; - } - } -+ - else - { - error (0, 0, _("tab size contains invalid character(s): %s"), -@@ -365,6 +383,142 @@ - } - } - -+#if HAVE_MBRTOWC -+static void -+expand_multibyte (void) -+{ -+ FILE *fp; /* Input strem. */ -+ mbstate_t i_state; /* Current shift state of the input stream. */ -+ mbstate_t i_state_bak; /* Back up the I_STATE. */ -+ mbstate_t o_state; /* Current shift state of the output stream. */ -+ char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */ -+ char *bufpos; /* Next read position of BUF. */ -+ size_t buflen = 0; /* The length of the byte sequence in buf. */ -+ wchar_t wc; /* A gotten wide character. */ -+ size_t mblength; /* The byte size of a multibyte character -+ which shows as same character as WC. */ -+ int tab_index = 0; /* Index in `tab_list' of next tabstop. */ -+ int column = 0; /* Column on screen of the next char. */ -+ int next_tab_column; /* Column the next tab stop is on. */ -+ int convert = 1; /* If nonzero, perform translations. */ -+ -+ fp = next_file ((FILE *) NULL); -+ if (fp == NULL) -+ return; -+ -+ memset (&o_state, '\0', sizeof(mbstate_t)); -+ memset (&i_state, '\0', sizeof(mbstate_t)); -+ -+ for (;;) -+ { -+ /* Refill the buffer BUF. */ -+ if (buflen < MB_LEN_MAX && !feof(fp) && !ferror(fp)) -+ { -+ memmove (buf, bufpos, buflen); -+ buflen += fread (buf + buflen, sizeof(char), BUFSIZ, fp); -+ bufpos = buf; -+ } -+ -+ /* No character is left in BUF. */ -+ if (buflen < 1) -+ { -+ fp = next_file (fp); -+ -+ if (fp == NULL) -+ break; /* No more files. */ -+ else -+ { -+ memset (&i_state, '\0', sizeof(mbstate_t)); -+ continue; -+ } -+ } -+ -+ /* Get a wide character. */ -+ i_state_bak = i_state; -+ mblength = mbrtowc (&wc, bufpos, buflen, &i_state); -+ -+ switch (mblength) -+ { -+ case (size_t)-1: /* illegal byte sequence. */ -+ case (size_t)-2: -+ mblength = 1; -+ i_state = i_state_bak; -+ if (convert) -+ { -+ ++column; -+ if (convert_entire_line == 0) -+ convert = 0; -+ } -+ putchar (*bufpos); -+ break; -+ -+ case 0: /* null. */ -+ mblength = 1; -+ if (convert && convert_entire_line == 0) -+ convert = 0; -+ putchar ('\0'); -+ break; -+ -+ default: -+ if (wc == L'\n') /* LF. */ -+ { -+ tab_index = 0; -+ column = 0; -+ convert = 1; -+ putchar ('\n'); -+ } -+ else if (wc == L'\t' && convert) /* Tab. */ -+ { -+ if (tab_size == 0) -+ { -+ /* Do not let tab_index == first_free_tab; -+ stop when it is 1 less. */ -+ while (tab_index < first_free_tab - 1 -+ && column >= tab_list[tab_index]) -+ tab_index++; -+ next_tab_column = tab_list[tab_index]; -+ if (tab_index < first_free_tab - 1) -+ tab_index++; -+ if (column >= next_tab_column) -+ next_tab_column = column + 1; -+ } -+ else -+ next_tab_column = column + tab_size - column % tab_size; -+ -+ while (column < next_tab_column) -+ { -+ putchar (' '); -+ ++column; -+ } -+ } -+ else /* Others. */ -+ { -+ if (convert) -+ { -+ if (wc == L'\b') -+ { -+ if (column > 0) -+ --column; -+ } -+ else -+ { -+ int width; /* The width of WC. */ -+ -+ width = wcwidth (wc); -+ column += (width > 0) ? width : 0; -+ if (convert_entire_line == 0) -+ convert = 0; -+ } -+ } -+ fwrite (bufpos, sizeof(char), mblength, stdout); -+ } -+ } -+ buflen -= mblength; -+ bufpos += mblength; -+ } -+} -+#endif -+ - int - main (int argc, char **argv) - { -@@ -429,7 +583,12 @@ - - file_list = (optind < argc ? &argv[optind] : stdin_argv); - -- expand (); -+#if HAVE_MBRTOWC -+ if (MB_CUR_MAX > 1) -+ expand_multibyte (); -+ else -+#endif -+ expand (); - - if (have_read_stdin && fclose (stdin) != 0) - error (EXIT_FAILURE, errno, "-"); ---- coreutils-6.8+/src/join.c.i18n 2007-01-14 15:41:28.000000000 +0000 -+++ coreutils-6.8+/src/join.c 2007-03-01 15:08:24.000000000 +0000 -@@ -23,16 +23,30 @@ - #include <sys/types.h> - #include <getopt.h> - -+/* Get mbstate_t, mbrtowc(), mbrtowc(), wcwidth(). */ -+#if HAVE_WCHAR_H -+# include <wchar.h> -+#endif -+ -+/* Get iswblank(), towupper. */ -+#if HAVE_WCTYPE_H -+# include <wctype.h> -+#endif -+ - #include "system.h" - #include "error.h" - #include "linebuffer.h" --#include "memcasecmp.h" - #include "quote.h" - #include "stdio--.h" - #include "xmemcoll.h" - #include "xstrtol.h" - #include "argmatch.h" - -+/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ -+#if HAVE_MBRTOWC && defined mbstate_t -+# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) -+#endif -+ - /* The official name of this program (e.g., no `g' prefix). */ - #define PROGRAM_NAME "join" - -@@ -104,10 +118,12 @@ - /* Last element in `outlist', where a new element can be added. */ - static struct outlist *outlist_end = &outlist_head; - --/* Tab character separating fields. If negative, fields are separated -- by any nonempty string of blanks, otherwise by exactly one -- tab character whose value (when cast to unsigned char) equals TAB. */ --static int tab = -1; -+/* Tab character separating fields. If NULL, fields are separated -+ by any nonempty string of blanks. */ -+static char *tab = NULL; -+ -+/* The number of bytes used for tab. */ -+static size_t tablen = 0; - - /* If nonzero, check that the input is correctly ordered. */ - static enum -@@ -199,10 +217,11 @@ - if (ptr == lim) - return; - -- if (0 <= tab) -+ if (tab != NULL) - { -+ unsigned char t = tab[0]; - char *sep; -- for (; (sep = memchr (ptr, tab, lim - ptr)) != NULL; ptr = sep + 1) -+ for (; (sep = memchr (ptr, t, lim - ptr)) != NULL; ptr = sep + 1) - extract_field (line, ptr, sep - ptr); - } - else -@@ -229,6 +248,148 @@ - extract_field (line, ptr, lim - ptr); - } - -+#if HAVE_MBRTOWC -+static void -+xfields_multibyte (struct line *line) -+{ -+ char *ptr = line->buf.buffer; -+ char const *lim = ptr + line->buf.length - 1; -+ wchar_t wc = 0; -+ size_t mblength = 1; -+ mbstate_t state, state_bak; -+ -+ memset (&state, 0, sizeof (mbstate_t)); -+ -+ if (ptr == lim) -+ return; -+ -+ if (tab != NULL) -+ { -+ unsigned char t = tab[0]; -+ char *sep = ptr; -+ for (; ptr < lim; ptr = sep + mblength) -+ { -+ sep = ptr; -+ while (sep < lim) -+ { -+ state_bak = state; -+ mblength = mbrtowc (&wc, sep, lim - sep + 1, &state); -+ -+ if (mblength == (size_t)-1 || mblength == (size_t)-2) -+ { -+ mblength = 1; -+ state = state_bak; -+ } -+ mblength = (mblength < 1) ? 1 : mblength; -+ -+ if (mblength == tablen && !memcmp (sep, tab, mblength)) -+ break; -+ else -+ { -+ sep += mblength; -+ continue; -+ } -+ } -+ -+ if (sep == lim) -+ break; -+ -+ extract_field (line, ptr, sep - ptr); -+ } -+ } -+ else -+ { -+ /* Skip leading blanks before the first field. */ -+ while(ptr < lim) -+ { -+ state_bak = state; -+ mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state); -+ -+ if (mblength == (size_t)-1 || mblength == (size_t)-2) -+ { -+ mblength = 1; -+ state = state_bak; -+ break; -+ } -+ mblength = (mblength < 1) ? 1 : mblength; -+ -+ if (!iswblank(wc)) -+ break; -+ ptr += mblength; -+ } -+ -+ do -+ { -+ char *sep; -+ state_bak = state; -+ mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state); -+ if (mblength == (size_t)-1 || mblength == (size_t)-2) -+ { -+ mblength = 1; -+ state = state_bak; -+ break; -+ } -+ mblength = (mblength < 1) ? 1 : mblength; -+ -+ sep = ptr + mblength; -+ while (sep != lim) -+ { -+ state_bak = state; -+ mblength = mbrtowc (&wc, sep, lim - sep + 1, &state); -+ if (mblength == (size_t)-1 || mblength == (size_t)-2) -+ { -+ mblength = 1; -+ state = state_bak; -+ break; -+ } -+ mblength = (mblength < 1) ? 1 : mblength; -+ -+ if (iswblank (wc)) -+ break; -+ -+ sep += mblength; -+ } -+ -+ extract_field (line, ptr, sep - ptr); -+ if (sep == lim) -+ return; -+ -+ state_bak = state; -+ mblength = mbrtowc (&wc, sep, lim - sep + 1, &state); -+ if (mblength == (size_t)-1 || mblength == (size_t)-2) -+ { -+ mblength = 1; -+ state = state_bak; -+ break; -+ } -+ mblength = (mblength < 1) ? 1 : mblength; -+ -+ ptr = sep + mblength; -+ while (ptr != lim) -+ { -+ state_bak = state; -+ mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state); -+ if (mblength == (size_t)-1 || mblength == (size_t)-2) -+ { -+ mblength = 1; -+ state = state_bak; -+ break; -+ } -+ mblength = (mblength < 1) ? 1 : mblength; -+ -+ if (!iswblank (wc)) -+ break; -+ -+ ptr += mblength; -+ } -+ } -+ while (ptr != lim); -+ } -+ -+ extract_field (line, ptr, lim - ptr); -+} -+#endif -+ - static void - freeline (struct line *line) - { -@@ -377,11 +601,18 @@ - - /* Print the join of LINE1 and LINE2. */ - -+#define PUT_TAB_CHAR \ -+ do \ -+ { \ -+ (tab != NULL) ? \ -+ fwrite(tab, sizeof(char), tablen, stdout) : putchar (' '); \ -+ } \ -+ while (0) -+ - static void - prjoin (struct line const *line1, struct line const *line2) - { - const struct outlist *outlist; -- char output_separator = tab < 0 ? ' ' : tab; - - outlist = outlist_head.next; - if (outlist) -@@ -397,12 +628,12 @@ - if (o->file == 0) - { - if (line1 == &uni_blank) -- { -+ { - line = line2; - field = join_field_2; - } - else -- { -+ { - line = line1; - field = join_field_1; - } -@@ -416,7 +647,7 @@ - o = o->next; - if (o == NULL) - break; -- putchar (output_separator); -+ PUT_TAB_CHAR; - } - putchar ('\n'); - } -@@ -434,23 +665,23 @@ - prfield (join_field_1, line1); - for (i = 0; i < join_field_1 && i < line1->nfields; ++i) - { -- putchar (output_separator); -+ PUT_TAB_CHAR; - prfield (i, line1); - } - for (i = join_field_1 + 1; i < line1->nfields; ++i) - { -- putchar (output_separator); -+ PUT_TAB_CHAR; - prfield (i, line1); - } - - for (i = 0; i < join_field_2 && i < line2->nfields; ++i) - { -- putchar (output_separator); -+ PUT_TAB_CHAR; - prfield (i, line2); - } - for (i = join_field_2 + 1; i < line2->nfields; ++i) - { -- putchar (output_separator); -+ PUT_TAB_CHAR; - prfield (i, line2); - } - putchar ('\n'); -@@ -859,20 +1090,41 @@ - - case 't': - { -- unsigned char newtab = optarg[0]; -- if (! newtab) -+ char *newtab; -+ size_t newtablen; -+ if (! optarg[0]) - error (EXIT_FAILURE, 0, _("empty tab")); -- if (optarg[1]) -+ newtab = xstrdup (optarg); -+#if HAVE_MBRTOWC -+ if (MB_CUR_MAX > 1) -+ { -+ mbstate_t state; -+ -+ memset (&state, 0, sizeof (mbstate_t)); -+ newtablen = mbrtowc (NULL, newtab, -+ strnlen (newtab, MB_LEN_MAX), -+ &state); -+ if (newtablen == (size_t) 0 -+ || newtablen == (size_t) -1 -+ || newtablen == (size_t) -2) -+ newtablen = 1; -+ } -+ else -+#endif -+ newtablen = 1; -+ -+ if (newtablen == 1 && newtab[1]) -+ { -+ if (STREQ (newtab, "\\0")) -+ newtab[0] = '\0'; -+ } -+ if (tab != NULL && strcmp (tab, newtab)) - { -- if (STREQ (optarg, "\\0")) -- newtab = '\0'; -- else -- error (EXIT_FAILURE, 0, _("multi-character tab %s"), -- quote (optarg)); -+ free (newtab); -+ error (EXIT_FAILURE, 0, _("incompatible tabs")); - } -- if (0 <= tab && tab != newtab) -- error (EXIT_FAILURE, 0, _("incompatible tabs")); - tab = newtab; -+ tablen = newtablen; - } - break; - -diff -urNp coreutils-6.11-orig/src/join.c coreutils-6.11/src/join.c ---- coreutils-6.11-orig/src/join.c 2008-04-21 13:44:32.000000000 +0200 -+++ coreutils-6.11/src/join.c 2008-04-21 14:03:22.000000000 +0200 -@@ -324,56 +324,115 @@ keycmp (struct line const *line1, struct - size_t jf_1, size_t jf_2) - { - /* Start of field to compare in each file. */ -- char *beg1; -- char *beg2; -- -- size_t len1; -- size_t len2; /* Length of fields to compare. */ -+ char *beg[2]; -+ char *copy[2]; -+ size_t len[2]; /* Length of fields to compare. */ - int diff; -+ int i, j; - - if (jf_1 < line1->nfields) - { -- beg1 = line1->fields[jf_1].beg; -- len1 = line1->fields[jf_1].len; -+ beg[0] = line1->fields[jf_1].beg; -+ len[0] = line1->fields[jf_1].len; - } - else - { -- beg1 = NULL; -- len1 = 0; -+ beg[0] = NULL; -+ len[0] = 0; - } - - if (jf_2 < line2->nfields) - { -- beg2 = line2->fields[jf_2].beg; -- len2 = line2->fields[jf_2].len; -+ beg[1] = line2->fields[jf_2].beg; -+ len[1] = line2->fields[jf_2].len; - } - else - { -- beg2 = NULL; -- len2 = 0; -+ beg[1] = NULL; -+ len[1] = 0; - } - -- if (len1 == 0) -- return len2 == 0 ? 0 : -1; -- if (len2 == 0) -+ if (len[0] == 0) -+ return len[1] == 0 ? 0 : -1; -+ if (len[1] == 0) - return 1; - - if (ignore_case) - { -- /* FIXME: ignore_case does not work with NLS (in particular, -- with multibyte chars). */ -- diff = memcasecmp (beg1, beg2, MIN (len1, len2)); -+#ifdef HAVE_MBRTOWC -+ if (MB_CUR_MAX > 1) -+ { -+ size_t mblength; -+ wchar_t wc, uwc; -+ mbstate_t state, state_bak; -+ -+ memset (&state, '\0', sizeof (mbstate_t)); -+ -+ for (i = 0; i < 2; i++) -+ { -+ copy[i] = alloca (len[i] + 1); -+ -+ for (j = 0; j < MIN (len[0], len[1]);) -+ { -+ state_bak = state; -+ mblength = mbrtowc (&wc, beg[i] + j, len[i] - j, &state); -+ -+ switch (mblength) -+ { -+ case (size_t) -1: -+ case (size_t) -2: -+ state = state_bak; -+ /* Fall through */ -+ case 0: -+ mblength = 1; -+ break; -+ -+ default: -+ uwc = towupper (wc); -+ -+ if (uwc != wc) -+ { -+ mbstate_t state_wc; -+ -+ memset (&state_wc, '\0', sizeof (mbstate_t)); -+ wcrtomb (copy[i] + j, uwc, &state_wc); -+ } -+ else -+ memcpy (copy[i] + j, beg[i] + j, mblength); -+ } -+ j += mblength; -+ } -+ copy[i][j] = '\0'; -+ } -+ } -+ else -+#endif -+ { -+ for (i = 0; i < 2; i++) -+ { -+ copy[i] = alloca (len[i] + 1); -+ -+ for (j = 0; j < MIN (len[0], len[1]); j++) -+ copy[i][j] = toupper (beg[i][j]); -+ -+ copy[i][j] = '\0'; -+ } -+ } - } - else - { -- if (hard_LC_COLLATE) -- return xmemcoll (beg1, len1, beg2, len2); -- diff = memcmp (beg1, beg2, MIN (len1, len2)); -+ copy[0] = (unsigned char *) beg[0]; -+ copy[1] = (unsigned char *) beg[1]; - } - -+ if (hard_LC_COLLATE) -+ return xmemcoll ((char *) copy[0], len[0], (char *) copy[1], len[1]); -+ diff = memcmp (copy[0], copy[1], MIN (len[0], len[1])); -+ -+ - if (diff) - return diff; -- return len1 < len2 ? -1 : len1 != len2; -+ return len[0] - len[1]; - } - - /* Check that successive input lines PREV and CURRENT from input file ---- coreutils-6.8+/src/uniq.c.i18n 2007-01-14 15:41:28.000000000 +0000 -+++ coreutils-6.8+/src/uniq.c 2007-03-01 15:08:24.000000000 +0000 -@@ -23,6 +23,16 @@ - #include <getopt.h> - #include <sys/types.h> - -+/* Get mbstate_t, mbrtowc(). */ -+#if HAVE_WCHAR_H -+# include <wchar.h> -+#endif -+ -+/* Get isw* functions. */ -+#if HAVE_WCTYPE_H -+# include <wctype.h> -+#endif -+ - #include "system.h" - #include "argmatch.h" - #include "linebuffer.h" -@@ -32,7 +42,19 @@ - #include "quote.h" - #include "xmemcoll.h" - #include "xstrtol.h" --#include "memcasecmp.h" -+#include "xmemcoll.h" -+ -+/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC -+ installation; work around this configuration error. */ -+#if !defined MB_LEN_MAX || MB_LEN_MAX < 2 -+# define MB_LEN_MAX 16 -+#endif -+ -+/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ -+#if HAVE_MBRTOWC && defined mbstate_t -+# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) -+#endif -+ - - /* The official name of this program (e.g., no `g' prefix). */ - #define PROGRAM_NAME "uniq" -@@ -109,6 +131,10 @@ - /* Select whether/how to delimit groups of duplicate lines. */ - static enum delimit_method delimit_groups; - -+/* Function pointers. */ -+static char * -+(*find_field) (struct linebuffer *line); -+ - static struct option const longopts[] = - { - {"count", no_argument, NULL, 'c'}, -@@ -198,7 +224,7 @@ - return a pointer to the beginning of the line's field to be compared. */ - - static char * --find_field (struct linebuffer const *line) -+find_field_uni (struct linebuffer *line) - { - size_t count; - char const *lp = line->buffer; -@@ -219,6 +245,83 @@ - return line->buffer + i; - } - -+#if HAVE_MBRTOWC -+ -+# define MBCHAR_TO_WCHAR(WC, MBLENGTH, LP, POS, SIZE, STATEP, CONVFAIL) \ -+ do \ -+ { \ -+ mbstate_t state_bak; \ -+ \ -+ CONVFAIL = 0; \ -+ state_bak = *STATEP; \ -+ \ -+ MBLENGTH = mbrtowc (&WC, LP + POS, SIZE - POS, STATEP); \ -+ \ -+ switch (MBLENGTH) \ -+ { \ -+ case (size_t)-2: \ -+ case (size_t)-1: \ -+ *STATEP = state_bak; \ -+ CONVFAIL++; \ -+ /* Fall through */ \ -+ case 0: \ -+ MBLENGTH = 1; \ -+ } \ -+ } \ -+ while (0) -+ -+static char * -+find_field_multi (struct linebuffer *line) -+{ -+ size_t count; -+ char *lp = line->buffer; -+ size_t size = line->length - 1; -+ size_t pos; -+ size_t mblength; -+ wchar_t wc; -+ mbstate_t *statep; -+ int convfail; -+ -+ pos = 0; -+ statep = &(line->state); -+ -+ /* skip fields. */ -+ for (count = 0; count < skip_fields && pos < size; count++) -+ { -+ while (pos < size) -+ { -+ MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail); -+ -+ if (convfail || !iswblank (wc)) -+ { -+ pos += mblength; -+ break; -+ } -+ pos += mblength; -+ } -+ -+ while (pos < size) -+ { -+ MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail); -+ -+ if (!convfail && iswblank (wc)) -+ break; -+ -+ pos += mblength; -+ } -+ } -+ -+ /* skip fields. */ -+ for (count = 0; count < skip_chars && pos < size; count++) -+ { -+ MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail); -+ pos += mblength; -+ } -+ -+ return lp + pos; -+} -+#endif -+ - /* Return false if two strings OLD and NEW match, true if not. - OLD and NEW point not to the beginnings of the lines - but rather to the beginnings of the fields to compare. -@@ -227,6 +330,8 @@ - static bool - different (char *old, char *new, size_t oldlen, size_t newlen) - { -+ char *copy_old, *copy_new; -+ - if (check_chars < oldlen) - oldlen = check_chars; - if (check_chars < newlen) -@@ -234,14 +339,92 @@ - - if (ignore_case) - { -- /* FIXME: This should invoke strcoll somehow. */ -- return oldlen != newlen || memcasecmp (old, new, oldlen); -+ size_t i; -+ -+ copy_old = alloca (oldlen + 1); -+ copy_new = alloca (oldlen + 1); -+ -+ for (i = 0; i < oldlen; i++) -+ { -+ copy_old[i] = toupper (old[i]); -+ copy_new[i] = toupper (new[i]); -+ } - } -- else if (hard_LC_COLLATE) -- return xmemcoll (old, oldlen, new, newlen) != 0; - else -- return oldlen != newlen || memcmp (old, new, oldlen); -+ { -+ copy_old = (char *)old; -+ copy_new = (char *)new; -+ } -+ -+ return xmemcoll (copy_old, oldlen, copy_new, newlen); -+} -+ -+#if HAVE_MBRTOWC -+static int -+different_multi (const char *old, const char *new, size_t oldlen, size_t newlen, mbstate_t oldstate, mbstate_t newstate) -+{ -+ size_t i, j, chars; -+ const char *str[2]; -+ char *copy[2]; -+ size_t len[2]; -+ mbstate_t state[2]; -+ size_t mblength; -+ wchar_t wc, uwc; -+ mbstate_t state_bak; -+ -+ str[0] = old; -+ str[1] = new; -+ len[0] = oldlen; -+ len[1] = newlen; -+ state[0] = oldstate; -+ state[1] = newstate; -+ -+ for (i = 0; i < 2; i++) -+ { -+ copy[i] = alloca (len[i] + 1); -+ -+ for (j = 0, chars = 0; j < len[i] && chars < check_chars; chars++) -+ { -+ state_bak = state[i]; -+ mblength = mbrtowc (&wc, str[i] + j, len[i] - j, &(state[i])); -+ -+ switch (mblength) -+ { -+ case (size_t)-1: -+ case (size_t)-2: -+ state[i] = state_bak; -+ /* Fall through */ -+ case 0: -+ mblength = 1; -+ break; -+ -+ default: -+ if (ignore_case) -+ { -+ uwc = towupper (wc); -+ -+ if (uwc != wc) -+ { -+ mbstate_t state_wc; -+ -+ memset (&state_wc, '\0', sizeof(mbstate_t)); -+ wcrtomb (copy[i] + j, uwc, &state_wc); -+ } -+ else -+ memcpy (copy[i] + j, str[i] + j, mblength); -+ } -+ else -+ memcpy (copy[i] + j, str[i] + j, mblength); -+ } -+ j += mblength; -+ } -+ copy[i][j] = '\0'; -+ len[i] = j; -+ } -+ -+ return xmemcoll (copy[0], len[0], copy[1], len[1]); - } -+#endif - - /* Output the line in linebuffer LINE to standard output - provided that the switches say it should be output. -@@ -295,15 +478,43 @@ - { - char *prevfield IF_LINT (= NULL); - size_t prevlen IF_LINT (= 0); -+#if HAVE_MBRTOWC -+ mbstate_t prevstate; -+ -+ memset (&prevstate, '\0', sizeof (mbstate_t)); -+#endif - - while (!feof (stdin)) - { - char *thisfield; - size_t thislen; -+#if HAVE_MBRTOWC -+ mbstate_t thisstate; -+#endif -+ - if (readlinebuffer_delim (thisline, stdin, delimiter) == 0) - break; - thisfield = find_field (thisline); - thislen = thisline->length - 1 - (thisfield - thisline->buffer); -+#if HAVE_MBRTOWC -+ if (MB_CUR_MAX > 1) -+ { -+ thisstate = thisline->state; -+ -+ if (prevline->length == 0 || different_multi -+ (thisfield, prevfield, thislen, prevlen, thisstate, prevstate)) -+ { -+ fwrite (thisline->buffer, sizeof (char), -+ thisline->length, stdout); -+ -+ SWAP_LINES (prevline, thisline); -+ prevfield = thisfield; -+ prevlen = thislen; -+ prevstate = thisstate; -+ } -+ } -+ else -+#endif - if (prevline->length == 0 - || different (thisfield, prevfield, thislen, prevlen)) - { -@@ -322,17 +533,26 @@ - size_t prevlen; - uintmax_t match_count = 0; - bool first_delimiter = true; -+#if HAVE_MBRTOWC -+ mbstate_t prevstate; -+#endif - - if (readlinebuffer_delim (prevline, stdin, delimiter) == 0) - goto closefiles; - prevfield = find_field (prevline); - prevlen = prevline->length - 1 - (prevfield - prevline->buffer); -+#if HAVE_MBRTOWC -+ prevstate = prevline->state; -+#endif - - while (!feof (stdin)) - { - bool match; - char *thisfield; - size_t thislen; -+#if HAVE_MBRTOWC -+ mbstate_t thisstate; -+#endif - if (readlinebuffer_delim (thisline, stdin, delimiter) == 0) - { - if (ferror (stdin)) -@@ -341,6 +561,15 @@ - } - thisfield = find_field (thisline); - thislen = thisline->length - 1 - (thisfield - thisline->buffer); -+#if HAVE_MBRTOWC -+ if (MB_CUR_MAX > 1) -+ { -+ thisstate = thisline->state; -+ match = !different_multi (thisfield, prevfield, -+ thislen, prevlen, thisstate, prevstate); -+ } -+ else -+#endif - match = !different (thisfield, prevfield, thislen, prevlen); - match_count += match; - -@@ -373,6 +602,9 @@ - SWAP_LINES (prevline, thisline); - prevfield = thisfield; - prevlen = thislen; -+#if HAVE_MBRTOWC -+ prevstate = thisstate; -+#endif - if (!match) - match_count = 0; - } -@@ -417,6 +649,19 @@ - - atexit (close_stdout); - -+#if HAVE_MBRTOWC -+ if (MB_CUR_MAX > 1) -+ { -+ find_field = find_field_multi; -+ } -+ else -+#endif -+ { -+ find_field = find_field_uni; -+ } -+ -+ -+ - skip_chars = 0; - skip_fields = 0; - check_chars = SIZE_MAX; ---- coreutils-6.8+/src/fold.c.i18n 2007-02-23 12:01:47.000000000 +0000 -+++ coreutils-6.8+/src/fold.c 2007-03-01 15:08:24.000000000 +0000 -@@ -23,11 +23,33 @@ - #include <getopt.h> - #include <sys/types.h> - -+/* Get mbstate_t, mbrtowc(), wcwidth(). */ -+#if HAVE_WCHAR_H -+# include <wchar.h> -+#endif -+ -+/* Get iswprint(), iswblank(), wcwidth(). */ -+#if HAVE_WCTYPE_H -+# include <wctype.h> -+#endif -+ - #include "system.h" - #include "error.h" - #include "quote.h" - #include "xstrtol.h" - -+/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC -+ installation; work around this configuration error. */ -+#if !defined MB_LEN_MAX || MB_LEN_MAX < 2 -+# undef MB_LEN_MAX -+# define MB_LEN_MAX 16 -+#endif -+ -+/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ -+#if HAVE_MBRTOWC && defined mbstate_t -+# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) -+#endif -+ - #define TAB_WIDTH 8 - - /* The official name of this program (e.g., no `g' prefix). */ -@@ -35,20 +57,41 @@ - - #define AUTHORS proper_name ("David MacKenzie") - -+#define FATAL_ERROR(Message) \ -+ do \ -+ { \ -+ error (0, 0, (Message)); \ -+ usage (2); \ -+ } \ -+ while (0) -+ -+enum operating_mode -+{ -+ /* Fold texts by columns that are at the given positions. */ -+ column_mode, -+ -+ /* Fold texts by bytes that are at the given positions. */ -+ byte_mode, -+ -+ /* Fold texts by characters that are at the given positions. */ -+ character_mode, -+}; -+ -+/* The argument shows current mode. (Default: column_mode) */ -+static enum operating_mode operating_mode; -+ - /* If nonzero, try to break on whitespace. */ - static bool break_spaces; - --/* If nonzero, count bytes, not column positions. */ --static bool count_bytes; -- - /* If nonzero, at least one of the files we read was standard input. */ - static bool have_read_stdin; - --static char const shortopts[] = "bsw:0::1::2::3::4::5::6::7::8::9::"; -+static char const shortopts[] = "bcsw:0::1::2::3::4::5::6::7::8::9::"; - - static struct option const longopts[] = - { - {"bytes", no_argument, NULL, 'b'}, -+ {"characters", no_argument, NULL, 'c'}, - {"spaces", no_argument, NULL, 's'}, - {"width", required_argument, NULL, 'w'}, - {GETOPT_HELP_OPTION_DECL}, -@@ -81,6 +124,7 @@ - "), stdout); - fputs (_("\ - -b, --bytes count bytes rather than columns\n\ -+ -c, --characters count characters rather than columns\n\ - -s, --spaces break at spaces\n\ - -w, --width=WIDTH use WIDTH columns instead of 80\n\ - "), stdout); -@@ -98,7 +142,7 @@ - static size_t - adjust_column (size_t column, char c) - { -- if (!count_bytes) -+ if (operating_mode != byte_mode) - { - if (c == '\b') - { -@@ -121,30 +165,14 @@ - to stdout, with maximum line length WIDTH. - Return true if successful. */ - --static bool --fold_file (char const *filename, size_t width) -+static void -+fold_text (FILE *istream, size_t width, int *saved_errno) - { -- FILE *istream; - int c; - size_t column = 0; /* Screen column where next char will go. */ - size_t offset_out = 0; /* Index in `line_out' for next char. */ - static char *line_out = NULL; - static size_t allocated_out = 0; -- int saved_errno; -- -- if (STREQ (filename, "-")) -- { -- istream = stdin; -- have_read_stdin = true; -- } -- else -- istream = fopen (filename, "r"); -- -- if (istream == NULL) -- { -- error (0, errno, "%s", filename); -- return false; -- } - - while ((c = getc (istream)) != EOF) - { -@@ -172,6 +200,15 @@ - bool found_blank = false; - size_t logical_end = offset_out; - -+ /* If LINE_OUT has no wide character, -+ put a new wide character in LINE_OUT -+ if column is bigger than width. */ -+ if (offset_out == 0) -+ { -+ line_out[offset_out++] = c; -+ continue; -+ } -+ - /* Look for the last blank. */ - while (logical_end) - { -@@ -218,11 +255,225 @@ - line_out[offset_out++] = c; - } - -- saved_errno = errno; -+ *saved_errno = errno; -+ -+ if (offset_out) -+ fwrite (line_out, sizeof (char), (size_t) offset_out, stdout); -+ -+ free(line_out); -+} -+ -+#if HAVE_MBRTOWC -+static void -+fold_multibyte_text (FILE *istream, size_t width, int *saved_errno) -+{ -+ char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */ -+ size_t buflen = 0; /* The length of the byte sequence in buf. */ -+ char *bufpos; /* Next read position of BUF. */ -+ wint_t wc; /* A gotten wide character. */ -+ size_t mblength; /* The byte size of a multibyte character which shows -+ as same character as WC. */ -+ mbstate_t state, state_bak; /* State of the stream. */ -+ int convfail; /* 1, when conversion is failed. Otherwise 0. */ -+ -+ char *line_out = NULL; -+ size_t offset_out = 0; /* Index in `line_out' for next char. */ -+ size_t allocated_out = 0; -+ -+ int increment; -+ size_t column = 0; -+ -+ size_t last_blank_pos; -+ size_t last_blank_column; -+ int is_blank_seen; -+ int last_blank_increment; -+ int is_bs_following_last_blank; -+ size_t bs_following_last_blank_num; -+ int is_cr_after_last_blank; -+ -+#define CLEAR_FLAGS \ -+ do \ -+ { \ -+ last_blank_pos = 0; \ -+ last_blank_column = 0; \ -+ is_blank_seen = 0; \ -+ is_bs_following_last_blank = 0; \ -+ bs_following_last_blank_num = 0; \ -+ is_cr_after_last_blank = 0; \ -+ } \ -+ while (0) -+ -+#define START_NEW_LINE \ -+ do \ -+ { \ -+ putchar ('\n'); \ -+ column = 0; \ -+ offset_out = 0; \ -+ CLEAR_FLAGS; \ -+ } \ -+ while (0) -+ -+ CLEAR_FLAGS; -+ memset (&state, '\0', sizeof(mbstate_t)); -+ -+ for (;; bufpos += mblength, buflen -= mblength) -+ { -+ if (buflen < MB_LEN_MAX && !feof (istream) && !ferror (istream)) -+ { -+ memmove (buf, bufpos, buflen); -+ buflen += fread (buf + buflen, sizeof(char), BUFSIZ, istream); -+ bufpos = buf; -+ } -+ -+ if (buflen < 1) -+ break; -+ -+ /* Get a wide character. */ -+ convfail = 0; -+ state_bak = state; -+ mblength = mbrtowc ((wchar_t *)&wc, bufpos, buflen, &state); -+ -+ switch (mblength) -+ { -+ case (size_t)-1: -+ case (size_t)-2: -+ convfail++; -+ state = state_bak; -+ /* Fall through. */ -+ -+ case 0: -+ mblength = 1; -+ break; -+ } -+ -+rescan: -+ if (operating_mode == byte_mode) /* byte mode */ -+ increment = mblength; -+ else if (operating_mode == character_mode) /* character mode */ -+ increment = 1; -+ else /* column mode */ -+ { -+ if (convfail) -+ increment = 1; -+ else -+ { -+ switch (wc) -+ { -+ case L'\n': -+ fwrite (line_out, sizeof(char), offset_out, stdout); -+ START_NEW_LINE; -+ continue; -+ -+ case L'\b': -+ increment = (column > 0) ? -1 : 0; -+ break; -+ -+ case L'\r': -+ increment = -1 * column; -+ break; -+ -+ case L'\t': -+ increment = 8 - column % 8; -+ break; -+ -+ default: -+ increment = wcwidth (wc); -+ increment = (increment < 0) ? 0 : increment; -+ } -+ } -+ } -+ -+ if (column + increment > width && break_spaces && last_blank_pos) -+ { -+ fwrite (line_out, sizeof(char), last_blank_pos, stdout); -+ putchar ('\n'); -+ -+ offset_out = offset_out - last_blank_pos; -+ column = column - last_blank_column + ((is_cr_after_last_blank) -+ ? last_blank_increment : bs_following_last_blank_num); -+ memmove (line_out, line_out + last_blank_pos, offset_out); -+ CLEAR_FLAGS; -+ goto rescan; -+ } -+ -+ if (column + increment > width && column != 0) -+ { -+ fwrite (line_out, sizeof(char), offset_out, stdout); -+ START_NEW_LINE; -+ goto rescan; -+ } -+ -+ if (allocated_out < offset_out + mblength) -+ { -+ allocated_out += 1024; -+ line_out = xrealloc (line_out, allocated_out); -+ } -+ -+ memcpy (line_out + offset_out, bufpos, mblength); -+ offset_out += mblength; -+ column += increment; -+ -+ if (is_blank_seen && !convfail && wc == L'\r') -+ is_cr_after_last_blank = 1; -+ -+ if (is_bs_following_last_blank && !convfail && wc == L'\b') -+ ++bs_following_last_blank_num; -+ else -+ is_bs_following_last_blank = 0; -+ -+ if (break_spaces && !convfail && iswblank (wc)) -+ { -+ last_blank_pos = offset_out; -+ last_blank_column = column; -+ is_blank_seen = 1; -+ last_blank_increment = increment; -+ is_bs_following_last_blank = 1; -+ bs_following_last_blank_num = 0; -+ is_cr_after_last_blank = 0; -+ } -+ } -+ -+ *saved_errno = errno; - - if (offset_out) - fwrite (line_out, sizeof (char), (size_t) offset_out, stdout); - -+ free(line_out); -+} -+#endif -+ -+/* Fold file FILENAME, or standard input if FILENAME is "-", -+ to stdout, with maximum line length WIDTH. -+ Return 0 if successful, 1 if an error occurs. */ -+ -+static bool -+fold_file (char *filename, size_t width) -+{ -+ FILE *istream; -+ int saved_errno; -+ -+ if (STREQ (filename, "-")) -+ { -+ istream = stdin; -+ have_read_stdin = 1; -+ } -+ else -+ istream = fopen (filename, "r"); -+ -+ if (istream == NULL) -+ { -+ error (0, errno, "%s", filename); -+ return 1; -+ } -+ -+ /* Define how ISTREAM is being folded. */ -+#if HAVE_MBRTOWC -+ if (MB_CUR_MAX > 1) -+ fold_multibyte_text (istream, width, &saved_errno); -+ else -+#endif -+ fold_text (istream, width, &saved_errno); -+ - if (ferror (istream)) - { - error (0, saved_errno, "%s", filename); -@@ -255,7 +506,8 @@ - - atexit (close_stdout); - -- break_spaces = count_bytes = have_read_stdin = false; -+ operating_mode = column_mode; -+ break_spaces = have_read_stdin = false; - - while ((optc = getopt_long (argc, argv, shortopts, longopts, NULL)) != -1) - { -@@ -264,7 +516,15 @@ - switch (optc) - { - case 'b': /* Count bytes rather than columns. */ -- count_bytes = true; -+ if (operating_mode != column_mode) -+ FATAL_ERROR (_("only one way of folding may be specified")); -+ operating_mode = byte_mode; -+ break; -+ -+ case 'c': -+ if (operating_mode != column_mode) -+ FATAL_ERROR (_("only one way of folding may be specified")); -+ operating_mode = character_mode; - break; - - case 's': /* Break at word boundaries. */ ---- coreutils-6.8+/src/sort.c.i18n 2007-02-24 11:23:23.000000000 +0000 -+++ coreutils-6.8+/src/sort.c 2007-03-01 15:10:57.000000000 +0000 -@@ -23,10 +23,19 @@ - - #include <config.h> - -+#include <assert.h> - #include <getopt.h> - #include <sys/types.h> - #include <sys/wait.h> - #include <signal.h> -+#if HAVE_WCHAR_H -+# include <wchar.h> -+#endif -+/* Get isw* functions. */ -+#if HAVE_WCTYPE_H -+# include <wctype.h> -+#endif -+ - #include "system.h" - #include "argmatch.h" - #include "error.h" -@@ -116,14 +125,38 @@ - /* Thousands separator; if -1, then there isn't one. */ - static int thousands_sep; - -+static int force_general_numcompare = 0; -+ - /* Nonzero if the corresponding locales are hard. */ - static bool hard_LC_COLLATE; --#if HAVE_NL_LANGINFO -+#if HAVE_LANGINFO_CODESET - static bool hard_LC_TIME; - #endif - - #define NONZERO(x) ((x) != 0) - -+/* get a multibyte character's byte length. */ -+#define GET_BYTELEN_OF_CHAR(LIM, PTR, MBLENGTH, STATE) \ -+ do \ -+ { \ -+ wchar_t wc; \ -+ mbstate_t state_bak; \ -+ \ -+ state_bak = STATE; \ -+ mblength = mbrtowc (&wc, PTR, LIM - PTR, &STATE); \ -+ \ -+ switch (MBLENGTH) \ -+ { \ -+ case (size_t)-1: \ -+ case (size_t)-2: \ -+ STATE = state_bak; \ -+ /* Fall through. */ \ -+ case 0: \ -+ MBLENGTH = 1; \ -+ } \ -+ } \ -+ while (0) -+ - /* The kind of blanks for '-b' to skip in various options. */ - enum blanktype { bl_start, bl_end, bl_both }; - -@@ -261,13 +294,11 @@ - they were read if all keys compare equal. */ - static bool stable; - --/* If TAB has this value, blanks separate fields. */ --enum { TAB_DEFAULT = CHAR_MAX + 1 }; -- --/* Tab character separating fields. If TAB_DEFAULT, then fields are -+/* Tab character separating fields. If tab_length is 0, then fields are - separated by the empty string between a non-blank character and a blank - character. */ --static int tab = TAB_DEFAULT; -+static char tab[MB_LEN_MAX + 1]; -+static size_t tab_length = 0; - - /* Flag to remove consecutive duplicate lines from the output. - Only the last of a sequence of equal lines will be output. */ -@@ -639,6 +670,44 @@ - update_proc (pid); - } - -+/* Function pointers. */ -+static void -+(*inittables) (void); -+static char * -+(*begfield) (const struct line*, const struct keyfield *); -+static char * -+(*limfield) (const struct line*, const struct keyfield *); -+static int -+(*getmonth) (char const *, size_t); -+static int -+(*keycompare) (const struct line *, const struct line *); -+static int -+(*numcompare) (const char *, const char *); -+ -+/* Test for white space multibyte character. -+ Set LENGTH the byte length of investigated multibyte character. */ -+#if HAVE_MBRTOWC -+static int -+ismbblank (const char *str, size_t len, size_t *length) -+{ -+ size_t mblength; -+ wchar_t wc; -+ mbstate_t state; -+ -+ memset (&state, '\0', sizeof(mbstate_t)); -+ mblength = mbrtowc (&wc, str, len, &state); -+ -+ if (mblength == (size_t)-1 || mblength == (size_t)-2) -+ { -+ *length = 1; -+ return 0; -+ } -+ -+ *length = (mblength < 1) ? 1 : mblength; -+ return iswblank (wc); -+} -+#endif -+ - /* Clean up any remaining temporary files. */ - - static void -@@ -978,7 +1047,7 @@ - free (node); - } - --#if HAVE_NL_LANGINFO -+#if HAVE_LANGINFO_CODESET - - static int - struct_month_cmp (const void *m1, const void *m2) -@@ -993,7 +1062,7 @@ - /* Initialize the character class tables. */ - - static void --inittables (void) -+inittables_uni (void) - { - size_t i; - -@@ -1005,7 +1074,7 @@ - fold_toupper[i] = toupper (i); - } - --#if HAVE_NL_LANGINFO -+#if HAVE_LANGINFO_CODESET - /* If we're not in the "C" locale, read different names for months. */ - if (hard_LC_TIME) - { -@@ -1031,6 +1100,64 @@ - xstrtol_fatal (e, oi, c, long_options, s); - } - -+#if HAVE_MBRTOWC -+static void -+inittables_mb (void) -+{ -+ int i, j, k, l; -+ char *name, *s; -+ size_t s_len, mblength; -+ char mbc[MB_LEN_MAX]; -+ wchar_t wc, pwc; -+ mbstate_t state_mb, state_wc; -+ -+ for (i = 0; i < MONTHS_PER_YEAR; i++) -+ { -+ s = (char *) nl_langinfo (ABMON_1 + i); -+ s_len = strlen (s); -+ monthtab[i].name = name = (char *) xmalloc (s_len + 1); -+ monthtab[i].val = i + 1; -+ -+ memset (&state_mb, '\0', sizeof (mbstate_t)); -+ memset (&state_wc, '\0', sizeof (mbstate_t)); -+ -+ for (j = 0; j < s_len;) -+ { -+ if (!ismbblank (s + j, s_len - j, &mblength)) -+ break; -+ j += mblength; -+ } -+ -+ for (k = 0; j < s_len;) -+ { -+ mblength = mbrtowc (&wc, (s + j), (s_len - j), &state_mb); -+ assert (mblength != (size_t)-1 && mblength != (size_t)-2); -+ if (mblength == 0) -+ break; -+ -+ pwc = towupper (wc); -+ if (pwc == wc) -+ { -+ memcpy (mbc, s + j, mblength); -+ j += mblength; -+ } -+ else -+ { -+ j += mblength; -+ mblength = wcrtomb (mbc, pwc, &state_wc); -+ assert (mblength != (size_t)0 && mblength != (size_t)-1); -+ } -+ -+ for (l = 0; l < mblength; l++) -+ name[k++] = mbc[l]; -+ } -+ name[k] = '\0'; -+ } -+ qsort ((void *) monthtab, MONTHS_PER_YEAR, -+ sizeof (struct month), struct_month_cmp); -+} -+#endif -+ - /* Specify the amount of main memory to use when sorting. */ - static void - specify_sort_size (int oi, char c, char const *s) -@@ -1241,7 +1368,7 @@ - by KEY in LINE. */ - - static char * --begfield (const struct line *line, const struct keyfield *key) -+begfield_uni (const struct line *line, const struct keyfield *key) - { - char *ptr = line->text, *lim = ptr + line->length - 1; - size_t sword = key->sword; -@@ -1251,10 +1378,10 @@ - /* The leading field separator itself is included in a field when -t - is absent. */ - -- if (tab != TAB_DEFAULT) -+ if (tab_length) - while (ptr < lim && sword--) - { -- while (ptr < lim && *ptr != tab) -+ while (ptr < lim && *ptr != tab[0]) - ++ptr; - if (ptr < lim) - ++ptr; -@@ -1282,11 +1409,70 @@ - return ptr; - } - -+#if HAVE_MBRTOWC -+static char * -+begfield_mb (const struct line *line, const struct keyfield *key) -+{ -+ int i; -+ char *ptr = line->text, *lim = ptr + line->length - 1; -+ size_t sword = key->sword; -+ size_t schar = key->schar; -+ size_t mblength; -+ mbstate_t state; -+ -+ memset (&state, '\0', sizeof(mbstate_t)); -+ -+ if (tab_length) -+ while (ptr < lim && sword--) -+ { -+ while (ptr < lim && memcmp (ptr, tab, tab_length) != 0) -+ { -+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); -+ ptr += mblength; -+ } -+ if (ptr < lim) -+ { -+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); -+ ptr += mblength; -+ } -+ } -+ else -+ while (ptr < lim && sword--) -+ { -+ while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength)) -+ ptr += mblength; -+ if (ptr < lim) -+ { -+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); -+ ptr += mblength; -+ } -+ while (ptr < lim && !ismbblank (ptr, lim - ptr, &mblength)) -+ ptr += mblength; -+ } -+ -+ if (key->skipsblanks) -+ while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength)) -+ ptr += mblength; -+ -+ for (i = 0; i < schar; i++) -+ { -+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); -+ -+ if (ptr + mblength > lim) -+ break; -+ else -+ ptr += mblength; -+ } -+ -+ return ptr; -+} -+#endif -+ - /* Return the limit of (a pointer to the first character after) the field - in LINE specified by KEY. */ - - static char * --limfield (const struct line *line, const struct keyfield *key) -+limfield_uni (const struct line *line, const struct keyfield *key) - { - char *ptr = line->text, *lim = ptr + line->length - 1; - size_t eword = key->eword, echar = key->echar; -@@ -1299,10 +1485,10 @@ - `beginning' is the first character following the delimiting TAB. - Otherwise, leave PTR pointing at the first `blank' character after - the preceding field. */ -- if (tab != TAB_DEFAULT) -+ if (tab_length) - while (ptr < lim && eword--) - { -- while (ptr < lim && *ptr != tab) -+ while (ptr < lim && *ptr != tab[0]) - ++ptr; - if (ptr < lim && (eword | echar)) - ++ptr; -@@ -1348,10 +1534,10 @@ - */ - - /* Make LIM point to the end of (one byte past) the current field. */ -- if (tab != TAB_DEFAULT) -+ if (tab_length) - { - char *newlim; -- newlim = memchr (ptr, tab, lim - ptr); -+ newlim = memchr (ptr, tab[0], lim - ptr); - if (newlim) - lim = newlim; - } -@@ -1384,6 +1570,113 @@ - return ptr; - } - -+#if HAVE_MBRTOWC -+static char * -+limfield_mb (const struct line *line, const struct keyfield *key) -+{ -+ char *ptr = line->text, *lim = ptr + line->length - 1; -+ size_t eword = key->eword, echar = key->echar; -+ int i; -+ size_t mblength; -+ mbstate_t state; -+ -+ if (echar == 0) -+ eword++; /* skip all of end field. */ -+ -+ memset (&state, '\0', sizeof(mbstate_t)); -+ -+ if (tab_length) -+ while (ptr < lim && eword--) -+ { -+ while (ptr < lim && memcmp (ptr, tab, tab_length) != 0) -+ { -+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); -+ ptr += mblength; -+ } -+ if (ptr < lim && (eword | echar)) -+ { -+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); -+ ptr += mblength; -+ } -+ } -+ else -+ while (ptr < lim && eword--) -+ { -+ while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength)) -+ ptr += mblength; -+ if (ptr < lim) -+ { -+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); -+ ptr += mblength; -+ } -+ while (ptr < lim && !ismbblank (ptr, lim - ptr, &mblength)) -+ ptr += mblength; -+ } -+ -+ -+# ifdef POSIX_UNSPECIFIED -+ /* Make LIM point to the end of (one byte past) the current field. */ -+ if (tab_length) -+ { -+ char *newlim, *p; -+ -+ newlim = NULL; -+ for (p = ptr; p < lim;) -+ { -+ if (memcmp (p, tab, tab_length) == 0) -+ { -+ newlim = p; -+ break; -+ } -+ -+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); -+ p += mblength; -+ } -+ } -+ else -+ { -+ char *newlim; -+ newlim = ptr; -+ -+ while (newlim < lim && ismbblank (newlim, lim - newlim, &mblength)) -+ newlim += mblength; -+ if (ptr < lim) -+ { -+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); -+ ptr += mblength; -+ } -+ while (newlim < lim && !ismbblank (newlim, lim - newlim, &mblength)) -+ newlim += mblength; -+ lim = newlim; -+ } -+# endif -+ -+ if (echar != 0) -+ { -+ /* If we're skipping leading blanks, don't start counting characters -+ * until after skipping past any leading blanks. */ -+ if (key->skipsblanks) -+ while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength)) -+ ptr += mblength; -+ -+ memset (&state, '\0', sizeof(mbstate_t)); -+ -+ /* Advance PTR by ECHAR (if possible), but no further than LIM. */ -+ for (i = 0; i < echar; i++) -+ { -+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); -+ -+ if (ptr + mblength > lim) -+ break; -+ else -+ ptr += mblength; -+ } -+ } -+ -+ return ptr; -+} -+#endif -+ - /* Fill BUF reading from FP, moving buf->left bytes from the end - of buf->buf to the beginning first. If EOF is reached and the - file wasn't terminated by a newline, supply one. Set up BUF's line -@@ -1466,8 +1753,24 @@ - else - { - if (key->skipsblanks) -- while (blanks[to_uchar (*line_start)]) -- line_start++; -+ { -+#if HAVE_MBRTOWC -+ if (MB_CUR_MAX > 1) -+ { -+ size_t mblength; -+ mbstate_t state; -+ memset (&state, '\0', sizeof(mbstate_t)); -+ while (line_start < line->keylim && -+ ismbblank (line_start, -+ line->keylim - line_start, -+ &mblength)) -+ line_start += mblength; -+ } -+ else -+#endif -+ while (blanks[to_uchar (*line_start)]) -+ line_start++; -+ } - line->keybeg = line_start; - } - } -@@ -1500,7 +1803,7 @@ - hideously fast. */ - - static int --numcompare (const char *a, const char *b) -+numcompare_uni (const char *a, const char *b) - { - while (blanks[to_uchar (*a)]) - a++; -@@ -1510,6 +1813,25 @@ - return strnumcmp (a, b, decimal_point, thousands_sep); - } - -+#if HAVE_MBRTOWC -+static int -+numcompare_mb (const char *a, const char *b) -+{ -+ size_t mblength, len; -+ len = strlen (a); /* okay for UTF-8 */ -+ while (*a && ismbblank (a, len > MB_CUR_MAX ? MB_CUR_MAX : len, &mblength)) -+ { -+ a += mblength; -+ len -= mblength; -+ } -+ len = strlen (b); /* okay for UTF-8 */ -+ while (*b && ismbblank (b, len > MB_CUR_MAX ? MB_CUR_MAX : len, &mblength)) -+ b += mblength; -+ -+ return strnumcmp (a, b, decimal_point, thousands_sep); -+} -+#endif /* HAV_EMBRTOWC */ -+ - static int - general_numcompare (const char *sa, const char *sb) - { -@@ -1543,7 +1865,7 @@ - Return 0 if the name in S is not recognized. */ - - static int --getmonth (char const *month, size_t len) -+getmonth_uni (char const *month, size_t len) - { - size_t lo = 0; - size_t hi = MONTHS_PER_YEAR; -@@ -1698,11 +2020,79 @@ - return diff; - } - -+#if HAVE_MBRTOWC -+static int -+getmonth_mb (const char *s, size_t len) -+{ -+ char *month; -+ register size_t i; -+ register int lo = 0, hi = MONTHS_PER_YEAR, result; -+ char *tmp; -+ size_t wclength, mblength; -+ const char **pp; -+ const wchar_t **wpp; -+ wchar_t *month_wcs; -+ mbstate_t state; -+ -+ while (len > 0 && ismbblank (s, len, &mblength)) -+ { -+ s += mblength; -+ len -= mblength; -+ } -+ -+ if (len == 0) -+ return 0; -+ -+ month = (char *) alloca (len + 1); -+ -+ tmp = (char *) alloca (len + 1); -+ memcpy (tmp, s, len); -+ tmp[len] = '\0'; -+ pp = (const char **)&tmp; -+ month_wcs = (wchar_t *) alloca ((len + 1) * sizeof (wchar_t)); -+ memset (&state, '\0', sizeof(mbstate_t)); -+ -+ wclength = mbsrtowcs (month_wcs, pp, len + 1, &state); -+ assert (wclength != (size_t)-1 && *pp == NULL); -+ -+ for (i = 0; i < wclength; i++) -+ { -+ month_wcs[i] = towupper(month_wcs[i]); -+ if (iswblank (month_wcs[i])) -+ { -+ month_wcs[i] = L'\0'; -+ break; -+ } -+ } -+ -+ wpp = (const wchar_t **)&month_wcs; -+ -+ mblength = wcsrtombs (month, wpp, len + 1, &state); -+ assert (mblength != (-1) && *wpp == NULL); -+ -+ do -+ { -+ int ix = (lo + hi) / 2; -+ -+ if (strncmp (month, monthtab[ix].name, strlen (monthtab[ix].name)) < 0) -+ hi = ix; -+ else -+ lo = ix; -+ } -+ while (hi - lo > 1); -+ -+ result = (!strncmp (month, monthtab[lo].name, strlen (monthtab[lo].name)) -+ ? monthtab[lo].val : 0); -+ -+ return result; -+} -+#endif -+ - /* Compare two lines A and B trying every key in sequence until there - are no more keys or a difference is found. */ - - static int --keycompare (const struct line *a, const struct line *b) -+keycompare_uni (const struct line *a, const struct line *b) - { - struct keyfield const *key = keylist; - -@@ -1875,6 +2265,179 @@ - return key->reverse ? -diff : diff; - } - -+#if HAVE_MBRTOWC -+static int -+keycompare_mb (const struct line *a, const struct line *b) -+{ -+ struct keyfield *key = keylist; -+ -+ /* For the first iteration only, the key positions have been -+ precomputed for us. */ -+ char *texta = a->keybeg; -+ char *textb = b->keybeg; -+ char *lima = a->keylim; -+ char *limb = b->keylim; -+ -+ size_t mblength_a, mblength_b; -+ wchar_t wc_a, wc_b; -+ mbstate_t state_a, state_b; -+ -+ int diff; -+ -+ memset (&state_a, '\0', sizeof(mbstate_t)); -+ memset (&state_b, '\0', sizeof(mbstate_t)); -+ -+ for (;;) -+ { -+ unsigned char *translate = (unsigned char *) key->translate; -+ bool const *ignore = key->ignore; -+ -+ /* Find the lengths. */ -+ size_t lena = lima <= texta ? 0 : lima - texta; -+ size_t lenb = limb <= textb ? 0 : limb - textb; -+ -+ /* Actually compare the fields. */ -+ if (key->random) -+ diff = compare_random (texta, lena, textb, lenb); -+ else if (key->numeric | key->general_numeric) -+ { -+ char savea = *lima, saveb = *limb; -+ -+ *lima = *limb = '\0'; -+ if (force_general_numcompare) -+ diff = general_numcompare (texta, textb); -+ else -+ diff = ((key->numeric ? numcompare : general_numcompare) -+ (texta, textb)); -+ *lima = savea, *limb = saveb; -+ } -+ else if (key->month) -+ diff = getmonth (texta, lena) - getmonth (textb, lenb); -+ else -+ { -+ if (ignore || translate) -+ { -+ char *copy_a = (char *) alloca (lena + 1 + lenb + 1); -+ char *copy_b = copy_a + lena + 1; -+ size_t new_len_a, new_len_b; -+ size_t i, j; -+ -+ /* Ignore and/or translate chars before comparing. */ -+# define IGNORE_CHARS(NEW_LEN, LEN, TEXT, COPY, WC, MBLENGTH, STATE) \ -+ do \ -+ { \ -+ wchar_t uwc; \ -+ char mbc[MB_LEN_MAX]; \ -+ mbstate_t state_wc; \ -+ \ -+ for (NEW_LEN = i = 0; i < LEN;) \ -+ { \ -+ mbstate_t state_bak; \ -+ \ -+ state_bak = STATE; \ -+ MBLENGTH = mbrtowc (&WC, TEXT + i, LEN - i, &STATE); \ -+ \ -+ if (MBLENGTH == (size_t)-2 || MBLENGTH == (size_t)-1 \ -+ || MBLENGTH == 0) \ -+ { \ -+ if (MBLENGTH == (size_t)-2 || MBLENGTH == (size_t)-1) \ -+ STATE = state_bak; \ -+ if (!ignore) \ -+ COPY[NEW_LEN++] = TEXT[i++]; \ -+ continue; \ -+ } \ -+ \ -+ if (ignore) \ -+ { \ -+ if ((ignore == nonprinting && !iswprint (WC)) \ -+ || (ignore == nondictionary \ -+ && !iswalnum (WC) && !iswblank (WC))) \ -+ { \ -+ i += MBLENGTH; \ -+ continue; \ -+ } \ -+ } \ -+ \ -+ if (translate) \ -+ { \ -+ \ -+ uwc = towupper(WC); \ -+ if (WC == uwc) \ -+ { \ -+ memcpy (mbc, TEXT + i, MBLENGTH); \ -+ i += MBLENGTH; \ -+ } \ -+ else \ -+ { \ -+ i += MBLENGTH; \ -+ WC = uwc; \ -+ memset (&state_wc, '\0', sizeof (mbstate_t)); \ -+ \ -+ MBLENGTH = wcrtomb (mbc, WC, &state_wc); \ -+ assert (MBLENGTH != (size_t)-1 && MBLENGTH != 0); \ -+ } \ -+ \ -+ for (j = 0; j < MBLENGTH; j++) \ -+ COPY[NEW_LEN++] = mbc[j]; \ -+ } \ -+ else \ -+ for (j = 0; j < MBLENGTH; j++) \ -+ COPY[NEW_LEN++] = TEXT[i++]; \ -+ } \ -+ COPY[NEW_LEN] = '\0'; \ -+ } \ -+ while (0) -+ IGNORE_CHARS (new_len_a, lena, texta, copy_a, -+ wc_a, mblength_a, state_a); -+ IGNORE_CHARS (new_len_b, lenb, textb, copy_b, -+ wc_b, mblength_b, state_b); -+ diff = xmemcoll (copy_a, new_len_a, copy_b, new_len_b); -+ } -+ else if (lena == 0) -+ diff = - NONZERO (lenb); -+ else if (lenb == 0) -+ goto greater; -+ else -+ diff = xmemcoll (texta, lena, textb, lenb); -+ } -+ -+ if (diff) -+ goto not_equal; -+ -+ key = key->next; -+ if (! key) -+ break; -+ -+ /* Find the beginning and limit of the next field. */ -+ if (key->eword != -1) -+ lima = limfield (a, key), limb = limfield (b, key); -+ else -+ lima = a->text + a->length - 1, limb = b->text + b->length - 1; -+ -+ if (key->sword != -1) -+ texta = begfield (a, key), textb = begfield (b, key); -+ else -+ { -+ texta = a->text, textb = b->text; -+ if (key->skipsblanks) -+ { -+ while (texta < lima && ismbblank (texta, lima - texta, &mblength_a)) -+ texta += mblength_a; -+ while (textb < limb && ismbblank (textb, limb - textb, &mblength_b)) -+ textb += mblength_b; -+ } -+ } -+ } -+ -+ return 0; -+ -+greater: -+ diff = 1; -+not_equal: -+ return key->reverse ? -diff : diff; -+} -+#endif -+ - /* Compare two lines A and B, returning negative, zero, or positive - depending on whether A compares less than, equal to, or greater than B. */ - -@@ -2744,7 +3305,7 @@ - initialize_exit_failure (SORT_FAILURE); - - hard_LC_COLLATE = hard_locale (LC_COLLATE); --#if HAVE_NL_LANGINFO -+#if HAVE_LANGINFO_CODESET - hard_LC_TIME = hard_locale (LC_TIME); - #endif - -@@ -2765,6 +3326,27 @@ - thousands_sep = -1; - } - -+#if HAVE_MBRTOWC -+ if (MB_CUR_MAX > 1) -+ { -+ inittables = inittables_mb; -+ begfield = begfield_mb; -+ limfield = limfield_mb; -+ getmonth = getmonth_mb; -+ keycompare = keycompare_mb; -+ numcompare = numcompare_mb; -+ } -+ else -+#endif -+ { -+ inittables = inittables_uni; -+ begfield = begfield_uni; -+ limfield = limfield_uni; -+ getmonth = getmonth_uni; -+ keycompare = keycompare_uni; -+ numcompare = numcompare_uni; -+ } -+ - have_read_stdin = false; - inittables (); - -@@ -3015,13 +3597,35 @@ - - case 't': - { -- char newtab = optarg[0]; -- if (! newtab) -+ char newtab[MB_LEN_MAX + 1]; -+ size_t newtab_length = 1; -+ strncpy (newtab, optarg, MB_LEN_MAX); -+ if (! newtab[0]) - error (SORT_FAILURE, 0, _("empty tab")); -- if (optarg[1]) -+#if HAVE_MBRTOWC -+ if (MB_CUR_MAX > 1) -+ { -+ wchar_t wc; -+ mbstate_t state; -+ size_t i; -+ -+ memset (&state, '\0', sizeof (mbstate_t)); -+ newtab_length = mbrtowc (&wc, newtab, strnlen (newtab, -+ MB_LEN_MAX), -+ &state); -+ switch (newtab_length) -+ { -+ case (size_t) -1: -+ case (size_t) -2: -+ case 0: -+ newtab_length = 1; -+ } -+ } -+#endif -+ if (newtab_length == 1 && optarg[1]) - { - if (STREQ (optarg, "\\0")) -- newtab = '\0'; -+ newtab[0] = '\0'; - else - { - /* Provoke with `sort -txx'. Complain about -@@ -3032,9 +3636,12 @@ - quote (optarg)); - } - } -- if (tab != TAB_DEFAULT && tab != newtab) -+ if (tab_length -+ && (tab_length != newtab_length -+ || memcmp (tab, newtab, tab_length) != 0)) - error (SORT_FAILURE, 0, _("incompatible tabs")); -- tab = newtab; -+ memcpy (tab, newtab, newtab_length); -+ tab_length = newtab_length; - } - break; - ---- coreutils-6.8+/src/unexpand.c.i18n 2007-01-14 15:41:28.000000000 +0000 -+++ coreutils-6.8+/src/unexpand.c 2007-03-01 15:08:24.000000000 +0000 -@@ -39,11 +39,28 @@ - #include <stdio.h> - #include <getopt.h> - #include <sys/types.h> -+ -+/* Get mbstate_t, mbrtowc(), wcwidth(). */ -+#if HAVE_WCHAR_H -+# include <wchar.h> -+#endif -+ - #include "system.h" - #include "error.h" - #include "quote.h" - #include "xstrndup.h" - -+/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC -+ installation; work around this configuration error. */ -+#if !defined MB_LEN_MAX || MB_LEN_MAX < 2 -+# define MB_LEN_MAX 16 -+#endif -+ -+/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ -+#if HAVE_MBRTOWC && defined mbstate_t -+# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) -+#endif -+ - /* The official name of this program (e.g., no `g' prefix). */ - #define PROGRAM_NAME "unexpand" - -@@ -110,6 +127,208 @@ - {NULL, 0, NULL, 0} - }; - -+static FILE *next_file (FILE *fp); -+ -+#if HAVE_MBRTOWC -+static void -+unexpand_multibyte (void) -+{ -+ FILE *fp; /* Input stream. */ -+ mbstate_t i_state; /* Current shift state of the input stream. */ -+ mbstate_t i_state_bak; /* Back up the I_STATE. */ -+ mbstate_t o_state; /* Current shift state of the output stream. */ -+ char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */ -+ char *bufpos; /* Next read position of BUF. */ -+ size_t buflen = 0; /* The length of the byte sequence in buf. */ -+ wint_t wc; /* A gotten wide character. */ -+ size_t mblength; /* The byte size of a multibyte character -+ which shows as same character as WC. */ -+ -+ /* Index in `tab_list' of next tabstop: */ -+ int tab_index = 0; /* For calculating width of pending tabs. */ -+ int print_tab_index = 0; /* For printing as many tabs as possible. */ -+ unsigned int column = 0; /* Column on screen of next char. */ -+ int next_tab_column; /* Column the next tab stop is on. */ -+ int convert = 1; /* If nonzero, perform translations. */ -+ unsigned int pending = 0; /* Pending columns of blanks. */ -+ -+ fp = next_file ((FILE *) NULL); -+ if (fp == NULL) -+ return; -+ -+ memset (&o_state, '\0', sizeof(mbstate_t)); -+ memset (&i_state, '\0', sizeof(mbstate_t)); -+ -+ for (;;) -+ { -+ if (buflen < MB_LEN_MAX && !feof(fp) && !ferror(fp)) -+ { -+ memmove (buf, bufpos, buflen); -+ buflen += fread (buf + buflen, sizeof(char), BUFSIZ, fp); -+ bufpos = buf; -+ } -+ -+ /* Get a wide character. */ -+ if (buflen < 1) -+ { -+ mblength = 1; -+ wc = WEOF; -+ } -+ else -+ { -+ i_state_bak = i_state; -+ mblength = mbrtowc ((wchar_t *)&wc, bufpos, buflen, &i_state); -+ } -+ -+ if (mblength == (size_t)-1 || mblength == (size_t)-2) -+ { -+ i_state = i_state_bak; -+ wc = L'\0'; -+ } -+ -+ if (wc == L' ' && convert && column < INT_MAX) -+ { -+ ++pending; -+ ++column; -+ } -+ else if (wc == L'\t' && convert) -+ { -+ if (tab_size == 0) -+ { -+ /* Do not let tab_index == first_free_tab; -+ stop when it is 1 less. */ -+ while (tab_index < first_free_tab - 1 -+ && column >= tab_list[tab_index]) -+ tab_index++; -+ next_tab_column = tab_list[tab_index]; -+ if (tab_index < first_free_tab - 1) -+ tab_index++; -+ if (column >= next_tab_column) -+ { -+ convert = 0; /* Ran out of tab stops. */ -+ goto flush_pend_mb; -+ } -+ } -+ else -+ { -+ next_tab_column = column + tab_size - column % tab_size; -+ } -+ pending += next_tab_column - column; -+ column = next_tab_column; -+ } -+ else -+ { -+flush_pend_mb: -+ /* Flush pending spaces. Print as many tabs as possible, -+ then print the rest as spaces. */ -+ if (pending == 1) -+ { -+ putchar (' '); -+ pending = 0; -+ } -+ column -= pending; -+ while (pending > 0) -+ { -+ if (tab_size == 0) -+ { -+ /* Do not let print_tab_index == first_free_tab; -+ stop when it is 1 less. */ -+ while (print_tab_index < first_free_tab - 1 -+ && column >= tab_list[print_tab_index]) -+ print_tab_index++; -+ next_tab_column = tab_list[print_tab_index]; -+ if (print_tab_index < first_free_tab - 1) -+ print_tab_index++; -+ } -+ else -+ { -+ next_tab_column = -+ column + tab_size - column % tab_size; -+ } -+ if (next_tab_column - column <= pending) -+ { -+ putchar ('\t'); -+ pending -= next_tab_column - column; -+ column = next_tab_column; -+ } -+ else -+ { -+ --print_tab_index; -+ column += pending; -+ while (pending != 0) -+ { -+ putchar (' '); -+ pending--; -+ } -+ } -+ } -+ -+ if (wc == WEOF) -+ { -+ fp = next_file (fp); -+ if (fp == NULL) -+ break; /* No more files. */ -+ else -+ { -+ memset (&i_state, '\0', sizeof(mbstate_t)); -+ continue; -+ } -+ } -+ -+ if (mblength == (size_t)-1 || mblength == (size_t)-2) -+ { -+ if (convert) -+ { -+ ++column; -+ if (convert_entire_line == 0) -+ convert = 0; -+ } -+ mblength = 1; -+ putchar (buf[0]); -+ } -+ else if (mblength == 0) -+ { -+ if (convert && convert_entire_line == 0) -+ convert = 0; -+ mblength = 1; -+ putchar ('\0'); -+ } -+ else -+ { -+ if (convert) -+ { -+ if (wc == L'\b') -+ { -+ if (column > 0) -+ --column; -+ } -+ else -+ { -+ int width; /* The width of WC. */ -+ -+ width = wcwidth (wc); -+ column += (width > 0) ? width : 0; -+ if (convert_entire_line == 0) -+ convert = 0; -+ } -+ } -+ -+ if (wc == L'\n') -+ { -+ tab_index = print_tab_index = 0; -+ column = pending = 0; -+ convert = 1; -+ } -+ fwrite (bufpos, sizeof(char), mblength, stdout); -+ } -+ } -+ buflen -= mblength; -+ bufpos += mblength; -+ } -+} -+#endif -+ -+ - void - usage (int status) - { -@@ -531,7 +750,12 @@ - - file_list = (optind < argc ? &argv[optind] : stdin_argv); - -- unexpand (); -+#if HAVE_MBRTOWC -+ if (MB_CUR_MAX > 1) -+ unexpand_multibyte (); -+ else -+#endif -+ unexpand (); - - if (have_read_stdin && fclose (stdin) != 0) - error (EXIT_FAILURE, errno, "-"); ---- coreutils-6.8+/src/pr.c.i18n 2007-01-14 15:41:28.000000000 +0000 -+++ coreutils-6.8+/src/pr.c 2007-03-01 15:08:24.000000000 +0000 -@@ -313,6 +313,32 @@ - - #include <getopt.h> - #include <sys/types.h> -+ -+/* Get MB_LEN_MAX. */ -+#include <limits.h> -+/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC -+ installation; work around this configuration error. */ -+#if !defined MB_LEN_MAX || MB_LEN_MAX == 1 -+# define MB_LEN_MAX 16 -+#endif -+ -+/* Get MB_CUR_MAX. */ -+#include <stdlib.h> -+ -+/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */ -+/* Get mbstate_t, mbrtowc(), wcwidth(). */ -+#if HAVE_WCHAR_H -+# include <wchar.h> -+#endif -+ -+/* Get iswprint(). -- for wcwidth(). */ -+#if HAVE_WCTYPE_H -+# include <wctype.h> -+#endif -+#if !defined iswprint && !HAVE_ISWPRINT -+# define iswprint(wc) 1 -+#endif -+ - #include "system.h" - #include "error.h" - #include "mbswidth.h" -@@ -324,6 +350,18 @@ - #include "strftime.h" - #include "xstrtol.h" - -+/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ -+#if HAVE_MBRTOWC && defined mbstate_t -+# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) -+#endif -+ -+#ifndef HAVE_DECL_WCWIDTH -+"this configure-time declaration test was not run" -+#endif -+#if !HAVE_DECL_WCWIDTH -+extern int wcwidth (); -+#endif -+ - /* The official name of this program (e.g., no `g' prefix). */ - #define PROGRAM_NAME "pr" - -@@ -416,7 +454,20 @@ - - #define NULLCOL (COLUMN *)0 - --static int char_to_clump (char c); -+/* Funtion pointers to switch functions for single byte locale or for -+ multibyte locale. If multibyte functions do not exist in your sysytem, -+ these pointers always point the function for single byte locale. */ -+static void (*print_char) (char c); -+static int (*char_to_clump) (char c); -+ -+/* Functions for single byte locale. */ -+static void print_char_single (char c); -+static int char_to_clump_single (char c); -+ -+/* Functions for multibyte locale. */ -+static void print_char_multi (char c); -+static int char_to_clump_multi (char c); -+ - static bool read_line (COLUMN *p); - static bool print_page (void); - static bool print_stored (COLUMN *p); -@@ -426,6 +477,7 @@ - static void pad_across_to (int position); - static void add_line_number (COLUMN *p); - static void getoptarg (char *arg, char switch_char, char *character, -+ int *character_length, int *character_width, - int *number); - void usage (int status); - static void print_files (int number_of_files, char **av); -@@ -440,7 +492,6 @@ - static void pad_down (int lines); - static void read_rest_of_line (COLUMN *p); - static void skip_read (COLUMN *p, int column_number); --static void print_char (char c); - static void cleanup (void); - static void print_sep_string (void); - static void separator_string (const char *optarg_S); -@@ -455,7 +506,7 @@ - we store the leftmost columns contiguously in buff. - To print a line from buff, get the index of the first character - from line_vector[i], and print up to line_vector[i + 1]. */ --static char *buff; -+static unsigned char *buff; - - /* Index of the position in buff where the next character - will be stored. */ -@@ -559,7 +610,7 @@ - static bool untabify_input = false; - - /* (-e) The input tab character. */ --static char input_tab_char = '\t'; -+static char input_tab_char[MB_LEN_MAX] = "\t"; - - /* (-e) Tabstops are at chars_per_tab, 2*chars_per_tab, 3*chars_per_tab, ... - where the leftmost column is 1. */ -@@ -569,7 +620,10 @@ - static bool tabify_output = false; - - /* (-i) The output tab character. */ --static char output_tab_char = '\t'; -+static char output_tab_char[MB_LEN_MAX] = "\t"; -+ -+/* (-i) The byte length of output tab character. */ -+static int output_tab_char_length = 1; - - /* (-i) The width of the output tab. */ - static int chars_per_output_tab = 8; -@@ -643,7 +697,13 @@ - static bool numbered_lines = false; - - /* (-n) Character which follows each line number. */ --static char number_separator = '\t'; -+static char number_separator[MB_LEN_MAX] = "\t"; -+ -+/* (-n) The byte length of the character which follows each line number. */ -+static int number_separator_length = 1; -+ -+/* (-n) The character width of the character which follows each line number. */ -+static int number_separator_width = 0; - - /* (-n) line counting starts with 1st line of input file (not with 1st - line of 1st page printed). */ -@@ -696,6 +756,7 @@ - -a|COLUMN|-m is a `space' and with the -J option a `tab'. */ - static char *col_sep_string = (char *) ""; - static int col_sep_length = 0; -+static int col_sep_width = 0; - static char *column_separator = (char *) " "; - static char *line_separator = (char *) "\t"; - -@@ -852,6 +913,13 @@ - col_sep_length = (int) strlen (optarg_S); - col_sep_string = xmalloc (col_sep_length + 1); - strcpy (col_sep_string, optarg_S); -+ -+#if HAVE_MBRTOWC -+ if (MB_CUR_MAX > 1) -+ col_sep_width = mbswidth (col_sep_string, 0); -+ else -+#endif -+ col_sep_width = col_sep_length; - } - - int -@@ -877,6 +945,21 @@ - - atexit (close_stdout); - -+/* Define which functions are used, the ones for single byte locale or the ones -+ for multibyte locale. */ -+#if HAVE_MBRTOWC -+ if (MB_CUR_MAX > 1) -+ { -+ print_char = print_char_multi; -+ char_to_clump = char_to_clump_multi; -+ } -+ else -+#endif -+ { -+ print_char = print_char_single; -+ char_to_clump = char_to_clump_single; -+ } -+ - n_files = 0; - file_names = (argc > 1 - ? xmalloc ((argc - 1) * sizeof (char *)) -@@ -949,8 +1032,12 @@ - break; - case 'e': - if (optarg) -- getoptarg (optarg, 'e', &input_tab_char, -- &chars_per_input_tab); -+ { -+ int dummy_length, dummy_width; -+ -+ getoptarg (optarg, 'e', input_tab_char, &dummy_length, -+ &dummy_width, &chars_per_input_tab); -+ } - /* Could check tab width > 0. */ - untabify_input = true; - break; -@@ -963,8 +1050,12 @@ - break; - case 'i': - if (optarg) -- getoptarg (optarg, 'i', &output_tab_char, -- &chars_per_output_tab); -+ { -+ int dummy_width; -+ -+ getoptarg (optarg, 'i', output_tab_char, &output_tab_char_length, -+ &dummy_width, &chars_per_output_tab); -+ } - /* Could check tab width > 0. */ - tabify_output = true; - break; -@@ -991,8 +1082,8 @@ - case 'n': - numbered_lines = true; - if (optarg) -- getoptarg (optarg, 'n', &number_separator, -- &chars_per_number); -+ getoptarg (optarg, 'n', number_separator, &number_separator_length, -+ &number_separator_width, &chars_per_number); - break; - case 'N': - skip_count = false; -@@ -1031,7 +1122,7 @@ - old_s = false; - /* Reset an additional input of -s, -S dominates -s */ - col_sep_string = bad_cast (""); -- col_sep_length = 0; -+ col_sep_length = col_sep_width = 0; - use_col_separator = true; - if (optarg) - separator_string (optarg); -@@ -1188,10 +1279,45 @@ - a number. */ - - static void --getoptarg (char *arg, char switch_char, char *character, int *number) -+getoptarg (char *arg, char switch_char, char *character, int *character_length, -+ int *character_width, int *number) - { - if (!ISDIGIT (*arg)) -- *character = *arg++; -+ { -+#ifdef HAVE_MBRTOWC -+ if (MB_CUR_MAX > 1) /* for multibyte locale. */ -+ { -+ wchar_t wc; -+ size_t mblength; -+ int width; -+ mbstate_t state = {'\0'}; -+ -+ mblength = mbrtowc (&wc, arg, strnlen(arg, MB_LEN_MAX), &state); -+ -+ if (mblength == (size_t)-1 || mblength == (size_t)-2) -+ { -+ *character_length = 1; -+ *character_width = 1; -+ } -+ else -+ { -+ *character_length = (mblength < 1) ? 1 : mblength; -+ width = wcwidth (wc); -+ *character_width = (width < 0) ? 0 : width; -+ } -+ -+ strncpy (character, arg, *character_length); -+ arg += *character_length; -+ } -+ else /* for single byte locale. */ -+#endif -+ { -+ *character = *arg++; -+ *character_length = 1; -+ *character_width = 1; -+ } -+ } -+ - if (*arg) - { - long int tmp_long; -@@ -1256,7 +1382,7 @@ - else - col_sep_string = column_separator; - -- col_sep_length = 1; -+ col_sep_length = col_sep_width = 1; - use_col_separator = true; - } - /* It's rather pointless to define a TAB separator with column -@@ -1288,11 +1414,11 @@ - TAB_WIDTH (chars_per_input_tab, chars_per_number); */ - - /* Estimate chars_per_text without any margin and keep it constant. */ -- if (number_separator == '\t') -+ if (number_separator[0] == '\t') - number_width = chars_per_number + - TAB_WIDTH (chars_per_default_tab, chars_per_number); - else -- number_width = chars_per_number + 1; -+ number_width = chars_per_number + number_separator_width; - - /* The number is part of the column width unless we are - printing files in parallel. */ -@@ -1307,7 +1433,7 @@ - } - - chars_per_column = (chars_per_line - chars_used_by_number - -- (columns - 1) * col_sep_length) / columns; -+ (columns - 1) * col_sep_width) / columns; - - if (chars_per_column < 1) - error (EXIT_FAILURE, 0, _("page width too narrow")); -@@ -1432,7 +1558,7 @@ - - /* Enlarge p->start_position of first column to use the same form of - padding_not_printed with all columns. */ -- h = h + col_sep_length; -+ h = h + col_sep_width; - - /* This loop takes care of all but the rightmost column. */ - -@@ -1466,7 +1592,7 @@ - } - else - { -- h = h_next + col_sep_length; -+ h = h_next + col_sep_width; - h_next = h + chars_per_column; - } - } -@@ -1756,9 +1882,9 @@ - align_column (COLUMN *p) - { - padding_not_printed = p->start_position; -- if (padding_not_printed - col_sep_length > 0) -+ if (padding_not_printed - col_sep_width > 0) - { -- pad_across_to (padding_not_printed - col_sep_length); -+ pad_across_to (padding_not_printed - col_sep_width); - padding_not_printed = ANYWHERE; - } - -@@ -2029,13 +2155,13 @@ - /* May be too generous. */ - buff = X2REALLOC (buff, &buff_allocated); - } -- buff[buff_current++] = c; -+ buff[buff_current++] = (unsigned char) c; - } - - static void - add_line_number (COLUMN *p) - { -- int i; -+ int i, j; - char *s; - int left_cut; - -@@ -2058,22 +2184,24 @@ - /* Tabification is assumed for multiple columns, also for n-separators, - but `default n-separator = TAB' hasn't been given priority over - equal column_width also specified by POSIX. */ -- if (number_separator == '\t') -+ if (number_separator[0] == '\t') - { - i = number_width - chars_per_number; - while (i-- > 0) - (p->char_func) (' '); - } - else -- (p->char_func) (number_separator); -+ for (j = 0; j < number_separator_length; j++) -+ (p->char_func) (number_separator[j]); - } - else - /* To comply with POSIX, we avoid any expansion of default TAB - separator with a single column output. No column_width requirement - has to be considered. */ - { -- (p->char_func) (number_separator); -- if (number_separator == '\t') -+ for (j = 0; j < number_separator_length; j++) -+ (p->char_func) (number_separator[j]); -+ if (number_separator[0] == '\t') - output_position = POS_AFTER_TAB (chars_per_output_tab, - output_position); - } -@@ -2234,7 +2362,7 @@ - while (goal - h_old > 1 - && (h_new = POS_AFTER_TAB (chars_per_output_tab, h_old)) <= goal) - { -- putchar (output_tab_char); -+ fwrite (output_tab_char, sizeof(char), output_tab_char_length, stdout); - h_old = h_new; - } - while (++h_old <= goal) -@@ -2254,6 +2382,7 @@ - { - char *s; - int l = col_sep_length; -+ int not_space_flag; - - s = col_sep_string; - -@@ -2267,6 +2396,7 @@ - { - for (; separators_not_printed > 0; --separators_not_printed) - { -+ not_space_flag = 0; - while (l-- > 0) - { - /* 3 types of sep_strings: spaces only, spaces and chars, -@@ -2280,12 +2410,15 @@ - } - else - { -+ not_space_flag = 1; - if (spaces_not_printed > 0) - print_white_space (); - putchar (*s++); -- ++output_position; - } - } -+ if (not_space_flag) -+ output_position += col_sep_width; -+ - /* sep_string ends with some spaces */ - if (spaces_not_printed > 0) - print_white_space (); -@@ -2313,7 +2446,7 @@ - required number of tabs and spaces. */ - - static void --print_char (char c) -+print_char_single (char c) - { - if (tabify_output) - { -@@ -2337,6 +2470,74 @@ - putchar (c); - } - -+#ifdef HAVE_MBRTOWC -+static void -+print_char_multi (char c) -+{ -+ static size_t mbc_pos = 0; -+ static char mbc[MB_LEN_MAX] = {'\0'}; -+ static mbstate_t state = {'\0'}; -+ mbstate_t state_bak; -+ wchar_t wc; -+ size_t mblength; -+ int width; -+ -+ if (tabify_output) -+ { -+ state_bak = state; -+ mbc[mbc_pos++] = c; -+ mblength = mbrtowc (&wc, mbc, mbc_pos, &state); -+ -+ while (mbc_pos > 0) -+ { -+ switch (mblength) -+ { -+ case (size_t)-2: -+ state = state_bak; -+ return; -+ -+ case (size_t)-1: -+ state = state_bak; -+ ++output_position; -+ putchar (mbc[0]); -+ memmove (mbc, mbc + 1, MB_CUR_MAX - 1); -+ --mbc_pos; -+ break; -+ -+ case 0: -+ mblength = 1; -+ -+ default: -+ if (wc == L' ') -+ { -+ memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength); -+ --mbc_pos; -+ ++spaces_not_printed; -+ return; -+ } -+ else if (spaces_not_printed > 0) -+ print_white_space (); -+ -+ /* Nonprintables are assumed to have width 0, except L'\b'. */ -+ if ((width = wcwidth (wc)) < 1) -+ { -+ if (wc == L'\b') -+ --output_position; -+ } -+ else -+ output_position += width; -+ -+ fwrite (mbc, sizeof(char), mblength, stdout); -+ memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength); -+ mbc_pos -= mblength; -+ } -+ } -+ return; -+ } -+ putchar (c); -+} -+#endif -+ - /* Skip to page PAGE before printing. - PAGE may be larger than total number of pages. */ - -@@ -2517,9 +2718,9 @@ - align_empty_cols = false; - } - -- if (padding_not_printed - col_sep_length > 0) -+ if (padding_not_printed - col_sep_width > 0) - { -- pad_across_to (padding_not_printed - col_sep_length); -+ pad_across_to (padding_not_printed - col_sep_width); - padding_not_printed = ANYWHERE; - } - -@@ -2620,9 +2821,9 @@ - } - } - -- if (padding_not_printed - col_sep_length > 0) -+ if (padding_not_printed - col_sep_width > 0) - { -- pad_across_to (padding_not_printed - col_sep_length); -+ pad_across_to (padding_not_printed - col_sep_width); - padding_not_printed = ANYWHERE; - } - -@@ -2635,8 +2836,8 @@ - if (spaces_not_printed == 0) - { - output_position = p->start_position + end_vector[line]; -- if (p->start_position - col_sep_length == chars_per_margin) -- output_position -= col_sep_length; -+ if (p->start_position - col_sep_width == chars_per_margin) -+ output_position -= col_sep_width; - } - - return true; -@@ -2655,7 +2856,7 @@ - number of characters is 1.) */ - - static int --char_to_clump (char c) -+char_to_clump_single (char c) - { - unsigned char uc = c; - char *s = clump_buff; -@@ -2665,10 +2866,10 @@ - int chars; - int chars_per_c = 8; - -- if (c == input_tab_char) -+ if (c == input_tab_char[0]) - chars_per_c = chars_per_input_tab; - -- if (c == input_tab_char || c == '\t') -+ if (c == input_tab_char[0] || c == '\t') - { - width = TAB_WIDTH (chars_per_c, input_position); - -@@ -2739,6 +2940,154 @@ - return chars; - } - -+#ifdef HAVE_MBRTOWC -+static int -+char_to_clump_multi (char c) -+{ -+ static size_t mbc_pos = 0; -+ static char mbc[MB_LEN_MAX] = {'\0'}; -+ static mbstate_t state = {'\0'}; -+ mbstate_t state_bak; -+ wchar_t wc; -+ size_t mblength; -+ int wc_width; -+ register char *s = clump_buff; -+ register int i, j; -+ char esc_buff[4]; -+ int width; -+ int chars; -+ int chars_per_c = 8; -+ -+ state_bak = state; -+ mbc[mbc_pos++] = c; -+ mblength = mbrtowc (&wc, mbc, mbc_pos, &state); -+ -+ width = 0; -+ chars = 0; -+ while (mbc_pos > 0) -+ { -+ switch (mblength) -+ { -+ case (size_t)-2: -+ state = state_bak; -+ return 0; -+ -+ case (size_t)-1: -+ state = state_bak; -+ mblength = 1; -+ -+ if (use_esc_sequence || use_cntrl_prefix) -+ { -+ width = +4; -+ chars = +4; -+ *s++ = '\\'; -+ sprintf (esc_buff, "%03o", mbc[0]); -+ for (i = 0; i <= 2; ++i) -+ *s++ = (int) esc_buff[i]; -+ } -+ else -+ { -+ width += 1; -+ chars += 1; -+ *s++ = mbc[0]; -+ } -+ break; -+ -+ case 0: -+ mblength = 1; -+ /* Fall through */ -+ -+ default: -+ if (memcmp (mbc, input_tab_char, mblength) == 0) -+ chars_per_c = chars_per_input_tab; -+ -+ if (memcmp (mbc, input_tab_char, mblength) == 0 || c == '\t') -+ { -+ int width_inc; -+ -+ width_inc = TAB_WIDTH (chars_per_c, input_position); -+ width += width_inc; -+ -+ if (untabify_input) -+ { -+ for (i = width_inc; i; --i) -+ *s++ = ' '; -+ chars += width_inc; -+ } -+ else -+ { -+ for (i = 0; i < mblength; i++) -+ *s++ = mbc[i]; -+ chars += mblength; -+ } -+ } -+ else if ((wc_width = wcwidth (wc)) < 1) -+ { -+ if (use_esc_sequence) -+ { -+ for (i = 0; i < mblength; i++) -+ { -+ width += 4; -+ chars += 4; -+ *s++ = '\\'; -+ sprintf (esc_buff, "%03o", c); -+ for (j = 0; j <= 2; ++j) -+ *s++ = (int) esc_buff[j]; -+ } -+ } -+ else if (use_cntrl_prefix) -+ { -+ if (wc < 0200) -+ { -+ width += 2; -+ chars += 2; -+ *s++ = '^'; -+ *s++ = wc ^ 0100; -+ } -+ else -+ { -+ for (i = 0; i < mblength; i++) -+ { -+ width += 4; -+ chars += 4; -+ *s++ = '\\'; -+ sprintf (esc_buff, "%03o", c); -+ for (j = 0; j <= 2; ++j) -+ *s++ = (int) esc_buff[j]; -+ } -+ } -+ } -+ else if (wc == L'\b') -+ { -+ width += -1; -+ chars += 1; -+ *s++ = c; -+ } -+ else -+ { -+ width += 0; -+ chars += mblength; -+ for (i = 0; i < mblength; i++) -+ *s++ = mbc[i]; -+ } -+ } -+ else -+ { -+ width += wc_width; -+ chars += mblength; -+ for (i = 0; i < mblength; i++) -+ *s++ = mbc[i]; -+ } -+ } -+ memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength); -+ mbc_pos -= mblength; -+ } -+ -+ input_position += width; -+ return chars; -+} -+#endif -+ - /* We've just printed some files and need to clean up things before - looking for more options and printing the next batch of files. - ---- coreutils-6.8+/src/cut.c.i18n 2007-01-14 15:41:28.000000000 +0000 -+++ coreutils-6.8+/src/cut.c 2007-03-01 15:08:24.000000000 +0000 -@@ -29,6 +29,11 @@ - #include <assert.h> - #include <getopt.h> - #include <sys/types.h> -+ -+/* Get mbstate_t, mbrtowc(). */ -+#if HAVE_WCHAR_H -+# include <wchar.h> -+#endif - #include "system.h" - - #include "error.h" -@@ -37,6 +42,18 @@ - #include "quote.h" - #include "xstrndup.h" - -+/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC -+ installation; work around this configuration error. */ -+#if !defined MB_LEN_MAX || MB_LEN_MAX < 2 -+# undef MB_LEN_MAX -+# define MB_LEN_MAX 16 -+#endif -+ -+/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ -+#if HAVE_MBRTOWC && defined mbstate_t -+# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) -+#endif -+ - /* The official name of this program (e.g., no `g' prefix). */ - #define PROGRAM_NAME "cut" - -@@ -67,6 +84,52 @@ - } \ - while (0) - -+/* Refill the buffer BUF to get a multibyte character. */ -+#define REFILL_BUFFER(BUF, BUFPOS, BUFLEN, STREAM) \ -+ do \ -+ { \ -+ if (BUFLEN < MB_LEN_MAX && !feof (STREAM) && !ferror (STREAM)) \ -+ { \ -+ memmove (BUF, BUFPOS, BUFLEN); \ -+ BUFLEN += fread (BUF + BUFLEN, sizeof(char), BUFSIZ, STREAM); \ -+ BUFPOS = BUF; \ -+ } \ -+ } \ -+ while (0) -+ -+/* Get wide character on BUFPOS. BUFPOS is not included after that. -+ If byte sequence is not valid as a character, CONVFAIL is 1. Otherwise 0. */ -+#define GET_NEXT_WC_FROM_BUFFER(WC, BUFPOS, BUFLEN, MBLENGTH, STATE, CONVFAIL) \ -+ do \ -+ { \ -+ mbstate_t state_bak; \ -+ \ -+ if (BUFLEN < 1) \ -+ { \ -+ WC = WEOF; \ -+ break; \ -+ } \ -+ \ -+ /* Get a wide character. */ \ -+ CONVFAIL = 0; \ -+ state_bak = STATE; \ -+ MBLENGTH = mbrtowc ((wchar_t *)&WC, BUFPOS, BUFLEN, &STATE); \ -+ \ -+ switch (MBLENGTH) \ -+ { \ -+ case (size_t)-1: \ -+ case (size_t)-2: \ -+ CONVFAIL++; \ -+ STATE = state_bak; \ -+ /* Fall througn. */ \ -+ \ -+ case 0: \ -+ MBLENGTH = 1; \ -+ break; \ -+ } \ -+ } \ -+ while (0) -+ - struct range_pair - { - size_t lo; -@@ -85,7 +148,7 @@ - /* The number of bytes allocated for FIELD_1_BUFFER. */ - static size_t field_1_bufsize; - --/* The largest field or byte index used as an endpoint of a closed -+/* The largest byte, character or field index used as an endpoint of a closed - or degenerate range specification; this doesn't include the starting - index of right-open-ended ranges. For example, with either range spec - `2-5,9-', `2-3,5,9-' this variable would be set to 5. */ -@@ -97,10 +160,11 @@ - - /* This is a bit vector. - In byte mode, which bytes to output. -+ In character mode, which characters to output. - In field mode, which DELIM-separated fields to output. -- Both bytes and fields are numbered starting with 1, -+ Bytes, characters and fields are numbered starting with 1, - so the zeroth bit of this array is unused. -- A field or byte K has been selected if -+ A byte, character or field K has been selected if - (K <= MAX_RANGE_ENDPOINT and is_printable_field(K)) - || (EOL_RANGE_START > 0 && K >= EOL_RANGE_START). */ - static unsigned char *printable_field; -@@ -109,9 +173,12 @@ - { - undefined_mode, - -- /* Output characters that are in the given bytes. */ -+ /* Output bytes that are at the given positions. */ - byte_mode, - -+ /* Output characters that are at the given positions. */ -+ character_mode, -+ - /* Output the given delimeter-separated fields. */ - field_mode - }; -@@ -121,6 +188,13 @@ - - static enum operating_mode operating_mode; - -+/* If nonzero, when in byte mode, don't split multibyte characters. */ -+static int byte_mode_character_aware; -+ -+/* If nonzero, the function for single byte locale is work -+ if this program runs on multibyte locale. */ -+static int force_singlebyte_mode; -+ - /* If true do not output lines containing no delimeter characters. - Otherwise, all such lines are printed. This option is valid only - with field mode. */ -@@ -132,6 +206,9 @@ - - /* The delimeter character for field mode. */ - static unsigned char delim; -+#if HAVE_WCHAR_H -+static wchar_t wcdelim; -+#endif - - /* True if the --output-delimiter=STRING option was specified. */ - static bool output_delimiter_specified; -@@ -205,7 +282,7 @@ - -f, --fields=LIST select only these fields; also print any line\n\ - that contains no delimiter character, unless\n\ - the -s option is specified\n\ -- -n (ignored)\n\ -+ -n with -b: don't split multibyte characters\n\ - "), stdout); - fputs (_("\ - --complement complement the set of selected bytes, characters\n\ -@@ -362,7 +439,7 @@ - in_digits = false; - /* Starting a range. */ - if (dash_found) -- FATAL_ERROR (_("invalid byte or field list")); -+ FATAL_ERROR (_("invalid byte, character or field list")); - dash_found = true; - fieldstr++; - -@@ -387,14 +464,16 @@ - if (!rhs_specified) - { - /* `n-'. From `initial' to end of line. */ -- eol_range_start = initial; -+ if (eol_range_start == 0 || -+ (eol_range_start != 0 && eol_range_start > initial)) -+ eol_range_start = initial; - field_found = true; - } - else - { - /* `m-n' or `-n' (1-n). */ - if (value < initial) -- FATAL_ERROR (_("invalid decreasing range")); -+ FATAL_ERROR (_("invalid byte, character or field list")); - - /* Is there already a range going to end of line? */ - if (eol_range_start != 0) -@@ -467,6 +546,9 @@ - if (operating_mode == byte_mode) - error (0, 0, - _("byte offset %s is too large"), quote (bad_num)); -+ else if (operating_mode == character_mode) -+ error (0, 0, -+ _("character offset %s is too large"), quote (bad_num)); - else - error (0, 0, - _("field number %s is too large"), quote (bad_num)); -@@ -477,7 +559,7 @@ - fieldstr++; - } - else -- FATAL_ERROR (_("invalid byte or field list")); -+ FATAL_ERROR (_("invalid byte, character or field list")); - } - - max_range_endpoint = 0; -@@ -570,6 +652,63 @@ - } - } - -+#if HAVE_MBRTOWC -+/* This function is in use for the following case. -+ -+ 1. Read from the stream STREAM, printing to standard output any selected -+ characters. -+ -+ 2. Read from stream STREAM, printing to standard output any selected bytes, -+ without splitting multibyte characters. */ -+ -+static void -+cut_characters_or_cut_bytes_no_split (FILE *stream) -+{ -+ int idx; /* number of bytes or characters in the line so far. */ -+ char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */ -+ char *bufpos; /* Next read position of BUF. */ -+ size_t buflen; /* The length of the byte sequence in buf. */ -+ wint_t wc; /* A gotten wide character. */ -+ size_t mblength; /* The byte size of a multibyte character which shows -+ as same character as WC. */ -+ mbstate_t state; /* State of the stream. */ -+ int convfail; /* 1, when conversion is failed. Otherwise 0. */ -+ -+ idx = 0; -+ buflen = 0; -+ bufpos = buf; -+ memset (&state, '\0', sizeof(mbstate_t)); -+ -+ while (1) -+ { -+ REFILL_BUFFER (buf, bufpos, buflen, stream); -+ -+ GET_NEXT_WC_FROM_BUFFER (wc, bufpos, buflen, mblength, state, convfail); -+ -+ if (wc == WEOF) -+ { -+ if (idx > 0) -+ putchar ('\n'); -+ break; -+ } -+ else if (wc == L'\n') -+ { -+ putchar ('\n'); -+ idx = 0; -+ } -+ else -+ { -+ idx += (operating_mode == byte_mode) ? mblength : 1; -+ if (print_kth (idx, NULL)) -+ fwrite (bufpos, mblength, sizeof(char), stdout); -+ } -+ -+ buflen -= mblength; -+ bufpos += mblength; -+ } -+} -+#endif -+ - /* Read from stream STREAM, printing to standard output any selected fields. */ - - static void -@@ -692,13 +831,192 @@ - } - } - -+#if HAVE_MBRTOWC -+static void -+cut_fields_mb (FILE *stream) -+{ -+ int c; -+ unsigned int field_idx; -+ int found_any_selected_field; -+ int buffer_first_field; -+ int empty_input; -+ char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */ -+ char *bufpos; /* Next read position of BUF. */ -+ size_t buflen; /* The length of the byte sequence in buf. */ -+ wint_t wc = 0; /* A gotten wide character. */ -+ size_t mblength; /* The byte size of a multibyte character which shows -+ as same character as WC. */ -+ mbstate_t state; /* State of the stream. */ -+ int convfail; /* 1, when conversion is failed. Otherwise 0. */ -+ -+ found_any_selected_field = 0; -+ field_idx = 1; -+ bufpos = buf; -+ buflen = 0; -+ memset (&state, '\0', sizeof(mbstate_t)); -+ -+ c = getc (stream); -+ empty_input = (c == EOF); -+ if (c != EOF) -+ ungetc (c, stream); -+ else -+ wc = WEOF; -+ -+ /* To support the semantics of the -s flag, we may have to buffer -+ all of the first field to determine whether it is `delimited.' -+ But that is unnecessary if all non-delimited lines must be printed -+ and the first field has been selected, or if non-delimited lines -+ must be suppressed and the first field has *not* been selected. -+ That is because a non-delimited line has exactly one field. */ -+ buffer_first_field = (suppress_non_delimited ^ !print_kth (1, NULL)); -+ -+ while (1) -+ { -+ if (field_idx == 1 && buffer_first_field) -+ { -+ int len = 0; -+ -+ while (1) -+ { -+ REFILL_BUFFER (buf, bufpos, buflen, stream); -+ -+ GET_NEXT_WC_FROM_BUFFER -+ (wc, bufpos, buflen, mblength, state, convfail); -+ -+ if (wc == WEOF) -+ break; -+ -+ field_1_buffer = xrealloc (field_1_buffer, len + mblength); -+ memcpy (field_1_buffer + len, bufpos, mblength); -+ len += mblength; -+ buflen -= mblength; -+ bufpos += mblength; -+ -+ if (!convfail && (wc == L'\n' || wc == wcdelim)) -+ break; -+ } -+ -+ if (wc == WEOF) -+ break; -+ -+ /* If the first field extends to the end of line (it is not -+ delimited) and we are printing all non-delimited lines, -+ print this one. */ -+ if (convfail || (!convfail && wc != wcdelim)) -+ { -+ if (suppress_non_delimited) -+ { -+ /* Empty. */ -+ } -+ else -+ { -+ fwrite (field_1_buffer, sizeof (char), len, stdout); -+ /* Make sure the output line is newline terminated. */ -+ if (convfail || (!convfail && wc != L'\n')) -+ putchar ('\n'); -+ } -+ continue; -+ } -+ -+ if (print_kth (1, NULL)) -+ { -+ /* Print the field, but not the trailing delimiter. */ -+ fwrite (field_1_buffer, sizeof (char), len - 1, stdout); -+ found_any_selected_field = 1; -+ } -+ ++field_idx; -+ } -+ -+ if (wc != WEOF) -+ { -+ if (print_kth (field_idx, NULL)) -+ { -+ if (found_any_selected_field) -+ { -+ fwrite (output_delimiter_string, sizeof (char), -+ output_delimiter_length, stdout); -+ } -+ found_any_selected_field = 1; -+ } -+ -+ while (1) -+ { -+ REFILL_BUFFER (buf, bufpos, buflen, stream); -+ -+ GET_NEXT_WC_FROM_BUFFER -+ (wc, bufpos, buflen, mblength, state, convfail); -+ -+ if (wc == WEOF) -+ break; -+ else if (!convfail && (wc == wcdelim || wc == L'\n')) -+ { -+ buflen -= mblength; -+ bufpos += mblength; -+ break; -+ } -+ -+ if (print_kth (field_idx, NULL)) -+ fwrite (bufpos, mblength, sizeof(char), stdout); -+ -+ buflen -= mblength; -+ bufpos += mblength; -+ } -+ } -+ -+ if ((!convfail || wc == L'\n') && buflen < 1) -+ wc = WEOF; -+ -+ if (!convfail && wc == wcdelim) -+ ++field_idx; -+ else if (wc == WEOF || (!convfail && wc == L'\n')) -+ { -+ if (found_any_selected_field -+ || (!empty_input && !(suppress_non_delimited && field_idx == 1))) -+ putchar ('\n'); -+ if (wc == WEOF) -+ break; -+ field_idx = 1; -+ found_any_selected_field = 0; -+ } -+ } -+} -+#endif -+ - static void - cut_stream (FILE *stream) - { -- if (operating_mode == byte_mode) -- cut_bytes (stream); -+#if HAVE_MBRTOWC -+ if (MB_CUR_MAX > 1 && !force_singlebyte_mode) -+ { -+ switch (operating_mode) -+ { -+ case byte_mode: -+ if (byte_mode_character_aware) -+ cut_characters_or_cut_bytes_no_split (stream); -+ else -+ cut_bytes (stream); -+ break; -+ -+ case character_mode: -+ cut_characters_or_cut_bytes_no_split (stream); -+ break; -+ -+ case field_mode: -+ cut_fields_mb (stream); -+ break; -+ -+ default: -+ abort (); -+ } -+ } - else -- cut_fields (stream); -+#endif -+ { -+ if (operating_mode == field_mode) -+ cut_fields (stream); -+ else -+ cut_bytes (stream); -+ } - } - - /* Process file FILE to standard output. -@@ -748,6 +1066,8 @@ - bool ok; - bool delim_specified = false; - char *spec_list_string IF_LINT(= NULL); -+ char mbdelim[MB_LEN_MAX + 1]; -+ size_t delimlen = 0; - - initialize_main (&argc, &argv); - set_program_name (argv[0]); -@@ -770,7 +1090,6 @@ - switch (optc) - { - case 'b': -- case 'c': - /* Build the byte list. */ - if (operating_mode != undefined_mode) - FATAL_ERROR (_("only one type of list may be specified")); -@@ -778,6 +1097,14 @@ - spec_list_string = optarg; - break; - -+ case 'c': -+ /* Build the character list. */ -+ if (operating_mode != undefined_mode) -+ FATAL_ERROR (_("only one type of list may be specified")); -+ operating_mode = character_mode; -+ spec_list_string = optarg; -+ break; -+ - case 'f': - /* Build the field list. */ - if (operating_mode != undefined_mode) -@@ -789,10 +1116,35 @@ - case 'd': - /* New delimiter. */ - /* Interpret -d '' to mean `use the NUL byte as the delimiter.' */ -- if (optarg[0] != '\0' && optarg[1] != '\0') -- FATAL_ERROR (_("the delimiter must be a single character")); -- delim = optarg[0]; -- delim_specified = true; -+ { -+#if HAVE_MBRTOWC -+ if(MB_CUR_MAX > 1) -+ { -+ mbstate_t state; -+ -+ memset (&state, '\0', sizeof(mbstate_t)); -+ delimlen = mbrtowc (&wcdelim, optarg, strnlen(optarg, MB_LEN_MAX), &state); -+ -+ if (delimlen == (size_t)-1 || delimlen == (size_t)-2) -+ ++force_singlebyte_mode; -+ else -+ { -+ delimlen = (delimlen < 1) ? 1 : delimlen; -+ if (wcdelim != L'\0' && *(optarg + delimlen) != '\0') -+ FATAL_ERROR (_("the delimiter must be a single character")); -+ memcpy (mbdelim, optarg, delimlen); -+ } -+ } -+ -+ if (MB_CUR_MAX <= 1 || force_singlebyte_mode) -+#endif -+ { -+ if (optarg[0] != '\0' && optarg[1] != '\0') -+ FATAL_ERROR (_("the delimiter must be a single character")); -+ delim = (unsigned char) optarg[0]; -+ } -+ delim_specified = true; -+ } - break; - - case OUTPUT_DELIMITER_OPTION: -@@ -805,6 +1157,7 @@ - break; - - case 'n': -+ byte_mode_character_aware = 1; - break; - - case 's': -@@ -827,7 +1180,7 @@ - if (operating_mode == undefined_mode) - FATAL_ERROR (_("you must specify a list of bytes, characters, or fields")); - -- if (delim != '\0' && operating_mode != field_mode) -+ if (delim_specified && operating_mode != field_mode) - FATAL_ERROR (_("an input delimiter may be specified only\ - when operating on fields")); - -@@ -854,15 +1207,34 @@ - } - - if (!delim_specified) -- delim = '\t'; -+ { -+ delim = '\t'; -+#ifdef HAVE_MBRTOWC -+ wcdelim = L'\t'; -+ mbdelim[0] = '\t'; -+ mbdelim[1] = '\0'; -+ delimlen = 1; -+#endif -+ } - - if (output_delimiter_string == NULL) - { -- static char dummy[2]; -- dummy[0] = delim; -- dummy[1] = '\0'; -- output_delimiter_string = dummy; -- output_delimiter_length = 1; -+#ifdef HAVE_MBRTOWC -+ if (MB_CUR_MAX > 1 && !force_singlebyte_mode) -+ { -+ output_delimiter_string = xstrdup(mbdelim); -+ output_delimiter_length = delimlen; -+ } -+ -+ if (MB_CUR_MAX <= 1 || force_singlebyte_mode) -+#endif -+ { -+ static char dummy[2]; -+ dummy[0] = delim; -+ dummy[1] = '\0'; -+ output_delimiter_string = dummy; -+ output_delimiter_length = 1; -+ } - } - - if (optind == argc) -diff -urNp coreutils-6.12/src/join.c coreutils-6.12-orig/src/join.c ---- coreutils-6.12/src/join.c 2008-07-16 14:08:01.000000000 +0200 -+++ coreutils-6.12-orig/src/join.c 2008-07-16 14:07:02.000000000 +0200 -@@ -634,6 +634,11 @@ get_line (FILE *fp, struct line *line, i - return false; - } - -+#if HAVE_MBRTOWC -+ if (MB_CUR_MAX > 1) -+ xfields_multibyte (line); -+ else -+#endif - xfields (line); - - if (prevline[which - 1]) - diff --git a/abs/core-testing/coreutils/coreutils-pam.patch b/abs/core-testing/coreutils/coreutils-pam.patch index 7171522..e61908f 100644 --- a/abs/core-testing/coreutils/coreutils-pam.patch +++ b/abs/core-testing/coreutils/coreutils-pam.patch @@ -1,17 +1,96 @@ ---- coreutils-6.7/src/Makefile.am.pam 2006-11-24 21:28:10.000000000 +0000 -+++ coreutils-6.7/src/Makefile.am 2007-01-09 17:00:01.000000000 +0000 -@@ -103,7 +103,7 @@ - # If necessary, add -lm to resolve use of pow in lib/strtod.c. - uptime_LDADD = $(LDADD) $(POW_LIB) $(GETLOADAVG_LIBS) +diff -urNp coreutils-8.4-orig/configure.ac coreutils-8.4/configure.ac +--- coreutils-8.4-orig/configure.ac 2010-01-11 18:20:42.000000000 +0100 ++++ coreutils-8.4/configure.ac 2010-02-12 10:17:46.000000000 +0100 +@@ -126,6 +126,13 @@ if test "$gl_gcc_warnings" = yes; then + AC_SUBST([GNULIB_WARN_CFLAGS]) + fi --su_LDADD = $(LDADD) $(LIB_CRYPT) -+su_LDADD = $(LDADD) $(LIB_CRYPT) @LIB_PAM@ ++dnl Give the chance to enable PAM ++AC_ARG_ENABLE(pam, dnl ++[ --enable-pam Enable use of the PAM libraries], ++[AC_DEFINE(USE_PAM, 1, [Define if you want to use PAM]) ++LIB_PAM="-ldl -lpam -lpam_misc" ++AC_SUBST(LIB_PAM)]) ++ + AC_FUNC_FORK + + optional_bin_progs= +diff -urNp coreutils-8.4-orig/doc/coreutils.texi coreutils-8.4/doc/coreutils.texi +--- coreutils-8.4-orig/doc/coreutils.texi 2010-01-03 18:06:20.000000000 +0100 ++++ coreutils-8.4/doc/coreutils.texi 2010-02-12 10:17:46.000000000 +0100 +@@ -15081,8 +15081,11 @@ to certain shells, etc.). + @findex syslog + @command{su} can optionally be compiled to use @code{syslog} to report + failed, and optionally successful, @command{su} attempts. (If the system +-supports @code{syslog}.) However, GNU @command{su} does not check if the +-user is a member of the @code{wheel} group; see below. ++supports @code{syslog}.) ++ ++This version of @command{su} has support for using PAM for ++authentication. You can edit @file{/etc/pam.d/su} to customize its ++behaviour. + + The program accepts the following options. Also see @ref{Common options}. + +@@ -15124,6 +15127,8 @@ environment variables except @env{TERM}, + @env{PATH} to a compiled-in default value. Change to @var{user}'s home + directory. Prepend @samp{-} to the shell's name, intended to make it + read its login startup file(s). ++Additionaly @env{DISPLAY} and @env{XAUTHORITY} environment variables ++are preserved as well for PAM functionality. + + @item -m + @itemx -p +@@ -15163,33 +15168,6 @@ Exit status: + the exit status of the subshell otherwise + @end display + +-@cindex wheel group, not supported +-@cindex group wheel, not supported +-@cindex fascism +-@subsection Why GNU @command{su} does not support the @samp{wheel} group +- +-(This section is by Richard Stallman.) +- +-@cindex Twenex +-@cindex MIT AI lab +-Sometimes a few of the users try to hold total power over all the +-rest. For example, in 1984, a few users at the MIT AI lab decided to +-seize power by changing the operator password on the Twenex system and +-keeping it secret from everyone else. (I was able to thwart this coup +-and give power back to the users by patching the kernel, but I +-wouldn't know how to do that in Unix.) +- +-However, occasionally the rulers do tell someone. Under the usual +-@command{su} mechanism, once someone learns the root password who +-sympathizes with the ordinary users, he or she can tell the rest. The +-``wheel group'' feature would make this impossible, and thus cement the +-power of the rulers. +- +-I'm on the side of the masses, not that of the rulers. If you are +-used to supporting the bosses and sysadmins in whatever they do, you +-might find this idea strange at first. +- +- + @node timeout invocation + @section @command{timeout}: Run a command with a time limit + +diff -urNp coreutils-8.4-orig/src/Makefile.am coreutils-8.4/src/Makefile.am +--- coreutils-8.4-orig/src/Makefile.am 2010-01-03 18:06:20.000000000 +0100 ++++ coreutils-8.4/src/Makefile.am 2010-02-12 10:17:46.000000000 +0100 +@@ -361,7 +361,7 @@ factor_LDADD += $(LIB_GMP) + uptime_LDADD += $(GETLOADAVG_LIBS) + + # for crypt +-su_LDADD += $(LIB_CRYPT) ++su_LDADD += $(LIB_CRYPT) @LIB_PAM@ - dir_LDADD += $(LIB_ACL) - ls_LDADD += $(LIB_ACL) ---- coreutils-6.7/src/su.c.pam 2007-01-09 17:00:01.000000000 +0000 -+++ coreutils-6.7/src/su.c 2007-01-09 17:16:43.000000000 +0000 -@@ -38,6 +38,16 @@ + # for various ACL functions + copy_LDADD += $(LIB_ACL) +diff -urNp coreutils-8.4-orig/src/su.c coreutils-8.4/src/su.c +--- coreutils-8.4-orig/src/su.c 2010-02-12 10:15:15.000000000 +0100 ++++ coreutils-8.4/src/su.c 2010-02-12 10:24:29.000000000 +0100 +@@ -37,6 +37,16 @@ restricts who can su to UID 0 accounts. RMS considers that to be fascist. @@ -28,9 +107,9 @@ Compile-time options: -DSYSLOG_SUCCESS Log successful su's (by default, to root) with syslog. -DSYSLOG_FAILURE Log failed su's (by default, to root) with syslog. -@@ -59,6 +69,15 @@ - prototype (returning `int') in <unistd.h>. */ - #define getusershell _getusershell_sys_proto_ +@@ -53,6 +63,15 @@ + #include <pwd.h> + #include <grp.h> +#ifdef USE_PAM +# include <signal.h> @@ -44,18 +123,13 @@ #include "system.h" #include "getpass.h" -@@ -128,15 +147,22 @@ +@@ -120,10 +139,17 @@ /* The user to become if none is specified. */ #define DEFAULT_USER "root" +#ifndef USE_PAM char *crypt (char const *key, char const *salt); +#endif - char *getusershell (void); - void endusershell (void); - void setusershell (void); - - extern char **environ; -static void run_shell (char const *, char const *, char **, size_t) +static void run_shell (char const *, char const *, char **, size_t, @@ -68,7 +142,7 @@ /* If true, pass the `-f' option to the subshell. */ static bool fast_startup; -@@ -225,7 +251,26 @@ +@@ -209,7 +235,26 @@ log_su (struct passwd const *pw, bool su } #endif @@ -95,7 +169,7 @@ Return true if the user gives the correct password for entry PW, false if not. Return true without asking for a password if run by UID 0 or if PW has an empty password. */ -@@ -233,6 +278,44 @@ +@@ -217,6 +262,44 @@ log_su (struct passwd const *pw, bool su static bool correct_password (const struct passwd *pw) { @@ -140,7 +214,7 @@ char *unencrypted, *encrypted, *correct; #if HAVE_GETSPNAM && HAVE_STRUCT_SPWD_SP_PWDP /* Shadow passwd stuff for SVR3 and maybe other systems. */ -@@ -257,6 +340,7 @@ +@@ -241,6 +324,7 @@ correct_password (const struct passwd *p encrypted = crypt (unencrypted, correct); memset (unencrypted, 0, strlen (unencrypted)); return STREQ (encrypted, correct); @@ -148,26 +222,26 @@ } /* Update `environ' for the new shell based on PW, with SHELL being -@@ -270,12 +354,18 @@ +@@ -254,12 +338,18 @@ modify_environment (const struct passwd /* Leave TERM unchanged. Set HOME, SHELL, USER, LOGNAME, PATH. Unset all other environment variables. */ char const *term = getenv ("TERM"); + char const *display = getenv ("DISPLAY"); + char const *xauthority = getenv ("XAUTHORITY"); if (term) - term = xstrdup (term); + term = xstrdup (term); environ = xmalloc ((6 + !!term) * sizeof (char *)); environ[0] = NULL; if (term) - xsetenv ("TERM", term); + xsetenv ("TERM", term); + if (display) -+ xsetenv ("DISPLAY", display); ++ xsetenv ("DISPLAY", display); + if (xauthority) -+ xsetenv ("XAUTHORITY", xauthority); ++ xsetenv ("XAUTHORITY", xauthority); xsetenv ("HOME", pw->pw_dir); xsetenv ("SHELL", shell); xsetenv ("USER", pw->pw_name); -@@ -308,8 +398,13 @@ +@@ -292,8 +382,13 @@ change_identity (const struct passwd *pw { #ifdef HAVE_INITGROUPS errno = 0; @@ -177,13 +251,13 @@ + pam_close_session(pamh, 0); + pam_end(pamh, PAM_ABORT); +#endif - error (EXIT_FAILURE, errno, _("cannot set groups")); + error (EXIT_CANCELED, errno, _("cannot set groups")); + } endgrent (); #endif if (setgid (pw->pw_gid)) -@@ -318,6 +413,31 @@ - error (EXIT_FAILURE, errno, _("cannot set user id")); +@@ -302,6 +397,31 @@ change_identity (const struct passwd *pw + error (EXIT_CANCELED, errno, _("cannot set user id")); } +#ifdef USE_PAM @@ -214,12 +288,12 @@ /* Run SHELL, or DEFAULT_SHELL if SHELL is empty. If COMMAND is nonzero, pass it to the shell with the -c option. Pass ADDITIONAL_ARGS to the shell as more arguments; there -@@ -325,17 +445,49 @@ +@@ -309,17 +429,49 @@ change_identity (const struct passwd *pw static void run_shell (char const *shell, char const *command, char **additional_args, -- size_t n_additional_args) -+ size_t n_additional_args, const struct passwd *pw) +- size_t n_additional_args) ++ size_t n_additional_args, const struct passwd *pw) { size_t n_args = 1 + fast_startup + 2 * !!command + n_additional_args + 1; char const **args = xnmalloc (n_args, sizeof *args); @@ -265,7 +339,7 @@ shell_basename = last_component (shell); arg0 = xmalloc (strlen (shell_basename) + 2); arg0[0] = '-'; -@@ -360,6 +512,66 @@ +@@ -344,6 +496,67 @@ run_shell (char const *shell, char const error (0, errno, "%s", shell); exit (exit_status); } @@ -303,12 +377,12 @@ + + pid = waitpid(-1, &status, WUNTRACED); + -+ if (WIFSTOPPED(status)) { -+ kill(getpid(), SIGSTOP); ++ if (((pid_t)-1 != pid) && (0 != WIFSTOPPED (status))) { ++ kill(getpid(), WSTOPSIG(status)); + /* once we get here, we must have resumed */ + kill(pid, SIGCONT); + } -+ } while (WIFSTOPPED(status)); ++ } while (0 != WIFSTOPPED(status)); + } + + if (caught) { @@ -327,12 +401,13 @@ + fprintf(stderr, " ...killed.\n"); + exit(-1); + } -+ exit (WEXITSTATUS(status)); ++ exit ((0 != WIFEXITED (status)) ? WEXITSTATUS (status) ++ : WTERMSIG (status) + 128); +#endif /* USE_PAM */ } /* Return true if SHELL is a restricted shell (one not returned by -@@ -527,9 +739,9 @@ +@@ -511,9 +724,9 @@ main (int argc, char **argv) shell = xstrdup (shell ? shell : pw->pw_shell); modify_environment (pw, shell); @@ -342,82 +417,12 @@ - error (0, errno, _("warning: cannot change directory to %s"), pw->pw_dir); +#endif + /* error() flushes stderr, but does not check for write failure. + Normally, we would catch this via our atexit() hook of +@@ -523,5 +736,5 @@ main (int argc, char **argv) + if (ferror (stderr)) + exit (EXIT_CANCELED); + - run_shell (shell, command, argv + optind, MAX (0, argc - optind)); + run_shell (shell, command, argv + optind, MAX (0, argc - optind), pw); } ---- coreutils-6.7/doc/coreutils.texi.pam 2006-10-27 15:30:48.000000000 +0100 -+++ coreutils-6.7/doc/coreutils.texi 2007-01-09 17:00:01.000000000 +0000 -@@ -13395,8 +13395,11 @@ - @findex syslog - @command{su} can optionally be compiled to use @code{syslog} to report - failed, and optionally successful, @command{su} attempts. (If the system --supports @code{syslog}.) However, GNU @command{su} does not check if the --user is a member of the @code{wheel} group; see below. -+supports @code{syslog}.) -+ -+This version of @command{su} has support for using PAM for -+authentication. You can edit @file{/etc/pam.d/su} to customize its -+behaviour. - - The program accepts the following options. Also see @ref{Common options}. - -@@ -12815,6 +12815,8 @@ - @env{PATH} to a compiled-in default value. Change to @var{user}'s home - directory. Prepend @samp{-} to the shell's name, intended to make it - read its login startup file(s). -+Additionaly @env{DISPLAY} and @env{XAUTHORITY} environment variables -+are preserved as well for PAM functionality. - - @item -m - @itemx -p -@@ -13477,33 +13480,6 @@ - the exit status of the subshell otherwise - @end display - --@cindex wheel group, not supported --@cindex group wheel, not supported --@cindex fascism --@subsection Why GNU @command{su} does not support the @samp{wheel} group -- --(This section is by Richard Stallman.) -- --@cindex Twenex --@cindex MIT AI lab --Sometimes a few of the users try to hold total power over all the --rest. For example, in 1984, a few users at the MIT AI lab decided to --seize power by changing the operator password on the Twenex system and --keeping it secret from everyone else. (I was able to thwart this coup --and give power back to the users by patching the kernel, but I --wouldn't know how to do that in Unix.) -- --However, occasionally the rulers do tell someone. Under the usual --@command{su} mechanism, once someone learns the root password who --sympathizes with the ordinary users, he or she can tell the rest. The --``wheel group'' feature would make this impossible, and thus cement the --power of the rulers. -- --I'm on the side of the masses, not that of the rulers. If you are --used to supporting the bosses and sysadmins in whatever they do, you --might find this idea strange at first. -- -- - @node timeout invocation - @section @command{timeout}: Run a command with a time limit - ---- coreutils-6.7/configure.ac.pam 2006-12-07 21:30:24.000000000 +0000 -+++ coreutils-6.7/configure.ac 2007-01-09 17:18:04.000000000 +0000 -@@ -44,6 +44,13 @@ - gl_INIT - coreutils_MACROS - -+dnl Give the chance to enable PAM -+AC_ARG_ENABLE(pam, dnl -+[ --enable-pam Enable use of the PAM libraries], -+[AC_DEFINE(USE_PAM, 1, [Define if you want to use PAM]) -+LIB_PAM="-ldl -lpam -lpam_misc" -+AC_SUBST(LIB_PAM)]) -+ - AC_FUNC_FORK - - optional_bin_progs= - diff --git a/abs/core-testing/coreutils/coreutils-uname.patch b/abs/core-testing/coreutils/coreutils-uname.patch index 93cb134..b458abe 100644 --- a/abs/core-testing/coreutils/coreutils-uname.patch +++ b/abs/core-testing/coreutils/coreutils-uname.patch @@ -11,11 +11,11 @@ func in the linux/arch/<ARCH>/ source tree of the kernel. --- coreutils/src/uname.c +++ coreutils/src/uname.c -@@ -51,6 +51,11 @@ +@@ -50,6 +50,11 @@ # include <mach-o/arch.h> #endif -+#if defined (__linux__) ++#if defined(__linux__) +# define USE_PROCINFO +# define UNAME_HARDWARE_PLATFORM +#endif @@ -63,7 +63,7 @@ func in the linux/arch/<ARCH>/ source tree of the kernel. + } +} + -+static int __linux_procinfo (int x, char *fstr, size_t s) ++static int __linux_procinfo(int x, char *fstr, size_t s) +{ + FILE *fp; + @@ -148,26 +148,26 @@ func in the linux/arch/<ARCH>/ source tree of the kernel. -#if HAVE_SYSINFO && defined SI_ARCHITECTURE +#if ( HAVE_SYSINFO && defined SI_ARCHITECTURE ) || defined(USE_PROCINFO) { - static char processor[257]; + static char processor[257]; +#if defined(USE_PROCINFO) -+ if (0 <= __linux_procinfo (PROCINFO_PROCESSOR, processor, sizeof processor)) ++ if (0 <= __linux_procinfo (PROCINFO_PROCESSOR, processor, sizeof processor)) +#else - if (0 <= sysinfo (SI_ARCHITECTURE, processor, sizeof processor)) + if (0 <= sysinfo (SI_ARCHITECTURE, processor, sizeof processor)) +#endif - element = processor; + element = processor; } #endif @@ -306,9 +404,13 @@ main (int argc, char **argv) if (element == unknown) - { - static char hardware_platform[257]; + { + static char hardware_platform[257]; +#if defined(USE_PROCINFO) -+ if (0 <= __linux_procinfo (PROCINFO_HARDWARE_PLATFORM, hardware_platform, sizeof hardware_platform)) ++ if (0 <= __linux_procinfo (PROCINFO_HARDWARE_PLATFORM, hardware_platform, sizeof hardware_platform)) +#else - size_t s = sizeof hardware_platform; - static int mib[] = { CTL_HW, UNAME_HARDWARE_PLATFORM }; - if (sysctl (mib, 2, hardware_platform, &s, 0, 0) >= 0) + size_t s = sizeof hardware_platform; + static int mib[] = { CTL_HW, UNAME_HARDWARE_PLATFORM }; + if (sysctl (mib, 2, hardware_platform, &s, 0, 0) >= 0) +#endif - element = hardware_platform; - } + element = hardware_platform; + } #endif diff --git a/abs/core-testing/coreutils/coreutils.install b/abs/core-testing/coreutils/coreutils.install index 37ebe29..dff2dc5 100644 --- a/abs/core-testing/coreutils/coreutils.install +++ b/abs/core-testing/coreutils/coreutils.install @@ -4,7 +4,7 @@ filelist=(coreutils.info) post_install() { [ -x usr/bin/install-info ] || return 0 for file in ${filelist[@]}; do - usr/bin/install-info $infodir/$file $infodir/dir 2> /dev/null + usr/bin/install-info $infodir/$file.gz $infodir/dir 2> /dev/null done } @@ -15,7 +15,7 @@ post_upgrade() { pre_remove() { [ -x usr/bin/install-info ] || return 0 for file in ${filelist[@]}; do - usr/bin/install-info --delete $infodir/$file $infodir/dir 2> /dev/null + usr/bin/install-info --delete $infodir/$file.gz $infodir/dir 2> /dev/null done } diff --git a/abs/core-testing/coreutils/futimes.patch b/abs/core-testing/coreutils/futimes.patch deleted file mode 100644 index bb08384..0000000 --- a/abs/core-testing/coreutils/futimes.patch +++ /dev/null @@ -1,47 +0,0 @@ ---- coreutils-6.9/lib/utimens.h.futimens 2007-02-23 18:25:21.000000000 +0000 -+++ coreutils-6.9/lib/utimens.h 2007-06-13 11:40:37.000000000 +0100 -@@ -1,3 +1,3 @@ - #include <time.h> --int futimens (int, char const *, struct timespec const [2]); -+int gl_futimens (int, char const *, struct timespec const [2]); - int utimens (char const *, struct timespec const [2]); ---- coreutils-6.9/lib/utimens.c.futimens 2007-01-18 08:33:34.000000000 +0000 -+++ coreutils-6.9/lib/utimens.c 2007-06-13 11:40:37.000000000 +0100 -@@ -75,7 +75,7 @@ struct utimbuf - Return 0 on success, -1 (setting errno) on failure. */ - - int --futimens (int fd ATTRIBUTE_UNUSED, -+gl_futimens (int fd ATTRIBUTE_UNUSED, - char const *file, struct timespec const timespec[2]) - { - /* Some Linux-based NFS clients are buggy, and mishandle time stamps -@@ -185,5 +185,5 @@ futimens (int fd ATTRIBUTE_UNUSED, - int - utimens (char const *file, struct timespec const timespec[2]) - { -- return futimens (-1, file, timespec); -+ return gl_futimens (-1, file, timespec); - } ---- coreutils-6.9/src/copy.c.futimens 2007-06-13 11:56:44.000000000 +0100 -+++ coreutils-6.9/src/copy.c 2007-06-13 11:57:00.000000000 +0100 -@@ -547,7 +547,7 @@ copy_reg (char const *src_name, char con - timespec[0] = get_stat_atime (src_sb); - timespec[1] = get_stat_mtime (src_sb); - -- if (futimens (dest_desc, dst_name, timespec) != 0) -+ if (gl_futimens (dest_desc, dst_name, timespec) != 0) - { - error (0, errno, _("preserving times for %s"), quote (dst_name)); - if (x->require_preserve) ---- coreutils-6.9/src/touch.c.futimens 2007-06-13 11:58:00.000000000 +0100 -+++ coreutils-6.9/src/touch.c 2007-06-13 11:58:06.000000000 +0100 -@@ -182,7 +182,7 @@ touch (const char *file) - t = timespec; - } - -- ok = (futimens (fd, (fd == STDOUT_FILENO ? NULL : file), t) == 0); -+ ok = (gl_futimens (fd, (fd == STDOUT_FILENO ? NULL : file), t) == 0); - - if (fd == STDIN_FILENO) - { |