[git commit master] [un]expand: unicode support

Denys Vlasenko vda.linux at googlemail.com
Mon Jan 4 15:21:31 UTC 2010


commit: http://git.busybox.net/busybox/commit/?id=d2b1ba6fdee59645763e915ade3ec55e67d5839a
branch: http://git.busybox.net/busybox/commit/?id=refs/heads/master

function                                             old     new   delta
expand_main                                          633     663     +30

Signed-off-by: Tomas Heinrich <heinrich.tomas at gmail.com>
Signed-off-by: Denys Vlasenko <vda.linux at googlemail.com>
---
 coreutils/expand.c       |   51 +++++++++++++++++++++++++++++----------------
 testsuite/expand.tests   |    6 +++++
 testsuite/unexpand.tests |    3 ++
 3 files changed, 42 insertions(+), 18 deletions(-)

diff --git a/coreutils/expand.c b/coreutils/expand.c
index 7137c3b..e08eb1d 100644
--- a/coreutils/expand.c
+++ b/coreutils/expand.c
@@ -20,8 +20,8 @@
  *
  *  Caveat: this versions of expand and unexpand don't accept tab lists.
  */
-
 #include "libbb.h"
+#include "unicode.h"
 
 enum {
 	OPT_INITIAL     = 1 << 0,
@@ -30,35 +30,37 @@ enum {
 };
 
 #if ENABLE_EXPAND
-static void expand(FILE *file, int tab_size, unsigned opt)
+static void expand(FILE *file, unsigned tab_size, unsigned opt)
 {
 	char *line;
 
-	tab_size = -tab_size;
-
 	while ((line = xmalloc_fgets(file)) != NULL) {
-		int pos;
 		unsigned char c;
-		char *ptr = line;
+		char *ptr;
+		char *ptr_strbeg;
 
-		goto start;
+		ptr = ptr_strbeg = line;
 		while ((c = *ptr) != '\0') {
 			if ((opt & OPT_INITIAL) && !isblank(c)) {
-				fputs(ptr, stdout);
+				/* not space or tab */
 				break;
 			}
-			ptr++;
 			if (c == '\t') {
-				c = ' ';
-				while (++pos < 0)
-					bb_putchar(c);
-			}
-			bb_putchar(c);
-			if (++pos >= 0) {
- start:
-				pos = tab_size;
+				unsigned len;
+				*ptr = '\0';
+# if ENABLE_FEATURE_ASSUME_UNICODE
+				len = bb_mbstrlen(ptr_strbeg);
+# else
+				len = ptr - ptr_strbeg;
+# endif
+				len = tab_size - (len % tab_size);
+				/*while (ptr[1] == '\t') { ptr++; len += tab_size; } - can handle many tabs at once */
+				printf("%s%*s", ptr_strbeg, len, "");
+				ptr_strbeg = ptr + 1;
 			}
+			ptr++;
 		}
+		fputs(ptr_strbeg, stdout);
 		free(line);
 	}
 }
@@ -75,6 +77,7 @@ static void unexpand(FILE *file, unsigned tab_size, unsigned opt)
 
 		while (*ptr) {
 			unsigned n;
+			unsigned len;
 
 			while (*ptr == ' ') {
 				column++;
@@ -97,8 +100,19 @@ static void unexpand(FILE *file, unsigned tab_size, unsigned opt)
 			}
 			n = strcspn(ptr, "\t ");
 			printf("%*s%.*s", column, "", n, ptr);
+# if ENABLE_FEATURE_ASSUME_UNICODE
+			{
+				char c;
+				c = ptr[n];
+				ptr[n] = '\0';
+				len = bb_mbstrlen(ptr);
+				ptr[n] = c;
+			}
+# else
+			len = n;
+# endif
 			ptr += n;
-			column = (column + n) % tab_size;
+			column = (column + len) % tab_size;
 		}
 		free(line);
 	}
@@ -130,6 +144,7 @@ int expand_main(int argc UNUSED_PARAM, char **argv)
 		"all\0"              No_argument       "a"
 	;
 #endif
+	check_unicode_in_env();
 
 	if (ENABLE_EXPAND && (!ENABLE_UNEXPAND || applet_name[0] == 'e')) {
 		IF_FEATURE_EXPAND_LONG_OPTIONS(applet_long_options = expand_longopts);
diff --git a/testsuite/expand.tests b/testsuite/expand.tests
index 57498a2..9966314 100755
--- a/testsuite/expand.tests
+++ b/testsuite/expand.tests
@@ -12,4 +12,10 @@ testing "expand" \
 	"" \
 	"\t12345678\t12345678\n" \
 
+testing "expand with unicode characher 0x394" \
+	"expand" \
+	"Δ       12345ΔΔΔ        12345678\n" \
+	"" \
+	"Δ\t12345ΔΔΔ\t12345678\n" \
+
 exit $FAILCOUNT
diff --git a/testsuite/unexpand.tests b/testsuite/unexpand.tests
index 5c82a1e..5c2a29b 100755
--- a/testsuite/unexpand.tests
+++ b/testsuite/unexpand.tests
@@ -27,4 +27,7 @@ testing "unexpand case 6" "unexpand" \
 testing "unexpand case 7" "unexpand" \
 	"123\t 45678\n" "" "123 \t 45678\n" \
 
+testing "unexpand with unicode characher 0x394" "unexpand" \
+	"1ΔΔΔ5\t99999\n" "" "1ΔΔΔ5   99999\n" \
+
 exit $FAILCOUNT
-- 
1.6.3.3



More information about the busybox-cvs mailing list