[PATCH] have tar with GNU extensions use base-256 encoding for large fields

Ian Wienand ianw at vmware.com
Mon May 2 23:56:35 UTC 2011


Hi,

Currently if busybox tar encounters a negative time_t on a file, it
just puts the sign-extended value into the tar file (see [1] where it
says "Portable file timestamps cannot be negative").  I think it's best
to leave the standard tar POSIX-ish; so this change gives a warning
when a negative timestamp is seen, and leaves the timestamp as zero.

However, when GNU extensions are turned on, it seems the best thing to
do is use base-256 encoding to represent the timestamp.  This also has
the advantage that we can easily use this encoding for the file size
too.  base-256 encoded fields are represented by having their top byte
with the top bit set, and no trailing NULL, and then the actual value
base-256 encoded obviously.

I've tested the following with negative timestamped files.  Without
extensions, we get aforementioned warning.

---
$ ~/programs/busybox-git/busybox tar cvf foobar-bb.tar ./foobar 
tar: Can not store negative time_t for ./foobar
./foobar
---

With GNU turned on, the hexdumping and comparing the header it looks
exactly the same as one produced by GNU tar itself.

Stats -- with GNU then without

function                                             old     new   delta
putValue                                               -     363    +363
putOctal                                              69     137     +68
writeTarHeader                                       822     871     +49
.rodata                                           139625  139662     +37
writeLongname                                        225     231      +6
chksum_and_xwrite                                    108     111      +3
------------------------------------------------------------------------------
(add/remove: 1/0 grow/shrink: 5/0 up/down: 526/0)             Total: 526 bytes
   text	   data	    bss	    dec	    hex	filename
 749743	   2135	   9076	 760954	  b9c7a	busybox_old
 750269	   2135	   9076	 761480	  b9e88	busybox_unstripped

function                                             old     new   delta
putOctal                                              69     137     +68
writeTarHeader                                       860     911     +51
.rodata                                           139610  139647     +37
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 3/0 up/down: 156/0)             Total: 156 bytes
   text	   data	    bss	    dec	    hex	filename
 749254	   2135	   9076	 760465	  b9a91	busybox_old
 749410	   2135	   9076	 760621	  b9b2d	busybox_unstripped

-i

[1] http://pubs.opengroup.org/onlinepubs/000095399/utilities/pax.html

Signed-off-by: Ian Wienand <ian at wienand.org>
---
 archival/Config.src |    5 +-
 archival/tar.c      |  109 ++++++++++++++++++++++++++++++++++++++++++--------
 2 files changed, 94 insertions(+), 20 deletions(-)

diff --git a/archival/Config.src b/archival/Config.src
index 81788ec..63aa743 100644
--- a/archival/Config.src
+++ b/archival/Config.src
@@ -266,12 +266,13 @@ config FEATURE_TAR_OLDSUN_COMPATIBILITY
 	  tarballs still exist.
 
 config FEATURE_TAR_GNU_EXTENSIONS
-	bool "Support for GNU tar extensions (long filenames)"
+	bool "Support for GNU tar extensions (long filenames & large files)"
 	default y
 	depends on TAR || DPKG
 	help
 	  With this option busybox supports GNU long filenames and
-	  linknames.
+	  linknames, old timestamps and larger file sizes that can not
+	  be represented in POSIX format tar files.
 
 config FEATURE_TAR_LONG_OPTIONS
 	bool "Enable long options"
diff --git a/archival/tar.c b/archival/tar.c
index 01b83d5..973b8d8 100644
--- a/archival/tar.c
+++ b/archival/tar.c
@@ -158,15 +158,34 @@ static HardLinkInfo *findHardLinkInfo(HardLinkInfo *hlInfo, struct stat *statbuf
 	return hlInfo;
 }
 
+#define LG_8   3
+/*
+ * if we have DIGITS, where each stores BITS_PER_DIGIT, what's the
+ * biggest number we can store?
+ */
+#define MAX_VAL_WITH_DIGITS(digits, bits_per_digit)			\
+	((digits) * (bits_per_digit) < sizeof (uint64_t) * CHAR_BIT	\
+	 ? ((uint64_t) 1 << ((digits) * (bits_per_digit))) - 1 :	\
+	 (uint64_t) -1)
+
+/* -1 here is for trailing NULL */
+#define MAX_OCTAL_SIZE(len) MAX_VAL_WITH_DIGITS (len - 1, LG_8)
+
 /* Put an octal string into the specified buffer.
  * The number is zero padded and possibly null terminated.
  * Stores low-order bits only if whole value does not fit. */
-static void putOctal(char *cp, int len, off_t value)
+static int putOctal(char *cp, int len, off_t value, int negative)
 {
 	char tempBuffer[sizeof(off_t)*3 + 1];
 	char *tempString = tempBuffer;
 	int width;
 
+	if (negative || value > MAX_OCTAL_SIZE(len)) {
+		/* zero for sanity */
+		memset(cp, 0, len);
+		return -1;
+	}
+
 	width = sprintf(tempBuffer, "%0*"OFF_FMT"o", len, value);
 	tempString += (width - len);
 
@@ -178,8 +197,58 @@ static void putOctal(char *cp, int len, off_t value)
 
 	/* Copy the string to the field */
 	memcpy(cp, tempString, len);
+	return 0;
 }
-#define PUT_OCTAL(a, b) putOctal((a), sizeof(a), (b))
+
+#if ENABLE_FEATURE_TAR_GNU_EXTENSIONS
+/* GNU tar allows some fields to be bigger by using base-256 encoding */
+
+#define LG_256 8
+/* -1 here is for top-byte header, see below */
+#define MAX_B256_SIZE(len)  MAX_VAL_WITH_DIGITS (len - 1, LG_256)
+
+static void toBase256(int negative, off_t value,
+		      char *cp, int len) {
+	off_t v = value;
+	off_t propogated_sign_bits =
+		((off_t) - negative << (CHAR_BIT * sizeof(v) - LG_256));
+	size_t i = len;
+
+	/*
+	 * top byte is a marker; first bit is always set to indicate
+	 * this is base-256; positive values are \200, negative \377
+	 */
+	cp[0] = negative ? -1 : 1 << (LG_256 - 1);
+
+	do {
+		cp[--i] = v & ((1 << LG_256) - 1);
+		v = propogated_sign_bits | (v >> LG_256);
+	}
+	while (i);
+}
+
+static int putValue(char *cp, int len, off_t value, int negative)
+{
+	/* put as octal by default, but that can't represent negative values */
+	if (!negative && value < MAX_OCTAL_SIZE(len)) {
+		return putOctal(cp, len, value, negative);
+	}
+
+	/* otherwise, see if it fits in base-256 encoding */
+	if ((negative ? -1 - value : value) <= MAX_B256_SIZE(len)) {
+		toBase256(negative, value, cp, len);
+		return 0;
+	}
+	/* now what?  zero out for sanity */
+	memset(cp, 0, len);
+
+	return -1;
+}
+
+#define STORE_VALUE(a, b) putValue((a), sizeof(a), (b), ((b) < 0 ? 1 : 0))
+#else
+#define STORE_VALUE(a, b) putOctal((a), sizeof(a), (b), ((b) < 0 ? 1 : 0))
+#endif
 
 static void chksum_and_xwrite(int fd, struct tar_header_t* hp)
 {
@@ -201,7 +270,7 @@ static void chksum_and_xwrite(int fd, struct tar_header_t* hp)
 	chksum = 0;
 	size = sizeof(*hp);
 	do { chksum += *cp++; } while (--size);
-	putOctal(hp->chksum, sizeof(hp->chksum)-1, chksum);
+	putOctal(hp->chksum, sizeof(hp->chksum)-1, chksum, 0);
 
 	/* Now write the header out to disk */
 	xwrite(fd, hp, sizeof(*hp));
@@ -233,7 +302,7 @@ static void writeLongname(int fd, int type, const char *name, int dir)
 	memset(&header, 0, sizeof(header));
 	strcpy(header.name, "././@LongLink");
 	memcpy(header.mode, prefilled.mode, sizeof(prefilled));
-	PUT_OCTAL(header.size, size);
+	STORE_VALUE(header.size, size);
 	header.typeflag = type;
 	chksum_and_xwrite(fd, &header);
 
@@ -260,11 +329,15 @@ static int writeTarHeader(struct TarBallInfo *tbInfo,
 	strncpy(header.name, header_name, sizeof(header.name));
 
 	/* POSIX says to mask mode with 07777. */
-	PUT_OCTAL(header.mode, statbuf->st_mode & 07777);
-	PUT_OCTAL(header.uid, statbuf->st_uid);
-	PUT_OCTAL(header.gid, statbuf->st_gid);
+	STORE_VALUE(header.mode, statbuf->st_mode & 07777);
+	STORE_VALUE(header.uid, statbuf->st_uid);
+	STORE_VALUE(header.gid, statbuf->st_gid);
 	memset(header.size, '0', sizeof(header.size)-1); /* Regular file size is handled later */
-	PUT_OCTAL(header.mtime, statbuf->st_mtime);
+	/* This may be < 0 with signed time_t -- without GNU
+	 * extensions we can't store it.  The field will be zeroed */
+	if (STORE_VALUE(header.mtime, statbuf->st_mtime) < 0) {
+		bb_error_msg("Can not store negative time_t for %s", fileName);
+	}
 
 	/* Enter the user and group names */
 	safe_strncpy(header.uname, get_cached_username(statbuf->st_uid), sizeof(header.uname));
@@ -307,24 +380,24 @@ static int writeTarHeader(struct TarBallInfo *tbInfo,
 			header.name[strlen(header.name)] = '/';
 	} else if (S_ISCHR(statbuf->st_mode)) {
 		header.typeflag = CHRTYPE;
-		PUT_OCTAL(header.devmajor, major(statbuf->st_rdev));
-		PUT_OCTAL(header.devminor, minor(statbuf->st_rdev));
+		STORE_VALUE(header.devmajor, major(statbuf->st_rdev));
+		STORE_VALUE(header.devminor, minor(statbuf->st_rdev));
 	} else if (S_ISBLK(statbuf->st_mode)) {
 		header.typeflag = BLKTYPE;
-		PUT_OCTAL(header.devmajor, major(statbuf->st_rdev));
-		PUT_OCTAL(header.devminor, minor(statbuf->st_rdev));
+		STORE_VALUE(header.devmajor, major(statbuf->st_rdev));
+		STORE_VALUE(header.devminor, minor(statbuf->st_rdev));
 	} else if (S_ISFIFO(statbuf->st_mode)) {
 		header.typeflag = FIFOTYPE;
 	} else if (S_ISREG(statbuf->st_mode)) {
-		if (sizeof(statbuf->st_size) > 4
-		 && statbuf->st_size > (off_t)0777777777777LL
-		) {
+		/* POSIX has limited encodings due to the octal
+		 * format.  With GNU extensions we will store this in
+		 * base-256, which can hold much larger file types. */
+		if (STORE_VALUE(header.size, statbuf->st_size) < 0) {
 			bb_error_msg_and_die("can't store file '%s' "
-				"of size %"OFF_FMT"u, aborting",
-				fileName, statbuf->st_size);
+					     "of size %"OFF_FMT"u, aborting",
+					     fileName, statbuf->st_size);
 		}
 		header.typeflag = REGTYPE;
-		PUT_OCTAL(header.size, statbuf->st_size);
 	} else {
 		bb_error_msg("%s: unknown file type", fileName);
 		return FALSE;
-- 
1.7.4.4



More information about the busybox mailing list