[PATCH] have tar with GNU extensions use base-256 encoding for large fields
Ian Wienand
ianw at vmware.com
Mon May 2 23:56:35 UTC 2011
Hi,
Currently if busybox tar encounters a negative time_t on a file, it
just puts the sign-extended value into the tar file (see [1] where it
says "Portable file timestamps cannot be negative"). I think it's best
to leave the standard tar POSIX-ish; so this change gives a warning
when a negative timestamp is seen, and leaves the timestamp as zero.
However, when GNU extensions are turned on, it seems the best thing to
do is use base-256 encoding to represent the timestamp. This also has
the advantage that we can easily use this encoding for the file size
too. base-256 encoded fields are represented by having their top byte
with the top bit set, and no trailing NULL, and then the actual value
base-256 encoded obviously.
I've tested the following with negative timestamped files. Without
extensions, we get aforementioned warning.
---
$ ~/programs/busybox-git/busybox tar cvf foobar-bb.tar ./foobar
tar: Can not store negative time_t for ./foobar
./foobar
---
With GNU turned on, the hexdumping and comparing the header it looks
exactly the same as one produced by GNU tar itself.
Stats -- with GNU then without
function old new delta
putValue - 363 +363
putOctal 69 137 +68
writeTarHeader 822 871 +49
.rodata 139625 139662 +37
writeLongname 225 231 +6
chksum_and_xwrite 108 111 +3
------------------------------------------------------------------------------
(add/remove: 1/0 grow/shrink: 5/0 up/down: 526/0) Total: 526 bytes
text data bss dec hex filename
749743 2135 9076 760954 b9c7a busybox_old
750269 2135 9076 761480 b9e88 busybox_unstripped
function old new delta
putOctal 69 137 +68
writeTarHeader 860 911 +51
.rodata 139610 139647 +37
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 3/0 up/down: 156/0) Total: 156 bytes
text data bss dec hex filename
749254 2135 9076 760465 b9a91 busybox_old
749410 2135 9076 760621 b9b2d busybox_unstripped
-i
[1] http://pubs.opengroup.org/onlinepubs/000095399/utilities/pax.html
Signed-off-by: Ian Wienand <ian at wienand.org>
---
archival/Config.src | 5 +-
archival/tar.c | 109 ++++++++++++++++++++++++++++++++++++++++++--------
2 files changed, 94 insertions(+), 20 deletions(-)
diff --git a/archival/Config.src b/archival/Config.src
index 81788ec..63aa743 100644
--- a/archival/Config.src
+++ b/archival/Config.src
@@ -266,12 +266,13 @@ config FEATURE_TAR_OLDSUN_COMPATIBILITY
tarballs still exist.
config FEATURE_TAR_GNU_EXTENSIONS
- bool "Support for GNU tar extensions (long filenames)"
+ bool "Support for GNU tar extensions (long filenames & large files)"
default y
depends on TAR || DPKG
help
With this option busybox supports GNU long filenames and
- linknames.
+ linknames, old timestamps and larger file sizes that can not
+ be represented in POSIX format tar files.
config FEATURE_TAR_LONG_OPTIONS
bool "Enable long options"
diff --git a/archival/tar.c b/archival/tar.c
index 01b83d5..973b8d8 100644
--- a/archival/tar.c
+++ b/archival/tar.c
@@ -158,15 +158,34 @@ static HardLinkInfo *findHardLinkInfo(HardLinkInfo *hlInfo, struct stat *statbuf
return hlInfo;
}
+#define LG_8 3
+/*
+ * if we have DIGITS, where each stores BITS_PER_DIGIT, what's the
+ * biggest number we can store?
+ */
+#define MAX_VAL_WITH_DIGITS(digits, bits_per_digit) \
+ ((digits) * (bits_per_digit) < sizeof (uint64_t) * CHAR_BIT \
+ ? ((uint64_t) 1 << ((digits) * (bits_per_digit))) - 1 : \
+ (uint64_t) -1)
+
+/* -1 here is for trailing NULL */
+#define MAX_OCTAL_SIZE(len) MAX_VAL_WITH_DIGITS (len - 1, LG_8)
+
/* Put an octal string into the specified buffer.
* The number is zero padded and possibly null terminated.
* Stores low-order bits only if whole value does not fit. */
-static void putOctal(char *cp, int len, off_t value)
+static int putOctal(char *cp, int len, off_t value, int negative)
{
char tempBuffer[sizeof(off_t)*3 + 1];
char *tempString = tempBuffer;
int width;
+ if (negative || value > MAX_OCTAL_SIZE(len)) {
+ /* zero for sanity */
+ memset(cp, 0, len);
+ return -1;
+ }
+
width = sprintf(tempBuffer, "%0*"OFF_FMT"o", len, value);
tempString += (width - len);
@@ -178,8 +197,58 @@ static void putOctal(char *cp, int len, off_t value)
/* Copy the string to the field */
memcpy(cp, tempString, len);
+ return 0;
}
-#define PUT_OCTAL(a, b) putOctal((a), sizeof(a), (b))
+
+#if ENABLE_FEATURE_TAR_GNU_EXTENSIONS
+/* GNU tar allows some fields to be bigger by using base-256 encoding */
+
+#define LG_256 8
+/* -1 here is for top-byte header, see below */
+#define MAX_B256_SIZE(len) MAX_VAL_WITH_DIGITS (len - 1, LG_256)
+
+static void toBase256(int negative, off_t value,
+ char *cp, int len) {
+ off_t v = value;
+ off_t propogated_sign_bits =
+ ((off_t) - negative << (CHAR_BIT * sizeof(v) - LG_256));
+ size_t i = len;
+
+ /*
+ * top byte is a marker; first bit is always set to indicate
+ * this is base-256; positive values are \200, negative \377
+ */
+ cp[0] = negative ? -1 : 1 << (LG_256 - 1);
+
+ do {
+ cp[--i] = v & ((1 << LG_256) - 1);
+ v = propogated_sign_bits | (v >> LG_256);
+ }
+ while (i);
+}
+
+static int putValue(char *cp, int len, off_t value, int negative)
+{
+ /* put as octal by default, but that can't represent negative values */
+ if (!negative && value < MAX_OCTAL_SIZE(len)) {
+ return putOctal(cp, len, value, negative);
+ }
+
+ /* otherwise, see if it fits in base-256 encoding */
+ if ((negative ? -1 - value : value) <= MAX_B256_SIZE(len)) {
+ toBase256(negative, value, cp, len);
+ return 0;
+ }
+ /* now what? zero out for sanity */
+ memset(cp, 0, len);
+
+ return -1;
+}
+
+#define STORE_VALUE(a, b) putValue((a), sizeof(a), (b), ((b) < 0 ? 1 : 0))
+#else
+#define STORE_VALUE(a, b) putOctal((a), sizeof(a), (b), ((b) < 0 ? 1 : 0))
+#endif
static void chksum_and_xwrite(int fd, struct tar_header_t* hp)
{
@@ -201,7 +270,7 @@ static void chksum_and_xwrite(int fd, struct tar_header_t* hp)
chksum = 0;
size = sizeof(*hp);
do { chksum += *cp++; } while (--size);
- putOctal(hp->chksum, sizeof(hp->chksum)-1, chksum);
+ putOctal(hp->chksum, sizeof(hp->chksum)-1, chksum, 0);
/* Now write the header out to disk */
xwrite(fd, hp, sizeof(*hp));
@@ -233,7 +302,7 @@ static void writeLongname(int fd, int type, const char *name, int dir)
memset(&header, 0, sizeof(header));
strcpy(header.name, "././@LongLink");
memcpy(header.mode, prefilled.mode, sizeof(prefilled));
- PUT_OCTAL(header.size, size);
+ STORE_VALUE(header.size, size);
header.typeflag = type;
chksum_and_xwrite(fd, &header);
@@ -260,11 +329,15 @@ static int writeTarHeader(struct TarBallInfo *tbInfo,
strncpy(header.name, header_name, sizeof(header.name));
/* POSIX says to mask mode with 07777. */
- PUT_OCTAL(header.mode, statbuf->st_mode & 07777);
- PUT_OCTAL(header.uid, statbuf->st_uid);
- PUT_OCTAL(header.gid, statbuf->st_gid);
+ STORE_VALUE(header.mode, statbuf->st_mode & 07777);
+ STORE_VALUE(header.uid, statbuf->st_uid);
+ STORE_VALUE(header.gid, statbuf->st_gid);
memset(header.size, '0', sizeof(header.size)-1); /* Regular file size is handled later */
- PUT_OCTAL(header.mtime, statbuf->st_mtime);
+ /* This may be < 0 with signed time_t -- without GNU
+ * extensions we can't store it. The field will be zeroed */
+ if (STORE_VALUE(header.mtime, statbuf->st_mtime) < 0) {
+ bb_error_msg("Can not store negative time_t for %s", fileName);
+ }
/* Enter the user and group names */
safe_strncpy(header.uname, get_cached_username(statbuf->st_uid), sizeof(header.uname));
@@ -307,24 +380,24 @@ static int writeTarHeader(struct TarBallInfo *tbInfo,
header.name[strlen(header.name)] = '/';
} else if (S_ISCHR(statbuf->st_mode)) {
header.typeflag = CHRTYPE;
- PUT_OCTAL(header.devmajor, major(statbuf->st_rdev));
- PUT_OCTAL(header.devminor, minor(statbuf->st_rdev));
+ STORE_VALUE(header.devmajor, major(statbuf->st_rdev));
+ STORE_VALUE(header.devminor, minor(statbuf->st_rdev));
} else if (S_ISBLK(statbuf->st_mode)) {
header.typeflag = BLKTYPE;
- PUT_OCTAL(header.devmajor, major(statbuf->st_rdev));
- PUT_OCTAL(header.devminor, minor(statbuf->st_rdev));
+ STORE_VALUE(header.devmajor, major(statbuf->st_rdev));
+ STORE_VALUE(header.devminor, minor(statbuf->st_rdev));
} else if (S_ISFIFO(statbuf->st_mode)) {
header.typeflag = FIFOTYPE;
} else if (S_ISREG(statbuf->st_mode)) {
- if (sizeof(statbuf->st_size) > 4
- && statbuf->st_size > (off_t)0777777777777LL
- ) {
+ /* POSIX has limited encodings due to the octal
+ * format. With GNU extensions we will store this in
+ * base-256, which can hold much larger file types. */
+ if (STORE_VALUE(header.size, statbuf->st_size) < 0) {
bb_error_msg_and_die("can't store file '%s' "
- "of size %"OFF_FMT"u, aborting",
- fileName, statbuf->st_size);
+ "of size %"OFF_FMT"u, aborting",
+ fileName, statbuf->st_size);
}
header.typeflag = REGTYPE;
- PUT_OCTAL(header.size, statbuf->st_size);
} else {
bb_error_msg("%s: unknown file type", fileName);
return FALSE;
--
1.7.4.4
More information about the busybox
mailing list