tar: add support for --strip-components=N
function old new delta
data_extract_all 882 995 +113
tar_longopts 290 309 +19
tar_main 938 942 +4
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 3/0 up/down: 136/0) Total: 136 bytes
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
diff --git a/archival/libarchive/data_extract_all.c b/archival/libarchive/data_extract_all.c
index 45776dc..bd51d2a 100644
--- a/archival/libarchive/data_extract_all.c
+++ b/archival/libarchive/data_extract_all.c
@@ -8,9 +8,17 @@
void FAST_FUNC data_extract_all(archive_handle_t *archive_handle)
{
+
file_header_t *file_header = archive_handle->file_header;
int dst_fd;
int res;
+#if ENABLE_FEATURE_TAR_LONG_OPTIONS
+ char *dst_name;
+ char *dst_link;
+#else
+# define dst_name (file_header->name)
+# define dst_link (file_header->link_target)
+#endif
#if ENABLE_FEATURE_TAR_SELINUX
char *sctx = archive_handle->tar__sctx[PAX_NEXT_FILE];
@@ -23,11 +31,47 @@
}
#endif
+#if ENABLE_FEATURE_TAR_LONG_OPTIONS
+ dst_name = file_header->name;
+ dst_link = file_header->link_target;
+ if (archive_handle->tar__strip_components) {
+ unsigned n = archive_handle->tar__strip_components;
+ do {
+ dst_name = strchr(dst_name, '/');
+ if (!dst_name || dst_name[1] == '\0') {
+ data_skip(archive_handle);
+ return;
+ }
+ dst_name++;
+ /*
+ * Link target is shortened only for hardlinks:
+ * softlinks restored unchanged.
+ */
+ if (S_ISREG(file_header->mode)
+ && file_header->size == 0
+ && dst_link
+ ) {
+// GNU tar 1.26 does not check that we reached end of link name:
+// if "dir/hardlink" is hardlinked to "file",
+// tar xvf a.tar --strip-components=1 says:
+// tar: hardlink: Cannot hard link to '': No such file or directory
+// and continues processing. We silently skip such entries.
+ dst_link = strchr(dst_link, '/');
+ if (!dst_link || dst_link[1] == '\0') {
+ data_skip(archive_handle);
+ return;
+ }
+ dst_link++;
+ }
+ } while (--n != 0);
+ }
+#endif
+
if (archive_handle->ah_flags & ARCHIVE_CREATE_LEADING_DIRS) {
- char *slash = strrchr(file_header->name, '/');
+ char *slash = strrchr(dst_name, '/');
if (slash) {
*slash = '\0';
- bb_make_directory(file_header->name, -1, FILEUTILS_RECUR);
+ bb_make_directory(dst_name, -1, FILEUTILS_RECUR);
*slash = '/';
}
}
@@ -38,8 +82,8 @@
/* Is it hardlink?
* We encode hard links as regular files of size 0 with a symlink */
if (S_ISREG(file_header->mode)
- && file_header->link_target
&& file_header->size == 0
+ && dst_link
) {
/* Ugly special case:
* tar cf t.tar hardlink1 hardlink2 hardlink1
@@ -48,22 +92,22 @@
* hardlink2 -> hardlink1
* hardlink1 -> hardlink1 <== !!!
*/
- if (strcmp(file_header->link_target, file_header->name) == 0)
+ if (strcmp(dst_link, dst_name) == 0)
goto ret;
}
/* Proceed with deleting */
- if (unlink(file_header->name) == -1
+ if (unlink(dst_name) == -1
&& errno != ENOENT
) {
bb_perror_msg_and_die("can't remove old file %s",
- file_header->name);
+ dst_name);
}
}
}
else if (archive_handle->ah_flags & ARCHIVE_EXTRACT_NEWER) {
/* Remove the existing entry if its older than the extracted entry */
struct stat existing_sb;
- if (lstat(file_header->name, &existing_sb) == -1) {
+ if (lstat(dst_name, &existing_sb) == -1) {
if (errno != ENOENT) {
bb_perror_msg_and_die("can't stat old file");
}
@@ -73,30 +117,30 @@
&& !S_ISDIR(file_header->mode)
) {
bb_error_msg("%s not created: newer or "
- "same age file exists", file_header->name);
+ "same age file exists", dst_name);
}
data_skip(archive_handle);
goto ret;
}
- else if ((unlink(file_header->name) == -1) && (errno != EISDIR)) {
+ else if ((unlink(dst_name) == -1) && (errno != EISDIR)) {
bb_perror_msg_and_die("can't remove old file %s",
- file_header->name);
+ dst_name);
}
}
/* Handle hard links separately
* We encode hard links as regular files of size 0 with a symlink */
if (S_ISREG(file_header->mode)
- && file_header->link_target
&& file_header->size == 0
+ && dst_link
) {
- /* hard link */
- res = link(file_header->link_target, file_header->name);
- if ((res == -1) && !(archive_handle->ah_flags & ARCHIVE_EXTRACT_QUIET)) {
+ /* Hard link */
+ res = link(dst_link, dst_name);
+ if (res != 0 && !(archive_handle->ah_flags & ARCHIVE_EXTRACT_QUIET)) {
bb_perror_msg("can't create %slink "
"from %s to %s", "hard",
- file_header->name,
- file_header->link_target);
+ dst_name,
+ dst_link);
}
/* Hardlinks have no separate mode/ownership, skip chown/chmod */
goto ret;
@@ -106,17 +150,17 @@
switch (file_header->mode & S_IFMT) {
case S_IFREG: {
/* Regular file */
- char *dst_name;
+ char *dst_nameN;
int flags = O_WRONLY | O_CREAT | O_EXCL;
if (archive_handle->ah_flags & ARCHIVE_O_TRUNC)
flags = O_WRONLY | O_CREAT | O_TRUNC;
- dst_name = file_header->name;
+ dst_nameN = dst_name;
#ifdef ARCHIVE_REPLACE_VIA_RENAME
if (archive_handle->ah_flags & ARCHIVE_REPLACE_VIA_RENAME)
/* rpm-style temp file name */
- dst_name = xasprintf("%s;%x", dst_name, (int)getpid());
+ dst_nameN = xasprintf("%s;%x", dst_name, (int)getpid());
#endif
- dst_fd = xopen3(dst_name,
+ dst_fd = xopen3(dst_nameN,
flags,
file_header->mode
);
@@ -124,32 +168,32 @@
close(dst_fd);
#ifdef ARCHIVE_REPLACE_VIA_RENAME
if (archive_handle->ah_flags & ARCHIVE_REPLACE_VIA_RENAME) {
- xrename(dst_name, file_header->name);
- free(dst_name);
+ xrename(dst_nameN, dst_name);
+ free(dst_nameN);
}
#endif
break;
}
case S_IFDIR:
- res = mkdir(file_header->name, file_header->mode);
+ res = mkdir(dst_name, file_header->mode);
if ((res == -1)
&& (errno != EISDIR) /* btw, Linux doesn't return this */
&& (errno != EEXIST)
&& !(archive_handle->ah_flags & ARCHIVE_EXTRACT_QUIET)
) {
- bb_perror_msg("can't make dir %s", file_header->name);
+ bb_perror_msg("can't make dir %s", dst_name);
}
break;
case S_IFLNK:
/* Symlink */
//TODO: what if file_header->link_target == NULL (say, corrupted tarball?)
- res = symlink(file_header->link_target, file_header->name);
- if ((res == -1)
+ res = symlink(file_header->link_target, dst_name);
+ if (res != 0
&& !(archive_handle->ah_flags & ARCHIVE_EXTRACT_QUIET)
) {
bb_perror_msg("can't create %slink "
"from %s to %s", "sym",
- file_header->name,
+ dst_name,
file_header->link_target);
}
break;
@@ -157,11 +201,11 @@
case S_IFBLK:
case S_IFCHR:
case S_IFIFO:
- res = mknod(file_header->name, file_header->mode, file_header->device);
+ res = mknod(dst_name, file_header->mode, file_header->device);
if ((res == -1)
&& !(archive_handle->ah_flags & ARCHIVE_EXTRACT_QUIET)
) {
- bb_perror_msg("can't create node %s", file_header->name);
+ bb_perror_msg("can't create node %s", dst_name);
}
break;
default:
@@ -186,20 +230,20 @@
}
#endif
/* GNU tar 1.15.1 uses chown, not lchown */
- chown(file_header->name, uid, gid);
+ chown(dst_name, uid, gid);
}
/* uclibc has no lchmod, glibc is even stranger -
* it has lchmod which seems to do nothing!
* so we use chmod... */
if (!(archive_handle->ah_flags & ARCHIVE_DONT_RESTORE_PERM)) {
- chmod(file_header->name, file_header->mode);
+ chmod(dst_name, file_header->mode);
}
if (archive_handle->ah_flags & ARCHIVE_RESTORE_DATE) {
struct timeval t[2];
t[1].tv_sec = t[0].tv_sec = file_header->mtime;
t[1].tv_usec = t[0].tv_usec = 0;
- utimes(file_header->name, t);
+ utimes(dst_name, t);
}
}
diff --git a/archival/libarchive/get_header_tar.c b/archival/libarchive/get_header_tar.c
index fb68673..ac2be72 100644
--- a/archival/libarchive/get_header_tar.c
+++ b/archival/libarchive/get_header_tar.c
@@ -418,6 +418,7 @@
/* Everything up to and including last ".." component is stripped */
overlapping_strcpy(file_header->name, strip_unsafe_prefix(file_header->name));
+//TODO: do the same for file_header->link_target?
/* Strip trailing '/' in directories */
/* Must be done after mode is set as '/' is used to check if it's a directory */
diff --git a/archival/tar.c b/archival/tar.c
index aa03ba9..566ba34 100644
--- a/archival/tar.c
+++ b/archival/tar.c
@@ -152,9 +152,12 @@
# define FNM_LEADING_DIR 0
#endif
-
-//#define DBG(fmt, ...) bb_error_msg("%s: " fmt, __func__, ## __VA_ARGS__)
-#define DBG(...) ((void)0)
+#if 0
+# define DBG(fmt, ...) bb_error_msg("%s: " fmt, __func__, ## __VA_ARGS__)
+#else
+# define DBG(...) ((void)0)
+#endif
+#define DBG_OPTION_PARSING 0
#define block_buf bb_common_bufsiz1
@@ -855,6 +858,7 @@
IF_FEATURE_SEAMLESS_Z( OPTBIT_COMPRESS ,)
IF_FEATURE_TAR_NOPRESERVE_TIME(OPTBIT_NOPRESERVE_TIME,)
#if ENABLE_FEATURE_TAR_LONG_OPTIONS
+ OPTBIT_STRIP_COMPONENTS,
OPTBIT_NORECURSION,
IF_FEATURE_TAR_TO_COMMAND(OPTBIT_2COMMAND ,)
OPTBIT_NUMERIC_OWNER,
@@ -879,12 +883,13 @@
OPT_GZIP = IF_FEATURE_SEAMLESS_GZ( (1 << OPTBIT_GZIP )) + 0, // z
OPT_XZ = IF_FEATURE_SEAMLESS_XZ( (1 << OPTBIT_XZ )) + 0, // J
OPT_COMPRESS = IF_FEATURE_SEAMLESS_Z( (1 << OPTBIT_COMPRESS )) + 0, // Z
- OPT_NOPRESERVE_TIME = IF_FEATURE_TAR_NOPRESERVE_TIME((1 << OPTBIT_NOPRESERVE_TIME)) + 0, // m
- OPT_NORECURSION = IF_FEATURE_TAR_LONG_OPTIONS((1 << OPTBIT_NORECURSION )) + 0, // no-recursion
- OPT_2COMMAND = IF_FEATURE_TAR_TO_COMMAND( (1 << OPTBIT_2COMMAND )) + 0, // to-command
- OPT_NUMERIC_OWNER = IF_FEATURE_TAR_LONG_OPTIONS((1 << OPTBIT_NUMERIC_OWNER )) + 0, // numeric-owner
- OPT_NOPRESERVE_PERM = IF_FEATURE_TAR_LONG_OPTIONS((1 << OPTBIT_NOPRESERVE_PERM)) + 0, // no-same-permissions
- OPT_OVERWRITE = IF_FEATURE_TAR_LONG_OPTIONS((1 << OPTBIT_OVERWRITE )) + 0, // overwrite
+ OPT_NOPRESERVE_TIME = IF_FEATURE_TAR_NOPRESERVE_TIME((1 << OPTBIT_NOPRESERVE_TIME)) + 0, // m
+ OPT_STRIP_COMPONENTS = IF_FEATURE_TAR_LONG_OPTIONS((1 << OPTBIT_STRIP_COMPONENTS)) + 0, // strip-components
+ OPT_NORECURSION = IF_FEATURE_TAR_LONG_OPTIONS((1 << OPTBIT_NORECURSION )) + 0, // no-recursion
+ OPT_2COMMAND = IF_FEATURE_TAR_TO_COMMAND( (1 << OPTBIT_2COMMAND )) + 0, // to-command
+ OPT_NUMERIC_OWNER = IF_FEATURE_TAR_LONG_OPTIONS((1 << OPTBIT_NUMERIC_OWNER )) + 0, // numeric-owner
+ OPT_NOPRESERVE_PERM = IF_FEATURE_TAR_LONG_OPTIONS((1 << OPTBIT_NOPRESERVE_PERM)) + 0, // no-same-permissions
+ OPT_OVERWRITE = IF_FEATURE_TAR_LONG_OPTIONS((1 << OPTBIT_OVERWRITE )) + 0, // overwrite
OPT_ANY_COMPRESS = (OPT_BZIP2 | OPT_LZMA | OPT_GZIP | OPT_XZ | OPT_COMPRESS),
};
@@ -928,6 +933,7 @@
# if ENABLE_FEATURE_TAR_NOPRESERVE_TIME
"touch\0" No_argument "m"
# endif
+ "strip-components\0" Required_argument "\xf9"
"no-recursion\0" No_argument "\xfa"
# if ENABLE_FEATURE_TAR_TO_COMMAND
"to-command\0" Required_argument "\xfb"
@@ -973,11 +979,15 @@
"tt:vv:" // count -t,-v
IF_FEATURE_TAR_FROM("X::T::") // cumulative lists
#if ENABLE_FEATURE_TAR_LONG_OPTIONS && ENABLE_FEATURE_TAR_FROM
- "\xff::" // cumulative lists for --exclude
+ "\xff::" // --exclude=PATTERN is a list
#endif
IF_FEATURE_TAR_CREATE("c:") "t:x:" // at least one of these is reqd
IF_FEATURE_TAR_CREATE("c--tx:t--cx:x--ct") // mutually exclusive
- IF_NOT_FEATURE_TAR_CREATE("t--x:x--t"); // mutually exclusive
+ IF_NOT_FEATURE_TAR_CREATE("t--x:x--t") // mutually exclusive
+#if ENABLE_FEATURE_TAR_LONG_OPTIONS
+ ":\xf9+" // --strip-components=NUM
+#endif
+ ;
#if ENABLE_FEATURE_TAR_LONG_OPTIONS
applet_long_options = tar_longopts;
#endif
@@ -1018,10 +1028,14 @@
IF_FEATURE_SEAMLESS_XZ( "J" )
IF_FEATURE_SEAMLESS_Z( "Z" )
IF_FEATURE_TAR_NOPRESERVE_TIME("m")
+ IF_FEATURE_TAR_LONG_OPTIONS("\xf9:") // --strip-components
, &base_dir // -C dir
, &tar_filename // -f filename
IF_FEATURE_TAR_FROM(, &(tar_handle->accept)) // T
IF_FEATURE_TAR_FROM(, &(tar_handle->reject)) // X
+#if ENABLE_FEATURE_TAR_LONG_OPTIONS
+ , &tar_handle->tar__strip_components // --strip-components
+#endif
IF_FEATURE_TAR_TO_COMMAND(, &(tar_handle->tar__to_command)) // --to-command
#if ENABLE_FEATURE_TAR_LONG_OPTIONS && ENABLE_FEATURE_TAR_FROM
, &excludes // --exclude
@@ -1029,11 +1043,49 @@
, &verboseFlag // combined count for -t and -v
, &verboseFlag // combined count for -t and -v
);
- //bb_error_msg("opt:%08x", opt);
+#if DBG_OPTION_PARSING
+ bb_error_msg("opt: 0x%08x", opt);
+# define showopt(o) bb_error_msg("opt & %s(%x): %x", #o, o, opt & o);
+ showopt(OPT_TEST );
+ showopt(OPT_EXTRACT );
+ showopt(OPT_BASEDIR );
+ showopt(OPT_TARNAME );
+ showopt(OPT_2STDOUT );
+ showopt(OPT_NOPRESERVE_OWNER);
+ showopt(OPT_P );
+ showopt(OPT_VERBOSE );
+ showopt(OPT_KEEP_OLD );
+ showopt(OPT_CREATE );
+ showopt(OPT_DEREFERENCE );
+ showopt(OPT_BZIP2 );
+ showopt(OPT_LZMA );
+ showopt(OPT_INCLUDE_FROM );
+ showopt(OPT_EXCLUDE_FROM );
+ showopt(OPT_GZIP );
+ showopt(OPT_XZ );
+ showopt(OPT_COMPRESS );
+ showopt(OPT_NOPRESERVE_TIME );
+ showopt(OPT_STRIP_COMPONENTS);
+ showopt(OPT_NORECURSION );
+ showopt(OPT_2COMMAND );
+ showopt(OPT_NUMERIC_OWNER );
+ showopt(OPT_NOPRESERVE_PERM );
+ showopt(OPT_OVERWRITE );
+ showopt(OPT_ANY_COMPRESS );
+ bb_error_msg("base_dir:'%s'", base_dir);
+ bb_error_msg("tar_filename:'%s'", tar_filename);
+ bb_error_msg("verboseFlag:%d", verboseFlag);
+ bb_error_msg("tar_handle->tar__to_command:'%s'", tar_handle->tar__to_command);
+ bb_error_msg("tar_handle->tar__strip_components:%u", tar_handle->tar__strip_components);
+ return 0;
+# undef showopt
+#endif
argv += optind;
- if (verboseFlag) tar_handle->action_header = header_verbose_list;
- if (verboseFlag == 1) tar_handle->action_header = header_list;
+ if (verboseFlag)
+ tar_handle->action_header = header_verbose_list;
+ if (verboseFlag == 1)
+ tar_handle->action_header = header_list;
if (opt & OPT_EXTRACT)
tar_handle->action_data = data_extract_all;
diff --git a/include/bb_archive.h b/include/bb_archive.h
index 2329d02..10969b5 100644
--- a/include/bb_archive.h
+++ b/include/bb_archive.h
@@ -77,6 +77,9 @@
off_t offset;
/* Archiver specific. Can make it a union if it ever gets big */
+#if ENABLE_FEATURE_TAR_LONG_OPTIONS
+ unsigned tar__strip_components;
+#endif
#define PAX_NEXT_FILE 0
#define PAX_GLOBAL 1
#if ENABLE_TAR || ENABLE_DPKG || ENABLE_DPKG_DEB
diff --git a/testsuite/tar.tests b/testsuite/tar.tests
index 4929f4e..383a464 100755
--- a/testsuite/tar.tests
+++ b/testsuite/tar.tests
@@ -53,6 +53,15 @@
"" ""
SKIP=
+# "tar cf test.tar input input_dir/ input_hard1 input_hard2 input_hard1 input_dir/ input":
+# GNU tar 1.26 records as hardlinks:
+# input_hard2 -> input_hard1
+# input_hard1 -> input_hard1 (!!!)
+# input_dir/file -> input_dir/file
+# input -> input
+# As of 1.24.0, we don't record last two: for them, nlink==1
+# and we check for "hardlink"ness only files with nlink!=1
+# We also don't use "hrw-r--r--" notation for hardlinks in "tar tv" listing.
optional FEATURE_TAR_CREATE FEATURE_LS_SORTFILES
testing "tar hardlinks and repeated files" '\
rm -rf input_* test.tar 2>/dev/null
@@ -64,6 +73,7 @@
chmod 755 input_dir
tar cf test.tar input input_dir/ input_hard1 input_hard2 input_hard1 input_dir/ input
tar tvf test.tar | sed "s/.*[0-9] input/input/"
+rm -rf input_dir
tar xf test.tar 2>&1
echo Ok: $?
ls -l . input_dir/* | grep input_ | sed "s/\\(^[^ ]*\\) .* input/\\1 input/"