From 977bf2a49484239f7a7b6ce08bfa9da413a27ead Mon Sep 17 00:00:00 2001
From: Florian Pritz <bluewind@xinu.at>
Date: Sat, 1 Mar 2014 17:21:47 +0100
Subject: [PATCH] mtree: Make reading additional information from the fs
 optional

This feature is not needed if users just want to read in the content of
an mtree file and do validation against the file system themselves.

It is needed for `bsdtar cvf out.tar @input.mtree` which is why the
option is enabled in bsdtar.

Since the mtree tests rely on this feature, this patch also enables it
there.

Signed-off-by: Florian Pritz <bluewind@xinu.at>
---
 libarchive/archive_read_support_format_mtree.c | 290 ++++++++++++++-----------
 libarchive/test/test_read_format_mtree.c       |  20 ++
 tar/write.c                                    |   1 +
 3 files changed, 179 insertions(+), 132 deletions(-)

diff --git a/libarchive/archive_read_support_format_mtree.c b/libarchive/archive_read_support_format_mtree.c
index 44799df..d82d4c1 100644
--- a/libarchive/archive_read_support_format_mtree.c
+++ b/libarchive/archive_read_support_format_mtree.c
@@ -104,6 +104,7 @@ struct mtree {
 	struct archive_entry_linkresolver *resolver;
 
 	int64_t			 cur_size;
+	char checkfs;
 };
 
 static int	bid_keycmp(const char *, const char *, ssize_t);
@@ -174,6 +175,29 @@ static int	read_header(struct archive_read *,
 #endif
 }
 
+static int
+archive_read_format_mtree_options(struct archive_read *a,
+    const char *key, const char *val)
+{
+	struct mtree *mtree;
+
+	mtree = (struct mtree *)(a->format->data);
+	if (strcmp(key, "checkfs")  == 0) {
+		/* Allows to read information missing from the mtree from the file system */
+		if (val == NULL || val[0] == 0) {
+			mtree->checkfs = 0;
+		} else {
+			mtree->checkfs = 1;
+		}
+		return (ARCHIVE_OK);
+	}
+
+	/* Note: The "warn" return is just to inform the options
+	 * supervisor that we didn't handle it.  It will generate
+	 * a suitable error if no one used this option. */
+	return (ARCHIVE_WARN);
+}
+
 static void
 free_options(struct mtree_option *head)
 {
@@ -206,7 +230,7 @@ static int	read_header(struct archive_read *,
 	mtree->fd = -1;
 
 	r = __archive_read_register_format(a, mtree, "mtree",
-	    mtree_bid, NULL, read_header, read_data, skip, NULL, cleanup);
+           mtree_bid, archive_read_format_mtree_options, read_header, read_data, skip, NULL, cleanup);
 
 	if (r != ARCHIVE_OK)
 		free(mtree);
@@ -1104,162 +1128,164 @@ static int	read_header(struct archive_read *,
 			mtree->current_dir.length = n;
 	}
 
-	/*
-	 * Try to open and stat the file to get the real size
-	 * and other file info.  It would be nice to avoid
-	 * this here so that getting a listing of an mtree
-	 * wouldn't require opening every referenced contents
-	 * file.  But then we wouldn't know the actual
-	 * contents size, so I don't see a really viable way
-	 * around this.  (Also, we may want to someday pull
-	 * other unspecified info from the contents file on
-	 * disk.)
-	 */
-	mtree->fd = -1;
-	if (archive_strlen(&mtree->contents_name) > 0)
-		path = mtree->contents_name.s;
-	else
-		path = archive_entry_pathname(entry);
-
-	if (archive_entry_filetype(entry) == AE_IFREG ||
-	    archive_entry_filetype(entry) == AE_IFDIR) {
-		mtree->fd = open(path, O_RDONLY | O_BINARY | O_CLOEXEC);
-		__archive_ensure_cloexec_flag(mtree->fd);
-		if (mtree->fd == -1 &&
-		    (errno != ENOENT ||
-		     archive_strlen(&mtree->contents_name) > 0)) {
-			archive_set_error(&a->archive, errno,
-			    "Can't open %s", path);
-			r = ARCHIVE_WARN;
+	if (mtree->checkfs) {
+		/*
+		 * Try to open and stat the file to get the real size
+		 * and other file info.  It would be nice to avoid
+		 * this here so that getting a listing of an mtree
+		 * wouldn't require opening every referenced contents
+		 * file.  But then we wouldn't know the actual
+		 * contents size, so I don't see a really viable way
+		 * around this.  (Also, we may want to someday pull
+		 * other unspecified info from the contents file on
+		 * disk.)
+		 */
+		mtree->fd = -1;
+		if (archive_strlen(&mtree->contents_name) > 0)
+			path = mtree->contents_name.s;
+		else
+			path = archive_entry_pathname(entry);
+
+		if (archive_entry_filetype(entry) == AE_IFREG ||
+				archive_entry_filetype(entry) == AE_IFDIR) {
+			mtree->fd = open(path, O_RDONLY | O_BINARY | O_CLOEXEC);
+			__archive_ensure_cloexec_flag(mtree->fd);
+			if (mtree->fd == -1 &&
+					(errno != ENOENT ||
+					 archive_strlen(&mtree->contents_name) > 0)) {
+				archive_set_error(&a->archive, errno,
+						"Can't open %s", path);
+				r = ARCHIVE_WARN;
+			}
 		}
-	}
 
-	st = &st_storage;
-	if (mtree->fd >= 0) {
-		if (fstat(mtree->fd, st) == -1) {
-			archive_set_error(&a->archive, errno,
-			    "Could not fstat %s", path);
-			r = ARCHIVE_WARN;
-			/* If we can't stat it, don't keep it open. */
-			close(mtree->fd);
-			mtree->fd = -1;
+		st = &st_storage;
+		if (mtree->fd >= 0) {
+			if (fstat(mtree->fd, st) == -1) {
+				archive_set_error(&a->archive, errno,
+						"Could not fstat %s", path);
+				r = ARCHIVE_WARN;
+				/* If we can't stat it, don't keep it open. */
+				close(mtree->fd);
+				mtree->fd = -1;
+				st = NULL;
+			}
+		} else if (lstat(path, st) == -1) {
 			st = NULL;
 		}
-	} else if (lstat(path, st) == -1) {
-		st = NULL;
-	}
 
-	/*
-	 * Check for a mismatch between the type in the specification and
-	 * the type of the contents object on disk.
-	 */
-	if (st != NULL) {
-		if (
-		    ((st->st_mode & S_IFMT) == S_IFREG &&
-		     archive_entry_filetype(entry) == AE_IFREG)
+		/*
+		 * Check for a mismatch between the type in the specification and
+		 * the type of the contents object on disk.
+		 */
+		if (st != NULL) {
+			if (
+					((st->st_mode & S_IFMT) == S_IFREG &&
+					 archive_entry_filetype(entry) == AE_IFREG)
 #ifdef S_IFLNK
-		    || ((st->st_mode & S_IFMT) == S_IFLNK &&
-			archive_entry_filetype(entry) == AE_IFLNK)
+					|| ((st->st_mode & S_IFMT) == S_IFLNK &&
+						archive_entry_filetype(entry) == AE_IFLNK)
 #endif
 #ifdef S_IFSOCK
-		    || ((st->st_mode & S_IFSOCK) == S_IFSOCK &&
-			archive_entry_filetype(entry) == AE_IFSOCK)
+					|| ((st->st_mode & S_IFSOCK) == S_IFSOCK &&
+						archive_entry_filetype(entry) == AE_IFSOCK)
 #endif
 #ifdef S_IFCHR
-		    || ((st->st_mode & S_IFMT) == S_IFCHR &&
-			archive_entry_filetype(entry) == AE_IFCHR)
+					|| ((st->st_mode & S_IFMT) == S_IFCHR &&
+						archive_entry_filetype(entry) == AE_IFCHR)
 #endif
 #ifdef S_IFBLK
-		    || ((st->st_mode & S_IFMT) == S_IFBLK &&
-			archive_entry_filetype(entry) == AE_IFBLK)
+					|| ((st->st_mode & S_IFMT) == S_IFBLK &&
+						archive_entry_filetype(entry) == AE_IFBLK)
 #endif
-		    || ((st->st_mode & S_IFMT) == S_IFDIR &&
-			archive_entry_filetype(entry) == AE_IFDIR)
+					|| ((st->st_mode & S_IFMT) == S_IFDIR &&
+						archive_entry_filetype(entry) == AE_IFDIR)
 #ifdef S_IFIFO
-		    || ((st->st_mode & S_IFMT) == S_IFIFO &&
-			archive_entry_filetype(entry) == AE_IFIFO)
+					|| ((st->st_mode & S_IFMT) == S_IFIFO &&
+							archive_entry_filetype(entry) == AE_IFIFO)
 #endif
-		    ) {
-			/* Types match. */
-		} else {
-			/* Types don't match; bail out gracefully. */
-			if (mtree->fd >= 0)
-				close(mtree->fd);
-			mtree->fd = -1;
-			if (parsed_kws & MTREE_HAS_OPTIONAL) {
-				/* It's not an error for an optional entry
-				   to not match disk. */
-				*use_next = 1;
-			} else if (r == ARCHIVE_OK) {
-				archive_set_error(&a->archive,
-				    ARCHIVE_ERRNO_MISC,
-				    "mtree specification has different type for %s",
-				    archive_entry_pathname(entry));
-				r = ARCHIVE_WARN;
-			}
-			return r;
+					) {
+						/* Types match. */
+					} else {
+						/* Types don't match; bail out gracefully. */
+						if (mtree->fd >= 0)
+							close(mtree->fd);
+						mtree->fd = -1;
+						if (parsed_kws & MTREE_HAS_OPTIONAL) {
+							/* It's not an error for an optional entry
+							   to not match disk. */
+							*use_next = 1;
+						} else if (r == ARCHIVE_OK) {
+							archive_set_error(&a->archive,
+									ARCHIVE_ERRNO_MISC,
+									"mtree specification has different type for %s",
+									archive_entry_pathname(entry));
+							r = ARCHIVE_WARN;
+						}
+						return r;
+					}
 		}
-	}
 
-	/*
-	 * If there is a contents file on disk, pick some of the metadata
-	 * from that file.  For most of these, we only set it from the contents
-	 * if it wasn't already parsed from the specification.
-	 */
-	if (st != NULL) {
-		if (((parsed_kws & MTREE_HAS_DEVICE) == 0 ||
-		     (parsed_kws & MTREE_HAS_NOCHANGE) != 0) &&
-		    (archive_entry_filetype(entry) == AE_IFCHR ||
-		     archive_entry_filetype(entry) == AE_IFBLK))
-			archive_entry_set_rdev(entry, st->st_rdev);
-		if ((parsed_kws & (MTREE_HAS_GID | MTREE_HAS_GNAME)) == 0 ||
-		    (parsed_kws & MTREE_HAS_NOCHANGE) != 0)
-			archive_entry_set_gid(entry, st->st_gid);
-		if ((parsed_kws & (MTREE_HAS_UID | MTREE_HAS_UNAME)) == 0 ||
-		    (parsed_kws & MTREE_HAS_NOCHANGE) != 0)
-			archive_entry_set_uid(entry, st->st_uid);
-		if ((parsed_kws & MTREE_HAS_MTIME) == 0 ||
-		    (parsed_kws & MTREE_HAS_NOCHANGE) != 0) {
+		/*
+		 * If there is a contents file on disk, pick some of the metadata
+		 * from that file.  For most of these, we only set it from the contents
+		 * if it wasn't already parsed from the specification.
+		 */
+		if (st != NULL) {
+			if (((parsed_kws & MTREE_HAS_DEVICE) == 0 ||
+						(parsed_kws & MTREE_HAS_NOCHANGE) != 0) &&
+					(archive_entry_filetype(entry) == AE_IFCHR ||
+					 archive_entry_filetype(entry) == AE_IFBLK))
+				archive_entry_set_rdev(entry, st->st_rdev);
+			if ((parsed_kws & (MTREE_HAS_GID | MTREE_HAS_GNAME)) == 0 ||
+					(parsed_kws & MTREE_HAS_NOCHANGE) != 0)
+				archive_entry_set_gid(entry, st->st_gid);
+			if ((parsed_kws & (MTREE_HAS_UID | MTREE_HAS_UNAME)) == 0 ||
+					(parsed_kws & MTREE_HAS_NOCHANGE) != 0)
+				archive_entry_set_uid(entry, st->st_uid);
+			if ((parsed_kws & MTREE_HAS_MTIME) == 0 ||
+					(parsed_kws & MTREE_HAS_NOCHANGE) != 0) {
 #if HAVE_STRUCT_STAT_ST_MTIMESPEC_TV_NSEC
-			archive_entry_set_mtime(entry, st->st_mtime,
-			    st->st_mtimespec.tv_nsec);
+				archive_entry_set_mtime(entry, st->st_mtime,
+						st->st_mtimespec.tv_nsec);
 #elif HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC
-			archive_entry_set_mtime(entry, st->st_mtime,
-			    st->st_mtim.tv_nsec);
+				archive_entry_set_mtime(entry, st->st_mtime,
+						st->st_mtim.tv_nsec);
 #elif HAVE_STRUCT_STAT_ST_MTIME_N
-			archive_entry_set_mtime(entry, st->st_mtime,
-			    st->st_mtime_n);
+				archive_entry_set_mtime(entry, st->st_mtime,
+						st->st_mtime_n);
 #elif HAVE_STRUCT_STAT_ST_UMTIME
-			archive_entry_set_mtime(entry, st->st_mtime,
-			    st->st_umtime*1000);
+				archive_entry_set_mtime(entry, st->st_mtime,
+						st->st_umtime*1000);
 #elif HAVE_STRUCT_STAT_ST_MTIME_USEC
-			archive_entry_set_mtime(entry, st->st_mtime,
-			    st->st_mtime_usec*1000);
+				archive_entry_set_mtime(entry, st->st_mtime,
+						st->st_mtime_usec*1000);
 #else
-			archive_entry_set_mtime(entry, st->st_mtime, 0);
+				archive_entry_set_mtime(entry, st->st_mtime, 0);
 #endif
+			}
+			if ((parsed_kws & MTREE_HAS_NLINK) == 0 ||
+					(parsed_kws & MTREE_HAS_NOCHANGE) != 0)
+				archive_entry_set_nlink(entry, st->st_nlink);
+			if ((parsed_kws & MTREE_HAS_PERM) == 0 ||
+					(parsed_kws & MTREE_HAS_NOCHANGE) != 0)
+				archive_entry_set_perm(entry, st->st_mode);
+			if ((parsed_kws & MTREE_HAS_SIZE) == 0 ||
+					(parsed_kws & MTREE_HAS_NOCHANGE) != 0)
+				archive_entry_set_size(entry, st->st_size);
+			archive_entry_set_ino(entry, st->st_ino);
+			archive_entry_set_dev(entry, st->st_dev);
+
+			archive_entry_linkify(mtree->resolver, &entry, &sparse_entry);
+		} else if (parsed_kws & MTREE_HAS_OPTIONAL) {
+			/*
+			 * Couldn't open the entry, stat it or the on-disk type
+			 * didn't match.  If this entry is optional, just ignore it
+			 * and read the next header entry.
+			 */
+			*use_next = 1;
+			return ARCHIVE_OK;
 		}
-		if ((parsed_kws & MTREE_HAS_NLINK) == 0 ||
-		    (parsed_kws & MTREE_HAS_NOCHANGE) != 0)
-			archive_entry_set_nlink(entry, st->st_nlink);
-		if ((parsed_kws & MTREE_HAS_PERM) == 0 ||
-		    (parsed_kws & MTREE_HAS_NOCHANGE) != 0)
-			archive_entry_set_perm(entry, st->st_mode);
-		if ((parsed_kws & MTREE_HAS_SIZE) == 0 ||
-		    (parsed_kws & MTREE_HAS_NOCHANGE) != 0)
-			archive_entry_set_size(entry, st->st_size);
-		archive_entry_set_ino(entry, st->st_ino);
-		archive_entry_set_dev(entry, st->st_dev);
-
-		archive_entry_linkify(mtree->resolver, &entry, &sparse_entry);
-	} else if (parsed_kws & MTREE_HAS_OPTIONAL) {
-		/*
-		 * Couldn't open the entry, stat it or the on-disk type
-		 * didn't match.  If this entry is optional, just ignore it
-		 * and read the next header entry.
-		 */
-		*use_next = 1;
-		return ARCHIVE_OK;
 	}
 
 	mtree->cur_size = archive_entry_size(entry);
diff --git a/libarchive/test/test_read_format_mtree.c b/libarchive/test/test_read_format_mtree.c
index 830fa0a..f96529d 100644
--- a/libarchive/test/test_read_format_mtree.c
+++ b/libarchive/test/test_read_format_mtree.c
@@ -58,6 +58,8 @@
 	assertEqualIntA(a, ARCHIVE_OK,
 	    archive_read_support_format_all(a));
 	assertEqualIntA(a, ARCHIVE_OK,
+	    archive_read_set_options(a, "mtree:checkfs"));
+	assertEqualIntA(a, ARCHIVE_OK,
 	    archive_read_open_filename(a, reffile, 11));
 
 	/*
@@ -209,6 +211,8 @@
 	assertEqualIntA(a, ARCHIVE_OK,
 	    archive_read_support_format_all(a));
 	assertEqualIntA(a, ARCHIVE_OK,
+	    archive_read_set_options(a, "mtree:checkfs"));
+	assertEqualIntA(a, ARCHIVE_OK,
 	    archive_read_open_memory(a, archive, sizeof(archive)));
 	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
 	assertEqualInt(archive_format(a), ARCHIVE_FORMAT_MTREE);
@@ -246,6 +250,8 @@
 	assertEqualIntA(a, ARCHIVE_OK,
 	    archive_read_support_format_all(a));
 	assertEqualIntA(a, ARCHIVE_OK,
+	    archive_read_set_options(a, "mtree:checkfs"));
+	assertEqualIntA(a, ARCHIVE_OK,
 	    archive_read_open_memory(a, archive, sizeof(archive)));
 	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
 	assertEqualString(archive_entry_pathname(ae), "a");
@@ -299,6 +305,8 @@
 	assertEqualIntA(a, ARCHIVE_OK,
 	    archive_read_support_format_all(a));
 	assertEqualIntA(a, ARCHIVE_OK,
+	    archive_read_set_options(a, "mtree:checkfs"));
+	assertEqualIntA(a, ARCHIVE_OK,
 	    archive_read_open_memory(a, archive, sizeof(archive)));
 	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
 	assertEqualString(archive_entry_pathname(ae), "./a");	
@@ -365,6 +373,8 @@
 	assertEqualIntA(a, ARCHIVE_OK,
 	    archive_read_support_format_all(a));
 	assertEqualIntA(a, ARCHIVE_OK,
+	    archive_read_set_options(a, "mtree:checkfs"));
+	assertEqualIntA(a, ARCHIVE_OK,
 	    archive_read_open_memory(a, archive, sizeof(archive)));
 	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
 	assertEqualString(archive_entry_pathname(ae), "./a");
@@ -402,6 +412,8 @@
 	assertEqualIntA(a, ARCHIVE_OK,
 	    archive_read_support_format_all(a));
 	assertEqualIntA(a, ARCHIVE_OK,
+	    archive_read_set_options(a, "mtree:checkfs"));
+	assertEqualIntA(a, ARCHIVE_OK,
 	    archive_read_open_memory(a, archive2, sizeof(archive2)));
 	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
 	assertEqualString(archive_entry_pathname(ae), "./a");
@@ -449,6 +461,8 @@
 	assertEqualIntA(a, ARCHIVE_OK,
 	    archive_read_support_format_all(a));
 	assertEqualIntA(a, ARCHIVE_OK,
+	    archive_read_set_options(a, "mtree:checkfs"));
+	assertEqualIntA(a, ARCHIVE_OK,
 	    archive_read_open_filename(a, reffile, 11));
 
 	/*
@@ -552,6 +566,8 @@
 	assertEqualIntA(a, ARCHIVE_OK,
 	    archive_read_support_format_all(a));
 	assertEqualIntA(a, ARCHIVE_OK,
+	    archive_read_set_options(a, "mtree:checkfs"));
+	assertEqualIntA(a, ARCHIVE_OK,
 	    archive_read_open_filename(a, reffile, 11));
 
 	/*
@@ -617,6 +633,8 @@
 	assertEqualIntA(a, ARCHIVE_OK,
 	    archive_read_support_format_all(a));
 	assertEqualIntA(a, ARCHIVE_OK,
+	    archive_read_set_options(a, "mtree:checkfs"));
+	assertEqualIntA(a, ARCHIVE_OK,
 	    archive_read_open_filename(a, reffile, 11));
 
 	/*
@@ -680,6 +698,8 @@
 	assertEqualIntA(a, ARCHIVE_OK,
 	    archive_read_support_format_all(a));
 	assertEqualIntA(a, ARCHIVE_OK,
+	    archive_read_set_options(a, "mtree:checkfs"));
+	assertEqualIntA(a, ARCHIVE_OK,
 	    archive_read_open_memory(a, archive, sizeof(archive)));
 	assertEqualIntA(a, ARCHIVE_WARN, archive_read_next_header(a, &ae));
 	assert(strlen(archive_error_string(a)) > 0);
diff --git a/tar/write.c b/tar/write.c
index 40d2fb0..7e8cb13 100644
--- a/tar/write.c
+++ b/tar/write.c
@@ -648,6 +648,7 @@ static void		 write_hierarchy(struct bsdtar *, struct archive *,
 	archive_read_support_format_all(ina);
 	archive_read_support_filter_all(ina);
 	set_reader_options(bsdtar, a);
+	archive_read_set_options(ina, "mtree:checkfs");
 	if (archive_read_open_filename(ina, filename,
 					bsdtar->bytes_per_block)) {
 		lafe_warnc(0, "%s", archive_error_string(ina));
-- 
1.8.5.5