diff options
Diffstat (limited to 'tar_fix.py')
-rwxr-xr-x | tar_fix.py | 92 |
1 files changed, 92 insertions, 0 deletions
diff --git a/tar_fix.py b/tar_fix.py new file mode 100755 index 0000000..d2817a2 --- /dev/null +++ b/tar_fix.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python3 + +import tarfile +import os + + +class Tarball: + def __init__(self, infile, outfile): + self.infile = infile + self.outfile = outfile + + def drop_lead_comp(self): + """Removes leading path component (top-level dir) + from input tar file.""" + with tarfile.open(self.infile) as tarin, tarfile.open(self.outfile, 'w:gz') as tarout: + + # Identify common top-level dir for all tarball + # components, and proceed if it's set. + lead_comp_name = os.path.commonpath(tarin.getnames()) + + if lead_comp_name: + prefix_len = len(lead_comp_name + '/') + # Remove top-level dir (eg. "root.x86_64" or "root.i686" + # in Hyperbola bootstrap tarballs) from the archive. + tarin.members.remove(tarin.getmember(lead_comp_name)) + + for m in tarin.members: + # Drop top-level dir prefix in all tarball + # component's paths. + m.path = m.path[prefix_len:] + # If component is a link, don't fetch its content. + # There's no point to that, and it helps avoiding + # KeyError("linkname 'something' not found") on "broken" + # symlinks, which are perfectly normal in a + # root FS tarball. And for hard links, the link + # target needs to be stripped of the prefix same as + # the file name. + if m.linkname: + if m.islnk(): + m.linkname = m.linkname[prefix_len:] + tarout.addfile(m) + else: + tarout.addfile(m, tarin.extractfile(m)) + + +if __name__ == '__main__': + import argparse + + parser = argparse.ArgumentParser( + description="Remove leading path component from input tarball contents and save " + "the result in output tarball.", add_help=False) + + group = parser.add_argument_group("Arguments") + group.add_argument("--help", action='store_true', + help="Show this help message and exit.") + + args = parser.parse_known_args() + + group.add_argument("--input", metavar='PATH', dest='infile', + type=str, help="Input tar[.gz/xz/bz2] file path.", + required=True) + group.add_argument("--output", metavar='PATH', dest='outfile', + type=str, help="Output tar.gz file path.", + required=True) + + if args[0].help: + parser.exit(parser.print_help()) + else: + args = parser.parse_args() + + tarball = Tarball(args.infile, args.outfile) + tarball.drop_lead_comp() + +# An error handling attempt I would like to remember. Note to self: it skips symlinks altogether. +# +# # Handle broken symlinks. They are perfectly normal in a root fs tarball, but tarfile module is not +# # prepared for that. Trying hard not to catch anything other than "linkname 'something' not found". +# try: +# # Write each modified component to output tarball. +# tarout.addfile(m, tarin.extractfile(m)) +# +# except KeyError as error: +# if "linkname '" and "' not found" in str(error): +# print("Warning: the input tarball contains a dead symlink: '%s' to non-existent '%s'. No " +# "biggy, but you might want to know. It will be included in the output tarball as it " +# "is. Proceeding..." % (m.name, m.linkname), file=sys.stderr) +# else: +# raise + +# And a compound list for all tar members: +# +# [m.path[prefix_len:] for m in tarin.members] |