diff options
Diffstat (limited to 'tar_fix.py')
-rw-r--r-- | tar_fix.py | 92 |
1 files changed, 0 insertions, 92 deletions
diff --git a/tar_fix.py b/tar_fix.py deleted file mode 100644 index d2817a2..0000000 --- a/tar_fix.py +++ /dev/null @@ -1,92 +0,0 @@ -#!/usr/bin/env python3 - -import tarfile -import os - - -class Tarball: - def __init__(self, infile, outfile): - self.infile = infile - self.outfile = outfile - - def drop_lead_comp(self): - """Removes leading path component (top-level dir) - from input tar file.""" - with tarfile.open(self.infile) as tarin, tarfile.open(self.outfile, 'w:gz') as tarout: - - # Identify common top-level dir for all tarball - # components, and proceed if it's set. - lead_comp_name = os.path.commonpath(tarin.getnames()) - - if lead_comp_name: - prefix_len = len(lead_comp_name + '/') - # Remove top-level dir (eg. "root.x86_64" or "root.i686" - # in Hyperbola bootstrap tarballs) from the archive. - tarin.members.remove(tarin.getmember(lead_comp_name)) - - for m in tarin.members: - # Drop top-level dir prefix in all tarball - # component's paths. - m.path = m.path[prefix_len:] - # If component is a link, don't fetch its content. - # There's no point to that, and it helps avoiding - # KeyError("linkname 'something' not found") on "broken" - # symlinks, which are perfectly normal in a - # root FS tarball. And for hard links, the link - # target needs to be stripped of the prefix same as - # the file name. - if m.linkname: - if m.islnk(): - m.linkname = m.linkname[prefix_len:] - tarout.addfile(m) - else: - tarout.addfile(m, tarin.extractfile(m)) - - -if __name__ == '__main__': - import argparse - - parser = argparse.ArgumentParser( - description="Remove leading path component from input tarball contents and save " - "the result in output tarball.", add_help=False) - - group = parser.add_argument_group("Arguments") - group.add_argument("--help", action='store_true', - help="Show this help message and exit.") - - args = parser.parse_known_args() - - group.add_argument("--input", metavar='PATH', dest='infile', - type=str, help="Input tar[.gz/xz/bz2] file path.", - required=True) - group.add_argument("--output", metavar='PATH', dest='outfile', - type=str, help="Output tar.gz file path.", - required=True) - - if args[0].help: - parser.exit(parser.print_help()) - else: - args = parser.parse_args() - - tarball = Tarball(args.infile, args.outfile) - tarball.drop_lead_comp() - -# An error handling attempt I would like to remember. Note to self: it skips symlinks altogether. -# -# # Handle broken symlinks. They are perfectly normal in a root fs tarball, but tarfile module is not -# # prepared for that. Trying hard not to catch anything other than "linkname 'something' not found". -# try: -# # Write each modified component to output tarball. -# tarout.addfile(m, tarin.extractfile(m)) -# -# except KeyError as error: -# if "linkname '" and "' not found" in str(error): -# print("Warning: the input tarball contains a dead symlink: '%s' to non-existent '%s'. No " -# "biggy, but you might want to know. It will be included in the output tarball as it " -# "is. Proceeding..." % (m.name, m.linkname), file=sys.stderr) -# else: -# raise - -# And a compound list for all tar members: -# -# [m.path[prefix_len:] for m in tarin.members] |