/usr/bin/pegasus-archive is in pegasus-wms 4.4.0+dfsg-5.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 | #!/usr/bin/env python
import os
import glob
import tarfile
import shutil
from optparse import OptionParser
VERBOSE = False
def extract(submitdir):
# Locate braindump file
braindump = os.path.join(submitdir, "braindump.txt")
if not os.path.isfile(braindump):
raise Exception("Not a submit directory: braindump.txt missing")
# Locate archive file
archname = os.path.join(submitdir, "archive.tar.gz")
if not os.path.isfile(archname):
raise Exception("Submit dir not archived")
tar = tarfile.open(archname, "r|gz")
tar.extractall(path=submitdir)
tar.close()
os.remove(archname)
def archive(submitdir):
exclude = set()
# Locate and exclude braindump file
braindump = os.path.join(submitdir, "braindump.txt")
if not os.path.isfile(braindump):
raise Exception("Not a submit directory: braindump.txt missing")
exclude.add(braindump)
# Ignore monitord files. This is needed so that tools like pegasus-statistics
# will consider the workflow to be complete
for name in ["monitord.started", "monitord.done", "monitord.log"]:
exclude.add(os.path.join(submitdir, name))
# Exclude stampede db
for db in glob.glob(os.path.join(submitdir, "*.stampede.db")):
exclude.add(db)
# Exclude properties file
for prop in glob.glob(os.path.join(submitdir, "pegasus.*.properties")):
exclude.add(prop)
# Locate and exclude archive file
archname = os.path.join(submitdir, "archive.tar.gz")
if os.path.exists(archname):
raise Exception("Submit dir already archived")
exclude.add(archname)
# Visit all the files in the submit dir that we want to archive
def visit(submitdir):
for name in os.listdir(submitdir):
path = os.path.join(submitdir, name)
if path not in exclude:
yield name, path
# Archive the files
tar = tarfile.open(name=archname, mode="w|gz")
for name, path in visit(submitdir):
if VERBOSE: print "Archiving %s" % name
tar.add(name=path, arcname=name)
tar.close()
# Remove the files and directories
# We do this here, instead of doing it in the loop above
# because we want to make sure there are no errors in creating
# the archive before we start removing files
for name, path in visit(submitdir):
if VERBOSE: print "Removing %s" % name
if os.path.isfile(path) or os.path.islink(path):
os.remove(path)
else:
shutil.rmtree(path)
def main():
global VERBOSE
parser = OptionParser(usage="Usage: %prog [options] SUBMIT_DIR",
description="Compress a workflow without causing it to be unusable by analysis tools")
parser.add_option("-v", "--verbose", dest="verbose", action="store_true",
default=False, help="Turn on verbose logging")
parser.add_option("-x", "--extract", dest="archive", action="store_false",
default=True, help="Extract previously archived submit dir")
options, args = parser.parse_args()
if len(args) != 1:
parser.error("Specify SUBMIT_DIR")
VERBOSE = options.verbose
submitdir = args[0]
if not os.path.exists(submitdir):
parser.error("SUBMIT_DIR does not exist: %s" % submitdir)
if not os.path.isdir(submitdir):
parser.error("SUBMIT_DIR is not a directory: %s" % submitdir)
submitdir = os.path.abspath(submitdir)
if options.archive:
archive(submitdir)
else:
extract(submitdir)
if __name__ == '__main__':
try:
main()
except Exception, e:
if VERBOSE: raise
print e
exit(1)
|