This file is indexed.

/usr/share/pegasus/visualize/dag2dot is in pegasus-wms 4.0.1+dfsg-8.

This file is owned by root:root, with mode 0o755.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
#!/usr/bin/env python
import sys
import os
import xml.sax.handler
import xml.sax
from optparse import OptionParser

COLORS = [
	"#1b9e77",
	"#d95f02",
	"#7570b3",
	"#e7298a",
	"#66a61e",
	"#e6ab02",
	"#a6761d",
	"#666666",
	"#8dd3c7",
	"#bebada",
	"#fb8072",
	"#80b1d3",
	"#fdb462",
	"#b3de69",
	"#fccde5",
	"#d9d9d9",
	"#bc80bd",
	"#ccebc5",
	"#ffed6f",
	"#ffffb3"
]

class Job:
	def __init__(self):
		self.id = None
		self.xform = None
		self.label = None
		self.level = 0
		self.parents = []
		self.children = []

class DAXHandler(xml.sax.handler.ContentHandler):
	"""
	This is a DAX file parser
	"""
	def __init__(self):
		self.dag = {}
		
	def startElement(self, name, attrs):
		if name in ["job","dax","dag"]:
			job = Job()
		
			job.id = attrs.get("id")
			if job.id is None:
				raise Exception("Invalid DAX: attribute 'id' missing")
			
			if name == "job":
				job.xform = attrs.get("name")
				if job.xform is None:
					raise Exception("Invalid DAX: job name missing for job %s" % job.id)
				ns = attrs.get("namespace")
				version = attrs.get("version")
				if ns is not None:
					job.xform = ns + "::" + job.xform
				if version is not None:
					job.xform = job.xform + ":" + version
			elif name == "dax":
				job.xform = "pegasus::dax"
			else:
				job.xform = "pegasus::dag"
			
			job.label = attrs.get("node-label")
			if job.label is None:
				job.label = attrs.get("file")
				if job.label is None:
					job.label = job.xform
			
			self.dag[job.id] = job
		elif name == "child":
			self.lastchild = attrs.get("ref")
		elif name == "parent":
			if self.lastchild is None:
				raise Exception("Invalid DAX: <parent> outside <child>")
			pid = attrs.get("ref")
			child = self.dag[self.lastchild]
			parent = self.dag[pid]
			child.parents.append(parent)
			parent.children.append(child)
			
	def endElement(self, name):
		if name == "child":
			self.lastchild = None

def parse_daxfile(fname):
	"""
	Parse DAG from a Pegasus DAX file.
	"""
	handler = DAXHandler()
	parser = xml.sax.make_parser()
	parser.setContentHandler(handler)
	f = open(fname,"r")
	parser.parse(f)
	f.close()
	return handler.dag
	
def parse_xform_name(path):
	"""
	Parse the transformation name from a submit script. Usually the
	transformation is in a special classad called '+pegasus_wf_xformation'.
	For special pegasus jobs (create_dir, etc.) set the name manually.
	"""
	# Handle special cases
	fname = os.path.basename(path)
	if fname.startswith("create_dir_"): return "pegasus::create_dir"
	if fname.startswith("stage_in_"): return "pegasus::stage_in"
	if fname.startswith("stage_out_"): return "pegasus::stage_out"
	if fname.startswith("stage_inter_"): return "pegasus::stage_inter"
	if fname.startswith("stage_worker_"): return "pegasus::stage_worker"
	if fname.startswith("register_"): return "pegasus::register"
	if fname.startswith("clean_up_"): return "pegasus::clean_up"
	
	# Get it from the submit file
	if os.path.isfile(path):
		f = open(path,'r')
		for line in f.readlines():
			if '+pegasus_wf_xformation' in line:
				return line.split('"')[1]
	
	return None
	
def parse_dagfile(fname):
	"""
	Parse a DAG from a dagfile.
	"""
	dagdir = os.path.dirname(fname)
	dag = {}
	lastchild = None
	f = open(fname,'r')
	for line in f.readlines():
		line = line.strip()
		if line.startswith("JOB"):
			rec = line.split()
			job = Job()
			if len(rec) < 3:
				raise Exception("Invalid line:",line)
			job.id = rec[1] # Job id
			subfile = rec[2] # submit script
			if not os.path.isabs(subfile):
				subfile = os.path.join(dagdir,subfile)
			job.xform = parse_xform_name(subfile)
			if job.xform is None:
				# Otherwise just use the ID
				job.xform = job.id
			job.label = job.id
			dag[job.id] = job
		elif line.startswith("PARENT"):
			rec = line.split()
			if len(rec) < 4:
				raise Exception("Invalid line:",line)
			p = dag[rec[1]]
			c = dag[rec[3]]
			p.children.append(c)
			c.parents.append(p)
	f.close()
	
	return dag

def remove_xforms(dag, xforms):
	"""
	Remove transformations in the DAG by name
	"""
	if len(xforms) == 0:
		return
	for id in dag.keys():
		job = dag[id]
		if job.xform in xforms:
			print "Removing %s" % job.id
			for p in job.parents:
				p.children.remove(job)
			for c in job.children:
				c.parents.remove(job)
			del dag[id]
			
def inv_reachable(a, b):
	"""
	Is a reachable from b using reverse edges? Reverse edges are
	used because it is a little more efficient than forward edges
	assuming that a node is more likely to have children than
	parents. Does a BFS using the child->parent edges instead of the
	parent->child edges.
	"""
	fifo = [a]
	while len(fifo) > 0:
		n = fifo.pop()
		for p in n.parents:
			if p == b: return True
			fifo.append(p)
	return False
			
def simplify(dag):
	"""
	Simplify a DAG by removing redundant edges. Redundant edges are edges
	that go from a grandparent to a grandchild. In other words, they are
	edges that, if removed, do not change the dependencies in the workflow.
	We want to remove these because they clutter up the diagram and make
	it hard to read.
	"""
	# Find roots
	roots = []
	for id in dag:
		j = dag[id]
		if len(j.parents) == 0:
			roots.append(j)
	
	# Assign surrogate root
	root = Job()
	root.level = 0
	for j in roots:
		root.children.append(j)
		
	# Label all levels of the workflow (BFS)
	fifo = [root]
	while len(fifo) > 0:
		n = fifo.pop()
		for c in n.children:
			fifo.append(c)
			c.level = max(c.level, n.level + 1)

	# Eliminate any redundant edges (BFS)
	fifo = [root]
	while len(fifo) > 0:
		n = fifo.pop()
		children = n.children[:]
		for c in children:
			fifo.append(c)
			dist = c.level - n.level
			if dist > 1:
				c.parents.remove(n)
				if inv_reachable(c, n):
					sys.stderr.write(
						"Removing redunant edge: %s -> %s\n" % 
						(n.id, c.id))
					n.children.remove(c)
				else:
					c.parents.append(n)
		
	return dag
	
def emit_dot(dag, use_xforms=False, outfile="/dev/stdout", width=8.0, height=10.0):
	"""
	Write a DOT-formatted diagram.
	use_xforms: Use transformation names instead of job names
	outfile: The file name to write the diagam out to.
	"""
	next_color = 0  # Keep track of next color
	xforms = {} # Keep track of transformation names to assign colors
	
	out = open(outfile,'w')
	
	out.write("""digraph dag {
	size="%s,%s"
	ratio=fill
	node [shape=ellipse, style=filled]
	edge [arrowhead=normal, arrowsize=1.0]
	\n""" % (width,height))
	
	for id in dag:
		j = dag[id]
		if use_xforms:
			label = j.xform
		else:
			label = j.label
		if j.xform not in xforms:
			xforms[j.xform] = next_color
			next_color += 1
			# Just in case we run out of colors
			next_color = min(len(COLORS)-1, next_color)
		color = xforms[j.xform]
		out.write('\t"%s" [color="%s",label="%s"]\n' % (j.id,COLORS[color],label))
		
	out.write('\n')
	
	for id in dag:
		j = dag[id]
		for c in j.children:
			out.write('\t"%s" -> "%s"\n' % (j.id,c.id))
			
	out.write("}\n")
	out.close()
	
def main():
	usage = "%prog [options] DAGFILE"
	description = """Parses DAGFILE and generates a DOT-formatted
graphical representation of the DAG. DAGFILE can be a Condor
DAGMan file, or a Pegasus DAX file."""
	parser = OptionParser(usage=usage,description=description)
	parser.add_option("-s", "--nosimplify", action="store_false",
		dest="simplify", default=True,
		help="Do not simplify the graph by removing redundant edges")
	parser.add_option("-x", "--xforms", action="store_true", 
		dest="xforms", default=False,
		help="Use transformation names as labels instead of the default label")
	parser.add_option("-o", "--output", action="store",
		dest="outfile", metavar="FILE", default="/dev/stdout",
		help="Write output to FILE [default: stdout]")
	parser.add_option("-r", "--remove", action="append",
		dest="remove", metavar="XFORM", default=[],
		help="Remove jobs from the workflow by transformation name")
	parser.add_option("-W", "--width", action="store",
		dest="width", default="8.0",
		help="Width of the digraph [default: 8.0]")
	parser.add_option("-H", "--height", action="store",
		dest="height", default="10.0",
		help="Height of the digraph [default: 10.0]")

	(options, args) = parser.parse_args()
	
	if len(args) < 1:
		parser.error("Please specify DAGFILE")
		
	if len(args) > 1:
		parser.error("Invalid argument")
	
	dagfile = args[0]
	if dagfile.endswith(".dag"):
		dag = parse_dagfile(dagfile)
	else:
		dag = parse_daxfile(dagfile)
		
	remove_xforms(dag, options.remove)
		
	if options.simplify:
		dag = simplify(dag)
		
	emit_dot(dag, options.xforms, options.outfile, options.width, options.height)
	
if __name__ == '__main__':
	main()