/usr/lib/python3/dist-packages/csvkit/cleanup.py is in python3-csvkit 1.0.2-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 | #!/usr/bin/env python
from csvkit.exceptions import CSVTestException, LengthMismatchError
def join_rows(rows, joiner=' '):
"""
Given a series of rows, return them as a single row where the inner edge cells are merged. By default joins with a single space character, but you can specify new-line, empty string, or anything else with the 'joiner' kwarg.
"""
rows = list(rows)
fixed_row = rows[0][:]
for row in rows[1:]:
if len(row) == 0:
row = ['']
fixed_row[-1] += "%s%s" % (joiner, row[0])
fixed_row.extend(row[1:])
return fixed_row
class RowChecker(object):
"""
Iterate over rows of a CSV producing cleaned rows and storing error rows.
"""
def __init__(self, reader):
self.reader = reader
self.column_names = next(reader)
self.errors = []
self.rows_joined = 0
self.joins = 0
def checked_rows(self):
"""
A generator which yields rows which are ready to write to output.
"""
length = len(self.column_names)
line_number = self.reader.line_num
joinable_row_errors = []
for row in self.reader:
try:
if len(row) != length:
raise LengthMismatchError(line_number, row, length)
yield row
# Don't join rows across valid rows.
joinable_row_errors = []
except LengthMismatchError as e:
self.errors.append(e)
# Don't join with long rows.
if len(row) > length:
joinable_row_errors = []
else:
joinable_row_errors.append(e)
while joinable_row_errors:
fixed_row = join_rows([error.row for error in joinable_row_errors], joiner=' ')
if len(fixed_row) < length:
break
if len(fixed_row) == length:
self.rows_joined += len(joinable_row_errors)
self.joins += 1
yield fixed_row
for fixed in joinable_row_errors:
joinable_row_errors.remove(fixed)
self.errors.remove(fixed)
break
joinable_row_errors = joinable_row_errors[1:] # keep trying in case we're too long because of a straggler
except CSVTestException as e:
self.errors.append(e)
# Don't join rows across other errors.
joinable_row_errors = []
line_number = self.reader.line_num
|