This file is indexed.

/usr/bin/starch-diff is in bedops 2.4.26+dfsg-1.

This file is owned by root:root, with mode 0o755.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
#!/usr/bin/env python

#
#    BEDOPS
#    Copyright (C) 2011-2017 Shane Neph, Scott Kuehn and Alex Reynolds
#
#    This program is free software; you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation; either version 2 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License along
#    with this program; if not, write to the Free Software Foundation, Inc.,
#    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#

from __future__ import absolute_import, division, print_function, unicode_literals

import sys
import os
import argparse
import errno
import subprocess
import json
import logging

name = "starch-diff"
citation = "  citation: http://bioinformatics.oxfordjournals.org/content/28/14/1919.abstract"
authors = "  authors:  Alex Reynolds and Shane Neph"
version = "  version:  2.4.26"
usage = "  $ starch-diff [ --chr <chr> ] starch-file-1 starch-file-2 [ starch-file-3 ... ]"
help = """
  The 'starch-diff' utility compares the signatures of two or more specified 
  Starch v2.2+ archives for all chromosomes, or for a specified chromosome.
"""

default_chromosome = "all"

def main():
    parser = argparse.ArgumentParser(prog=name, usage=usage, add_help=False)
    parser.add_argument('--help', '-h', action='store_true', dest='help')
    parser.add_argument('--chr', '-c', type=str, action="store", default=default_chromosome)
    parser.add_argument('--debug', '-d', action='store_true', dest='debug')
    parser.add_argument('file', type=argparse.FileType('r'), nargs='*')
    args = parser.parse_args()

    if args.debug:
        logging.basicConfig(level=logging.DEBUG)
        logger = logging.getLogger(__name__)

    if args.help or len(args.file) < 2:
        sys.stdout.write(name + '\n')
        sys.stdout.write(citation + '\n')
        sys.stdout.write(version + '\n')
        sys.stdout.write(authors + '\n\n')
        sys.stdout.write(usage + '\n')
        sys.stdout.write(help)
        if args.help:
            sys.exit(os.EX_OK)
        else:
            sys.stdout.write("\nERROR: Please specify two or more Starch archives as input\n")
            sys.exit(errno.EINVAL)

    all_tests_pass = True

    selected_chromosome = unicode(args.chr)
    if args.debug: logger.info('Selected chromosome [%s]' % (selected_chromosome))

    archive_paths = list()
    for i in range(0, len(args.file)):
        archive_handle = args.file[i]
        if not os.path.exists(archive_handle.name) or not os.path.isfile(archive_handle.name):
            sys.stdout.write("ERROR: Input [%s] is not a file or does not exist\n" % (archive_handle.name))
            sys.stdout.write("%s\n" % (usage))
            sys.exit(errno.ENOENT)
        archive_paths.append(archive_handle.name)
        if args.debug: logger.info('Appended input file to list [%s]' % (archive_handle.name))        
    num_files = len(args.file)
    if args.debug: logger.info('Number of input files [%d]' % (num_files))
    
    if selected_chromosome == default_chromosome:
        if args.debug: logger.info('Examining all chromosomes from all inputs...')
        all_chromosomes = {}
        for archive_path in archive_paths:
            get_archive_version_cmd_components = [
                'unstarch',
                '--list-json',
                archive_path
            ]
            try:
                get_archive_version_cmd_result = subprocess.check_output(get_archive_version_cmd_components)
            except subprocess.CalledProcessError as err:
                get_archive_version_cmd_result = "ERROR: Command '{}' returned with error (code {}): {}".format(err.cmd, err.returncode, err.output)
                raise
            archive_metadata = json.loads(get_archive_version_cmd_result.decode('utf-8'))
            try:
                archive_version = archive_metadata['archive']['version']
            except KeyError as err:
                sys.stderr.write("ERROR: Could not read archive version from Starch metadata\n")
                raise
            if archive_version['major'] < 2 or (archive_version['major'] == 2 and archive_version['minor'] < 2):
                sys.stderr.write("ERROR: Input [%s] must be a v2.2+ Starch archive -- use 'starchcat' or extract/recompress to update archive\n" % (archive_path))
                sys.exit(errno.EINVAL)
                
            list_chromosome_cmd_components = [
                'unstarch',
                '--list-chr',
                archive_path
            ]
            try:
                list_chromosome_cmd_result = subprocess.check_output(list_chromosome_cmd_components)
            except subprocess.CalledProcessError as err:
                list_chromosome_cmd_result = "ERROR: Command '{}' returned with error (code {}): {}".format(err.cmd, err.returncode, err.output)
                raise
            archive_chromosomes = list_chromosome_cmd_result.rstrip('\n').split('\n')
            for chromosome in archive_chromosomes:
                if chromosome not in all_chromosomes:
                    all_chromosomes[chromosome] = 0
                all_chromosomes[chromosome] += 1

        common_chromosomes = [k for k, v in all_chromosomes.iteritems() if v == num_files]
        common_chromosomes.sort()
        if args.debug: logger.info('Common chromosomes [%s]' % (','.join(common_chromosomes)))

        uncommon_chromosomes = [k for k, v in all_chromosomes.iteritems() if v != num_files]
        uncommon_chromosomes.sort()
        if args.debug: logger.info('Uncommon chromosomes [%s]' % (','.join(uncommon_chromosomes)))        

        if not common_chromosomes:
            sys.stderr.write("ERROR: Inputs share no records with a common chromosome name\n")
            sys.exit(errno.EINVAL)
    else:
        common_chromosomes = [selected_chromosome]

    tests_to_run = {}
    for common_chromosome in common_chromosomes:
        if common_chromosome not in tests_to_run: 
            tests_to_run[common_chromosome] = []

        for idx, archive_path in enumerate(archive_paths):
            get_chromosome_signature_cmd_components = [
                'unstarch',
                common_chromosome,
                '--signature',
                archive_path
            ]
            try:
                get_chromosome_signature_cmd_result = subprocess.check_output(get_chromosome_signature_cmd_components)
            except subprocess.CalledProcessError as err:
                sys.stderr.write("ERROR: Command [%s] returned with error (code %d)\n" % (' '.join(err.cmd), err.returncode))
                sys.exit(errno.EINVAL)

            chromosome_signature = get_chromosome_signature_cmd_result.rstrip('\n')
            if len(chromosome_signature) == 0:
                chromosome_signature = None

            result = {}
            result['archive'] = archive_path
            result['signature'] = chromosome_signature
            tests_to_run[common_chromosome].append(result)
            if args.debug: logger.info('Test to run: [%s] [%s]' % (common_chromosome, str(result)))

    # for a given chromosome, we have a set of objects to test for equality
    for test_chromosome in sorted(tests_to_run.keys()):
        per_chromosome_tests_pass = True
        previous_signature = None
        current_signature = None
        previous_archive = None
        current_archive = None
        none_signature_found = False
        test_list = tests_to_run[test_chromosome]
        for test_item in test_list:
            test_signature = test_item['signature']
            test_archive = test_item['archive']
            if args.debug: logger.info('Examining archive [%s]' % (test_archive))
            if args.debug: logger.info('Setting current signature [%s] to [%s]' % (current_signature, test_signature))
            current_signature = test_signature
            current_archive = test_archive
            if not test_signature:
                none_signature_found = True
                per_chromosome_tests_pass = False
                break
            elif not previous_signature:
                if args.debug: logger.info('Setting previous signature [%s] to [%s]' % (previous_signature, test_signature))
                previous_signature = test_signature
                previous_archive = test_archive
            if args.debug: logger.info('Comparing chr [%s] previous signature [%s] current signature [%s]' % (test_chromosome, previous_signature, current_signature))
            if current_signature != previous_signature:
                per_chromosome_tests_pass = False
            if not per_chromosome_tests_pass:
                all_tests_pass = False
                if none_signature_found:
                    sys.stderr.write('WARNING: One or more signatures are not available for chromosome [%s] in archive [%s]\n' % (test_chromosome, current_archive))
                else:
                    sys.stderr.write('WARNING: Signatures do not match for chromosome [%s] between archives [%s] and [%s]\n' % (test_chromosome, previous_archive, current_archive))

    if not all_tests_pass:
        sys.exit(errno.EINVAL)

    if selected_chromosome == default_chromosome:
        sys.stderr.write('Compressed genomic streams in input files are identical\n')
    else:
        sys.stderr.write('Compressed genomic streams in input files are identical for chromosome [%s]\n' % (selected_chromosome))

    sys.exit(os.EX_OK)

if __name__ == "__main__":
    main()