This file is indexed.

/usr/include/anfo/output_streams.h is in libanfo0-dev 0.98-4.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
//    Copyright 2009 Udo Stenzel
//    This file is part of ANFO
//
//    ANFO is free software: you can redistribute it and/or modify
//    it under the terms of the GNU General Public License as published by
//    the Free Software Foundation, either version 3 of the License, or
//    (at your option) any later version.
//
//    Anfo is distributed in the hope that it will be useful,
//    but WITHOUT ANY WARRANTY; without even the implied warranty of
//    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//    GNU General Public License for more details.
//
//    You should have received a copy of the GNU General Public License
//    along with Anfo.  If not, see <http://www.gnu.org/licenses/>.

#ifndef INCLUDED_OUTPUT_STREAMS_H
#define INCLUDED_OUTPUT_STREAMS_H

#include "index.h"
#include "stream.h"

#include <google/protobuf/io/gzip_stream.h>
#include <google/protobuf/io/zero_copy_stream_impl.h>
#include <fstream>
#include <ios>
#include <map>

namespace output { class Hit ; }
namespace config { class Config ; }

namespace streams {

	using namespace google::protobuf::io ;

//! \brief writes in (a modification of) Google's text format
//! This is essentially the human readable version of the native format.
//! Additionally, alignment strings and a CLUSTAL-style 'conservation'
//! line are added, provided they were looked up using the reference
//! genome beforehand, and some fields, notably the CIGAR line, are
//! printed in a more compact format.
class TextWriter : public Stream
{
	private:
		auto_ptr< std::ostream > out_ ;

		void print_msg( const google::protobuf::Message& ) ;

	public:
		TextWriter( const pair< std::ostream*, string > &p ) : out_( p.first ) {}

		virtual void put_header( const Header& ) ;
		virtual void put_result( const Result& ) ;
		virtual void put_footer( const Footer& ) ;
		virtual string type_name() const { return "TextWriter" ; }
} ;

//! \brief writes in SAM format
//! Every hit for every sequence is written.  A sequence without hits is
//! written out anyway.  If that's not desired, the input stream must be
//! filtered appropriately.
//! \todo Writing SAM is probably a bad idea in the long run.  Directly
//!       generating BAM is more sustainable
//! \todo Creation of correctly sorted SAM files is next to impossible.
//!       Maybe creation of BAM files is better anyway.
class SamWriter : public Stream
{
	private:
		enum bad_stuff { goodness = 0, no_hit, multiple_hits, no_seqid, no_seq, bad_cigar, bad_stuff_max } ; 
		static const char *descr[] ;

		std::auto_ptr< std::ostream > out_ ;
		string nm_ ;
		int discarded[bad_stuff_max] ;

		enum bam_flags {
			bam_fpaired        = 1,   // read is paired in sequencing
			bam_fproper_pair   = 2,   // read is mapped in proper pair
			bam_funmap         = 4,   // query seq. is unmapped
			bam_fmunmap        = 8,   // mate is umapped
			bam_freverse      = 16,   // strand of query (0 - fwd, 1 - rev)
			bam_fmreverse     = 32,   // strand of mate
			bam_fread1        = 64,   // read is 1. in pair
			bam_fread2       = 128,   // read is 2. in pair
			bam_fsecondary   = 256,   // alignment is NOT primary
			bam_fqcfail      = 512,   // read fails due low quality
			bam_fdup        = 1024    // read is duplicate
		} ;

		bad_stuff protoHit_2_bam_Hit( const output::Result& ) ;

	public:
		SamWriter( const pair< ostream*, string > &p ) : out_( p.first ), nm_( p.second )
		{
			memset( discarded, 0, sizeof(discarded) ) ;
		}

		virtual void put_header( const Header& h ) 
		{
			Stream::put_header( h ) ;
			*out_ << "@HD\tVN:1.0" ;
			if( h.is_sorted_by_name() ) *out_ << "\tSO:queryname" ;
			*out_ << "\n@PG\tID:ANFO\tVN:" << h.version() << '\n' ;
		}

		virtual void put_result( const Result& res )
		{
			if (bad_stuff r = protoHit_2_bam_Hit( res )) discarded[r]++;
		}

		virtual void put_footer( const Footer& ) ;
} ;


//! \brief writes in aligned FASTA-format
//! Each alignment appears as a pair of sequences, reference first,
//! query last.  Score, coordinates and whether an adapter was trimmed
//! are encoded in the header.  This is considered a legacy format,
//! mostly useful to make substitution graphs from.
class FastaAlnWriter : public Stream
{
	private:
		std::auto_ptr< std::ostream > out_ ;

	public:
		FastaAlnWriter( const pair< ostream*, string > &p ) : out_( p.first ) {}
		virtual void put_result( const Result& ) ;
} ;

//! \brief writes in FASTQ format
//! This should simply reproduce the input to ANFO, so I can throw away
//! the ugly FASTQ files.  Every sequence gets a header, then the
//! sequence (50 bases per line), the header is then *not* repeated and
//! the quality scores follow in the same layout.
class FastqWriter : public Stream
{
	private:
		std::auto_ptr< std::ostream > out_ ;
		bool with_qual_ ;

	public:
		FastqWriter( const pair< ostream*, string > &p, bool q ) : out_( p.first ), with_qual_(q) {}
		virtual void put_result( const Result& ) ;
} ;

class TableWriter : public Stream
{
	private:
		std::auto_ptr< std::ostream > out_ ;

	public:
		TableWriter( const pair< ostream*, string > &p ) : out_( p.first ) {}
		virtual void put_result( const Result& ) ;
} ;

class GenTextAlignment : public Filter
{
	private:
		int context_ ;
		bool strict_ ;

	public:
		GenTextAlignment( int context, bool strict ) : context_( context ), strict_( strict ) {}
		virtual bool xform( Result& ) ;
} ;

//! \brief writes out coverage depth in WIG format
//! This only works after DuctTaper has been applied, afterwards it
//! extracts the depth of coverage (aka number of observations) per
//! position.
//! Note that the output file will get huge; we're talking about a text
//! based format...
class WigCoverageWriter : public Stream
{
	private:
		std::auto_ptr< std::ostream > out_ ;

	public:
		WigCoverageWriter( const pair< ostream*, string > &p ) : out_( p.first ) {}
		virtual void put_result( const Result& ) ;
} ;

} // namespace

#endif