/usr/include/anfo/output_streams.h is in libanfo0-dev 0.98-4.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 | // Copyright 2009 Udo Stenzel
// This file is part of ANFO
//
// ANFO is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// Anfo is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Anfo. If not, see <http://www.gnu.org/licenses/>.
#ifndef INCLUDED_OUTPUT_STREAMS_H
#define INCLUDED_OUTPUT_STREAMS_H
#include "index.h"
#include "stream.h"
#include <google/protobuf/io/gzip_stream.h>
#include <google/protobuf/io/zero_copy_stream_impl.h>
#include <fstream>
#include <ios>
#include <map>
namespace output { class Hit ; }
namespace config { class Config ; }
namespace streams {
using namespace google::protobuf::io ;
//! \brief writes in (a modification of) Google's text format
//! This is essentially the human readable version of the native format.
//! Additionally, alignment strings and a CLUSTAL-style 'conservation'
//! line are added, provided they were looked up using the reference
//! genome beforehand, and some fields, notably the CIGAR line, are
//! printed in a more compact format.
class TextWriter : public Stream
{
private:
auto_ptr< std::ostream > out_ ;
void print_msg( const google::protobuf::Message& ) ;
public:
TextWriter( const pair< std::ostream*, string > &p ) : out_( p.first ) {}
virtual void put_header( const Header& ) ;
virtual void put_result( const Result& ) ;
virtual void put_footer( const Footer& ) ;
virtual string type_name() const { return "TextWriter" ; }
} ;
//! \brief writes in SAM format
//! Every hit for every sequence is written. A sequence without hits is
//! written out anyway. If that's not desired, the input stream must be
//! filtered appropriately.
//! \todo Writing SAM is probably a bad idea in the long run. Directly
//! generating BAM is more sustainable
//! \todo Creation of correctly sorted SAM files is next to impossible.
//! Maybe creation of BAM files is better anyway.
class SamWriter : public Stream
{
private:
enum bad_stuff { goodness = 0, no_hit, multiple_hits, no_seqid, no_seq, bad_cigar, bad_stuff_max } ;
static const char *descr[] ;
std::auto_ptr< std::ostream > out_ ;
string nm_ ;
int discarded[bad_stuff_max] ;
enum bam_flags {
bam_fpaired = 1, // read is paired in sequencing
bam_fproper_pair = 2, // read is mapped in proper pair
bam_funmap = 4, // query seq. is unmapped
bam_fmunmap = 8, // mate is umapped
bam_freverse = 16, // strand of query (0 - fwd, 1 - rev)
bam_fmreverse = 32, // strand of mate
bam_fread1 = 64, // read is 1. in pair
bam_fread2 = 128, // read is 2. in pair
bam_fsecondary = 256, // alignment is NOT primary
bam_fqcfail = 512, // read fails due low quality
bam_fdup = 1024 // read is duplicate
} ;
bad_stuff protoHit_2_bam_Hit( const output::Result& ) ;
public:
SamWriter( const pair< ostream*, string > &p ) : out_( p.first ), nm_( p.second )
{
memset( discarded, 0, sizeof(discarded) ) ;
}
virtual void put_header( const Header& h )
{
Stream::put_header( h ) ;
*out_ << "@HD\tVN:1.0" ;
if( h.is_sorted_by_name() ) *out_ << "\tSO:queryname" ;
*out_ << "\n@PG\tID:ANFO\tVN:" << h.version() << '\n' ;
}
virtual void put_result( const Result& res )
{
if (bad_stuff r = protoHit_2_bam_Hit( res )) discarded[r]++;
}
virtual void put_footer( const Footer& ) ;
} ;
//! \brief writes in aligned FASTA-format
//! Each alignment appears as a pair of sequences, reference first,
//! query last. Score, coordinates and whether an adapter was trimmed
//! are encoded in the header. This is considered a legacy format,
//! mostly useful to make substitution graphs from.
class FastaAlnWriter : public Stream
{
private:
std::auto_ptr< std::ostream > out_ ;
public:
FastaAlnWriter( const pair< ostream*, string > &p ) : out_( p.first ) {}
virtual void put_result( const Result& ) ;
} ;
//! \brief writes in FASTQ format
//! This should simply reproduce the input to ANFO, so I can throw away
//! the ugly FASTQ files. Every sequence gets a header, then the
//! sequence (50 bases per line), the header is then *not* repeated and
//! the quality scores follow in the same layout.
class FastqWriter : public Stream
{
private:
std::auto_ptr< std::ostream > out_ ;
bool with_qual_ ;
public:
FastqWriter( const pair< ostream*, string > &p, bool q ) : out_( p.first ), with_qual_(q) {}
virtual void put_result( const Result& ) ;
} ;
class TableWriter : public Stream
{
private:
std::auto_ptr< std::ostream > out_ ;
public:
TableWriter( const pair< ostream*, string > &p ) : out_( p.first ) {}
virtual void put_result( const Result& ) ;
} ;
class GenTextAlignment : public Filter
{
private:
int context_ ;
bool strict_ ;
public:
GenTextAlignment( int context, bool strict ) : context_( context ), strict_( strict ) {}
virtual bool xform( Result& ) ;
} ;
//! \brief writes out coverage depth in WIG format
//! This only works after DuctTaper has been applied, afterwards it
//! extracts the depth of coverage (aka number of observations) per
//! position.
//! Note that the output file will get huge; we're talking about a text
//! based format...
class WigCoverageWriter : public Stream
{
private:
std::auto_ptr< std::ostream > out_ ;
public:
WigCoverageWriter( const pair< ostream*, string > &p ) : out_( p.first ) {}
virtual void put_result( const Result& ) ;
} ;
} // namespace
#endif
|