/usr/lib/emboss/include/ajseqbam.h is in emboss-lib 6.6.0+dfsg-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 | /* @include ajseqbam **********************************************************
**
** AJAX BAM format sequence processing functions
**
** These functions control all aspects of AJAX BAM file processing
**
** @author Copyright (C) 2010 Peter Rice ported from samtools
** @version $Revision: 1.21 $
** @modified 2010-2011 Peter Rice
** @modified $Date: 2012/07/02 17:24:52 $ by $Author: rice $
** @@
**
** This library is free software; you can redistribute it and/or
** modify it under the terms of the GNU Lesser General Public
** License as published by the Free Software Foundation; either
** version 2.1 of the License, or (at your option) any later version.
**
** This library is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
** Lesser General Public License for more details.
**
** You should have received a copy of the GNU Lesser General Public
** License along with this library; if not, write to the Free Software
** Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
** MA 02110-1301, USA.
**
******************************************************************************/
/* The MIT License
**
** Copyright (c) 2008 Genome Research Ltd (GRL).
**
** Permission is hereby granted, free of charge, to any person obtaining
** a copy of this software and associated documentation files (the
** "Software"), to deal in the Software without restriction, including
** without limitation the rights to use, copy, modify, merge, publish,
** distribute, sublicense, and/or sell copies of the Software, and to
** permit persons to whom the Software is furnished to do so, subject to
** the following conditions:
**
** The above copyright notice and this permission notice shall be
** included in all copies or substantial portions of the Software.
**
** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
** NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
** BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
** ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
** CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
** SOFTWARE.
*/
/* Contact: Heng Li <lh3@sanger.ac.uk> */
/*
** much modified for EMBOSS by Peter Rice pmr@ebi.ac.uk May 2010
** lists changed to AjPList
** hashes changed to AjPTable
** strings changed to AjPStr
** fixed-length datatypes int32_t etc. changed to EMBOSS types
*/
#ifndef AJSEQBAM_H
#define AJSEQBAM_H
/*
** BAM library provides I/O and various operations on manipulating files
** in the BAM (Binary Alignment/Mapping) or SAM (Sequence Alignment/Map)
** format. It now supports importing from or exporting to TAM, sorting,
** merging, generating pileup, and quickly retrieval of reads overlapped
** with a specified region.
**
** copyright Genome Research Ltd.
*/
/* ========================================================================= */
/* ============================= include files ============================= */
/* ========================================================================= */
#include "ajdefine.h"
#include "ajlist.h"
#include "ajtable.h"
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include "zlib.h"
AJ_BEGIN_DECLS
/* ========================================================================= */
/* =============================== constants =============================== */
/* ========================================================================= */
#ifdef WIN32
#define inline __inline
#endif /* WIN32 */
/* The read is paired in sequencing, no matter whether it is mapped in a pair */
#define BAM_FPAIRED 1
/* The read is mapped in a proper pair */
#define BAM_FPROPER_PAIR 2
/* The read itself is unmapped; conflictive with BAM_FPROPER_PAIR */
#define BAM_FUNMAP 4
/* The mate is unmapped */
#define BAM_FMUNMAP 8
/* The read is mapped to the reverse strand */
#define BAM_FREVERSE 16
/* The mate is mapped to the reverse strand */
#define BAM_FMREVERSE 32
/* This is read1 */
#define BAM_FREAD1 64
/* This is read2 */
#define BAM_FREAD2 128
/* Not primary alignment */
#define BAM_FSECONDARY 256
/* QC failure */
#define BAM_FQCFAIL 512
/* Optical or PCR duplicate */
#define BAM_FDUP 1024
#define BAM_OFDEC 0
#define BAM_OFHEX 1
#define BAM_OFSTR 2
/* Default mask for pileup */
#define BAM_DEF_MASK (BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | BAM_FDUP)
#define BAM_CORE_SIZE sizeof(AjOSeqBamCore)
/*
** Describing how CIGAR operation/length is packed in a 32-bit integer.
*/
#define BAM_CIGAR_SHIFT 4
#define BAM_CIGAR_MASK ((1 << BAM_CIGAR_SHIFT) - 1)
/*
** CIGAR operations.
*/
/* CIGAR: M match or mismatch */
#define BAM_CMATCH 0
/* CIGAR: I insertion to the reference */
#define BAM_CINS 1
/* CIGAR: D deletion from the reference */
#define BAM_CDEL 2
/* CIGAR: N skip on the reference (e.g. spliced alignment) */
#define BAM_CREF_SKIP 3
/* CIGAR: S clip on the read with clipped sequence present in qseq */
#define BAM_CSOFT_CLIP 4
/* CIGAR: H clip on the read with clipped sequence trimmed off */
#define BAM_CHARD_CLIP 5
/* CIGAR: P padding */
#define BAM_CPAD 6
/* CIGAR: = match */
#define BAM_CEQUAL 7
/* CIGAR: X mismatch */
#define BAM_CDIFF 8
extern const char* cigarcode;
extern const char* bam_nt16_rev_table;
/* ========================================================================= */
/* ============================== public data ============================== */
/* ========================================================================= */
/* @data AjPSeqBamBgzf ********************************************************
**
** BGZF file handling object
**
** @alias AjOSeqBamBgzf
** @alias AjSSeqBamBgzf
**
** @attr file [FILE*] File object
** @attr cache [AjPTable] Block cache
** @attr uncompressed_block [void*] Uncompressed block data
** @attr compressed_block [void*] Compressed block data
** @attr error [const char*] Error description
** @attr block_address [ajlong] Block offset
** @attr file_descriptor [int] File descriptor
** @attr cache_size [int] Cache size
** @attr uncompressed_block_size [int] Uncompressed block size
** @attr compressed_block_size [int] Compressed block size
** @attr block_length [int] Block length
** @attr block_offset [int] Block offset
** @attr open_mode [char] Open_mode 'r' or 'w'
** @attr owned_file [char] Boolean
** @attr is_uncompressed [char] Boolean
** @attr Padding [char[5]] Padding
**
******************************************************************************/
typedef struct AjSSeqBamBgzf
{
FILE* file;
AjPTable cache;
void* uncompressed_block;
void* compressed_block;
const char* error;
ajlong block_address;
int file_descriptor;
int cache_size;
int uncompressed_block_size;
int compressed_block_size;
int block_length;
int block_offset;
char open_mode;
char owned_file;
char is_uncompressed;
char Padding[5];
} AjOSeqBamBgzf;
#define AjPSeqBamBgzf AjOSeqBamBgzf*
#define BAM_VIRTUAL_OFFSET16
/* #abstract BAM file handler */
/* @data AjPSeqBamHeader ******************************************************
**
** BAM alignment file header data
**
** @attr target_name [char**] names of the reference sequences
** @attr target_len [ajuint*] lengths of the reference sequences
** @attr dict [AjPList] header dictionary
** @attr hash [AjPTable] hash table for fast name lookup
** @attr rg2lib [AjPTable] hash table for @RG-ID -> LB lookup
** @attr text [char*] plain text
** @attr n_targets [ajint] number of reference sequences
** @attr l_text [ajint] length of the plain text in the header
**
** @@
** discussion Field hash points to null by default. It is a private
** member.
******************************************************************************/
typedef struct AjSSeqBamheader
{
char **target_name;
ajuint *target_len;
AjPList dict;
AjPTable hash;
AjPTable rg2lib;
char *text;
ajint n_targets;
ajint l_text;
} AjOSeqBamHeader;
#define AjPSeqBamHeader AjOSeqBamHeader*
/* @data AjPSeqBamCore ********************************************************
**
** Structure for core alignment information.
**
** @attr tid [ajint] read ID, defined by AjPSeqBamheader
** @attr pos [ajint] 0-based leftmost coordinate
** @attr bin [ajushort] bin calculated by ajSeqBamReg2bin()
** @attr qual [unsigned char] mapping quality
** @attr l_qname [unsigned char] length of the query name
** @attr flag [ajushort] bitwise flag
** @attr n_cigar [ajushort] number of CIGAR operations
** @attr l_qseq [ajint] length of the query sequence (read)
** @attr mtid [ajint] paired read (mate) ID
** @attr mpos [ajint] paired read (mate) position
** @attr isize [ajint] insert size for paired reads
******************************************************************************/
typedef struct AjSBamSeqCore
{
ajint tid;
ajint pos;
ajushort bin;
unsigned char qual;
unsigned char l_qname;
ajushort flag;
ajushort n_cigar;
ajint l_qseq;
ajint mtid;
ajint mpos;
ajint isize;
} AjOSeqBamCore;
#define AjPSeqBamCore AjOSeqBamCore*
/* @data AjPSeqBam ************************************************************
**
** Structure for one alignment.
**
** @alias AjSSeqBam
** @alias AjOSeqBam
**
** @attr core [AjOSeqBamCore] core information about the alignment
** @attr data [unsigned char*] all variable-length data, concatenated;
** structure: cigar-qname-seq-qual-aux
** @attr l_aux [int] length of auxiliary data
** @attr data_len [int] current length of data
** @attr m_data [int] maximum reserved size of data
** @attr Padding [int] Padding to alignment boundary
**
** @@
** discussion Notes:
**
** 1. qname is zero tailed and core.l_qname includes the tailing '\0'.
**
** 2. l_qseq is calculated from the total length of an alignment block
** on reading or from CIGAR.
******************************************************************************/
typedef struct AjSSeqBam
{
AjOSeqBamCore core;
unsigned char *data;
int l_aux;
int data_len;
int m_data;
int Padding;
} AjOSeqBam;
#define AjPSeqBam AjOSeqBam*
/* ========================================================================= */
/* =========================== public functions ============================ */
/* ========================================================================= */
#define MAJSEQBAMSTRAND(b) (((b)->core.flag&BAM_FREVERSE) != 0)
#define MAJSEQBAMMSTRAND(b) (((b)->core.flag&BAM_FMREVERSE) != 0)
/*
** Get the CIGAR array
** param b pointer to an alignment
** return pointer to the CIGAR array
**
** In the CIGAR array, each element is a 32-bit integer. The
** lower 4 bits gives a CIGAR operation and the higher 28 bits keep the
** length of a CIGAR.
*/
#define MAJSEQBAMCIGAR(b) ((ajuint*)((b)->data + (b)->core.l_qname))
/*
** Get the name of the query
** param b pointer to an alignment
** return pointer to the name string, null terminated
*/
#define MAJSEQBAMQNAME(b) ((char*)((b)->data))
/*
** Get query sequence
** param b pointer to an alignment
** return pointer to sequence
**
** Each base is encoded in 4 bits: 1 for A, 2 for C, 4 for G,
** 8 for T and 15 for N. Two bases are packed in one byte with the base
** at the higher 4 bits having smaller coordinate on the read. It is
** recommended to use bam1_seqi() macro to get the base.
*/
#define MAJSEQBAMSEQ(b) ((b)->data + (b)->core.n_cigar*4 + (b)->core.l_qname)
/*
** Get query quality
** param b pointer to an alignment
** return pointer to quality string
*/
#define MAJSEQBAMQUAL(b) ((b)->data + (b)->core.n_cigar*4 + \
(b)->core.l_qname + ((b)->core.l_qseq + 1)/2)
/*
** Get a base on read
** param s Query sequence returned by bam1_seq()
** param i The i-th position, 0-based
** return 4-bit integer representing the base.
*/
#define MAJSEQBAMSEQI(s, i) ((s)[(i)/2] >> 4*(1-(i)%2) & 0xf)
/*
** Get pointer to the list of auxiliary data
** param b pointer to an alignment
** return pointer to the concatenated auxiliary data
*/
#define MAJSEQBAMAUX(b) ((b)->data + (b)->core.n_cigar*4 + \
(b)->core.l_qname + \
(b)->core.l_qseq + ((b)->core.l_qseq + 1)/2)
#ifndef kroundup32
/*
** Round an integer to the next closest power-2 integer.
** param x integer to be rounded (in place)
** x will be modified.
*/
#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, \
(x)|=(x)>>8, (x)|=(x)>>16, ++(x))
#endif /* !kroundup32 */
/*
** Prototype definitions
*/
void ajSeqBamAuxAppend(AjPSeqBam b, const char tag[2],
char type, int len, const unsigned char* data);
AjPSeqBamBgzf ajSeqBamBgzfNew(FILE* file, const char* mode);
AjPSeqBamBgzf ajSeqBamBgzfOpenfd(int fd, const char *mode);
AjPSeqBamBgzf ajSeqBamBgzfOpenC(const char* path, const char *mode);
ajuint ajSeqBamCalend(const AjPSeqBamCore c, const ajuint *cigar);
int ajSeqBamBgzfClose(AjPSeqBamBgzf fp);
int ajSeqBamBgzfEof(AjPSeqBamBgzf fp);
int ajSeqBamBgzfFlush(AjPSeqBamBgzf fp);
int ajSeqBamBgzfRead(AjPSeqBamBgzf fp, void* data, int length);
ajlong ajSeqBamBgzfSeek(AjPSeqBamBgzf fp, ajlong pos, int where);
AjBool ajSeqBamBgzfSetInfile(AjPSeqBamBgzf gzfile, AjPFile outf);
AjBool ajSeqBamBgzfSetOutfile(AjPSeqBamBgzf gzfile, AjPFile outf);
int ajSeqBamBgzfWrite(AjPSeqBamBgzf fp, const void* data, int length);
void ajSeqBamDel(AjPSeqBam *Pbam);
AjPSeqBamHeader ajSeqBamHeaderNew(void);
AjPSeqBamHeader ajSeqBamHeaderNewN(ajint n);
AjPSeqBamHeader ajSeqBamHeaderNewTextC(const char* txt);
AjPSeqBamHeader ajSeqBamHeaderRead(AjPSeqBamBgzf gzfile);
void ajSeqBamHeaderDel(AjPSeqBamHeader *Pheader);
int ajSeqBamHeaderWrite(AjPSeqBamBgzf fp, const AjPSeqBamHeader header);
AjPTable ajSeqBamHeaderGetRefseqTags(const AjPSeqBamHeader header);
AjPTable ajSeqBamHeaderGetReadgroupTags(const AjPSeqBamHeader header);
const char* ajSeqBamHeaderGetSortorder(const AjPSeqBamHeader header);
void ajSeqBamHeaderSetTextC(AjPSeqBamHeader header, const char* txt);
int ajSeqBamRead(AjPSeqBamBgzf fp, AjPSeqBam b);
int ajSeqBamReg2bin(ajuint beg, ajuint end);
int ajSeqBamValidate(const AjPSeqBamHeader header, const AjPSeqBam b);
int ajSeqBamWrite(AjPSeqBamBgzf fp, const AjPSeqBam b);
const char *ajSeqBamGetLibrary(AjPSeqBamHeader header, const AjPSeqBam b);
AjPTable ajSeqBamHeaderLineParse(const char *headerLine);
/*
** End of prototype definitions
*/
AJ_END_DECLS
#endif /* !AJSEQBAM_H */
|