This file is indexed.

/usr/lib/emboss/include/ajseqbam.h is in emboss-lib 6.6.0+dfsg-6.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
/* @include ajseqbam **********************************************************
**
** AJAX BAM format sequence processing functions
**
** These functions control all aspects of AJAX BAM file processing
**
** @author Copyright (C) 2010 Peter Rice ported from samtools
** @version $Revision: 1.21 $
** @modified 2010-2011 Peter Rice
** @modified $Date: 2012/07/02 17:24:52 $ by $Author: rice $
** @@
**
** This library is free software; you can redistribute it and/or
** modify it under the terms of the GNU Lesser General Public
** License as published by the Free Software Foundation; either
** version 2.1 of the License, or (at your option) any later version.
**
** This library is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
** Lesser General Public License for more details.
**
** You should have received a copy of the GNU Lesser General Public
** License along with this library; if not, write to the Free Software
** Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
** MA  02110-1301,  USA.
**
******************************************************************************/

/* The MIT License
**
**   Copyright (c) 2008 Genome Research Ltd (GRL).
**
**   Permission is hereby granted, free of charge, to any person obtaining
**   a copy of this software and associated documentation files (the
**   "Software"), to deal in the Software without restriction, including
**   without limitation the rights to use, copy, modify, merge, publish,
**   distribute, sublicense, and/or sell copies of the Software, and to
**   permit persons to whom the Software is furnished to do so, subject to
**   the following conditions:
**
**   The above copyright notice and this permission notice shall be
**   included in all copies or substantial portions of the Software.
**
**   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
**   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
**   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
**   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
**   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
**   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
**   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
**   SOFTWARE.
*/

/* Contact: Heng Li <lh3@sanger.ac.uk> */

/*
** much modified for EMBOSS by Peter Rice pmr@ebi.ac.uk May 2010
** lists changed to AjPList
** hashes changed to AjPTable
** strings changed to AjPStr
** fixed-length datatypes int32_t etc. changed to EMBOSS types
*/

#ifndef AJSEQBAM_H
#define AJSEQBAM_H

/*
**  BAM library provides I/O and various operations on manipulating files
**  in the BAM (Binary Alignment/Mapping) or SAM (Sequence Alignment/Map)
**  format. It now supports importing from or exporting to TAM, sorting,
**  merging, generating pileup, and quickly retrieval of reads overlapped
**  with a specified region.
**
**  copyright Genome Research Ltd.
*/

/* ========================================================================= */
/* ============================= include files ============================= */
/* ========================================================================= */

#include "ajdefine.h"
#include "ajlist.h"
#include "ajtable.h"

#include <stdlib.h>
#include <string.h>
#include <stdio.h>

#include "zlib.h"

AJ_BEGIN_DECLS




/* ========================================================================= */
/* =============================== constants =============================== */
/* ========================================================================= */




#ifdef WIN32
#define inline __inline
#endif /* WIN32 */

/* The read is paired in sequencing, no matter whether it is mapped in a pair */
#define BAM_FPAIRED        1

/* The read is mapped in a proper pair */
#define BAM_FPROPER_PAIR   2

/* The read itself is unmapped; conflictive with BAM_FPROPER_PAIR */
#define BAM_FUNMAP         4

/* The mate is unmapped */
#define BAM_FMUNMAP        8

/* The read is mapped to the reverse strand */
#define BAM_FREVERSE      16

/* The mate is mapped to the reverse strand */
#define BAM_FMREVERSE     32

/* This is read1 */
#define BAM_FREAD1        64

/* This is read2 */
#define BAM_FREAD2       128

/* Not primary alignment */
#define BAM_FSECONDARY   256

/* QC failure */
#define BAM_FQCFAIL      512

/* Optical or PCR duplicate */
#define BAM_FDUP        1024


#define BAM_OFDEC          0
#define BAM_OFHEX          1
#define BAM_OFSTR          2


/* Default mask for pileup */
#define BAM_DEF_MASK (BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | BAM_FDUP)

#define BAM_CORE_SIZE   sizeof(AjOSeqBamCore)

/*
** Describing how CIGAR operation/length is packed in a 32-bit integer.
*/

#define BAM_CIGAR_SHIFT 4
#define BAM_CIGAR_MASK  ((1 << BAM_CIGAR_SHIFT) - 1)

/*
**  CIGAR operations.
*/

/* CIGAR: M match or mismatch */
#define BAM_CMATCH      0

/* CIGAR: I insertion to the reference */
#define BAM_CINS        1

/* CIGAR: D deletion from the reference */
#define BAM_CDEL        2

/* CIGAR: N skip on the reference (e.g. spliced alignment) */
#define BAM_CREF_SKIP   3

/* CIGAR: S clip on the read with clipped sequence present in qseq */
#define BAM_CSOFT_CLIP  4

/* CIGAR: H clip on the read with clipped sequence trimmed off */
#define BAM_CHARD_CLIP  5

/* CIGAR: P padding */
#define BAM_CPAD        6

/* CIGAR: = match */
#define BAM_CEQUAL        7

/* CIGAR: X mismatch */
#define BAM_CDIFF        8


extern const char* cigarcode;
extern const char* bam_nt16_rev_table;




/* ========================================================================= */
/* ============================== public data ============================== */
/* ========================================================================= */




/* @data AjPSeqBamBgzf ********************************************************
**
** BGZF file handling object
**
** @alias AjOSeqBamBgzf
** @alias AjSSeqBamBgzf
**
** @attr file [FILE*] File object
** @attr cache [AjPTable] Block cache
** @attr uncompressed_block [void*] Uncompressed block data
** @attr compressed_block [void*] Compressed block data
** @attr error [const char*] Error description
** @attr block_address [ajlong] Block offset
** @attr file_descriptor [int] File descriptor
** @attr cache_size [int] Cache size
** @attr uncompressed_block_size [int] Uncompressed block size
** @attr compressed_block_size [int] Compressed block size
** @attr block_length [int] Block length
** @attr block_offset [int] Block offset
** @attr open_mode [char] Open_mode 'r' or 'w'
** @attr owned_file [char] Boolean
** @attr is_uncompressed [char] Boolean
** @attr Padding [char[5]] Padding
**
******************************************************************************/

typedef struct AjSSeqBamBgzf
{
    FILE* file;
    AjPTable cache;
    void* uncompressed_block;
    void* compressed_block;
    const char* error;
    ajlong block_address;
    int file_descriptor;
    int cache_size;
    int uncompressed_block_size;
    int compressed_block_size;
    int block_length;
    int block_offset;
    char open_mode;
    char owned_file;
    char is_uncompressed;
    char Padding[5];
} AjOSeqBamBgzf;

#define AjPSeqBamBgzf AjOSeqBamBgzf*




#define BAM_VIRTUAL_OFFSET16




/* #abstract BAM file handler */

/* @data AjPSeqBamHeader ******************************************************
**
** BAM alignment file header data
**
** @attr  target_name [char**] names of the reference sequences
** @attr  target_len  [ajuint*] lengths of the reference sequences
** @attr  dict        [AjPList] header dictionary
** @attr  hash        [AjPTable] hash table for fast name lookup
** @attr  rg2lib      [AjPTable] hash table for @RG-ID -> LB lookup
** @attr  text        [char*] plain text
** @attr  n_targets   [ajint] number of reference sequences
** @attr  l_text      [ajint] length of the plain text in the header
**
** @@
** discussion Field hash points to null by default. It is a private
**  member.
******************************************************************************/

typedef struct AjSSeqBamheader
{
    char **target_name;
    ajuint *target_len;
    AjPList dict;
    AjPTable hash;
    AjPTable rg2lib;
    char *text;
    ajint n_targets;
    ajint l_text;
} AjOSeqBamHeader;

#define AjPSeqBamHeader AjOSeqBamHeader*




/* @data AjPSeqBamCore ********************************************************
**
** Structure for core alignment information.
**
** @attr  tid     [ajint]  read ID, defined by AjPSeqBamheader
** @attr  pos     [ajint]  0-based leftmost coordinate
** @attr  bin     [ajushort]  bin calculated by ajSeqBamReg2bin()
** @attr  qual    [unsigned char]  mapping quality
** @attr  l_qname [unsigned char]  length of the query name
** @attr  flag    [ajushort]  bitwise flag
** @attr  n_cigar [ajushort]  number of CIGAR operations
** @attr  l_qseq  [ajint]  length of the query sequence (read)
** @attr  mtid    [ajint]  paired read (mate) ID
** @attr  mpos    [ajint]  paired read (mate) position
** @attr  isize   [ajint]  insert size for paired reads
******************************************************************************/

typedef struct AjSBamSeqCore
{
    ajint tid;
    ajint pos;
    ajushort bin;
    unsigned char qual;
    unsigned char l_qname;
    ajushort flag;
    ajushort n_cigar;
    ajint l_qseq;
    ajint mtid;
    ajint mpos;
    ajint isize;
} AjOSeqBamCore;

#define AjPSeqBamCore AjOSeqBamCore*




/* @data AjPSeqBam ************************************************************
**
** Structure for one alignment.
**
** @alias AjSSeqBam
** @alias AjOSeqBam
**
** @attr  core      [AjOSeqBamCore]  core information about the alignment
** @attr  data      [unsigned char*] all variable-length data, concatenated;
**                             structure: cigar-qname-seq-qual-aux
** @attr  l_aux      [int]  length of auxiliary data
** @attr  data_len   [int]  current length of data
** @attr  m_data     [int]  maximum reserved size of data
** @attr  Padding    [int]  Padding to alignment boundary
**
** @@
** discussion Notes:
**
**   1. qname is zero tailed and core.l_qname includes the tailing '\0'.
**
**   2. l_qseq is calculated from the total length of an alignment block
**      on reading or from CIGAR.
******************************************************************************/

typedef struct AjSSeqBam
{
    AjOSeqBamCore core;
    unsigned char *data;
    int l_aux;
    int data_len;
    int m_data;
    int Padding;
} AjOSeqBam;

#define AjPSeqBam AjOSeqBam*



/* ========================================================================= */
/* =========================== public functions ============================ */
/* ========================================================================= */




#define MAJSEQBAMSTRAND(b) (((b)->core.flag&BAM_FREVERSE) != 0)
#define MAJSEQBAMMSTRAND(b) (((b)->core.flag&BAM_FMREVERSE) != 0)




/*
**  Get the CIGAR array
**  param  b  pointer to an alignment
**  return    pointer to the CIGAR array
**
**  In the CIGAR array, each element is a 32-bit integer. The
**  lower 4 bits gives a CIGAR operation and the higher 28 bits keep the
**  length of a CIGAR.
*/
#define MAJSEQBAMCIGAR(b) ((ajuint*)((b)->data + (b)->core.l_qname))


/*
**  Get the name of the query
**  param  b  pointer to an alignment
**  return    pointer to the name string, null terminated
*/
#define MAJSEQBAMQNAME(b) ((char*)((b)->data))


/*
**  Get query sequence
**  param  b  pointer to an alignment
**  return    pointer to sequence
**
**  Each base is encoded in 4 bits: 1 for A, 2 for C, 4 for G,
**  8 for T and 15 for N. Two bases are packed in one byte with the base
**  at the higher 4 bits having smaller coordinate on the read. It is
**  recommended to use bam1_seqi() macro to get the base.
*/
#define MAJSEQBAMSEQ(b) ((b)->data + (b)->core.n_cigar*4 + (b)->core.l_qname)


/*
**  Get query quality
**  param  b  pointer to an alignment
**  return    pointer to quality string
*/
#define MAJSEQBAMQUAL(b) ((b)->data + (b)->core.n_cigar*4 +             \
                          (b)->core.l_qname + ((b)->core.l_qseq + 1)/2)

/*
**  Get a base on read
**  param  s  Query sequence returned by bam1_seq()
**  param  i  The i-th position, 0-based
**  return    4-bit integer representing the base.
*/
#define MAJSEQBAMSEQI(s, i) ((s)[(i)/2] >> 4*(1-(i)%2) & 0xf)

/*
**  Get pointer to the list of auxiliary data
**  param  b  pointer to an alignment
**  return    pointer to the concatenated auxiliary data
*/
#define MAJSEQBAMAUX(b) ((b)->data + (b)->core.n_cigar*4 + \
			 (b)->core.l_qname + \
		         (b)->core.l_qseq + ((b)->core.l_qseq + 1)/2)

#ifndef kroundup32
/*
**  Round an integer to the next closest power-2 integer.
**  param  x  integer to be rounded (in place)
**  x will be modified.
*/
#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4,    \
                       (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
#endif /* !kroundup32 */




/*
** Prototype definitions
*/

void ajSeqBamAuxAppend(AjPSeqBam b, const char tag[2],
		       char type, int len, const unsigned char* data);

AjPSeqBamBgzf ajSeqBamBgzfNew(FILE* file, const char* mode);

AjPSeqBamBgzf ajSeqBamBgzfOpenfd(int fd, const char *mode);
AjPSeqBamBgzf ajSeqBamBgzfOpenC(const char* path, const char *mode);

ajuint ajSeqBamCalend(const AjPSeqBamCore c, const ajuint *cigar);

int ajSeqBamBgzfClose(AjPSeqBamBgzf fp);
int ajSeqBamBgzfEof(AjPSeqBamBgzf fp);
int ajSeqBamBgzfFlush(AjPSeqBamBgzf fp);
int ajSeqBamBgzfRead(AjPSeqBamBgzf fp, void* data, int length);
ajlong ajSeqBamBgzfSeek(AjPSeqBamBgzf fp, ajlong pos, int where);
AjBool ajSeqBamBgzfSetInfile(AjPSeqBamBgzf gzfile, AjPFile outf);
AjBool ajSeqBamBgzfSetOutfile(AjPSeqBamBgzf gzfile, AjPFile outf);
int ajSeqBamBgzfWrite(AjPSeqBamBgzf fp, const void* data, int length);

void ajSeqBamDel(AjPSeqBam *Pbam);

AjPSeqBamHeader ajSeqBamHeaderNew(void);
AjPSeqBamHeader ajSeqBamHeaderNewN(ajint n);
AjPSeqBamHeader ajSeqBamHeaderNewTextC(const char* txt);
AjPSeqBamHeader ajSeqBamHeaderRead(AjPSeqBamBgzf gzfile);
void ajSeqBamHeaderDel(AjPSeqBamHeader *Pheader);
int ajSeqBamHeaderWrite(AjPSeqBamBgzf fp, const AjPSeqBamHeader header);
AjPTable ajSeqBamHeaderGetRefseqTags(const AjPSeqBamHeader header);
AjPTable ajSeqBamHeaderGetReadgroupTags(const AjPSeqBamHeader header);
const char* ajSeqBamHeaderGetSortorder(const AjPSeqBamHeader header);
void ajSeqBamHeaderSetTextC(AjPSeqBamHeader header, const char* txt);

int ajSeqBamRead(AjPSeqBamBgzf fp, AjPSeqBam b);

int ajSeqBamReg2bin(ajuint beg, ajuint end);

int ajSeqBamValidate(const AjPSeqBamHeader header, const AjPSeqBam b);
int ajSeqBamWrite(AjPSeqBamBgzf fp, const AjPSeqBam b);

const char *ajSeqBamGetLibrary(AjPSeqBamHeader header, const AjPSeqBam b);

AjPTable ajSeqBamHeaderLineParse(const char *headerLine);

/*
** End of prototype definitions
*/




AJ_END_DECLS

#endif /* !AJSEQBAM_H */