This file is indexed.

/usr/include/Bpp/Seq/SequenceTools.h is in libbpp-seq-dev 2.1.0-1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
//
// File: SequenceTools.h
// Authors: Guillaume Deuchst
//          Julien Dutheil
//          Sylvain Gaillard
// Created on: Tue Aug 21 2003
//

/*
   Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)

   This software is a computer program whose purpose is to provide classes
   for sequences analysis.

   This software is governed by the CeCILL  license under French law and
   abiding by the rules of distribution of free software.  You can  use,
   modify and/ or redistribute the software under the terms of the CeCILL
   license as circulated by CEA, CNRS and INRIA at the following URL
   "http://www.cecill.info".

   As a counterpart to the access to the source code and  rights to copy,
   modify and redistribute granted by the license, users are provided only
   with a limited warranty  and the software's author,  the holder of the
   economic rights,  and the successive licensors  have only  limited
   liability.

   In this respect, the user's attention is drawn to the risks associated
   with loading,  using,  modifying and/or developing or reproducing the
   software by the user in light of its specific status of free software,
   that may mean  that it is complicated to manipulate,  and  that  also
   therefore means  that it is reserved for developers  and  experienced
   professionals having in-depth computer knowledge. Users are therefore
   encouraged to load and test the software's suitability as regards their
   requirements in conditions enabling the security of their systems and/or
   data to be ensured and,  more generally, to use and operate it in the
   same conditions as regards security.

   The fact that you are presently reading this means that you have had
   knowledge of the CeCILL license and that you accept its terms.
 */

#ifndef _SEQUENCETOOLS_H_
#define _SEQUENCETOOLS_H_

#include "Alphabet/Alphabet.h"
#include "Alphabet/DNA.h"
#include "Alphabet/RNA.h"
#include "Alphabet/RNY.h"
#include "Sequence.h"
#include "SymbolListTools.h"
#include "NucleicAcidsReplication.h"
#include <Bpp/Exceptions.h>
#include <Bpp/Numeric/Random/RandomTools.h>
#include <Bpp/Numeric/Stat/StatTest.h>

// From the STL:
#include <string>
#include <map>
#include <vector>
#include <algorithm>

namespace bpp
{
/**
 * @brief Bowker's homogeneity test results class.
 */
class BowkerTest :
  public StatTest
{
private:
  double pvalue_;
  double stat_;

public:
  BowkerTest() : pvalue_(1.),
    stat_(0.) {}

  virtual ~BowkerTest() {}

  BowkerTest* clone() const { return new BowkerTest(*this); }

public:
  std::string getName() const { return "Bowker's test for homogeneity."; }
  double getStatistic() const { return stat_; }
  double getPValue() const { return pvalue_; }

  void setStatistic(double stat) { stat_ = stat; }
  void setPValue(double pvalue) { pvalue_ = pvalue; }
};

/**
 * @brief SequenceTools static class
 *
 * Implement methods to manipulate sequences
 */
class SequenceTools :
  public SymbolListTools
{
private:
  static DNA _DNA;
  static RNA _RNA;
  static RNY _RNY;
  static NucleicAcidsReplication _DNARep;
  static NucleicAcidsReplication _RNARep;
  static NucleicAcidsReplication _transc;

public:
  SequenceTools() {}
  virtual ~SequenceTools() {}

public:
  /**
   * @brief Get a sub-sequence.
   *
   * @param sequence The sequence to trunc.
   * @param begin The first position of the subsequence.
   * @param end   The last position of the subsequence.
   * @return A new sequence object with the given subsequence.
   * @throw IndexOutOfBoundsException, Exception In case of bad indices.
   */
  static Sequence* subseq(const Sequence& sequence, size_t begin, size_t end) throw (IndexOutOfBoundsException, Exception);

  /**
   * @brief Concatenate two sequences.
   *
   * Sequences must have the same name and alphabets.
   * Only first sequence's commentaries are kept.
   *
   * @param seq1 The first sequence.
   * @param seq2 The second sequence.
   * @return A new sequence object with the concatenation of the two sequences.
   * @throw AlphabetMismatchException If the two alphabets do not match.
   * @throw Exception If the sequence names do not match.
   */
  static Sequence* concatenate(const Sequence& seq1, const Sequence& seq2)
  throw (AlphabetMismatchException, Exception);

  /**
   * @brief Complement the nucleotide sequence itself
   *
   * @param seq The sequence to be complemented.
   * @return A ref toward the complemented sequence.
   * @throw AlphabetException if the sequence is not a nucleotide sequence.
   * @author Sylvain Gaillard
   */
  static Sequence& complement(Sequence& seq) throw (AlphabetException);

  /**
   * @brief Get the complementary sequence of a nucleotide sequence.
   *
   * @see DNAReplication
   * @return A new sequence object with the complementary sequence.
   * @param sequence The sequence to complement.
   * @throw AlphabetException If the sequence is not a nucleotide sequence.
   */
  static Sequence* getComplement(const Sequence& sequence) throw (AlphabetException);

  /**
   * @brief Get the transcription sequence of a DNA sequence.
   *
   * Translate DNA sequence into RNA sequence.
   *
   * @see DNAReplication
   * @return sequence A new sequence object with the transcription sequence.
   * @param sequence The sequence to transcript.
   * @throw AlphabetException If the sequence is not a DNA sequence.
   */
  static Sequence* transcript(const Sequence& sequence) throw (AlphabetException);

  /**
   * @brief Get the reverse-transcription sequence of a RNA sequence.
   *
   * Translate RNA sequence into DNA sequence.
   *
   * @see DNAReplication
   * @return sequence A new sequence object with the reverse-transcription sequence.
   * @param sequence The sequence to reverse-transcript.
   * @throw AlphabetException If the sequence is not a RNA sequence.
   */
  static Sequence* reverseTranscript(const Sequence& sequence) throw (AlphabetException);

  /**
   * @brief Inverse a sequence from 5'->3' to 3'->5' and vice-versa.
   *
   * ABCDEF becomes FEDCBA, and the sense attribute is changed (may be
   * inhibited).
   *
   * @param seq The sequence to inverse.
   * @return A ref toward the sequence.
   * @author Sylvain Gaillard
   */
  static Sequence& invert(Sequence& seq);

  /**
   * @brief Inverse a sequence from 5'->3' to 3'->5' and vice-versa.
   *
   * ABCDEF becomes FEDCBA, and the sense attribute is changed (may be
   * inhibited).
   *
   * @param sequence The sequence to inverse.
   * @return A new sequence object containing the inverted sequence.
   * @author Sylvain Gaillard
   */
  static Sequence* getInvert(const Sequence& sequence);

  /**
   * @brief Inverse and complement a sequence.
   *
   * This methode is more accurate than calling invert and complement
   * separatly.
   *
   * @param seq The sequence to inverse and complement.
   * @return A ref toward the sequence.
   * @author Sylvain Gaillard
   */
  static Sequence& invertComplement(Sequence& seq);

  /**
   * @return The identity percent of 2 sequence.
   * One match is counted if the two sequences have identical states.
   * @param seq1 The first sequence.
   * @param seq2 The second sequence.
   * @param ignoreGaps If true, only positions without gaps will be used for the counting.
   * @throw AlphabetMismatchException If the two sequences do not have the same alphabet.
   * @throw SequenceNotAlignedException If the two sequences do not have the same length.
   */
  static double getPercentIdentity(const Sequence& seq1, const Sequence& seq2, bool ignoreGaps = false) throw (AlphabetMismatchException, SequenceNotAlignedException);

  /**
   * @return The number of sites in the sequences, <i>i.e.</i> all positions without gaps.
   *
   * @param seq The sequence to analyse.
   */
  static size_t getNumberOfSites(const Sequence& seq);

  /**
   * @return The number of complete sites in the sequences, <i>i.e.</i> all positions without gaps and unresolved states (generic characters).
   *
   * @param seq The sequence to analyse.
   */
  static size_t getNumberOfCompleteSites(const Sequence& seq);

  /**
   * @return The number of unresolved sites in the sequence.
   *
   * @param seq The sequence to analyse.
   *
   * @author Sylvain Gaillard
   */
  static size_t getNumberOfUnresolvedSites(const Sequence& seq);

  /**
   * @brief Remove gaps from a sequence.
   *
   * The deleteElement method of the Sequence object will be used where appropriate.
   * @param seq The sequence to analyse.
   */
  static void removeGaps(Sequence& seq);

  /**
   * @brief Get a copy of the sequence without gaps.
   *
   * A whole new sequence will be created by adding all non-gap positions.
   * The original sequence will be cloned to serve as a template.
   *
   * @param seq The sequence to analyse.
   * @return A new sequence object without gaps.
   */
  static Sequence* getSequenceWithoutGaps(const Sequence& seq);

  /**
   * @brief Remove stops from a codon sequence.
   *
   * The deleteElement method of the Sequence object will be used where appropriate.
   * @param seq The sequence to analyse.
   * @throw Exception if the input sequence does not have a codon alphabet.
   */
  static void removeStops(Sequence& seq) throw (Exception);

  /**
   * @brief Get a copy of the codon sequence without stops.
   *
   * A whole new sequence will be created by adding all non-stop positions.
   * The original sequence will be cloned to serve as a template.
   *
   * @param seq The sequence to analyse.
   * @return A new sequence object without stops.
   * @throw Exception if the input sequence does not have a codon alphabet.
   */
  static Sequence* getSequenceWithoutStops(const Sequence& seq) throw (Exception);

  /**
   * @brief Replace stop codons by gaps.
   *
   * The setElement method of the Sequence object will be used where appropriate.
   * @param seq The sequence to analyse.
   * @throw Exception if the input sequence does not have a codon alphabet.
   */
  static void replaceStopsWithGaps(Sequence& seq) throw (Exception);

  /**
   * @brief Bowker's test for homogeneity.
   *
   * Computes the contingency table of occurrence of all pairs of states and test its symmetry using Bowker's (1948) test.
   *
   * Reference:<br>
   * @code
   * Ababneh F. Bioinformatics 2006 22(10) 1225-1231
   * @endcode
   *
   * @param seq1 The first sequence.
   * @param seq2 The second sequence.
   * @return A BowkerTest object with the computed statistic and p-value (computed from a chi square distribution).
   * @throw SequenceNotAlignedException If the two sequences do not have the same length.
   */
  static BowkerTest* bowkerTest(const Sequence& seq1, const Sequence& seq2) throw (SequenceNotAlignedException);

  /**
   * @brief Get all putatives haplotypes from an heterozygous sequence.
   *
   * @param seq The sequence to resolve
   * @param hap The vector to fill with the new sequences
   * @param level The maximum number of states that a generic char must code
   * (if this number is higher than level, the state will not be resolved).
   * For instance if level = 3 and Alphabet is DNA, all generic char will be
   * resolved but N.
   *
   * @author Sylvain Gaillard
   */
  static void getPutativeHaplotypes(const Sequence& seq, std::vector<Sequence*>& hap, unsigned int level = 2);

  /**
   * @brief Combine two sequences.
   *
   * @author Sylvain Gaillard
   */

  static Sequence* combineSequences(const Sequence& s1, const Sequence& s2) throw (AlphabetMismatchException);

  /**
   * @brief Subtract haplotype from an heterozygous sequence.
   *
   * Subtract an haplotype (i.e. a fully resolved sequence) from an heterozygous
   * sequence to get the other haplotype. The new haplotype could be an unresolved
   * sequence if unresolved characters in the sequence code for more than 2 states.
   *
   * For example:<br>
   * @code
   * >heterozygous sequence
   * ATTCGGGKWTATRYRM
   * >haplotype
   * ATTCGGGTATATGCAA
   * >subtracted haplotype
   * ATTCGGGGTTATATGC
   * @endcode
   *
   * @param s The heterozygous sequence.
   * @param h The haplotype to subtract.
   * @param name The name of the new computed haplotype.
   * @param level The number of states from which the site is set to fully unresolved.
   * @throw SequenceNotAlignedException if s and h don't have the same size.
   *
   * @author Sylvain Gaillard
   */
  static Sequence* subtractHaplotype(const Sequence& s, const Sequence& h, std::string name = "", unsigned int level = 1) throw (SequenceNotAlignedException);

  /**
   * @brief Get the RNY decomposition of a DNA sequence; with a given
   * phase between 1 and 3, it gives the decomposition in this phase;
   * in phase 1, the first triplet is centered on the first character.
   * Without a phase the function gives the alternative succession in
   * phases 1, 2 and 3.
   *
   * @return sequence A new sequence object with the transcription sequence.
   * @param sequence The sequence to transcript.
   * @param ph The phase to use (1,2 or 3).
   * @throw AlphabetException If the sequence is not a DNA sequence.
   *
   * @author Laurent Guéguen
   */
  static Sequence* RNYslice(const Sequence& sequence, int ph) throw (AlphabetException);
  static Sequence* RNYslice(const Sequence& sequence) throw (AlphabetException);

  /**
   * @brief Extract CDS part from a codon sequence. Optionally check for intiator and stop codons, or both.
   *
   * @param sequence The sequence to be reduced to CDS part.
   * @param checkInit If true, then everything before the initiator codon will be removed, together with the initiator codon if includeInit is false.
   * @param checkStop If true, then everything after the first stop codon will be removed, together with the stop codon if includeStop is false.
   * @param includeInit Tell if initiator codon should be kept or removed. No effect if checkInit is false.
   * @param includeStop Tell if stop codon should be kept or removed. No effect if checkStop is false.
   */
  static void getCDS(Sequence& sequence, bool checkInit, bool checkStop, bool includeInit = true, bool includeStop = true);

  /**
   * @brief Find the position of a motif in a sequence
   *
   * @param seq The reference sequence
   * @param motif The motif to find
   * @param strict If true (default) find exactly the motif
   *               If false find compatible match
   * @return The position of the first occurence of the motif or the seq
   * length.
   */
  static size_t findFirstOf(const Sequence& seq, const Sequence& motif, bool strict = true);
};
} // end of namespace bpp.

#endif // _SEQUENCETOOLS_H_