This file is indexed.

/usr/include/rdkit/GraphMol/FileParsers/MolSupplier.h is in librdkit-dev 201603.5-2.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
//
//  Copyright (C) 2002-2013 greg landrum, Rational Discovery LLC
//
//   @@ All Rights Reserved @@
//  This file is part of the RDKit.
//  The contents are covered by the terms of the BSD license
//  which is included in the file license.txt, found at the root
//  of the RDKit source tree.
//
#ifndef _RD_MOLSUPPLIER_H
#define _RD_MOLSUPPLIER_H

#include <RDGeneral/types.h>

#include <string>
#include <list>
#include <vector>
#include <iostream>
#include <GraphMol/ROMol.h>

namespace RDKit {
std::string strip(const std::string &orig);

/*!
//
//  Here are a couple of ways one can interact with MolSuppliers:
//
//  1) Lazy (ForwardIterator):
//     while(!supplier.atEnd()){
//       ROMol *mol = supplier.next();
//       if(mol){
//           do something;
//       }
//     }
//  2) Random Access:
//     for(int i=0;i<supplier.length();i++){
//       ROMol *mol = supplier[i];
//       if(mol){
//           do something;
//       }
//     }
//
//
*/
class MolSupplier {
  // this is an abstract base class to supply molecules one at a time
 public:
  MolSupplier(){};
  virtual ~MolSupplier(){};
  virtual void init() = 0;
  virtual void reset() = 0;
  virtual bool atEnd() = 0;
  virtual ROMol *next() = 0;

 private:
  // disable automatic copy constructors and assignment operators
  // for this class and its subclasses.  They will likely be
  // carrying around stream pointers and copying those is a recipe
  // for disaster.
  MolSupplier(const MolSupplier &);
  MolSupplier &operator=(const MolSupplier &);

 protected:
  // stream to read the molecules from:
  std::istream *dp_inStream;
  // do we own dp_inStream?
  bool df_owner;
};

// \brief a supplier from an SD file that only reads forward:
class ForwardSDMolSupplier : public MolSupplier {
  /*************************************************************************
   * A lazy mol supplier from a SD file.
   *  - When new molecules are read using "next" their positions in the file are
   *noted.
   ***********************************************************************************/
 public:
  ForwardSDMolSupplier() { init(); };

  explicit ForwardSDMolSupplier(std::istream *inStream,
                                bool takeOwnership = true, bool sanitize = true,
                                bool removeHs = true,
                                bool strictParsing = false);

  virtual ~ForwardSDMolSupplier() {
    if (df_owner && dp_inStream) {
      delete dp_inStream;
      df_owner = false;
      dp_inStream = NULL;
    }
  };

  virtual void init();
  virtual void reset();
  virtual ROMol *next();
  virtual bool atEnd();

 protected:
  virtual void checkForEnd();
  ROMol *_next();
  virtual void readMolProps(ROMol *);
  bool df_end;
  int d_line;  // line number we are currently on
  bool df_sanitize, df_removeHs, df_strictParsing;
};

// \brief a lazy supplier from an SD file
class SDMolSupplier : public ForwardSDMolSupplier {
  /*************************************************************************
   * A lazy mol supplier from a SD file.
   *  - When new molecules are read using "next" their positions in the file are
   *noted.
   *  - A call to the "length" will automatically parse the entire file and
   *cache all the mol
   *    block positions
   *  - [] operator is used to access a molecule at "idx", calling next
   *following this will result
   *    in the next molecule after "idx"
   ***********************************************************************************/

 public:
  SDMolSupplier() { init(); };

  /*!
   *   \param fileName - the name of the SD file
   *   \param sanitize - if true sanitize the molecule before returning it
   *   \param removeHs - if true remove Hs from the molecule before returning it
   *                     (triggers sanitization)
   *   \param strictParsing - if not set, the parser is more lax about
   * correctness
   *                          of the contents.
   */
  explicit SDMolSupplier(const std::string &fileName, bool sanitize = true,
                         bool removeHs = true, bool strictParsing = true);

  explicit SDMolSupplier(std::istream *inStream, bool takeOwnership = true,
                         bool sanitize = true, bool removeHs = true,
                         bool strictParsing = true);

  ~SDMolSupplier(){};
  void init();
  void reset();
  ROMol *next();
  bool atEnd();
  void moveTo(unsigned int idx);
  ROMol *operator[](unsigned int idx);
  /*! \brief returns the text block for a particular item
   *
   *  \param idx - which item to return
   */
  std::string getItemText(unsigned int idx);
  unsigned int length();
  void setData(const std::string &text, bool sanitize = true,
               bool removeHs = true);
  void setData(const std::string &text, bool sanitize, bool removeHs,
               bool strictParsing);

  /*! Resets our internal state and sets the indices of molecules in the stream.
   *  The client should be *very* careful about calling this method, as it's
   *trivial
   *  to end up with a completely useless supplier.
   *
   *   \param locs - the vector of stream positions.
   *
   *  Note that this can be used not only to make reading selected molecules
   *from a
   *  large SD file much faster, but it can also allow subsetting an SD file or
   *  rearranging the order of the molecules.
   */
  void setStreamIndices(const std::vector<std::streampos> &locs);

 private:
  void checkForEnd();
  void setDataCommon(const std::string &text, bool sanitize, bool removeHs);
  int d_len;   // total number of mol blocks in the file (initialized to -1)
  int d_last;  // the molecule we are ready to read
  std::vector<std::streampos> d_molpos;
};

//! lazy file parser for Smiles tables
class SmilesMolSupplier : public MolSupplier {
  /**************************************************************************
   * Lazy file parser for Smiles table file, similar to the lazy SD
   * file parser above
   * - As an when new molecules are read using "next" their
   *    positions in the file are noted.
   *  - A call to the "length" will autamatically parse the entire
   *    file and cache all the mol block positions
   *  - [] operator is used to access a molecule at "idx", calling
   *    next following this will result in the next molecule after
   *    "idx"
   ***************************************************************************/
 public:
  /*!
   *   \param fileName - the name of smiles table file
   *   \param delimiter - delimiting characters between records on a each
   *     line NOTE that this is not a string, the tokenizer looks for
   *     the individual characters in delimiter, not the full string
   *     itself.  So the default delimiter: " \t", means " " or "\t".
   *   \param smilesColumn - column number for the SMILES string (defaults
   *     to the first column)
   *   \param nameColumn - column number for the molecule name (defaults to
   *     the second column) If set to -1 we assume that no name is
   *     available for the molecule and the name is defaulted to the
   *     smiles string
   *   \param titleLine - if true, the first line is assumed to list the
   *     names of properties in order seperated by 'delimiter'. It is
   *     also assume that the 'SMILES' column and the 'name' column
   *     are not specified here if false - no title line is assumed
   *     and the properties are recorded as the "columnX" where "X" is
   *     the column number
   *   \param sanitize - if true sanitize the molecule before returning it
   */
  explicit SmilesMolSupplier(const std::string &fileName,
                             const std::string &delimiter = " \t",
                             int smilesColumn = 0, int nameColumn = 1,
                             bool titleLine = true, bool sanitize = true);
  SmilesMolSupplier();
  explicit SmilesMolSupplier(std::istream *inStream, bool takeOwnership = true,
                             const std::string &delimiter = " \t",
                             int smilesColumn = 0, int nameColumn = 1,
                             bool titleLine = true, bool sanitize = true);

  ~SmilesMolSupplier();
  void setData(const std::string &text, const std::string &delimiter = " ",
               int smilesColumn = 0, int nameColumn = 1, bool titleLine = true,
               bool sanitize = true);
  void init();
  void reset();
  ROMol *next();
  bool atEnd();
  void moveTo(unsigned int idx);
  ROMol *operator[](unsigned int idx);
  /*! \brief returns the text block for a particular item
   *
   *  \param idx - which item to return
   */
  std::string getItemText(unsigned int idx);
  unsigned int length();

 private:
  ROMol *processLine(std::string inLine);
  void processTitleLine();
  std::string nextLine();
  long int skipComments();
  void checkForEnd();

  bool df_end;  // have we reached the end of the file?
  int d_len;    // total number of smiles in the file
  int d_next;   // the  molecule we are ready to read
  int d_line;   // line number we are currently on
  std::vector<std::streampos>
      d_molpos;  // vector of positions in the file for molecules
  std::vector<int> d_lineNums;
  std::string d_delim;  // the delimiter string
  bool df_sanitize;     // sanitize molecules before returning them?
  STR_VECT d_props;     // vector of property names
  bool df_title;        // do we have a title line?
  int d_smi;            // column id for the smile string
  int d_name;           // column id for the name
};

//! lazy file parser for TDT files
class TDTMolSupplier : public MolSupplier {
  /**************************************************************************
   * Lazy file parser for TDT files, similar to the lazy SD
   * file parser above
   * - As an when new molecules are read using "next" their
   *    positions in the file are noted.
   *  - A call to the "length" will autamatically parse the entire
   *    file and cache all the mol block positions
   *  - [] operator is used to access a molecule at "idx", calling
   *    next following this will result in the next molecule after
   *    "idx"
   ***************************************************************************/
 public:
  /*!
   *   \param fileName - the name of the TDT file
   *   \param nameRecord - property name for the molecule name.
   *     If empty (the default), the name defaults to be empty
   *   \param confId2D - if >=0 and 2D coordinates are provided, the 2D
   *                   structure (depiction) in the input will be read into the
   *                   corresponding conformer id.
   *   \param confId3D - if >=0 and 3D coordinates are provided, the 3D
   *                   structure (depiction) in the input will be read into the
   *                   corresponding conformer id.
   *   \param sanitize - if true sanitize the molecule before returning it
   */
  explicit TDTMolSupplier(const std::string &fileName,
                          const std::string &nameRecord = "", int confId2D = -1,
                          int confId3D = 0, bool sanitize = true);
  explicit TDTMolSupplier(std::istream *inStream, bool takeOwnership = true,
                          const std::string &nameRecord = "", int confId2D = -1,
                          int confId3D = 0, bool sanitize = true);
  TDTMolSupplier();
  ~TDTMolSupplier();
  void setData(const std::string &text, const std::string &nameRecord = "",
               int confId2D = -1, int confId3D = 0, bool sanitize = true);
  void init();
  void reset();
  ROMol *next();
  bool atEnd();
  void moveTo(unsigned int idx);
  ROMol *operator[](unsigned int idx);
  /*! \brief returns the text block for a particular item
   *
   *  \param idx - which item to return
   */
  std::string getItemText(unsigned int idx);
  unsigned int length();

 private:
  bool advanceToNextRecord();
  void checkForEnd();
  ROMol *parseMol(std::string inLine);

  bool df_end;     // have we reached the end of the file?
  int d_len;       // total number of mols in the file
  int d_last;      // the molecule we are ready to read
  int d_line;      // line number we are currently on
  int d_confId2D;  // id to use for 2D conformers
  int d_confId3D;  // id to use for 3D conformers
  std::vector<std::streampos>
      d_molpos;            // vector of positions in the file for molecules
  bool df_sanitize;        // sanitize molecules before returning them?
  std::string d_nameProp;  // local storage for the property providing mol names
};

//! lazy file parser for PDB files
class PDBMolSupplier : public MolSupplier {
 public:
  explicit PDBMolSupplier(std::istream *inStream, bool takeOwnership = true,
                          bool sanitize = true, bool removeHs = true,
                          unsigned int flavor = 0);
  explicit PDBMolSupplier(const std::string &fname, bool sanitize = true,
                          bool removeHs = true, unsigned int flavor = 0);

  virtual ~PDBMolSupplier() {
    if (df_owner && dp_inStream) delete dp_inStream;
  };

  virtual void init();
  virtual void reset();
  virtual ROMol *next();
  virtual bool atEnd();

 protected:
  bool df_sanitize, df_removeHs;
  unsigned int d_flavor;
};
}

#endif