This file is indexed.

/usr/include/shogun/lib/DelimiterTokenizer.h is in libshogun-dev 3.2.0-7.5.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
/*
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 3 of the License, or
 * (at your option) any later version.
 *
 * Written (W) 2013 Evangelos Anagnostopoulos
 * Copyright (C) 2013 Evangelos Anagnostopoulos
 */

#ifndef _DELIMITERTOKENIZER__H__
#define	_DELIMITERTOKENIZER__H__

#include <shogun/lib/Tokenizer.h>

namespace shogun
{
class CTokenizer;

/** @brief The class CDelimiterTokenizer is used to tokenize
 *  a SGVector<char> into tokens using custom chars as delimiters.
 *  One can set the delimiters to use by setting to 1 the appropriate
 *  index of the public field delimiters. Eg. to set as delimiter the
 *  character ':', one should do: tokenizer->delimiters[':'] = 1;
 */
class CDelimiterTokenizer: public CTokenizer
{
public:
	/** default constructor
	 *
	 * @param skip_delimiters whether to skip consecutive delimiters or not
	 */
	CDelimiterTokenizer(bool skip_delimiters = false);

	/** copy constructor
	 *
	 * @param orig the original DelimiterTokenizer
	 */
	CDelimiterTokenizer(const CDelimiterTokenizer& orig);

	/** destructor */
	virtual ~CDelimiterTokenizer() {}

	/** Set the char array that requires tokenization
	 *
	 * @param txt the text to tokenize
	 */
	virtual void set_text(SGVector<char> txt);

	/** Returns true or false based on whether
	 * there exists another token in the text
	 *
	 * @return if another token exists
	 */
	virtual bool has_next();

	/** Method that returns the indices, start and end, of
	 *  the next token in line.
	 *  If next_token starts with a delimiter and skip_consecutive_delimiters is false,
	 *  it returns the same indices for start and end.
	 *
	 * @param start token's starting index
	 * @return token's ending index (exclusive)
	 */
	virtual index_t next_token_idx(index_t& start);

	/** Returns the name of the SGSerializable instance.  It MUST BE
	 * the CLASS NAME without the prefixed 'C'.
	 *
	 * @return name of the SGSerializable
	 */
	virtual const char* get_name() const;

	/** Makes the tokenizer to use ' ' or '\\t'
	 *  as the delimiters for the tokenization process;
	 */
	void init_for_whitespace();

	CDelimiterTokenizer* get_copy();

	/** Resets the delimiters */
	void clear_delimiters();

	/** Get skip_consecutive_delimiters
	 *
	 * @return if skip consecutive delimiters is set
	 */
	bool get_skip_delimiters() const;

	/** set value for skip_consecutive_delimiters
	 *
	 * @param skip_delimiters whether to skip or not consecutive delimiters
	 */
	void set_skip_delimiters(bool skip_delimiters);

private:
	void init();

public:
	/** delimiters */
	SGVector<bool> delimiters;

protected:
	/** index of last token */
	index_t last_idx;

	/** whether to skip consecutive delimiters or not */
	bool skip_consecutive_delimiters;
};
}
#endif	/* _WHITESPACETOKENIZER__H__ */