This file is indexed.

/usr/include/dcmtk/dcmdata/dcspchrs.h is in libdcmtk-dev 3.6.1~20160216-4.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
/*
 *
 *  Copyright (C) 2011-2015, OFFIS e.V.
 *  All rights reserved.  See COPYRIGHT file for details.
 *
 *  This software and supporting documentation were developed by
 *
 *    OFFIS e.V.
 *    R&D Division Health
 *    Escherweg 2
 *    D-26121 Oldenburg, Germany
 *
 *
 *  Module:  dcmdata
 *
 *  Author:  Joerg Riesmeier
 *
 *  Purpose: Class for supporting the Specific Character Set attribute
 *
 */


#ifndef DCSPCHRS_H
#define DCSPCHRS_H

#include "dcmtk/config/osconfig.h"

#include "dcmtk/ofstd/ofchrenc.h"
#include "dcmtk/ofstd/ofmap.h"
#include "dcmtk/dcmdata/dcdefine.h"


// forward declaration
class DcmItem;


/** A class for managing and converting between different DICOM character sets.
 *  The conversion relies on the OFCharacterEncoding class, which again relies
 *  on the libiconv toolkit (if available).
 *  @note Please note that a current limitation is that only a single value is
 *    allowed for the destination character set (i.e. no code extensions).  Of
 *    course, for the source character set, also multiple values are supported.
 */
class DCMTK_DCMDATA_EXPORT DcmSpecificCharacterSet
{

  public:

    /** constructor. Initializes the member variables.
     */
    DcmSpecificCharacterSet();

    /** destructor
     */
    ~DcmSpecificCharacterSet();

    /** clear the internal state.  This also forgets about the currently
     *  selected character sets, so selectCharacterSet() has to be called again
     *  before a string can be converted with convertString().
     */
    void clear();

    /** get currently selected source DICOM character set(s).  Please note that
     *  the returned string can contain multiple values (defined terms separated
     *  by a backslash) if code extension techniques are used.  Furthermore,
     *  the returned string is always normalized, i.e. leading and trailing
     *  spaces have been removed.
     *  @return currently selected source DICOM character set(s) or an empty
     *    string if none is selected (identical to ASCII, which is the default)
     */
    const OFString &getSourceCharacterSet() const;

    /** get currently selected destination DICOM character set.  Please note
     *  that the returned string, which contains a defined term, is always
     *  normalized, i.e. leading and trailing spaces have been removed.
     *  @return currently selected destination DICOM character set or an empty
     *    string if none is selected (identical to ASCII, which is the default)
     */
    const OFString &getDestinationCharacterSet() const;

    /** get currently selected destination encoding, i.e. the name of the
     *  character set as used by libiconv for the conversion.  If code
     *  extension techniques are used to switch between different character
     *  encodings, the main/default encoding is returned.
     *  @return currently selected destination encoding or an empty string if
     *    none is selected
     */
    const OFString &getDestinationEncoding() const;

    /** get mode specifying whether a character that cannot be represented in
     *  the destination character encoding is approximated through one or more
     *  characters that look similar to the original one.  See
     *  selectCharacterSet().
     *  @return current value of the mode.  OFTrue means that the mode is
     *    enabled, OFFalse means disabled.
     */
    OFBool getTransliterationMode() const;

    /** get mode specifying whether characters that cannot be represented in
     *  the destination character encoding will be silently discarded
     *  @return current value of the mode.  OFTrue means that the mode is
     *    enabled, OFFalse means disabled.
     */
    OFBool getDiscardIllegalSequenceMode() const;

    /** select DICOM character sets for the input and output string, between
     *  which subsequent calls of convertString() convert.  The defined terms
     *  for a particular character set can be found in the DICOM standard, e.g.
     *  "ISO_IR 100" for ISO 8859-1 (Latin 1) or "ISO_IR 192" for Unicode in
     *  UTF-8.  An empty string denotes the default character repertoire, which
     *  is ASCII (7-bit).  If multiple values are given for 'fromCharset'
     *  (separated by a backslash) code extension techniques are used and
     *  escape sequences may be encountered in the source string to switch
     *  between the specified character sets.
     *  @param  fromCharset     name of the source character set(s) used for the
     *                          input string as given in the DICOM attribute
     *                          Specific Character Set (0008,0005).  Leading and
     *                          trailing spaces are removed automatically (if
     *                          present).
     *  @param  toCharset       name of the destination character set used for
     *                          the output string.  Only a single value is
     *                          permitted (no code extensions).  Leading and
     *                          trailing spaces are removed automatically (if
     *                          present).  The default value is "ISO_IR 192"
     *                          (Unicode in UTF-8).
     *  @param  transliterate   mode specifying whether a character that cannot
     *                          be represented in the destination character
     *                          encoding is approximated through one or more
     *                          characters that look similar to the original
     *                          one.  By default, this mode is disabled.
     *  @param  discardIllegal  mode specifying whether characters that cannot
     *                          be represented in the destination character
     *                          encoding will be silently discarded.  By
     *                          default, this mode is disabled.
     *  @return status, EC_Normal if successful, an error code otherwise
     */
    OFCondition selectCharacterSet(const OFString &fromCharset,
                                   const OFString &toCharset = "ISO_IR 192",
                                   const OFBool transliterate = OFFalse,
                                   const OFBool discardIllegal = OFFalse);

    /** select DICOM character sets for the input and output string, between
     *  which subsequent calls of convertString() convert.  The source
     *  character set is determined from the DICOM element Specific Character
     *  Set (0008,0005) stored in the given dataset/item.  The defined terms
     *  for the destination character set can be found in the DICOM standard,
     *  e.g. "ISO_IR 100" for ISO 8859-1 (Latin 1) or "ISO_IR 192" for Unicode
     *  in UTF-8.  An empty string denotes the default character repertoire,
     *  which is ASCII (7-bit).  If multiple values are found in the Specific
     *  Character Set element of the given 'dataset' (separated by a backslash)
     *  code extension techniques are used and escape sequences may be
     *  encountered in the source string to switch between the specified
     *  character sets.
     *  @param  dataset         DICOM dataset or item from which the source
     *                          character set should be retrieved.  If the data
     *                          element Specific Character Set (0008,0005) is
     *                          empty or missing, the default character set
     *                          (i.e. ASCII) is used.
     *  @param  toCharset       name of the destination character set used for
     *                          the output string.  Only a single value is
     *                          permitted (no code extensions).  Leading and
     *                          trailing spaces are removed automatically (if
     *                          present).  The default value is "ISO_IR 192"
     *                          (Unicode in UTF-8).
     *  @param  transliterate   mode specifying whether a character that cannot
     *                          be represented in the destination character
     *                          encoding is approximated through one or more
     *                          characters that look similar to the original
     *                          one.  By default, this mode is disabled.
     *  @param  discardIllegal  mode specifying whether characters that cannot
     *                          be represented in the destination character
     *                          encoding will be silently discarded.  By
     *                          default, this mode is disabled.
     *  @return status, EC_Normal if successful, an error code otherwise
     */
    OFCondition selectCharacterSet(DcmItem &dataset,
                                   const OFString &toCharset = "ISO_IR 192",
                                   const OFBool transliterate = OFFalse,
                                   const OFBool discardIllegal = OFFalse);

    /** convert the given string from the selected source character set(s) to
     *  the selected destination character set.  That means selectCharacterSet()
     *  has to be called prior to this method.
     *  @param  fromString  input string to be converted (using the currently
     *                      selected source character set)
     *  @param  toString    reference to variable where the converted string
     *                      (using the currently selected destination character
     *                      set) is stored
     *  @param  delimiters  optional string of characters that are regarded as
     *                      delimiters, i.e.\ when found the character set is
     *                      switched back to the default.  CR, LF and FF are
     *                      always regarded as delimiters (see DICOM PS 3.5).
     *  @return status, EC_Normal if successful, an error code otherwise
     */
    OFCondition convertString(const OFString &fromString,
                              OFString &toString,
                              const OFString &delimiters = "");

    /** convert the given string from the selected source character set(s) to
     *  the selected destination character set. That means selectCharacterSet()
     *  has to be called prior to this method.  Since the length of the input
     *  string has to be specified explicitly, the string can contain more than
     *  one NULL byte.
     *  @param  fromString  input string to be converted (using the currently
     *                      selected character set)
     *  @param  fromLength  length of the input string (number of bytes without
     *                      the trailing NULL byte)
     *  @param  toString    reference to variable where the converted string
     *                      (using the currently selected destination character
     *                      set) is stored
     *  @param  delimiters  optional string of characters that are regarded as
     *                      delimiters, i.e.\ when found the character set is
     *                      switched back to the default.  CR, LF and FF are
     *                      always regarded as delimiters (see DICOM PS 3.5).
     *  @return status, EC_Normal if successful, an error code otherwise
     */
    OFCondition convertString(const char *fromString,
                              const size_t fromLength,
                              OFString &toString,
                              const OFString &delimiters = "");

    // --- static helper functions ---

    /** check whether the underlying character set conversion library is
     *  available.  If the library is not available, no conversion between
     *  different character sets will be possible.
     *  @return OFTrue if the character set conversion library is available,
     *    OFFalse otherwise
     */
    static OFBool isConversionLibraryAvailable();

    /** count characters in given UTF-8 string and return the resulting number
     *  of so-called "code points".  Please note that invalid UTF-8 encodings
     *  are not handled properly.  ASCII strings (7-bit) are also supported,
     *  although OFString::length() is probably much faster.
     *  @param  utf8String  valid character string with UTF-8 encoding
     *  @return number of characters (code points) in given UTF-8 string
     */
    static size_t countCharactersInUTF8String(const OFString &utf8String);


  protected:

    /// type definition of a map storing the identifier (key) of a character
    /// set and the associated conversion descriptor
    typedef OFMap<OFString, OFCharacterEncoding::T_Descriptor> T_DescriptorMap;

    /** determine the destination character encoding (as used by libiconv) from
     *  the given DICOM defined term (specific character set), and set the
     *  member variables accordingly.
     *  @param  toCharset  name of the destination character set used for the
     *                     output string
     *  @return status, EC_Normal if successful, an error code otherwise
     */
    OFCondition determineDestinationEncoding(const OFString &toCharset);

    /** select a particular DICOM character set without code extensions for
     *  subsequent conversions.  The corresponding DICOM defined term for the
     *  source character set is determined from the member variable
     *  'SourceCharacterSet'.
     *  @return status, EC_Normal if successful, an error code otherwise
     */
    OFCondition selectCharacterSetWithoutCodeExtensions();

    /** select a particular DICOM character set with code extensions for
     *  subsequent conversions.  The corresponding DICOM defined terms for the
     *  source character set are determined from the member variable
     *  'SourceCharacterSet'.
     *  @param  sourceVM  value multiplicity of the member variable
     *                    'SourceCharacterSet'.  Usually, this value has
     *                    already been determined by the calling method.
     *  @return status, EC_Normal if successful, an error code otherwise
     */
    OFCondition selectCharacterSetWithCodeExtensions(const unsigned long sourceVM);

    /** close any currently open character set conversion descriptor(s).
     *  Afterwards, no conversion descriptor is selected, pretty much like
     *  after the initialization with the constructor.
     */
    void closeConversionDescriptors();

    /** check whether the given string contains at least one escape character
     *  (ESC), because it is used for code extension techniques like ISO 2022
     *  @param  strValue   input string to be checked for any escape character
     *  @param  strLength  length of the input string
     *  @return OFTrue if an escape character has been found, OFFalse otherwise
     */
    OFBool checkForEscapeCharacter(const char *strValue,
                                   const size_t strLength) const;

    /** convert given string to octal format, i.e.\ all non-ASCII and control
     *  characters are converted to their octal representation.  The total
     *  length of the string is always limited to a particular maximum (see
     *  implementation).  If the converted string would be longer, it is
     *  cropped and "..." is appended to indicate this cropping.
     *  @param  strValue   input string to be converted and possibly cropped
     *  @param  strLength  length of the input string
     *  @return resulting string in octal format
     */
    OFString convertToLengthLimitedOctalString(const char *strValue,
                                               const size_t strLength) const;


  private:

    // private undefined copy constructor
    DcmSpecificCharacterSet(const DcmSpecificCharacterSet &);

    // private undefined assignment operator
    DcmSpecificCharacterSet &operator=(const DcmSpecificCharacterSet &);

    /// selected source character set(s) based on one or more DICOM defined terms
    OFString SourceCharacterSet;

    /// selected destination character set based on a single DICOM defined term
    OFString DestinationCharacterSet;

    /// selected destination encoding based on names supported by the libiconv toolkit
    OFString DestinationEncoding;

    /// character encoding converter
    OFCharacterEncoding EncodingConverter;

    /// map of character set conversion descriptors
    /// (only used if multiple character sets are needed)
    T_DescriptorMap ConversionDescriptors;
};


#endif