/usr/include/dcmtk/dcmdata/dcspchrs.h is in libdcmtk-dev 3.6.1~20160216-4.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 | /*
*
* Copyright (C) 2011-2015, OFFIS e.V.
* All rights reserved. See COPYRIGHT file for details.
*
* This software and supporting documentation were developed by
*
* OFFIS e.V.
* R&D Division Health
* Escherweg 2
* D-26121 Oldenburg, Germany
*
*
* Module: dcmdata
*
* Author: Joerg Riesmeier
*
* Purpose: Class for supporting the Specific Character Set attribute
*
*/
#ifndef DCSPCHRS_H
#define DCSPCHRS_H
#include "dcmtk/config/osconfig.h"
#include "dcmtk/ofstd/ofchrenc.h"
#include "dcmtk/ofstd/ofmap.h"
#include "dcmtk/dcmdata/dcdefine.h"
// forward declaration
class DcmItem;
/** A class for managing and converting between different DICOM character sets.
* The conversion relies on the OFCharacterEncoding class, which again relies
* on the libiconv toolkit (if available).
* @note Please note that a current limitation is that only a single value is
* allowed for the destination character set (i.e. no code extensions). Of
* course, for the source character set, also multiple values are supported.
*/
class DCMTK_DCMDATA_EXPORT DcmSpecificCharacterSet
{
public:
/** constructor. Initializes the member variables.
*/
DcmSpecificCharacterSet();
/** destructor
*/
~DcmSpecificCharacterSet();
/** clear the internal state. This also forgets about the currently
* selected character sets, so selectCharacterSet() has to be called again
* before a string can be converted with convertString().
*/
void clear();
/** get currently selected source DICOM character set(s). Please note that
* the returned string can contain multiple values (defined terms separated
* by a backslash) if code extension techniques are used. Furthermore,
* the returned string is always normalized, i.e. leading and trailing
* spaces have been removed.
* @return currently selected source DICOM character set(s) or an empty
* string if none is selected (identical to ASCII, which is the default)
*/
const OFString &getSourceCharacterSet() const;
/** get currently selected destination DICOM character set. Please note
* that the returned string, which contains a defined term, is always
* normalized, i.e. leading and trailing spaces have been removed.
* @return currently selected destination DICOM character set or an empty
* string if none is selected (identical to ASCII, which is the default)
*/
const OFString &getDestinationCharacterSet() const;
/** get currently selected destination encoding, i.e. the name of the
* character set as used by libiconv for the conversion. If code
* extension techniques are used to switch between different character
* encodings, the main/default encoding is returned.
* @return currently selected destination encoding or an empty string if
* none is selected
*/
const OFString &getDestinationEncoding() const;
/** get mode specifying whether a character that cannot be represented in
* the destination character encoding is approximated through one or more
* characters that look similar to the original one. See
* selectCharacterSet().
* @return current value of the mode. OFTrue means that the mode is
* enabled, OFFalse means disabled.
*/
OFBool getTransliterationMode() const;
/** get mode specifying whether characters that cannot be represented in
* the destination character encoding will be silently discarded
* @return current value of the mode. OFTrue means that the mode is
* enabled, OFFalse means disabled.
*/
OFBool getDiscardIllegalSequenceMode() const;
/** select DICOM character sets for the input and output string, between
* which subsequent calls of convertString() convert. The defined terms
* for a particular character set can be found in the DICOM standard, e.g.
* "ISO_IR 100" for ISO 8859-1 (Latin 1) or "ISO_IR 192" for Unicode in
* UTF-8. An empty string denotes the default character repertoire, which
* is ASCII (7-bit). If multiple values are given for 'fromCharset'
* (separated by a backslash) code extension techniques are used and
* escape sequences may be encountered in the source string to switch
* between the specified character sets.
* @param fromCharset name of the source character set(s) used for the
* input string as given in the DICOM attribute
* Specific Character Set (0008,0005). Leading and
* trailing spaces are removed automatically (if
* present).
* @param toCharset name of the destination character set used for
* the output string. Only a single value is
* permitted (no code extensions). Leading and
* trailing spaces are removed automatically (if
* present). The default value is "ISO_IR 192"
* (Unicode in UTF-8).
* @param transliterate mode specifying whether a character that cannot
* be represented in the destination character
* encoding is approximated through one or more
* characters that look similar to the original
* one. By default, this mode is disabled.
* @param discardIllegal mode specifying whether characters that cannot
* be represented in the destination character
* encoding will be silently discarded. By
* default, this mode is disabled.
* @return status, EC_Normal if successful, an error code otherwise
*/
OFCondition selectCharacterSet(const OFString &fromCharset,
const OFString &toCharset = "ISO_IR 192",
const OFBool transliterate = OFFalse,
const OFBool discardIllegal = OFFalse);
/** select DICOM character sets for the input and output string, between
* which subsequent calls of convertString() convert. The source
* character set is determined from the DICOM element Specific Character
* Set (0008,0005) stored in the given dataset/item. The defined terms
* for the destination character set can be found in the DICOM standard,
* e.g. "ISO_IR 100" for ISO 8859-1 (Latin 1) or "ISO_IR 192" for Unicode
* in UTF-8. An empty string denotes the default character repertoire,
* which is ASCII (7-bit). If multiple values are found in the Specific
* Character Set element of the given 'dataset' (separated by a backslash)
* code extension techniques are used and escape sequences may be
* encountered in the source string to switch between the specified
* character sets.
* @param dataset DICOM dataset or item from which the source
* character set should be retrieved. If the data
* element Specific Character Set (0008,0005) is
* empty or missing, the default character set
* (i.e. ASCII) is used.
* @param toCharset name of the destination character set used for
* the output string. Only a single value is
* permitted (no code extensions). Leading and
* trailing spaces are removed automatically (if
* present). The default value is "ISO_IR 192"
* (Unicode in UTF-8).
* @param transliterate mode specifying whether a character that cannot
* be represented in the destination character
* encoding is approximated through one or more
* characters that look similar to the original
* one. By default, this mode is disabled.
* @param discardIllegal mode specifying whether characters that cannot
* be represented in the destination character
* encoding will be silently discarded. By
* default, this mode is disabled.
* @return status, EC_Normal if successful, an error code otherwise
*/
OFCondition selectCharacterSet(DcmItem &dataset,
const OFString &toCharset = "ISO_IR 192",
const OFBool transliterate = OFFalse,
const OFBool discardIllegal = OFFalse);
/** convert the given string from the selected source character set(s) to
* the selected destination character set. That means selectCharacterSet()
* has to be called prior to this method.
* @param fromString input string to be converted (using the currently
* selected source character set)
* @param toString reference to variable where the converted string
* (using the currently selected destination character
* set) is stored
* @param delimiters optional string of characters that are regarded as
* delimiters, i.e.\ when found the character set is
* switched back to the default. CR, LF and FF are
* always regarded as delimiters (see DICOM PS 3.5).
* @return status, EC_Normal if successful, an error code otherwise
*/
OFCondition convertString(const OFString &fromString,
OFString &toString,
const OFString &delimiters = "");
/** convert the given string from the selected source character set(s) to
* the selected destination character set. That means selectCharacterSet()
* has to be called prior to this method. Since the length of the input
* string has to be specified explicitly, the string can contain more than
* one NULL byte.
* @param fromString input string to be converted (using the currently
* selected character set)
* @param fromLength length of the input string (number of bytes without
* the trailing NULL byte)
* @param toString reference to variable where the converted string
* (using the currently selected destination character
* set) is stored
* @param delimiters optional string of characters that are regarded as
* delimiters, i.e.\ when found the character set is
* switched back to the default. CR, LF and FF are
* always regarded as delimiters (see DICOM PS 3.5).
* @return status, EC_Normal if successful, an error code otherwise
*/
OFCondition convertString(const char *fromString,
const size_t fromLength,
OFString &toString,
const OFString &delimiters = "");
// --- static helper functions ---
/** check whether the underlying character set conversion library is
* available. If the library is not available, no conversion between
* different character sets will be possible.
* @return OFTrue if the character set conversion library is available,
* OFFalse otherwise
*/
static OFBool isConversionLibraryAvailable();
/** count characters in given UTF-8 string and return the resulting number
* of so-called "code points". Please note that invalid UTF-8 encodings
* are not handled properly. ASCII strings (7-bit) are also supported,
* although OFString::length() is probably much faster.
* @param utf8String valid character string with UTF-8 encoding
* @return number of characters (code points) in given UTF-8 string
*/
static size_t countCharactersInUTF8String(const OFString &utf8String);
protected:
/// type definition of a map storing the identifier (key) of a character
/// set and the associated conversion descriptor
typedef OFMap<OFString, OFCharacterEncoding::T_Descriptor> T_DescriptorMap;
/** determine the destination character encoding (as used by libiconv) from
* the given DICOM defined term (specific character set), and set the
* member variables accordingly.
* @param toCharset name of the destination character set used for the
* output string
* @return status, EC_Normal if successful, an error code otherwise
*/
OFCondition determineDestinationEncoding(const OFString &toCharset);
/** select a particular DICOM character set without code extensions for
* subsequent conversions. The corresponding DICOM defined term for the
* source character set is determined from the member variable
* 'SourceCharacterSet'.
* @return status, EC_Normal if successful, an error code otherwise
*/
OFCondition selectCharacterSetWithoutCodeExtensions();
/** select a particular DICOM character set with code extensions for
* subsequent conversions. The corresponding DICOM defined terms for the
* source character set are determined from the member variable
* 'SourceCharacterSet'.
* @param sourceVM value multiplicity of the member variable
* 'SourceCharacterSet'. Usually, this value has
* already been determined by the calling method.
* @return status, EC_Normal if successful, an error code otherwise
*/
OFCondition selectCharacterSetWithCodeExtensions(const unsigned long sourceVM);
/** close any currently open character set conversion descriptor(s).
* Afterwards, no conversion descriptor is selected, pretty much like
* after the initialization with the constructor.
*/
void closeConversionDescriptors();
/** check whether the given string contains at least one escape character
* (ESC), because it is used for code extension techniques like ISO 2022
* @param strValue input string to be checked for any escape character
* @param strLength length of the input string
* @return OFTrue if an escape character has been found, OFFalse otherwise
*/
OFBool checkForEscapeCharacter(const char *strValue,
const size_t strLength) const;
/** convert given string to octal format, i.e.\ all non-ASCII and control
* characters are converted to their octal representation. The total
* length of the string is always limited to a particular maximum (see
* implementation). If the converted string would be longer, it is
* cropped and "..." is appended to indicate this cropping.
* @param strValue input string to be converted and possibly cropped
* @param strLength length of the input string
* @return resulting string in octal format
*/
OFString convertToLengthLimitedOctalString(const char *strValue,
const size_t strLength) const;
private:
// private undefined copy constructor
DcmSpecificCharacterSet(const DcmSpecificCharacterSet &);
// private undefined assignment operator
DcmSpecificCharacterSet &operator=(const DcmSpecificCharacterSet &);
/// selected source character set(s) based on one or more DICOM defined terms
OFString SourceCharacterSet;
/// selected destination character set based on a single DICOM defined term
OFString DestinationCharacterSet;
/// selected destination encoding based on names supported by the libiconv toolkit
OFString DestinationEncoding;
/// character encoding converter
OFCharacterEncoding EncodingConverter;
/// map of character set conversion descriptors
/// (only used if multiple character sets are needed)
T_DescriptorMap ConversionDescriptors;
};
#endif
|