This file is indexed.

/usr/lib/python2.7/dist-packages/dicom/charset.py is in python-dicom 0.9.9-2.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
# charset.py
"""Handle alternate character sets for character strings."""
#
# Copyright (c) 2008-2012 Darcy Mason
# This file is part of pydicom, released under a modified MIT license.
#    See the file license.txt included with this distribution, also
#    available at http://pydicom.googlecode.com
#

import logging
logger = logging.getLogger('pydicom')
from dicom.valuerep import PersonNameUnicode, text_VRs
from dicom import in_py3

# Map DICOM Specific Character Set to python equivalent
python_encoding = {
    '': 'iso8859',           # default character set for DICOM
    'ISO_IR 6': 'iso8859',   # alias for latin_1 too
    'ISO_IR 100': 'latin_1',
    'ISO 2022 IR 87': 'iso2022_jp',
    'ISO 2022 IR 13': 'shift_jis',
    'ISO 2022 IR 149': 'euc_kr',  # needs cleanup via clean_escseq()
    'ISO_IR 192': 'UTF8',     # from Chinese example, 2008 PS3.5 Annex J p1-4
    'GB18030': 'GB18030',
    'ISO_IR 126': 'iso_ir_126',  # Greek
    'ISO_IR 127': 'iso_ir_127',  # Arab
    'ISO_IR 138': 'iso_ir_138',  # Hebrew
    'ISO_IR 144': 'iso_ir_144',  # Russian
}

default_encoding = "iso8859"


def clean_escseq(element, encodings):
    """Remove escape sequences that Python does not remove from
       Korean encoding ISO 2022 IR 149 due to the G1 code element.
    """
    if 'euc_kr' in encodings:
        return element.replace(
            "\x1b\x24\x29\x43", "").replace("\x1b\x28\x42", "")
    else:
        return element

# DICOM PS3.5-2008 6.1.1 (p 18) says:
#   default is ISO-IR 6 G0, equiv to common chr set of ISO 8859 (PS3.5 6.1.2.1)
#    (0008,0005)  value 1 can *replace* the default encoding...
#           for VRs of SH, LO, ST, LT, PN and UT (PS3.5 6.1.2.3)...
#           with a single-byte character encoding
#  if (0008,0005) is multi-valued, then value 1 (or default if blank)...
#           is used until code extension escape sequence is hit,
#          which can be at start of string, or after CR/LF, FF, or
#          in Person Name PN, after ^ or =
# NOTE also that 7.5.3 SEQUENCE INHERITANCE states that if (0008,0005)
#       is not present in a sequence item then it is inherited from its parent.


def convert_encodings(encodings):
    """Converts DICOM encodings into corresponding python encodings"""

    # If a list if passed, we don't want to modify the list in place so copy it
    encodings = encodings[:]

    if isinstance(encodings, basestring):
        encodings = [encodings]
    elif not encodings[0]:
        encodings[0] = 'ISO_IR 6'

    try:
        encodings = [python_encoding[x] for x in encodings]
    except KeyError:  # Assume that it is already the python encoding (is there a way to check this?)
        pass

    if len(encodings) == 1:
        encodings = [encodings[0]] * 3
    elif len(encodings) == 2:
        encodings.append(encodings[1])

    return encodings


def decode(data_element, dicom_character_set):
    """Apply the DICOM character encoding to the data element

    data_element -- DataElement instance containing a value to convert
    dicom_character_set -- the value of Specific Character Set (0008,0005),
                    which may be a single value,
                    a multiple value (code extension), or
                    may also be '' or None.
                    If blank or None, ISO_IR 6 is used.

    """
    if not dicom_character_set:
        dicom_character_set = ['ISO_IR 6']

    encodings = convert_encodings(dicom_character_set)

    # decode the string value to unicode
    # PN is special case as may have 3 components with differenct chr sets
    if data_element.VR == "PN":
        # logger.warn("%s ... type: %s" %(str(data_element), type(data_element.VR)))
        if in_py3:
            if data_element.VM == 1:
                data_element.value = data_element.value.decode(encodings)
            else:
                data_element.value = [val.decode(encodings) for val in data_element.value]
        else:
            if data_element.VM == 1:
                data_element.value = PersonNameUnicode(data_element.value, encodings)
            else:
                data_element.value = [PersonNameUnicode(value, encodings)
                                      for value in data_element.value]
    if data_element.VR in text_VRs:
        # Remove the first encoding if this is a multi-byte encoding
        if len(encodings) > 1:
            del encodings[0]

        # You can't re-decode unicode (string literals in py3)
        if data_element.VM == 1:
            if isinstance(data_element.value, unicode):
                return
            data_element.value = clean_escseq(
                data_element.value.decode(encodings[0]), encodings)
        else:

            output = list()

            for value in data_element.value:
                if isinstance(value, unicode):
                    output.append(value)
                else:
                    output.append(clean_escseq(value.decode(encodings[0]), encodings))

            data_element.value = output