This file is indexed.

/usr/lib/python3/dist-packages/pytds/collate.py is in python3-tds 1.8.2-1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
import codecs
import struct

TDS_CHARSET_ISO_8859_1 = 1
TDS_CHARSET_CP1251 = 2
TDS_CHARSET_CP1252 = 3
TDS_CHARSET_UCS_2LE = 4

TDS_CHARSET_UNICODE = 5

ucs2_codec = codecs.lookup('utf_16_le')


def sortid2charset(sort_id):
    sql_collate = sort_id
    #
    # The table from the MSQLServer reference "Windows Collation Designators"
    # and from " NLS Information for Microsoft Windows XP"
    #
    if sql_collate in (
            30,  # SQL_Latin1_General_CP437_BIN
            31,  # SQL_Latin1_General_CP437_CS_AS
            32,  # SQL_Latin1_General_CP437_CI_AS
            33,  # SQL_Latin1_General_Pref_CP437_CI_AS
            34):  # SQL_Latin1_General_CP437_CI_AI
        return 'CP437'
    elif sql_collate in (
            40,  # SQL_Latin1_General_CP850_BIN
            41,  # SQL_Latin1_General_CP850_CS_AS
            42,  # SQL_Latin1_General_CP850_CI_AS
            43,  # SQL_Latin1_General_Pref_CP850_CI_AS
            44,  # SQL_Latin1_General_CP850_CI_AI
            49,  # SQL_1xCompat_CP850_CI_AS
            55,  # SQL_AltDiction_CP850_CS_AS
            56,  # SQL_AltDiction_Pref_CP850_CI_AS
            57,  # SQL_AltDiction_CP850_CI_AI
            58,  # SQL_Scandinavian_Pref_CP850_CI_AS
            59,  # SQL_Scandinavian_CP850_CS_AS
            60,  # SQL_Scandinavian_CP850_CI_AS
            61):  # SQL_AltDiction_CP850_CI_AS
        return 'CP850'
    elif sql_collate in (
            80,  # SQL_Latin1_General_1250_BIN
            81,  # SQL_Latin1_General_CP1250_CS_AS
            82,  # SQL_Latin1_General_Cp1250_CI_AS_KI_WI
            83,  # SQL_Czech_Cp1250_CS_AS_KI_WI
            84,  # SQL_Czech_Cp1250_CI_AS_KI_WI
            85,  # SQL_Hungarian_Cp1250_CS_AS_KI_WI
            86,  # SQL_Hungarian_Cp1250_CI_AS_KI_WI
            87,  # SQL_Polish_Cp1250_CS_AS_KI_WI
            88,  # SQL_Polish_Cp1250_CI_AS_KI_WI
            89,  # SQL_Romanian_Cp1250_CS_AS_KI_WI
            90,  # SQL_Romanian_Cp1250_CI_AS_KI_WI
            91,  # SQL_Croatian_Cp1250_CS_AS_KI_WI
            92,  # SQL_Croatian_Cp1250_CI_AS_KI_WI
            93,  # SQL_Slovak_Cp1250_CS_AS_KI_WI
            94,  # SQL_Slovak_Cp1250_CI_AS_KI_WI
            95,  # SQL_Slovenian_Cp1250_CS_AS_KI_WI
            96,  # SQL_Slovenian_Cp1250_CI_AS_KI_WI
            ):
        return 'CP1250'
    elif sql_collate in (
            104,  # SQL_Latin1_General_1251_BIN
            105,  # SQL_Latin1_General_CP1251_CS_AS
            106,  # SQL_Latin1_General_CP1251_CI_AS
            107,  # SQL_Ukrainian_Cp1251_CS_AS_KI_WI
            108,  # SQL_Ukrainian_Cp1251_CI_AS_KI_WI
            ):
        return 'CP1251'
    elif sql_collate in (
            51,  # SQL_Latin1_General_Cp1_CS_AS_KI_WI
            52,  # SQL_Latin1_General_Cp1_CI_AS_KI_WI
            53,  # SQL_Latin1_General_Pref_Cp1_CI_AS_KI_WI
            54,  # SQL_Latin1_General_Cp1_CI_AI_KI_WI
            183,  # SQL_Danish_Pref_Cp1_CI_AS_KI_WI
            184,  # SQL_SwedishPhone_Pref_Cp1_CI_AS_KI_WI
            185,  # SQL_SwedishStd_Pref_Cp1_CI_AS_KI_WI
            186,  # SQL_Icelandic_Pref_Cp1_CI_AS_KI_WI
            ):
        return 'CP1252'
    elif sql_collate in (
            112,  # SQL_Latin1_General_1253_BIN
            113,  # SQL_Latin1_General_CP1253_CS_AS
            114,  # SQL_Latin1_General_CP1253_CI_AS
            120,  # SQL_MixDiction_CP1253_CS_AS
            121,  # SQL_AltDiction_CP1253_CS_AS
            122,  # SQL_AltDiction2_CP1253_CS_AS
            124,  # SQL_Latin1_General_CP1253_CI_AI
            ):
        return 'CP1253'
    elif sql_collate in (
            128,  # SQL_Latin1_General_1254_BIN
            129,  # SQL_Latin1_General_Cp1254_CS_AS_KI_WI
            130,  # SQL_Latin1_General_Cp1254_CI_AS_KI_WI
            ):
        return 'CP1254'
    elif sql_collate in (
            136,  # SQL_Latin1_General_1255_BIN
            137,  # SQL_Latin1_General_CP1255_CS_AS
            138,  # SQL_Latin1_General_CP1255_CI_AS
            ):
        return 'CP1255'
    elif sql_collate in (
            144,  # SQL_Latin1_General_1256_BIN
            145,  # SQL_Latin1_General_CP1256_CS_AS
            146,  # SQL_Latin1_General_CP1256_CI_AS
            ):
        return 'CP1256'
    elif sql_collate in (
            152,  # SQL_Latin1_General_1257_BIN
            153,  # SQL_Latin1_General_CP1257_CS_AS
            154,  # SQL_Latin1_General_CP1257_CI_AS
            155,  # SQL_Estonian_Cp1257_CS_AS_KI_WI
            156,  # SQL_Estonian_Cp1257_CI_AS_KI_WI
            157,  # SQL_Latvian_Cp1257_CS_AS_KI_WI
            158,  # SQL_Latvian_Cp1257_CI_AS_KI_WI
            159,  # SQL_Lithuanian_Cp1257_CS_AS_KI_WI
            160,  # SQL_Lithuanian_Cp1257_CI_AS_KI_WI
            ):
        return 'CP1257'
    else:
        raise Exception("Invalid collation: 0x%X" % (sql_collate, ))


def lcid2charset(lcid):
    if lcid in (0x405,
                0x40e,  # 0x1040e
                0x415, 0x418, 0x41a, 0x41b, 0x41c, 0x424,
                #0x81a, seem wrong in XP table TODO check
                0x104e):
                    return 'CP1250'
    elif lcid in (0x402, 0x419, 0x422, 0x423, 0x42f, 0x43f,
                  0x440, 0x444, 0x450,
                  0x81a,  # ??
                  0x82c, 0x843, 0xc1a):
                    return 'CP1251'
    elif lcid in (0x1007, 0x1009, 0x100a, 0x100c, 0x1407,
                  0x1409, 0x140a, 0x140c, 0x1809, 0x180a,
                  0x180c, 0x1c09, 0x1c0a, 0x2009, 0x200a,
                  0x2409, 0x240a, 0x2809, 0x280a, 0x2c09,
                  0x2c0a, 0x3009, 0x300a, 0x3409, 0x340a,
                  0x380a, 0x3c0a, 0x400a, 0x403, 0x406,
                  0x407,  # 0x10407
                  0x409, 0x40a, 0x40b, 0x40c, 0x40f, 0x410,
                  0x413, 0x414, 0x416, 0x41d, 0x421, 0x42d,
                  0x436,
                  0x437,  # 0x10437
                  0x438,
                  #0x439,  ??? Unicode only
                  0x43e, 0x440a, 0x441, 0x456, 0x480a,
                  0x4c0a, 0x500a, 0x807, 0x809, 0x80a,
                  0x80c, 0x810, 0x813, 0x814, 0x816,
                  0x81d, 0x83e, 0xc07, 0xc09, 0xc0a, 0xc0c):
            return 'CP1252'
    elif lcid == 0x408:
        return 'CP1253'
    elif lcid in (0x41f, 0x42c, 0x443):
        return 'CP1254'
    elif lcid == 0x40d:
        return 'CP1255'
    elif lcid in (0x1001, 0x1401, 0x1801, 0x1c01, 0x2001,
                  0x2401, 0x2801, 0x2c01, 0x3001, 0x3401,
                  0x3801, 0x3c01, 0x4001, 0x401, 0x420,
                  0x429, 0x801, 0xc01):
        return 'CP1256'
    elif lcid in (0x425, 0x426, 0x427,
                  0x827):  # ??
        return 'CP1257'
    elif lcid == 0x42a:
        return 'CP1258'
    elif lcid == 0x41e:
        return 'CP874'
    elif lcid == 0x411:  # 0x10411
        return 'CP932'
    elif lcid in (0x1004,
                  0x804):  # 0x20804
        return 'CP936'
    elif lcid == 0x412:  # 0x10412
        return 'CP949'
    elif lcid in (0x1404,
                  0x404,  # 0x30404
                  0xc04):
        return 'CP950'
    else:
        return 'CP1252'


class Collation(object):
    _coll_struct = struct.Struct('<LB')
    wire_size = _coll_struct.size
    f_ignore_case = 0x100000
    f_ignore_accent = 0x200000
    f_ignore_width = 0x400000
    f_ignore_kana = 0x800000
    f_binary = 0x1000000
    f_binary2 = 0x2000000

    def __init__(self, lcid, sort_id, ignore_case, ignore_accent, ignore_width, ignore_kana, binary, binary2, version):
        self.lcid = lcid
        self.sort_id = sort_id
        self.ignore_case = ignore_case
        self.ignore_accent = ignore_accent
        self.ignore_width = ignore_width
        self.ignore_kana = ignore_kana
        self.binary = binary
        self.binary2 = binary2
        self.version = version

    def __repr__(self):
        return 'Collation(lcid={0}, sort_id={1}, ignore_case={2}, ignore_accent={3}, ignore_width={4}, ignore_kana={5}, binary={6}, binary2={7}, version={8})'.format(
            self.lcid,
            self.sort_id,
            self.ignore_case,
            self.ignore_accent,
            self.ignore_width,
            self.ignore_kana,
            self.binary,
            self.binary2,
            self.version)

    @classmethod
    def unpack(cls, b):
        lump, sort_id = cls._coll_struct.unpack_from(b)
        lcid = lump & 0xfffff
        ignore_case = bool(lump & cls.f_ignore_case)
        ignore_accent = bool(lump & cls.f_ignore_accent)
        ignore_width = bool(lump & cls.f_ignore_width)
        ignore_kana = bool(lump & cls.f_ignore_kana)
        binary = bool(lump & cls.f_binary)
        binary2 = bool(lump & cls.f_binary2)
        version = (lump & 0xf0000000) >> 26
        return cls(lcid=lcid,
                   ignore_case=ignore_case,
                   ignore_accent=ignore_accent,
                   ignore_width=ignore_width,
                   ignore_kana=ignore_kana,
                   binary=binary,
                   binary2=binary2,
                   version=version,
                   sort_id=sort_id)

    def pack(self):
        lump = 0
        lump |= self.lcid & 0xfffff
        lump |= (self.version << 26) & 0xf0000000
        if self.ignore_case:
            lump |= self.f_ignore_case
        if self.ignore_accent:
            lump |= self.f_ignore_accent
        if self.ignore_width:
            lump |= self.f_ignore_width
        if self.ignore_kana:
            lump |= self.f_ignore_kana
        if self.binary:
            lump |= self.f_binary
        if self.binary2:
            lump |= self.f_binary2
        return self._coll_struct.pack(lump, self.sort_id)

    def get_charset(self):
        if self.sort_id:
            return sortid2charset(self.sort_id)
        else:
            return lcid2charset(self.lcid)

    def get_codec(self):
        return codecs.lookup(self.get_charset())

    #TODO: define __repr__ and __unicode__


raw_collation = Collation(0, 0, 0, 0, 0, 0, 0, 0, 0)