This file is indexed.

/usr/share/pyshared/meliae/scanner.py is in python-meliae 0.4.0-1build1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
# Copyright (C) 2009, 2010, 2011 Canonical Ltd
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 3 as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

"""Some bits for helping to scan objects looking for referenced memory."""

import gc
import types

from meliae import (
    _intset,
    _scanner,
    )


size_of = _scanner.size_of
get_referents = _scanner.get_referents
add_special_size = _scanner.add_special_size

def _size_of_ndarray(ndarray_obj):
    """
    Return the size of a Numpy ndarray's internal storage.

    Doesn't yet handle views into other arrays.
    """

    return ndarray_obj.nbytes

add_special_size("numpy.ndarray", _size_of_ndarray, _size_of_ndarray)


def dump_all_referenced(outf, obj, is_pending=False):
    """Recursively dump everything that is referenced from obj."""
    if isinstance(outf, str):
        outf = open(outf, 'wb')
    if is_pending:
        pending = obj
    else:
        pending = [obj]
    last_offset = len(pending) - 1
    # TODO: Instead of using an IDSet, we could use a BloomFilter. It would
    #       mean some objects may not get dumped (blooms say "yes you
    #       definitely are not present", but only "you might already be
    #       present", collisions cause false positives.)
    #       However, you can get by with 8-10bits for a 1% FPR, rather than
    #       using 32/64-bit pointers + overhead for avoiding hash collisions.
    #       So on 64-bit we drop from 16bytes/object to 1...
    seen = _intset.IDSet()
    if is_pending:
        seen.add(id(pending))
    while last_offset >= 0:
        next = pending[last_offset]
        last_offset -= 1
        id_next = id(next)
        if id_next in seen:
            continue
        seen.add(id_next)
        # We will recurse here, so tell dump_object_info to not recurse
        _scanner.dump_object_info(outf, next, recurse_depth=0)
        for ref in get_referents(next):
            if id(ref) not in seen:
                last_offset += 1
                if len(pending) > last_offset:
                    pending[last_offset] = ref
                else:
                    pending.append(ref)


def dump_gc_objects(outf, recurse_depth=1):
    """Dump everything that is available via gc.get_objects().
    """
    if isinstance(outf, basestring):
        opened = True
        outf = open(outf, 'wb')
    else:
        opened = False
    # Get the list of everything before we start building new objects
    all_objs = gc.get_objects()
    # Dump out a few specific objects, so they don't get repeated forever
    nodump = [None, True, False]
    # In current versions of python, these are all pre-cached
    nodump.extend(xrange(-5, 256))
    nodump.extend([chr(c) for c in xrange(256)])
    nodump.extend([t for t in types.__dict__.itervalues()
                      if type(t) is types.TypeType])
    nodump.extend([set, dict])
    # Some very common interned strings
    nodump.extend(('__doc__', 'self', 'operator', '__init__', 'codecs',
                   '__new__', '__builtin__', '__builtins__', 'error', 'len',
                   'errors', 'keys', 'None', '__module__', 'file', 'name', '',
                   'sys', 'True', 'False'))
    nodump.extend((BaseException, Exception, StandardError, ValueError))
    for obj in nodump:
        _scanner.dump_object_info(outf, obj, nodump=None, recurse_depth=0)
    # Avoid dumping the all_objs list and this function as well. This helps
    # avoid getting a 'reference everything in existence' problem.
    nodump.append(dump_gc_objects)
    # This currently costs us ~16kB during dumping, but means we won't write
    # out those objects multiple times in the log file.
    # TODO: we might want to make nodump a variable-size dict, and add anything
    #       with ob_refcnt > 1000 or so.
    nodump = frozenset(nodump)
    for obj in all_objs:
        _scanner.dump_object_info(outf, obj, nodump=nodump,
                                  recurse_depth=recurse_depth)
    del all_objs[:]
    if opened:
        outf.close()
    else:
        outf.flush()


def dump_all_objects(outf):
    """Dump everything that is referenced from gc.get_objects()

    This recurses, and tracks dumped objects in an IDSet. Which means it costs
    memory, which is often about 10% of currently active memory. Otherwise,
    this usually results in smaller dump files than dump_gc_objects().

    This also can be faster, because it doesn't dump the same item multiple
    times.
    """
    if isinstance(outf, basestring):
        opened = True
        outf = open(outf, 'wb')
    else:
        opened = False
    all_objs = gc.get_objects()
    dump_all_referenced(outf, all_objs, is_pending=True)
    del all_objs[:]
    if opened:
        outf.close()
    else:
        outf.flush()



def get_recursive_size(obj):
    """Get the memory referenced from this object.

    This returns the memory of the direct object, and all of the memory
    referenced by child objects. It also returns the total number of objects.
    """
    total_size = 0
    pending = [obj]
    last_item = 0
    seen = _intset.IDSet()
    size_of = _scanner.size_of
    while last_item >= 0:
        item = pending[last_item]
        last_item -= 1
        id_item = id(item)
        if id_item in seen:
            continue
        seen.add(id_item)
        total_size += size_of(item)
        for child in get_referents(item):
            if id(child) not in seen:
                last_item += 1
                if len(pending) > last_item:
                    pending[last_item] = child
                else:
                    pending.append(child)
    return len(seen), total_size


def get_recursive_items(obj):
    """Walk all referred items and return the unique list of them."""
    all = []
    pending = [obj]
    last_item = 0
    seen = _intset.IDSet()
    while last_item >= 0:
        item = pending[last_item]
        last_item -= 1
        id_item = id(item)
        if id_item in seen:
            continue
        seen.add(id_item)
        all.append(item)
        for child in get_referents(item):
            if id(child) not in seen:
                last_item += 1
                if len(pending) > last_item:
                    pending[last_item] = child
                else:
                    pending.append(child)
    return all


def find_interned_dict():
    """Go through all gc objects and find the interned python dict."""
    for obj in gc.get_objects():
        if (type(obj) is not dict
            or 'find_interned_dict' not in obj
            or obj['find_interned_dict'] is not 'find_interned_dict'
            or 'get_recursive_items' not in obj
            or obj['get_recursive_items'] is not 'get_recursive_items'):
            # The above check assumes that local strings will be interned,
            # which is the standard cpython behavior, but perhaps not the best
            # to require? However, if we used something like a custom string
            # that we intern() we still could have problems with locals(), etc.
            continue
        return obj