/usr/share/pyshared/meliae/scanner.py is in python-meliae 0.4.0-1build1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 | # Copyright (C) 2009, 2010, 2011 Canonical Ltd
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 3 as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""Some bits for helping to scan objects looking for referenced memory."""
import gc
import types
from meliae import (
_intset,
_scanner,
)
size_of = _scanner.size_of
get_referents = _scanner.get_referents
add_special_size = _scanner.add_special_size
def _size_of_ndarray(ndarray_obj):
"""
Return the size of a Numpy ndarray's internal storage.
Doesn't yet handle views into other arrays.
"""
return ndarray_obj.nbytes
add_special_size("numpy.ndarray", _size_of_ndarray, _size_of_ndarray)
def dump_all_referenced(outf, obj, is_pending=False):
"""Recursively dump everything that is referenced from obj."""
if isinstance(outf, str):
outf = open(outf, 'wb')
if is_pending:
pending = obj
else:
pending = [obj]
last_offset = len(pending) - 1
# TODO: Instead of using an IDSet, we could use a BloomFilter. It would
# mean some objects may not get dumped (blooms say "yes you
# definitely are not present", but only "you might already be
# present", collisions cause false positives.)
# However, you can get by with 8-10bits for a 1% FPR, rather than
# using 32/64-bit pointers + overhead for avoiding hash collisions.
# So on 64-bit we drop from 16bytes/object to 1...
seen = _intset.IDSet()
if is_pending:
seen.add(id(pending))
while last_offset >= 0:
next = pending[last_offset]
last_offset -= 1
id_next = id(next)
if id_next in seen:
continue
seen.add(id_next)
# We will recurse here, so tell dump_object_info to not recurse
_scanner.dump_object_info(outf, next, recurse_depth=0)
for ref in get_referents(next):
if id(ref) not in seen:
last_offset += 1
if len(pending) > last_offset:
pending[last_offset] = ref
else:
pending.append(ref)
def dump_gc_objects(outf, recurse_depth=1):
"""Dump everything that is available via gc.get_objects().
"""
if isinstance(outf, basestring):
opened = True
outf = open(outf, 'wb')
else:
opened = False
# Get the list of everything before we start building new objects
all_objs = gc.get_objects()
# Dump out a few specific objects, so they don't get repeated forever
nodump = [None, True, False]
# In current versions of python, these are all pre-cached
nodump.extend(xrange(-5, 256))
nodump.extend([chr(c) for c in xrange(256)])
nodump.extend([t for t in types.__dict__.itervalues()
if type(t) is types.TypeType])
nodump.extend([set, dict])
# Some very common interned strings
nodump.extend(('__doc__', 'self', 'operator', '__init__', 'codecs',
'__new__', '__builtin__', '__builtins__', 'error', 'len',
'errors', 'keys', 'None', '__module__', 'file', 'name', '',
'sys', 'True', 'False'))
nodump.extend((BaseException, Exception, StandardError, ValueError))
for obj in nodump:
_scanner.dump_object_info(outf, obj, nodump=None, recurse_depth=0)
# Avoid dumping the all_objs list and this function as well. This helps
# avoid getting a 'reference everything in existence' problem.
nodump.append(dump_gc_objects)
# This currently costs us ~16kB during dumping, but means we won't write
# out those objects multiple times in the log file.
# TODO: we might want to make nodump a variable-size dict, and add anything
# with ob_refcnt > 1000 or so.
nodump = frozenset(nodump)
for obj in all_objs:
_scanner.dump_object_info(outf, obj, nodump=nodump,
recurse_depth=recurse_depth)
del all_objs[:]
if opened:
outf.close()
else:
outf.flush()
def dump_all_objects(outf):
"""Dump everything that is referenced from gc.get_objects()
This recurses, and tracks dumped objects in an IDSet. Which means it costs
memory, which is often about 10% of currently active memory. Otherwise,
this usually results in smaller dump files than dump_gc_objects().
This also can be faster, because it doesn't dump the same item multiple
times.
"""
if isinstance(outf, basestring):
opened = True
outf = open(outf, 'wb')
else:
opened = False
all_objs = gc.get_objects()
dump_all_referenced(outf, all_objs, is_pending=True)
del all_objs[:]
if opened:
outf.close()
else:
outf.flush()
def get_recursive_size(obj):
"""Get the memory referenced from this object.
This returns the memory of the direct object, and all of the memory
referenced by child objects. It also returns the total number of objects.
"""
total_size = 0
pending = [obj]
last_item = 0
seen = _intset.IDSet()
size_of = _scanner.size_of
while last_item >= 0:
item = pending[last_item]
last_item -= 1
id_item = id(item)
if id_item in seen:
continue
seen.add(id_item)
total_size += size_of(item)
for child in get_referents(item):
if id(child) not in seen:
last_item += 1
if len(pending) > last_item:
pending[last_item] = child
else:
pending.append(child)
return len(seen), total_size
def get_recursive_items(obj):
"""Walk all referred items and return the unique list of them."""
all = []
pending = [obj]
last_item = 0
seen = _intset.IDSet()
while last_item >= 0:
item = pending[last_item]
last_item -= 1
id_item = id(item)
if id_item in seen:
continue
seen.add(id_item)
all.append(item)
for child in get_referents(item):
if id(child) not in seen:
last_item += 1
if len(pending) > last_item:
pending[last_item] = child
else:
pending.append(child)
return all
def find_interned_dict():
"""Go through all gc objects and find the interned python dict."""
for obj in gc.get_objects():
if (type(obj) is not dict
or 'find_interned_dict' not in obj
or obj['find_interned_dict'] is not 'find_interned_dict'
or 'get_recursive_items' not in obj
or obj['get_recursive_items'] is not 'get_recursive_items'):
# The above check assumes that local strings will be interned,
# which is the standard cpython behavior, but perhaps not the best
# to require? However, if we used something like a custom string
# that we intern() we still could have problems with locals(), etc.
continue
return obj
|