This file is indexed.

/usr/lib/python2.7/dist-packages/datalad/auto.py is in python-datalad 0.9.3-1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
# emacs: -*- mode: python; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil -*-
# ex: set sts=4 ts=4 sw=4 noet:
# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
#
#   See COPYING file distributed along with the datalad package for the
#   copyright and license terms.
#
# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
"""Proxy basic file operations (e.g. open) to auto-obtain files upon I/O
"""

import sys
# OPT delay import for expensive mock until used
#from mock import patch
from six import PY2
import six.moves.builtins as __builtin__
builtins_name = '__builtin__' if PY2 else 'builtins'

import logging
import io
import os

from os.path import dirname, lexists, realpath
from os.path import exists
from os.path import isabs
from os.path import join as opj
from git.exc import InvalidGitRepositoryError

from .utils import getpwd
from .dochelpers import exc_str
from .support.annexrepo import AnnexRepo
from .cmdline.helpers import get_repo_instance

lgr = logging.getLogger("datalad.auto")

h5py = None
try:
    import h5py
except ImportError:
    pass
except Exception as exc:
    # could happen due to misbehaving handlers provided by git module
    # see https://github.com/gitpython-developers/GitPython/issues/600
    # we could overload the handler by providing a blank one, but I do not
    # think it is worthwhile at this point.  So let's just issue a warning
    lgr.warning(
        "Failed to import h5py, so no automagic handling for it atm: %s",
        exc_str(exc)
    )

lzma = None
try:
    import lzma
except ImportError:
    pass
except Exception as exc:
    lgr.warning(
        "Failed to import lzma, so no automagic handling for it atm: %s",
        exc_str(exc)
    )

# TODO: RF to reduce code duplication among cases, also RF tests for the same reason

class _EarlyExit(Exception):
    """Helper to early escape try/except logic in wrappde open"""
    pass


class AutomagicIO(object):
    """Class to proxy commonly used API for accessing files so they get automatically fetched

    Currently supports builtin open() and h5py.File when those are read
    """

    def __init__(self, autoget=True, activate=False):
        self._active = False
        self._builtin_open = __builtin__.open
        self._io_open = io.open
        self._builtin_exists = os.path.exists
        self._builtin_isfile = os.path.isfile
        if h5py:
            self._h5py_File = h5py.File
        else:
            self._h5py_File = None
        if lzma:
            self._lzma_LZMAFile = lzma.LZMAFile
        else:
            self._lzma_LZMAFile = None
        self._autoget = autoget
        self._in_open = False
        self._log_online = True
        from mock import patch
        self._patch = patch
        if activate:
            self.activate()

    def __enter__(self):
        self.activate()
        return self

    def __exit__(self, exc_type, exc_value, traceback):
        self.deactivate()

    @property
    def autoget(self):
        return self._autoget

    @property
    def active(self):
        return self._active

    def _proxy_open_name_mode(self, origname, origfunc, *args, **kwargs):
        """Proxy for various "open" which have first argument name and 2nd - mode

        """
        # wrap it all for resilience to errors -- proxying must do no harm!
        try:
            if self._in_open:
                raise _EarlyExit
            self._in_open = True  # just in case someone kept alias/assignment
            # return stock open for the duration of handling so that
            # logging etc could workout correctly
            with self._patch(origname, origfunc):
                lgr.log(2, "Proxying open with %r %r", args, kwargs)

                # had to go with *args since in PY2 it is name, in PY3 file
                # deduce arguments
                if len(args) > 0:
                    # name/file was provided
                    file = args[0]
                else:
                    filearg = "name" if PY2 else "file"
                    if filearg not in kwargs:
                        # so the name was missing etc, just proxy into original open call and let it puke
                        lgr.debug("No name/file was given, avoiding proxying")
                        raise _EarlyExit
                    file = kwargs.get(filearg)
                if isinstance(file, int):
                    lgr.debug(
                        "Skipping operation on %i, already a file descriptor", file)
                    raise _EarlyExit
                mode = 'r'
                if len(args) > 1:
                    mode = args[1]
                elif 'mode' in kwargs:
                    mode = kwargs['mode']

                if 'r' in mode:
                    self._dataset_auto_get(file)
                else:
                    lgr.debug("Skipping operation on %s since mode=%r", file, mode)
        except _EarlyExit:
            pass
        except Exception as e:
            # If anything goes wrong -- we should complain and proceed
            with self._patch(origname, origfunc):
                lgr.warning("Failed proxying open with %r, %r: %s", args, kwargs, exc_str(e))
        finally:
            self._in_open = False
        # finally give it back to stock open
        return origfunc(*args, **kwargs)

    def _proxy_open(self, *args, **kwargs):
        return self._proxy_open_name_mode(builtins_name + '.open', self._builtin_open,
                                          *args, **kwargs)

    def _proxy_io_open(self, *args, **kwargs):
        return self._proxy_open_name_mode('io.open', self._io_open,
                                          *args, **kwargs)

    def _proxy_h5py_File(self, *args, **kwargs):
        return self._proxy_open_name_mode('h5py.File', self._h5py_File,
                                          *args, **kwargs)

    def _proxy_lzma_LZMAFile(self, *args, **kwargs):
        return self._proxy_open_name_mode('lzma.LZMAFile', self._lzma_LZMAFile,
                                          *args, **kwargs)

    def _proxy_exists(self, path):
        # TODO: decide either it should may be retrieved right away.
        # For now, as long as it is a symlink pointing to under .git/annex
        if exists(path):
            return True
        return lexists(path) and 'annex/objects' in str(realpath(path))

    def _proxy_isfile(self, path):
        return self._proxy_open_name_mode(
            'os.path.isfile', self._builtin_isfile, path
        )

    def _dataset_auto_get(self, filepath):
        """Verify that filepath is under annex, and if so and not present - get it"""

        if not self._autoget:
            return
        # if filepath is not there at all (program just "checked" if it could access it
        if not lexists(filepath):
            lgr.log(2, "Not testing/getting file %s since it is not there", filepath)
            return
        # deduce directory for filepath
        filedir = dirname(filepath)
        try:
            # TODO: verify logic for create -- we shouldn't 'annexify' non-annexified
            # see https://github.com/datalad/datalad/issues/204
            annex = get_repo_instance(filedir)
        except (RuntimeError, InvalidGitRepositoryError) as e:
            # must be not under annex etc
            return
        if not isinstance(annex, AnnexRepo):
            # not an annex -- can do nothing
            return

        # since Git/AnnexRepo functionality treats relative paths relative to the
        # top of the repository and might be outside, get a full path
        if not isabs(filepath):
            filepath = opj(getpwd(), filepath)

        # "quick" check first if under annex at all
        try:
            # might fail.  TODO: troubleshoot when it does e.g.
            # datalad/tests/test_auto.py:test_proxying_open_testrepobased
            under_annex = annex.is_under_annex(filepath, batch=True)
        except:  # MIH: really? what if MemoryError
            under_annex = None
        # either it has content
        if (under_annex or under_annex is None) and not annex.file_has_content(filepath):
            lgr.info("AutomagicIO: retrieving file content of %s", filepath)
            annex.get(filepath)

    def activate(self):
        # we should stay below info for this message. With PR #1630 we
        # start to use this functionality internally, and this will show
        # up frequently even in cases where it does nothing at all
        lgr.debug("Activating DataLad's AutoMagicIO")
        # Some beasts (e.g. tornado used by IPython) override outputs, and
        # provide fileno which throws exception.  In such cases we should not log online
        self._log_online = hasattr(sys.stdout, 'fileno') and hasattr(sys.stderr, 'fileno')
        try:
            if self._log_online:
                sys.stdout.fileno()
                sys.stderr.fileno()
        except:  # MIH: IOError?
            self._log_online = False
        if self.active:
            # this is not a warning, because there is nothing going
            # wrong or being undesired. Nested invokation could happen
            # caused by independent pieces of code, e.g. user code
            # that invokes our own metadata handling.
            lgr.debug("%s already active. No action taken" % self)
            return
        # overloads
        __builtin__.open = self._proxy_open
        io.open = self._proxy_io_open
        os.path.exists = self._proxy_exists
        os.path.isfile = self._proxy_isfile
        if h5py:
            h5py.File = self._proxy_h5py_File
        if lzma:
            lzma.LZMAFile = self._proxy_lzma_LZMAFile
        self._active = True

    def deactivate(self):
        # just debug level -- see activate()
        lgr.debug("Deactivating DataLad's AutoMagicIO")
        if not self.active:
            lgr.warning("%s is not active, can't deactivate" % self)
            return
        __builtin__.open = self._builtin_open
        io.open = self._io_open
        if h5py:
            h5py.File = self._h5py_File
        if lzma:
            lzma.LZMAFile = self._lzma_LZMAFile
        os.path.exists = self._builtin_exists
        os.path.isfile = self._builtin_isfile
        self._active = False

    def __del__(self):
        try:
            if self._active:
                self.deactivate()
        except:  # MIH: IOError?
            pass
        try:
            super(self.__class__, self).__del__()
        except:
            pass