This file is indexed.

/usr/lib/python3/dist-packages/patsy/highlevel.py is in python3-patsy 0.4.1+git34-ga5b54c2-1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
# This file is part of Patsy
# Copyright (C) 2011-2013 Nathaniel Smith <njs@pobox.com>
# See file LICENSE.txt for license information.

# These are made available in the patsy.* namespace:
__all__ = ["dmatrix", "dmatrices",
           "incr_dbuilder", "incr_dbuilders"]

# problems:
#   statsmodels reluctant to pass around separate eval environment, suggesting
#     that design_and_matrices-equivalent should return a formula_like
#   is ModelDesc really the high-level thing?
#   ModelDesign doesn't work -- need to work with the builder set
#   want to be able to return either a matrix or a pandas dataframe

import six
import numpy as np
from patsy import PatsyError
from patsy.design_info import DesignMatrix, DesignInfo
from patsy.eval import EvalEnvironment
from patsy.desc import ModelDesc
from patsy.build import (design_matrix_builders,
                         build_design_matrices)
from patsy.util import (have_pandas, asarray_or_pandas,
                        atleast_2d_column_default)

if have_pandas:
    import pandas

# Tries to build a (lhs, rhs) design given a formula_like and an incremental
# data source. If formula_like is not capable of doing this, then returns
# None.
def _try_incr_builders(formula_like, data_iter_maker, eval_env,
                       NA_action):
    if isinstance(formula_like, DesignInfo):
        return (design_matrix_builders([[]], data_iter_maker, eval_env, NA_action)[0],
                formula_like)
    if (isinstance(formula_like, tuple)
        and len(formula_like) == 2
        and isinstance(formula_like[0], DesignInfo)
        and isinstance(formula_like[1], DesignInfo)):
        return formula_like
    if hasattr(formula_like, "__patsy_get_model_desc__"):
        formula_like = formula_like.__patsy_get_model_desc__(eval_env)
        if not isinstance(formula_like, ModelDesc):
            raise PatsyError("bad value from %r.__patsy_get_model_desc__"
                                % (formula_like,))
        # fallthrough
    if not six.PY3 and isinstance(formula_like, unicode):
        # Included for the convenience of people who are using py2 with
        # __future__.unicode_literals.
        try:
            formula_like = formula_like.encode("ascii")
        except UnicodeEncodeError:
            raise PatsyError(
                "On Python 2, formula strings must be either 'str' objects, "
                "or else 'unicode' objects containing only ascii "
                "characters. You passed a unicode string with non-ascii "
                "characters. I'm afraid you'll have to either switch to "
                "ascii-only, or else upgrade to Python 3.")
    if isinstance(formula_like, str):
        formula_like = ModelDesc.from_formula(formula_like)
        # fallthrough
    if isinstance(formula_like, ModelDesc):
        assert isinstance(eval_env, EvalEnvironment)
        return design_matrix_builders([formula_like.lhs_termlist,
                                       formula_like.rhs_termlist],
                                      data_iter_maker,
                                      eval_env,
                                      NA_action)
    else:
        return None

def incr_dbuilder(formula_like, data_iter_maker, eval_env=0, NA_action="drop"):
    """Construct a design matrix builder incrementally from a large data set.

    :arg formula_like: Similar to :func:`dmatrix`, except that explicit
      matrices are not allowed. Must be a formula string, a
      :class:`ModelDesc`, a :class:`DesignInfo`, or an object with a
      ``__patsy_get_model_desc__`` method.
    :arg data_iter_maker: A zero-argument callable which returns an iterator
      over dict-like data objects. This must be a callable rather than a
      simple iterator because sufficiently complex formulas may require
      multiple passes over the data (e.g. if there are nested stateful
      transforms).
    :arg eval_env: Either a :class:`EvalEnvironment` which will be used to
      look up any variables referenced in `formula_like` that cannot be
      found in `data`, or else a depth represented as an
      integer which will be passed to :meth:`EvalEnvironment.capture`.
      ``eval_env=0`` means to use the context of the function calling
      :func:`incr_dbuilder` for lookups. If calling this function from a
      library, you probably want ``eval_env=1``, which means that variables
      should be resolved in *your* caller's namespace.
    :arg NA_action: An :class:`NAAction` object or string, used to determine
      what values count as 'missing' for purposes of determining the levels of
      categorical factors.
    :returns: A :class:`DesignInfo`

    Tip: for `data_iter_maker`, write a generator like::

      def iter_maker():
          for data_chunk in my_data_store:
              yield data_chunk

    and pass `iter_maker` (*not* `iter_maker()`).

    .. versionadded:: 0.2.0
       The ``NA_action`` argument.
    """
    eval_env = EvalEnvironment.capture(eval_env, reference=1)
    design_infos = _try_incr_builders(formula_like, data_iter_maker, eval_env,
                                      NA_action)
    if design_infos is None:
        raise PatsyError("bad formula-like object")
    if len(design_infos[0].column_names) > 0:
        raise PatsyError("encountered outcome variables for a model "
                         "that does not expect them")
    return design_infos[1]

def incr_dbuilders(formula_like, data_iter_maker, eval_env=0,
                   NA_action="drop"):
    """Construct two design matrix builders incrementally from a large data
    set.

    :func:`incr_dbuilders` is to :func:`incr_dbuilder` as :func:`dmatrices` is
    to :func:`dmatrix`. See :func:`incr_dbuilder` for details.
    """
    eval_env = EvalEnvironment.capture(eval_env, reference=1)
    design_infos = _try_incr_builders(formula_like, data_iter_maker, eval_env,
                                      NA_action)
    if design_infos is None:
        raise PatsyError("bad formula-like object")
    if len(design_infos[0].column_names) == 0:
        raise PatsyError("model is missing required outcome variables")
    return design_infos

# This always returns a length-two tuple,
#   response, predictors
# where
#   response is a DesignMatrix (possibly with 0 columns)
#   predictors is a DesignMatrix
# The input 'formula_like' could be like:
#   (np.ndarray, np.ndarray)
#   (DesignMatrix, DesignMatrix)
#   (None, DesignMatrix)
#   np.ndarray  # for predictor-only models
#   DesignMatrix
#   (None, np.ndarray)
#   "y ~ x"
#   ModelDesc(...)
#   DesignInfo
#   (DesignInfo, DesignInfo)
#   any object with a special method __patsy_get_model_desc__
def _do_highlevel_design(formula_like, data, eval_env,
                         NA_action, return_type):
    if return_type == "dataframe" and not have_pandas:
        raise PatsyError("pandas.DataFrame was requested, but pandas "
                            "is not installed")
    if return_type not in ("matrix", "dataframe"):
        raise PatsyError("unrecognized output type %r, should be "
                            "'matrix' or 'dataframe'" % (return_type,))
    def data_iter_maker():
        return iter([data])
    design_infos = _try_incr_builders(formula_like, data_iter_maker, eval_env,
                                      NA_action)
    if design_infos is not None:
        return build_design_matrices(design_infos, data,
                                     NA_action=NA_action,
                                     return_type=return_type)
    else:
        # No builders, but maybe we can still get matrices
        if isinstance(formula_like, tuple):
            if len(formula_like) != 2:
                raise PatsyError("don't know what to do with a length %s "
                                    "matrices tuple"
                                    % (len(formula_like),))
            (lhs, rhs) = formula_like
        else:
            # subok=True is necessary here to allow DesignMatrixes to pass
            # through
            (lhs, rhs) = (None, asarray_or_pandas(formula_like, subok=True))
        # some sort of explicit matrix or matrices were given. Currently we
        # have them in one of these forms:
        #   -- an ndarray or subclass
        #   -- a DesignMatrix
        #   -- a pandas.Series
        #   -- a pandas.DataFrame
        # and we have to produce a standard output format.
        def _regularize_matrix(m, default_column_prefix):
            di = DesignInfo.from_array(m, default_column_prefix)
            if have_pandas and isinstance(m, (pandas.Series, pandas.DataFrame)):
                orig_index = m.index
            else:
                orig_index = None
            if return_type == "dataframe":
                m = atleast_2d_column_default(m, preserve_pandas=True)
                m = pandas.DataFrame(m)
                m.columns = di.column_names
                m.design_info = di
                return (m, orig_index)
            else:
                return (DesignMatrix(m, di), orig_index)
        rhs, rhs_orig_index = _regularize_matrix(rhs, "x")
        if lhs is None:
            lhs = np.zeros((rhs.shape[0], 0), dtype=float)
        lhs, lhs_orig_index = _regularize_matrix(lhs, "y")

        assert isinstance(getattr(lhs, "design_info", None), DesignInfo)
        assert isinstance(getattr(rhs, "design_info", None), DesignInfo)
        if lhs.shape[0] != rhs.shape[0]:
            raise PatsyError("shape mismatch: outcome matrix has %s rows, "
                                "predictor matrix has %s rows"
                                % (lhs.shape[0], rhs.shape[0]))
        if rhs_orig_index is not None and lhs_orig_index is not None:
            if not rhs_orig_index.equals(lhs_orig_index):
                raise PatsyError("index mismatch: outcome and "
                                    "predictor have incompatible indexes")
        if return_type == "dataframe":
            if rhs_orig_index is not None and lhs_orig_index is None:
                lhs.index = rhs.index
            if rhs_orig_index is None and lhs_orig_index is not None:
                rhs.index = lhs.index
        return (lhs, rhs)

def dmatrix(formula_like, data={}, eval_env=0,
            NA_action="drop", return_type="matrix"):
    """Construct a single design matrix given a formula_like and data.

    :arg formula_like: An object that can be used to construct a design
      matrix. See below.
    :arg data: A dict-like object that can be used to look up variables
      referenced in `formula_like`.
    :arg eval_env: Either a :class:`EvalEnvironment` which will be used to
      look up any variables referenced in `formula_like` that cannot be
      found in `data`, or else a depth represented as an
      integer which will be passed to :meth:`EvalEnvironment.capture`.
      ``eval_env=0`` means to use the context of the function calling
      :func:`dmatrix` for lookups. If calling this function from a library,
      you probably want ``eval_env=1``, which means that variables should be
      resolved in *your* caller's namespace.
    :arg NA_action: What to do with rows that contain missing values. You can
      ``"drop"`` them, ``"raise"`` an error, or for customization, pass an
      :class:`NAAction` object. See :class:`NAAction` for details on what
      values count as 'missing' (and how to alter this).
    :arg return_type: Either ``"matrix"`` or ``"dataframe"``. See below.

    The `formula_like` can take a variety of forms. You can use any of the
    following:

    * (The most common option) A formula string like ``"x1 + x2"`` (for
      :func:`dmatrix`) or ``"y ~ x1 + x2"`` (for :func:`dmatrices`). For
      details see :ref:`formulas`.
    * A :class:`ModelDesc`, which is a Python object representation of a
      formula. See :ref:`formulas` and :ref:`expert-model-specification` for
      details.
    * A :class:`DesignInfo`.
    * An object that has a method called :meth:`__patsy_get_model_desc__`.
      For details see :ref:`expert-model-specification`.
    * A numpy array_like (for :func:`dmatrix`) or a tuple
      (array_like, array_like) (for :func:`dmatrices`). These will have
      metadata added, representation normalized, and then be returned
      directly. In this case `data` and `eval_env` are
      ignored. There is special handling for two cases:

      * :class:`DesignMatrix` objects will have their :class:`DesignInfo`
        preserved. This allows you to set up custom column names and term
        information even if you aren't using the rest of the patsy
        machinery.
      * :class:`pandas.DataFrame` or :class:`pandas.Series` objects will have
        their (row) indexes checked. If two are passed in, their indexes must
        be aligned. If ``return_type="dataframe"``, then their indexes will be
        preserved on the output.

    Regardless of the input, the return type is always either:

    * A :class:`DesignMatrix`, if ``return_type="matrix"`` (the default)
    * A :class:`pandas.DataFrame`, if ``return_type="dataframe"``.

    The actual contents of the design matrix is identical in both cases, and
    in both cases a :class:`DesignInfo` object will be available in a
    ``.design_info`` attribute on the return value. However, for
    ``return_type="dataframe"``, any pandas indexes on the input (either in
    `data` or directly passed through `formula_like`) will be preserved, which
    may be useful for e.g. time-series models.

    .. versionadded:: 0.2.0
       The ``NA_action`` argument.
    """
    eval_env = EvalEnvironment.capture(eval_env, reference=1)
    (lhs, rhs) = _do_highlevel_design(formula_like, data, eval_env,
                                      NA_action, return_type)
    if lhs.shape[1] != 0:
        raise PatsyError("encountered outcome variables for a model "
                            "that does not expect them")
    return rhs

def dmatrices(formula_like, data={}, eval_env=0,
              NA_action="drop", return_type="matrix"):
    """Construct two design matrices given a formula_like and data.

    This function is identical to :func:`dmatrix`, except that it requires
    (and returns) two matrices instead of one. By convention, the first matrix
    is the "outcome" or "y" data, and the second is the "predictor" or "x"
    data.

    See :func:`dmatrix` for details.
    """
    eval_env = EvalEnvironment.capture(eval_env, reference=1)
    (lhs, rhs) = _do_highlevel_design(formula_like, data, eval_env,
                                      NA_action, return_type)
    if lhs.shape[1] == 0:
        raise PatsyError("model is missing required outcome variables")
    return (lhs, rhs)