/usr/lib/python3/dist-packages/xarray/backends/rasterio_.py is in python3-xarray 0.10.2-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 | import os
from collections import OrderedDict
from distutils.version import LooseVersion
import warnings
import numpy as np
from .. import DataArray
from ..core import indexing
from ..core.utils import is_scalar
from .common import BackendArray
try:
from dask.utils import SerializableLock as Lock
except ImportError:
from threading import Lock
RASTERIO_LOCK = Lock()
_ERROR_MSG = ('The kind of indexing operation you are trying to do is not '
'valid on rasterio files. Try to load your data with ds.load()'
'first.')
class RasterioArrayWrapper(BackendArray):
"""A wrapper around rasterio dataset objects"""
def __init__(self, rasterio_ds):
self.rasterio_ds = rasterio_ds
self._shape = (rasterio_ds.count, rasterio_ds.height,
rasterio_ds.width)
self._ndims = len(self.shape)
@property
def dtype(self):
dtypes = self.rasterio_ds.dtypes
if not np.all(np.asarray(dtypes) == dtypes[0]):
raise ValueError('All bands should have the same dtype')
return np.dtype(dtypes[0])
@property
def shape(self):
return self._shape
def _get_indexer(self, key):
""" Get indexer for rasterio array.
Parameter
---------
key: ExplicitIndexer
Returns
-------
band_key: an indexer for the 1st dimension
window: two tuples. Each consists of (start, stop).
squeeze_axis: axes to be squeezed
np_ind: indexer for loaded numpy array
See also
--------
indexing.decompose_indexer
"""
key, np_inds = indexing.decompose_indexer(
key, self.shape, indexing.IndexingSupport.OUTER)
# bands cannot be windowed but they can be listed
band_key = key.tuple[0]
new_shape = []
np_inds2 = []
# bands (axis=0) cannot be windowed but they can be listed
if isinstance(band_key, slice):
start, stop, step = band_key.indices(self.shape[0])
band_key = np.arange(start, stop, step)
# be sure we give out a list
band_key = (np.asarray(band_key) + 1).tolist()
if isinstance(band_key, list): # if band_key is not a scalar
new_shape.append(len(band_key))
np_inds2.append(slice(None))
# but other dims can only be windowed
window = []
squeeze_axis = []
for i, (k, n) in enumerate(zip(key.tuple[1:], self.shape[1:])):
if isinstance(k, slice):
# step is always positive. see indexing.decompose_indexer
start, stop, step = k.indices(n)
np_inds2.append(slice(None, None, step))
new_shape.append(stop - start)
elif is_scalar(k):
# windowed operations will always return an array
# we will have to squeeze it later
squeeze_axis.append(- (2 - i))
start = k
stop = k + 1
else:
start, stop = np.min(k), np.max(k) + 1
np_inds2.append(k - start)
new_shape.append(stop - start)
window.append((start, stop))
np_inds = indexing._combine_indexers(
indexing.OuterIndexer(tuple(np_inds2)), new_shape, np_inds)
return band_key, window, tuple(squeeze_axis), np_inds
def __getitem__(self, key):
band_key, window, squeeze_axis, np_inds = self._get_indexer(key)
out = self.rasterio_ds.read(band_key, window=tuple(window))
if squeeze_axis:
out = np.squeeze(out, axis=squeeze_axis)
return indexing.NumpyIndexingAdapter(out)[np_inds]
def _parse_envi(meta):
"""Parse ENVI metadata into Python data structures.
See the link for information on the ENVI header file format:
http://www.harrisgeospatial.com/docs/enviheaderfiles.html
Parameters
----------
meta : dict
Dictionary of keys and str values to parse, as returned by the rasterio
tags(ns='ENVI') call.
Returns
-------
parsed_meta : dict
Dictionary containing the original keys and the parsed values
"""
def parsevec(s):
return np.fromstring(s.strip('{}'), dtype='float', sep=',')
def default(s):
return s.strip('{}')
parse = {'wavelength': parsevec,
'fwhm': parsevec}
parsed_meta = {k: parse.get(k, default)(v) for k, v in meta.items()}
return parsed_meta
def open_rasterio(filename, parse_coordinates=None, chunks=None, cache=None,
lock=None):
"""Open a file with rasterio (experimental).
This should work with any file that rasterio can open (most often:
geoTIFF). The x and y coordinates are generated automatically from the
file's geoinformation, shifted to the center of each pixel (see
`"PixelIsArea" Raster Space
<http://web.archive.org/web/20160326194152/http://remotesensing.org/geotiff/spec/geotiff2.5.html#2.5.2>`_
for more information).
You can generate 2D coordinates from the file's attributes with::
from affine import Affine
da = xr.open_rasterio('path_to_file.tif')
transform = Affine(*da.attrs['transform'])
nx, ny = da.sizes['x'], da.sizes['y']
x, y = np.meshgrid(np.arange(nx)+0.5, np.arange(ny)+0.5) * transform
Parameters
----------
filename : str
Path to the file to open.
parse_coordinates : bool, optional
Whether to parse the x and y coordinates out of the file's
``transform`` attribute or not. The default is to automatically
parse the coordinates only if they are rectilinear (1D).
It can be useful to set ``parse_coordinates=False``
if your files are very large or if you don't need the coordinates.
chunks : int, tuple or dict, optional
Chunk sizes along each dimension, e.g., ``5``, ``(5, 5)`` or
``{'x': 5, 'y': 5}``. If chunks is provided, it used to load the new
DataArray into a dask array.
cache : bool, optional
If True, cache data loaded from the underlying datastore in memory as
NumPy arrays when accessed to avoid reading from the underlying data-
store multiple times. Defaults to True unless you specify the `chunks`
argument to use dask, in which case it defaults to False.
lock : False, True or threading.Lock, optional
If chunks is provided, this argument is passed on to
:py:func:`dask.array.from_array`. By default, a global lock is
used to avoid issues with concurrent access to the same file when using
dask's multithreaded backend.
Returns
-------
data : DataArray
The newly created DataArray.
"""
import rasterio
riods = rasterio.open(filename, mode='r')
if cache is None:
cache = chunks is None
coords = OrderedDict()
# Get bands
if riods.count < 1:
raise ValueError('Unknown dims')
coords['band'] = np.asarray(riods.indexes)
# Get coordinates
if LooseVersion(rasterio.__version__) < '1.0':
transform = riods.affine
else:
transform = riods.transform
if transform.is_rectilinear:
# 1d coordinates
parse = True if parse_coordinates is None else parse_coordinates
if parse:
nx, ny = riods.width, riods.height
# xarray coordinates are pixel centered
x, _ = (np.arange(nx) + 0.5, np.zeros(nx) + 0.5) * transform
_, y = (np.zeros(ny) + 0.5, np.arange(ny) + 0.5) * transform
coords['y'] = y
coords['x'] = x
else:
# 2d coordinates
parse = False if (parse_coordinates is None) else parse_coordinates
if parse:
warnings.warn("The file coordinates' transformation isn't "
"rectilinear: xarray won't parse the coordinates "
"in this case. Set `parse_coordinates=False` to "
"suppress this warning.",
RuntimeWarning, stacklevel=3)
# Attributes
attrs = dict()
# Affine transformation matrix (always available)
# This describes coefficients mapping pixel coordinates to CRS
# For serialization store as tuple of 6 floats, the last row being
# always (0, 0, 1) per definition (see https://github.com/sgillies/affine)
attrs['transform'] = tuple(transform)[:6]
if hasattr(riods, 'crs') and riods.crs:
# CRS is a dict-like object specific to rasterio
# If CRS is not None, we convert it back to a PROJ4 string using
# rasterio itself
attrs['crs'] = riods.crs.to_string()
if hasattr(riods, 'res'):
# (width, height) tuple of pixels in units of CRS
attrs['res'] = riods.res
if hasattr(riods, 'is_tiled'):
# Is the TIF tiled? (bool)
# We cast it to an int for netCDF compatibility
attrs['is_tiled'] = np.uint8(riods.is_tiled)
with warnings.catch_warnings():
# casting riods.transform to a tuple makes this future proof
warnings.simplefilter('ignore', FutureWarning)
if hasattr(riods, 'transform'):
# Affine transformation matrix (tuple of floats)
# Describes coefficients mapping pixel coordinates to CRS
attrs['transform'] = tuple(riods.transform)
if hasattr(riods, 'nodatavals'):
# The nodata values for the raster bands
attrs['nodatavals'] = tuple([np.nan if nodataval is None else nodataval
for nodataval in riods.nodatavals])
# Parse extra metadata from tags, if supported
parsers = {'ENVI': _parse_envi}
driver = riods.driver
if driver in parsers:
meta = parsers[driver](riods.tags(ns=driver))
for k, v in meta.items():
# Add values as coordinates if they match the band count,
# as attributes otherwise
if isinstance(v, (list, np.ndarray)) and len(v) == riods.count:
coords[k] = ('band', np.asarray(v))
else:
attrs[k] = v
data = indexing.LazilyOuterIndexedArray(RasterioArrayWrapper(riods))
# this lets you write arrays loaded with rasterio
data = indexing.CopyOnWriteArray(data)
if cache and (chunks is None):
data = indexing.MemoryCachedArray(data)
result = DataArray(data=data, dims=('band', 'y', 'x'),
coords=coords, attrs=attrs)
if chunks is not None:
from dask.base import tokenize
# augment the token with the file modification time
try:
mtime = os.path.getmtime(filename)
except OSError:
# the filename is probably an s3 bucket rather than a regular file
mtime = None
token = tokenize(filename, mtime, chunks)
name_prefix = 'open_rasterio-%s' % token
if lock is None:
lock = RASTERIO_LOCK
result = result.chunk(chunks, name_prefix=name_prefix, token=token,
lock=lock)
# Make the file closeable
result._file_obj = riods
return result
|