Source code for cw_geodata.vector_label.mask
from ..utils.core import _check_df_load, _check_rasterio_im_load
from ..utils.geo import geometries_internal_intersection, _check_wkt_load
import numpy as np
import pandas as pd
import rasterio
from rasterio import features
from affine import Affine
from skimage.morphology import square, erosion, dilation
[docs]def df_to_px_mask(df, channels=['footprint'], out_file=None, reference_im=None,
geom_col='geometry', affine_obj=None, shape=(900, 900),
out_type='int', burn_value=255, **kwargs):
"""Convert a dataframe of geometries to a pixel mask.
Arguments
---------
df : :class:`pandas.DataFrame` or :class:`geopandas.GeoDataFrame`
A :class:`pandas.DataFrame` or :class:`geopandas.GeoDataFrame` instance
with a column containing geometries (identified by `geom_col`). If the
geometries in `df` are not in pixel coordinates, then `affine` or
`reference_im` must be passed to provide the transformation to convert.
channels : list, optional
The mask channels to generate. There are three values that this can
contain:
- ``"footprint"``: Create a full footprint mask, with 0s at pixels
that don't fall within geometries and `burn_value` at pixels that
do.
- ``"boundary"``: Create a mask with geometries outlined. Use
`boundary_width` to set how thick the boundary will be drawn.
- ``"contact"``: Create a mask with regions between >= 2 closely
juxtaposed geometries labeled. Use `contact_spacing` to set the
maximum spacing between polygons to be labeled.
Each channel correspond to its own `shape` plane in the output.
out_file : str, optional
Path to an image file to save the output to. Must be compatible with
:class:`rasterio.DatasetReader`. If provided, a `reference_im` must be
provided (for metadata purposes).
reference_im : :class:`rasterio.DatasetReader` or `str`, optional
An image to extract necessary coordinate information from: the
affine transformation matrix, the image extent, etc. If provided,
`affine_obj` and `shape` are ignored.
geom_col : str, optional
The column containing geometries in `df`. Defaults to ``"geometry"``.
affine_obj : `list` or :class:`affine.Affine`, optional
Affine transformation to use to convert from geo coordinates to pixel
space. Only provide this argument if `df` is a
:class:`geopandas.GeoDataFrame` with coordinates in a georeferenced
coordinate space. Ignored if `reference_im` is provided.
shape : tuple, optional
An ``(x_size, y_size)`` tuple defining the pixel extent of the output
mask. Ignored if `reference_im` is provided.
burn_value : `int` or `float`
The value to use for labeling objects in the mask. Defaults to 255 (the
max value for ``uint8`` arrays). The mask array will be set to the same
dtype as `burn_value`.
kwargs
Additional arguments to pass to `boundary_mask` or `contact_mask`. See
those functions for requirements.
Returns
-------
mask : :class:`numpy.array`
A pixel mask with 0s for non-object pixels and `burn_value` at object
pixels. `mask` dtype will coincide with `burn_value`. Shape will be
``(shape[0], shape[1], len(channels))``, with channels ordered per the
provided `channels` `list`.
"""
if isinstance(channels, str): # e.g. if "contact", not ["contact"]
channels = [channels]
mask_dict = {}
if 'footprint' in channels:
mask_dict['footprint'] = footprint_mask(
df=df, reference_im=reference_im, geom_col=geom_col,
affine_obj=affine_obj, shape=shape, out_type=out_type,
burn_value=burn_value
)
if 'boundary' in channels:
mask_dict['boundary'] = boundary_mask(
footprint_msk=mask_dict.get('footprint', None),
reference_im=reference_im, geom_col=geom_col,
boundary_width=kwargs.get('boundary_width', 3),
boundary_type=kwargs.get('boundary_type', 'inner'),
burn_value=burn_value, df=df, affine_obj=affine_obj,
shape=shape, out_type=out_type
)
if 'contact' in channels:
mask_dict['contact'] = contact_mask(
df=df, reference_im=reference_im, geom_col=geom_col,
affine_obj=affine_obj, shape=shape, out_type=out_type,
contact_spacing=kwargs.get('contact_spacing', 10),
burn_value=burn_value
)
output_arr = np.stack([mask_dict[c] for c in channels], axis=-1)
if reference_im:
reference_im = _check_rasterio_im_load(reference_im)
if out_file:
meta = reference_im.meta.copy()
meta.update(count=output_arr.shape[-1])
meta.update(dtype='uint8')
with rasterio.open(out_file, 'w', **meta) as dst:
# I hate band indexing.
for c in range(1, 1 + output_arr.shape[-1]):
dst.write(output_arr[:, :, c-1], indexes=c)
return output_arr
[docs]def footprint_mask(df, out_file=None, reference_im=None, geom_col='geometry',
do_transform=False, affine_obj=None, shape=(900, 900),
out_type='int', burn_value=255, burn_field=None):
"""Convert a dataframe of geometries to a pixel mask.
Arguments
---------
df : :class:`pandas.DataFrame` or :class:`geopandas.GeoDataFrame`
A :class:`pandas.DataFrame` or :class:`geopandas.GeoDataFrame` instance
with a column containing geometries (identified by `geom_col`). If the
geometries in `df` are not in pixel coordinates, then `affine` or
`reference_im` must be passed to provide the transformation to convert.
out_file : str, optional
Path to an image file to save the output to. Must be compatible with
:class:`rasterio.DatasetReader`. If provided, a `reference_im` must be
provided (for metadata purposes).
reference_im : :class:`rasterio.DatasetReader` or `str`, optional
An image to extract necessary coordinate information from: the
affine transformation matrix, the image extent, etc. If provided,
`affine_obj` and `shape` are ignored.
geom_col : str, optional
The column containing geometries in `df`. Defaults to ``"geometry"``.
do_transform : bool, optional
Should the values in `df` be transformed from geospatial coordinates
to pixel coordinates? Defaults to no (False). If True, either
`reference_im` or `affine_obj` must be provided as a source for the
the required affine transformation matrix.
affine_obj : `list` or :class:`affine.Affine`, optional
Affine transformation to use to convert from geo coordinates to pixel
space. Only provide this argument if `df` is a
:class:`geopandas.GeoDataFrame` with coordinates in a georeferenced
coordinate space. Ignored if `reference_im` is provided or if
``do_transform=False``.
shape : tuple, optional
An ``(x_size, y_size)`` tuple defining the pixel extent of the output
mask. Ignored if `reference_im` is provided.
out_type : 'float' or 'int'
burn_value : `int` or `float`, optional
The value to use for labeling objects in the mask. Defaults to 255 (the
max value for ``uint8`` arrays). The mask array will be set to the same
dtype as `burn_value`. Ignored if `burn_field` is provided.
burn_field : str, optional
Name of a column in `df` that provides values for `burn_value` for each
independent object. If provided, `burn_value` is ignored.
Returns
-------
mask : :class:`numpy.array`
A pixel mask with 0s for non-object pixels and `burn_value` at object
pixels. `mask` dtype will coincide with `burn_value`.
"""
# start with required checks and pre-population of values
if out_file and not reference_im:
raise ValueError(
'If saving output to file, `reference_im` must be provided.')
df = _check_df_load(df)
df[geom_col] = df[geom_col].apply(_check_wkt_load) # load in geoms if wkt
if not do_transform:
affine_obj = Affine(1, 0, 0, 0, 1, 0) # identity transform
if reference_im:
reference_im = _check_rasterio_im_load(reference_im)
shape = reference_im.shape
if do_transform:
affine_obj = reference_im.transform
# extract geometries and pair them with burn values
if burn_field:
if out_type == 'int':
feature_list = list(zip(df[geom_col],
df[burn_field].astype('uint8')))
else:
feature_list = list(zip(df[geom_col],
df[burn_field].astype('uint8')))
else:
feature_list = list(zip(df[geom_col], [burn_value]*len(df)))
output_arr = features.rasterize(shapes=feature_list, out_shape=shape,
transform=affine_obj)
if out_file:
meta = reference_im.meta.copy()
meta.update(count=1)
if out_type == 'int':
meta.update(dtype='uint8')
with rasterio.open(out_file, 'w', **meta) as dst:
dst.write(output_arr, indexes=1)
return output_arr
[docs]def boundary_mask(footprint_msk=None, out_file=None, reference_im=None,
boundary_width=3, boundary_type='inner', burn_value=255,
**kwargs):
"""Convert a dataframe of geometries to a pixel mask.
Notes
-----
This function requires creation of a footprint mask before it can operate;
therefore, if there is no footprint mask already present, it will create
one. In that case, additional arguments for :func:`footprint_mask` (e.g.
``df``) must be passed.
Arguments
---------
footprint_msk : :class:`numpy.array`, optional
A filled in footprint mask created using :func:`footprint_mask`. If not
provided, one will be made by calling :func:`footprint_mask` before
creating the boundary mask, and the required arguments for that
function must be provided as kwargs.
out_file : str, optional
Path to an image file to save the output to. Must be compatible with
:class:`rasterio.DatasetReader`. If provided, a `reference_im` must be
provided (for metadata purposes).
reference_im : :class:`rasterio.DatasetReader` or `str`, optional
An image to extract necessary coordinate information from: the
affine transformation matrix, the image extent, etc. If provided,
`affine_obj` and `shape` are ignored
boundary_width : int, optional
The width of the boundary to be created in pixels. Defaults to 3.
boundary_type : ``"inner"`` or ``"outer"``, optional
Where to draw the boundaries: within the object (``"inner"``) or
outside of it (``"outer"``). Defaults to ``"inner"``.
burn_value : `int`, optional
The value to use for labeling objects in the mask. Defaults to 255 (the
max value for ``uint8`` arrays). The mask array will be set to the same
dtype as `burn_value`. Ignored if `burn_field` is provided.
**kwargs : optional
Additional arguments to pass to :func:`footprint_mask` if one needs to
be created.
Returns
-------
boundary_mask : :class:`numpy.array`
A pixel mask with 0s for non-object pixels and the same value as the
footprint mask `burn_value` for the boundaries of each object.
Note: This function draws the boundaries within the edge of the object.
"""
if out_file and not reference_im:
raise ValueError(
'If saving output to file, `reference_im` must be provided.')
if reference_im:
reference_im = _check_rasterio_im_load(reference_im)
# need to have a footprint mask for this function, so make it if not given
if footprint_msk is None:
footprint_msk = footprint_mask(reference_im=reference_im,
burn_value=burn_value, **kwargs)
# perform dilation or erosion of `footprint_mask` to get the boundary
strel = square(boundary_width)
if boundary_type == 'outer':
boundary_mask = dilation(footprint_msk, strel)
elif boundary_type == 'inner':
boundary_mask = erosion(footprint_msk, strel)
# use xor operator between border and footprint mask to get _just_ boundary
boundary_mask = boundary_mask ^ footprint_msk
# scale the `True` values to burn_value and return
boundary_mask = boundary_mask > 0 # need to binarize to get burn val right
output_arr = boundary_mask.astype('uint8')*burn_value
if out_file:
meta = reference_im.meta.copy()
meta.update(count=1)
meta.update(dtype='uint8')
with rasterio.open(out_file, 'w', **meta) as dst:
dst.write(output_arr, indexes=1)
return output_arr
[docs]def contact_mask(df, out_file=None, reference_im=None, geom_col='geometry',
affine_obj=None, shape=(900, 900), out_type='int',
contact_spacing=10, burn_value=255):
"""Create a pixel mask labeling closely juxtaposed objects.
Notes
-----
This function identifies pixels in an image that do not correspond to
objects, but fall within `contact_spacing` of >1 labeled object.
Arguments
---------
df : :class:`pandas.DataFrame` or :class:`geopandas.GeoDataFrame`
A :class:`pandas.DataFrame` or :class:`geopandas.GeoDataFrame` instance
with a column containing geometries (identified by `geom_col`). If the
geometries in `df` are not in pixel coordinates, then `affine` or
`reference_im` must be passed to provide the transformation to convert.
out_file : str, optional
Path to an image file to save the output to. Must be compatible with
:class:`rasterio.DatasetReader`. If provided, a `reference_im` must be
provided (for metadata purposes).
reference_im : :class:`rasterio.DatasetReader` or `str`, optional
An image to extract necessary coordinate information from: the
affine transformation matrix, the image extent, etc. If provided,
`affine_obj` and `shape` are ignored.
geom_col : str, optional
The column containing geometries in `df`. Defaults to ``"geometry"``.
affine_obj : `list` or :class:`affine.Affine`, optional
Affine transformation to use to convert from geo coordinates to pixel
space. Only provide this argument if `df` is a
:class:`geopandas.GeoDataFrame` with coordinates in a georeferenced
coordinate space. Ignored if `reference_im` is provided.
shape : tuple, optional
An ``(x_size, y_size)`` tuple defining the pixel extent of the output
mask. Ignored if `reference_im` is provided.
out_type : 'float' or 'int'
contact_spacing : `int` or `float`, optional
The desired maximum distance between adjacent polygons to be labeled
as contact. `contact_spacing` will be in the same units as `df` 's
geometries, not necessarily in pixel units.
burn_value : `int` or `float`, optional
The value to use for labeling objects in the mask. Defaults to 255 (the
max value for ``uint8`` arrays). The mask array will be set to the same
dtype as `burn_value`.
"""
if out_file and not reference_im:
raise ValueError(
'If saving output to file, `reference_im` must be provided.')
df = _check_df_load(df)
df[geom_col] = df[geom_col].apply(_check_wkt_load) # load in geoms if wkt
if reference_im:
reference_im = _check_rasterio_im_load(reference_im)
# grow geometries by half `contact_spacing` to find overlaps
buffered_geoms = df[geom_col].apply(lambda x: x.buffer(contact_spacing/2))
# create a single multipolygon that covers all of the intersections
intersect_poly = geometries_internal_intersection(buffered_geoms)
# create a small df containing the intersections to make a footprint from
df_for_footprint = pd.DataFrame({'shape_name': ['overlap'],
'geometry': [intersect_poly]})
# use `footprint_mask` to create the overlap mask
contact_msk = footprint_mask(df_for_footprint, reference_im=reference_im,
geom_col='geometry', affine_obj=affine_obj,
shape=shape, out_type=out_type,
burn_value=burn_value)
footprint_msk = footprint_mask(df, reference_im=reference_im,
geom_col=geom_col, affine_obj=affine_obj,
shape=shape, out_type=out_type,
burn_value=burn_value)
contact_msk[footprint_msk > 0] = 0
contact_msk = contact_msk > 0
output_arr = contact_msk.astype('uint8')*burn_value
if out_file:
meta = reference_im.meta.copy()
meta.update(count=1)
if out_type == 'int':
meta.update(dtype='uint8')
with rasterio.open(out_file, 'w', **meta) as dst:
dst.write(output_arr, indexes=1)
return output_arr