Skip to content

create

MissingColumnsError

Bases: ValueError

Raised when target columns are missing from the GeoDataFrame.

Source code in src/rastr/create.py
44
45
class MissingColumnsError(ValueError):
    """Raised when target columns are missing from the GeoDataFrame."""

NonNumericColumnsError

Bases: ValueError

Raised when target columns contain non-numeric data.

Source code in src/rastr/create.py
48
49
class NonNumericColumnsError(ValueError):
    """Raised when target columns contain non-numeric data."""

OverlappingGeometriesError

Bases: RasterizationError

Raised when geometries overlap, which could lead to data loss.

Source code in src/rastr/create.py
56
57
class OverlappingGeometriesError(RasterizationError):
    """Raised when geometries overlap, which could lead to data loss."""

RasterizationError

Bases: ValueError

Base exception for rasterization errors.

Source code in src/rastr/create.py
52
53
class RasterizationError(ValueError):
    """Base exception for rasterization errors."""

full_raster(raster_meta, *, bounds, fill_value=np.nan)

Create a raster with a specified fill value for all cells.

Source code in src/rastr/create.py
142
143
144
145
146
147
148
149
150
151
def full_raster(
    raster_meta: RasterMeta,
    *,
    bounds: tuple[float, float, float, float],
    fill_value: float = np.nan,
) -> Raster:
    """Create a raster with a specified fill value for all cells."""
    shape = get_point_grid_shape(bounds=bounds, cell_size=raster_meta.cell_size)
    arr = np.full(shape, fill_value, dtype=np.float32)
    return Raster(arr=arr, raster_meta=raster_meta)

raster_distance_from_polygon(polygon, *, raster_meta, extent_polygon=None, snap_raster=None, show_pbar=False)

Make a raster where each cell's value is its centre's distance to a polygon.

The raster should use a projected coordinate system.

Parameters:

Name Type Description Default
polygon Polygon

Polygon to measure distances to.

required
raster_meta RasterMeta

Raster configuration (giving cell_size, CRS, etc.).

required
extent_polygon Polygon | None

Polygon for raster cell extent; The bounding box of this polygon is the bounding box of the output raster. Cells outside this polygon but within the bounding box will be NaN-valued, and cells will not be generated centred outside the bounding box of this polygon.

None
snap_raster Raster | None

An alternative to using the extent_polygon. If provided, the raster must have the exact same cell alignment as the snap_raster.

None
show_pbar bool

Whether to show a progress bar during the distance calculation.

False

Returns:

Type Description
Raster

Array storing the distance between cell centres and the polygon. Cell are

Raster

NaN-valued if they are within the polygon or outside the extent polygon.

Raises:

Type Description
ValueError

If the provided CRS is geographic (lat/lon).

Source code in src/rastr/create.py
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
def raster_distance_from_polygon(
    polygon: Polygon,
    *,
    raster_meta: RasterMeta,
    extent_polygon: Polygon | None = None,
    snap_raster: Raster | None = None,
    show_pbar: bool = False,
) -> Raster:
    """Make a raster where each cell's value is its centre's distance to a polygon.

    The raster should use a projected coordinate system.

    Parameters:
        polygon: Polygon to measure distances to.
        raster_meta: Raster configuration (giving cell_size, CRS, etc.).
        extent_polygon: Polygon for raster cell extent; The bounding box of this
                        polygon is the bounding box of the output raster. Cells outside
                        this polygon but within the bounding box will be NaN-valued, and
                        cells will not be generated centred outside the bounding box of
                        this polygon.
        snap_raster: An alternative to using the extent_polygon. If provided, the raster
                     must have the exact same cell alignment as the snap_raster.
        show_pbar: Whether to show a progress bar during the distance calculation.

    Returns:
        Array storing the distance between cell centres and the polygon. Cell are
        NaN-valued if they are within the polygon or outside the extent polygon.

    Raises:
        ValueError: If the provided CRS is geographic (lat/lon).
    """
    if show_pbar and not TQDM_INSTALLED:
        msg = "The 'tqdm' package is not installed. Progress bars will not be shown."
        warnings.warn(msg, UserWarning, stacklevel=2)
        show_pbar = False

    # Check if the provided CRS is projected (cartesian)
    if raster_meta.crs.is_geographic:
        err_msg = (
            "The provided CRS is geographic (lat/lon). Please use a projected CRS."
        )
        raise ValueError(err_msg)

    if extent_polygon is None and snap_raster is None:
        err_msg = "Either 'extent_polygon' or 'snap_raster' must be provided. "
        raise ValueError(err_msg)
    elif extent_polygon is not None and snap_raster is not None:
        err_msg = "Only one of 'extent_polygon' or 'snap_raster' can be provided. "
        raise ValueError(err_msg)
    elif extent_polygon is None and snap_raster is not None:
        # Calculate the coordinates
        x, y = snap_raster.get_xy()

        # Create a mask to identify points for which distance should be calculated
        distance_extent = snap_raster.bbox.difference(polygon)
    elif extent_polygon is not None and snap_raster is None:
        x, y = create_point_grid(
            bounds=extent_polygon.bounds,
            cell_size=raster_meta.cell_size,
        )
        distance_extent = extent_polygon.difference(polygon)
    else:
        raise AssertionError

    pts = [Point(x, y) for x, y in zip(x.flatten(), y.flatten(), strict=True)]

    _pts = _pbar(pts, desc="Finding points within extent") if show_pbar else pts
    mask = [distance_extent.intersects(pt) for pt in _pts]

    _pts = _pbar(pts, desc="Calculating distances") if show_pbar else pts
    distances = np.where(mask, np.array([polygon.distance(pt) for pt in _pts]), np.nan)
    distance_raster = distances.reshape(x.shape)

    return Raster(arr=distance_raster, raster_meta=raster_meta)

raster_from_contours(values, *, geometry, crs=None, cell_size=None)

Create a raster from contour geometries with associated values.

If differently-valued contours intersect, the mean value at the intersection points is used.

Parameters:

Name Type Description Default
values Collection[float]

Collection of contour values.

required
geometry Collection[BaseGeometry] | GeoSeries

Collection contour geometries (e.g., LineStrings).

required
crs CRS | str | None

Coordinate reference system for the (x, y) coordinates.

None
cell_size tuple[float, float] | float | None

Desired cell size for the raster. If None, a heuristic is used based on the spacing between (x, y) points.

None

Returns:

Type Description
Raster

Raster containing the rasterized contour values.

Source code in src/rastr/create.py
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
def raster_from_contours(
    values: Collection[float],
    *,
    geometry: Collection[BaseGeometry] | gpd.GeoSeries,
    crs: CRS | str | None = None,
    cell_size: tuple[float, float] | float | None = None,
) -> Raster:
    """Create a raster from contour geometries with associated values.

    If differently-valued contours intersect, the mean value at the intersection points
    is used.

    Args:
        values: Collection of contour values.
        geometry: Collection contour geometries (e.g., LineStrings).
        crs: Coordinate reference system for the (x, y) coordinates.
        cell_size: Desired cell size for the raster. If None, a heuristic is used based
                   on the spacing between (x, y) points.

    Returns:
        Raster containing the rasterized contour values.
    """
    # Determine CRS
    if GEOPANDAS_INSTALLED:
        import geopandas as gpd

        if isinstance(geometry, gpd.GeoSeries):
            if crs is not None:
                geometry = geometry.to_crs(crs)
            else:
                crs = geometry.crs

    if crs is None:
        msg = "CRS must be provided if geometry has no CRS."
        raise ValueError(msg)

    crs = CRS.from_user_input(crs)

    # Check that values and geometry have the same length
    if len(values) != len(geometry):
        msg = "Values and geometry must have the same length."
        raise ValueError(msg)

    # Check that there are at least two distinct contour values
    distinct_values = set(values)
    if len(distinct_values) < 2:
        msg = "At least two distinct contour values are required."
        raise ValueError(msg)

    if cell_size is None:
        cell_size = _infer_cell_size_from_geometry(geometry)

    cell_size = _ensure_pair(cell_size)

    coords: list[tuple[float, ...]] = []
    z_values: list[float] = []
    for value, geom in zip(values, geometry, strict=True):
        # Extract (x, y, z) points from contour geometries
        # Segmentize to ensure contours are treated like continuous curves instead
        # of a collection of points with gaps.
        geom = segmentize(geom, max_segment_length=min(cell_size) / 2)
        geom_coords = list(_extract_coords(geom))
        coords.extend(geom_coords)

        # Repeat each contour value for all coordinates in that contour
        z_values.extend([value] * len(geom_coords))

    coord_arr = np.asarray(coords)

    x, y = coord_arr[:, 0], coord_arr[:, 1]  # ignore z-coords from geometry
    z = np.asarray(z_values)

    # Contours sometimes touch at a single point, and thus have same (x, y) with
    # different z. We need to mean these before rasterizing.
    # assume x, y, z are numpy arrays
    # This is basically a groupby operation on (x, y) with mean aggregation on z, but
    # using pure numpy.
    points = np.column_stack((x, y))
    unique_points, inverse_idx = np.unique(points, axis=0, return_inverse=True)
    x, y = unique_points[:, 0], unique_points[:, 1]
    z = np.bincount(inverse_idx, weights=z) / np.bincount(inverse_idx)

    raster = raster_from_point_cloud(x=x, y=y, z=z, crs=crs, cell_size=cell_size)

    # Clamp to the nearest contour values to avoid floating point effects when
    # visualizing/thresholding the raster at the contour values
    for value in distinct_values:
        mask = np.isclose(raster.arr, value)
        raster.arr[mask] = value

    return raster

raster_from_point_cloud(x, y, z, *, crs, cell_size=None)

Create a raster from a point cloud via interpolation.

Interpolation is only possible within the convex hull of the points. Outside of this, cells will be NaN-valued.

Duplicate (x, y, z) triples are silently deduplicated. However, duplicate (x, y) points with different z values will raise an error.

Parameters:

Name Type Description Default
x ArrayLike

X coordinates of points.

required
y ArrayLike

Y coordinates of points.

required
z ArrayLike

Values at each (x, y) point to assign the raster.

required
crs CRS | str

Coordinate reference system for the (x, y) coordinates.

required
cell_size tuple[float, float] | float | None

Desired cell size for the raster. If None, a heuristic is used based on the spacing between (x, y) points.

None

Returns:

Type Description
Raster

Raster containing the interpolated values.

Raises:

Type Description
ValueError

If any (x, y) points have different z values, or if they are all collinear.

Source code in src/rastr/create.py
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
def raster_from_point_cloud(
    x: ArrayLike,
    y: ArrayLike,
    z: ArrayLike,
    *,
    crs: CRS | str,
    cell_size: tuple[float, float] | float | None = None,
) -> Raster:
    """Create a raster from a point cloud via interpolation.

    Interpolation is only possible within the convex hull of the points. Outside of
    this, cells will be NaN-valued.

    Duplicate (x, y, z) triples are silently deduplicated. However, duplicate (x, y)
    points with different z values will raise an error.

    Args:
        x: X coordinates of points.
        y: Y coordinates of points.
        z: Values at each (x, y) point to assign the raster.
        crs: Coordinate reference system for the (x, y) coordinates.
        cell_size: Desired cell size for the raster. If None, a heuristic is used based
                   on the spacing between (x, y) points.

    Returns:
        Raster containing the interpolated values.

    Raises:
        ValueError: If any (x, y) points have different z values, or if they are all
                    collinear.
    """

    cell_size = _ensure_pair(cell_size) if cell_size is not None else None

    crs = CRS.from_user_input(crs)
    x, y, z = _validate_xyz(
        np.asarray(x).ravel(), np.asarray(y).ravel(), np.asarray(z).ravel()
    )

    raster_meta, shape = RasterMeta.infer(x, y, cell_size=cell_size, crs=crs)
    arr = interpn_kernel(
        points=np.column_stack((x, y)),
        values=z,
        xi=np.column_stack(_get_grid(raster_meta, shape=shape)),
    ).reshape(shape)

    # We only support float rasters for now; we should preserve the input dtype if
    # possible
    if z.dtype in (np.float16, np.float32, np.float64):
        arr = arr.astype(z.dtype)
    else:
        arr = arr.astype(np.float64)

    return Raster(arr=arr, raster_meta=raster_meta)

rasterize_gdf(gdf, *, raster_meta, target_cols)

Rasterize geometries from a GeoDataFrame.

Supports polygons, points, linestrings, and other geometry types. Gaps will be set as NaN.

Parameters:

Name Type Description Default
gdf GeoDataFrame

The geometries to rasterize (polygons, points, linestrings, etc.).

required
raster_meta RasterMeta

Metadata for the created rasters.

required
target_cols Collection[str]

A list of columns from the GeoDataFrame containing numeric datatypes. Each column will correspond to a separate raster in the output.

required

Returns:

Type Description
list[Raster]

Rasters for each column in target_cols.

Raises:

Type Description
MissingColumnsError

If any of the target columns are not found in the GeoDataFrame.

NonNumericColumnsError

If any of the target columns contain non-numeric data.

OverlappingGeometriesError

If any geometries overlap, which could lead to data loss in the rasterization process.

Source code in src/rastr/create.py
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
def rasterize_gdf(
    gdf: gpd.GeoDataFrame,
    *,
    raster_meta: RasterMeta,
    target_cols: Collection[str],
) -> list[Raster]:
    """Rasterize geometries from a GeoDataFrame.

    Supports polygons, points, linestrings, and other geometry types.
    Gaps will be set as NaN.

    Args:
        gdf: The geometries to rasterize (polygons, points, linestrings, etc.).
        raster_meta: Metadata for the created rasters.
        target_cols: A list of columns from the GeoDataFrame containing numeric
                     datatypes. Each column will correspond to a separate raster
                     in the output.

    Returns:
        Rasters for each column in `target_cols`.

    Raises:
        MissingColumnsError: If any of the target columns are not found in the
                             GeoDataFrame.
        NonNumericColumnsError: If any of the target columns contain non-numeric data.
        OverlappingGeometriesError: If any geometries overlap, which could lead to
                                    data loss in the rasterization process.
    """
    # Validate inputs using helper functions
    _validate_columns_exist(gdf, target_cols)
    _validate_columns_numeric(gdf, target_cols)
    _validate_no_overlapping_geometries(gdf)

    # Get the bounds from the GeoDataFrame and expand them to include potential gaps
    bounds = gdf.total_bounds
    min_x, min_y, max_x, max_y = bounds
    cell_width = raster_meta.cell_width
    cell_height = raster_meta.cell_height

    # Expand bounds by at least one cell size to ensure there are potential gaps
    expanded_bounds = (
        min_x - cell_width,
        min_y - cell_height,
        max_x + cell_width,
        max_y + cell_height,
    )

    # Create point grid to get raster dimensions and transform
    shape = get_point_grid_shape(
        bounds=expanded_bounds, cell_size=raster_meta.cell_size
    )

    # Create the affine transform for rasterization
    xs, ys = get_affine_sign(raster_meta.crs)
    transform = Affine.translation(
        expanded_bounds[0], expanded_bounds[3]
    ) * Affine.scale(xs * cell_width, ys * cell_height)

    # Create rasters for each target column using rasterio.features.rasterize
    rasters = []
    for col in target_cols:
        # Create (geometry, value) pairs for rasterization
        shapes = [
            (geom, value) for geom, value in zip(gdf.geometry, gdf[col], strict=True)
        ]

        # Rasterize the geometries with their values
        raster_array = rasterio.features.rasterize(
            shapes,
            out_shape=shape,
            transform=transform,
            # Fill gaps with NaN
            fill=np.nan,  # type: ignore[reportArgumentType] docstring contradicts inferred annotation
            dtype=np.float32,
        )

        # Create Raster
        raster = Raster(arr=raster_array, raster_meta=raster_meta)
        rasters.append(raster)

    return rasters

rasterize_z_gdf(gdf, *, cell_size, crs, agg='mean')

Rasterize interpolated Z-values from geometries in a GeoDataFrame.

Handles overlapping geometries by aggregating values using a specified method. All geometries must be 3D (have Z coordinates) for interpolation to work.

The Z-value for each cell is interpolated at the cell center.

Parameters:

Name Type Description Default
gdf GeoDataFrame

GeoDataFrame containing 3D geometries with Z coordinates.

required
cell_size tuple[float, float] | float

Desired cell size for the output raster as (width, height) or a single value for square cells.

required
crs CRS | str

Coordinate reference system for the output raster.

required
agg Literal['mean', 'min', 'max']

Aggregation function to use for overlapping values ("mean", "min", "max").

'mean'

Returns:

Type Description
RasterModel

A raster of interpolated Z values.

Raises:

Type Description
ValueError

If any geometries are not 3D.

Source code in src/rastr/create.py
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
def rasterize_z_gdf(
    gdf: gpd.GeoDataFrame,
    *,
    cell_size: tuple[float, float] | float,
    crs: CRS | str,
    agg: Literal["mean", "min", "max"] = "mean",
) -> RasterModel:
    """Rasterize interpolated Z-values from geometries in a GeoDataFrame.

    Handles overlapping geometries by aggregating values using a specified method.
    All geometries must be 3D (have Z coordinates) for interpolation to work.

    The Z-value for each cell is interpolated at the cell center.

    Args:
        gdf: GeoDataFrame containing 3D geometries with Z coordinates.
        cell_size: Desired cell size for the output raster as (width, height) or a
            single value for square cells.
        crs: Coordinate reference system for the output raster.
        agg: Aggregation function to use for overlapping values ("mean", "min", "max").

    Returns:
        A raster of interpolated Z values.

    Raises:
        ValueError: If any geometries are not 3D.
    """
    crs = CRS.from_user_input(crs)
    cell_size = _ensure_pair(cell_size)

    if len(gdf) == 0:
        msg = "Cannot rasterize an empty GeoDataFrame."
        raise ValueError(msg)

    _validate_geometries_are_3d(gdf)

    # Determine the bounds that would encompass the geometry while respecting the grid
    gdf_bounds = gdf.total_bounds
    meta, shape = RasterMeta.infer(
        x=np.array([gdf_bounds[0], gdf_bounds[2]]),
        y=np.array([gdf_bounds[1], gdf_bounds[3]]),
        cell_size=cell_size,
        crs=crs,
    )

    # Generate grid coordinates for interpolation
    x_coords, y_coords = _get_grid(meta, shape=shape)

    # Create 2D accumulation arrays
    z_stack = []
    for geom in gdf.geometry:
        z_vals = _interpolate_z_in_geometry(geom, x_coords, y_coords)
        z_stack.append(z_vals)

    if not z_stack:
        msg = (
            "No valid Z values could be interpolated from the geometries. Raster "
            "will be entirely NaN-valued."
        )
        warnings.warn(msg, stacklevel=2)
        arr = np.full(shape, np.nan, dtype=np.float64)
        return RasterModel(arr=arr, raster_meta=meta)

    z_stack = np.array(z_stack)  # Shape: (N, height * width)

    with warnings.catch_warnings():
        warnings.filterwarnings(
            "ignore", category=RuntimeWarning, message="All-NaN slice encountered"
        )
        warnings.filterwarnings(
            "ignore", category=RuntimeWarning, message="Mean of empty slice"
        )

        if agg == "mean":
            z_agg = np.nanmean(z_stack, axis=0)
        elif agg == "min":
            z_agg = np.nanmin(z_stack, axis=0)
        elif agg == "max":
            z_agg = np.nanmax(z_stack, axis=0)
        else:
            assert_never(agg)

    arr = np.asarray(z_agg, dtype=np.float64).reshape(shape)

    return RasterModel(arr=arr, raster_meta=meta)