Skip to content

API Reference

geomedian(in_array, **kwargs)

Compute the geomedian of a 4D array (y, x, band, time).

Parameters:

Name Type Description Default
in_array ndarray

Input array with shape (y, x, band, time).

required
**kwargs

Additional keyword arguments for geomedian computation. - num_threads (int, optional): Number of threads to use. Default is 1. - eps (float, optional): Convergence threshold. Default is 1e-6. - maxiters (int, optional): Maximum number of iterations. Default is 1000. - scale (float, optional): Scaling factor for integer arrays. Default is 1.0. - offset (float, optional): Offset for integer arrays. Default is 0.0. - nodata (int or float, optional): Nodata value for the array. Default depends on dtype.

{}

Returns:

Type Description
ndarray

np.ndarray: Geomedian array with shape (y, x, band).

Raises:

Type Description
ValueError

If input array does not have 4 dimensions.

TypeError

If input dtype is not supported.

Source code in datacube_compute/__init__.py
def geomedian(in_array, **kwargs) -> np.ndarray:
    """
    Compute the geomedian of a 4D array (y, x, band, time).

    Args:
        in_array (np.ndarray): Input array with shape (y, x, band, time).
        **kwargs: Additional keyword arguments for geomedian computation.
            - num_threads (int, optional): Number of threads to use. Default is 1.
            - eps (float, optional): Convergence threshold. Default is 1e-6.
            - maxiters (int, optional): Maximum number of iterations. Default is 1000.
            - scale (float, optional): Scaling factor for integer arrays. Default is 1.0.
            - offset (float, optional): Offset for integer arrays. Default is 0.0.
            - nodata (int or float, optional): Nodata value for the array. Default depends on dtype.

    Returns:
        np.ndarray: Geomedian array with shape (y, x, band).

    Raises:
        ValueError: If input array does not have 4 dimensions.
        TypeError: If input dtype is not supported.
    """
    kwargs.setdefault("num_threads", 1)
    kwargs.setdefault("eps", 1e-6)
    kwargs.setdefault("maxiters", 1000)
    kwargs.setdefault("scale", 1.0)
    kwargs.setdefault("offset", 0.0)

    if len(in_array.shape) != 4:
        raise ValueError(
            (
                "in_array: expected array to have 4 dimensions in format"
                f"(y, x, band, time), found {len(in_array.shape)} dimensions."
            )
        )

    if in_array.dtype == np.float32:
        kwargs.setdefault("nodata", np.nan)
        return _geomedian(
            in_array,
            kwargs["maxiters"],
            kwargs["eps"],
            kwargs["num_threads"],
            kwargs["scale"],
            kwargs["offset"],
        )
    elif in_array.dtype == np.int16:
        kwargs.setdefault("nodata", -1)
        return _geomedian_int16(
            in_array,
            kwargs["maxiters"],
            kwargs["eps"],
            kwargs["num_threads"],
            kwargs["nodata"],
            kwargs["scale"],
            kwargs["offset"],
        )
    elif in_array.dtype == np.uint16:
        kwargs.setdefault("nodata", 0)
        return _geomedian_uint16(
            in_array,
            kwargs["maxiters"],
            kwargs["eps"],
            kwargs["num_threads"],
            kwargs["nodata"],
            kwargs["scale"],
            kwargs["offset"],
        )
    else:
        raise TypeError(
            (
                "in_array: expected dtype to be one of "
                "{np.float32}, {np.int16}, {np.uint16}, found {in_array.dtype}."
            )
        )

geomedian_block_processor(input, nodata=None, scale=1, offset=0, eps=1e-06, maxiters=1000, num_threads=1, dim='time', is_float=True)

Process a chunk of an xarray Dataset to compute the geomedian and MADS statistics.

Parameters:

Name Type Description Default
input Dataset

Input dataset chunk.

required
nodata int or float

Nodata value. If None, will be inferred from input attributes.

None
scale float

Scaling factor for integer arrays. Default is 1.

1
offset float

Offset for integer arrays. Default is 0.

0
eps float

Convergence threshold. Default is 1e-6.

1e-06
maxiters int

Maximum number of iterations. Default is 1000.

1000
num_threads int

Number of threads to use. Default is 1.

1
dim str

Name of the time/spec dimension. Default is "time".

'time'
is_float bool

If True, input is assumed to be float32, else integer. Default is True.

True

Returns:

Type Description
Dataset

xr.Dataset: Dataset containing geomedian, emad, smad, bcmad, and count arrays.

Raises:

Type Description
ValueError

If multiple nodata values are found in input data variables.

Source code in datacube_compute/__init__.py
def geomedian_block_processor(
    input: xr.Dataset,
    nodata=None,
    scale=1,
    offset=0,
    eps=1e-6,
    maxiters=1000,
    num_threads=1,
    dim="time",
    is_float=True,
) -> xr.Dataset:
    """
    Process a chunk of an xarray Dataset to compute the geomedian and MADS statistics.

    Args:
        input (xr.Dataset): Input dataset chunk.
        nodata (int or float, optional): Nodata value. If None, will be inferred from input attributes.
        scale (float, optional): Scaling factor for integer arrays. Default is 1.
        offset (float, optional): Offset for integer arrays. Default is 0.
        eps (float, optional): Convergence threshold. Default is 1e-6.
        maxiters (int, optional): Maximum number of iterations. Default is 1000.
        num_threads (int, optional): Number of threads to use. Default is 1.
        dim (str, optional): Name of the time/spec dimension. Default is "time".
        is_float (bool, optional): If True, input is assumed to be float32, else integer. Default is True.

    Returns:
        xr.Dataset: Dataset containing geomedian, emad, smad, bcmad, and count arrays.

    Raises:
        ValueError: If multiple nodata values are found in input data variables.
    """
    array = input.to_array(dim="band").transpose("y", "x", "band", dim)

    if nodata is None:
        nodata = input.attrs.get("nodata", None)

    if nodata is None:
        # Grab the nodata value from our input array
        nodata_vals = set(
            dv.attrs.get("nodata", None) for dv in input.data_vars.values()
        )
        if len(nodata_vals) > 1:
            # Check for the nan case... nans are not equal to themselves
            string_vals = set(str(v) for v in nodata_vals)
            if len(string_vals) == 1 and string_vals.pop() == "nan":
                nodata = np.nan
            else:
                raise ValueError(
                    "Data arrays have more than 1 nodata value across them", nodata_vals
                )
        elif len(nodata_vals) == 1:
            nodata = nodata_vals.pop()

    if nodata is None:
        if is_float:
            nodata = np.nan
        else:
            nodata = 0

    gm_data, mads = geomedian(
        array.data,
        nodata=nodata,
        num_threads=num_threads,
        eps=eps,
        maxiters=maxiters,
        scale=scale,
        offset=offset,
    )

    dims = ("y", "x", "band")
    coords = {k: array.coords[k] for k in dims}
    result = xr.DataArray(
        data=gm_data, dims=dims, coords=coords, attrs=array.attrs
    ).to_dataset("band")

    emad = mads[:, :, 0]
    smad = mads[:, :, 1]
    bcmad = mads[:, :, 2]

    result["emad"] = xr.DataArray(data=emad, dims=dims[:2], coords=result.coords)
    result["smad"] = xr.DataArray(data=smad, dims=dims[:2], coords=result.coords)
    result["bcmad"] = xr.DataArray(data=bcmad, dims=dims[:2], coords=result.coords)

    if np.isnan(nodata):
        count_good = np.all(~np.isnan(array.data), axis=2).sum(axis=2)
    else:
        count_good = np.all(array.data != nodata, axis=2).sum(axis=2)

    result["count"] = xr.DataArray(
        data=count_good, dims=dims[:2], coords=result.coords
    ).astype("uint16")

    # This is required to ensure that nodata is set per-band
    for band_name in result.data_vars:
        band = result[band_name]
        # Don't think these should all be nan
        if band_name in ["emad", "smad", "bcmad"]:
            band.attrs = dict(nodata=np.nan)
        elif band_name == "count":
            band.attrs = dict(nodata=9999)
        else:
            band.attrs = dict(nodata=nodata)

    return result

geomedian_with_mads(src, scale=1.0, offset=0.0, eps=None, maxiters=1000, num_threads=1, work_chunks=(100, 100), is_float=True, nodata=None)

Compute Geomedian on Dask backed Dataset.

Parameters:

Name Type Description Default
src Dataset or DataArray

Input data, bands can be either float or integer with nodata values to indicate gaps in data.

required
scale float

Only used when input contains integer values. Geomedian will run on scaled values scale*X+offset. Only affects internal computation, final result is scaled back to the original value range. Default is 1.0.

1.0
offset float

See scale param above. Default is 0.0.

0.0
eps float

Termination criteria passed on to geomedian algorithm. Default is None.

None
maxiters int

Maximum number of iterations done per output pixel. Default is 1000.

1000
num_threads int

Configure internal concurrency of the Geomedian computation. Default is 1.

1
work_chunks tuple of int

Default is (100, 100), only applicable when input is Dataset.

(100, 100)
is_float bool

If True, input is assumed to be float32, if False, input is integer. Default is True.

True
nodata int or float

Nodata value to use, if not provided it will be taken from input attributes.

None

Returns:

Type Description
Dataset

xr.Dataset: Geomedian and associated statistics as a Dataset.

Source code in datacube_compute/__init__.py
def geomedian_with_mads(
    src: Union[xr.Dataset, xr.DataArray],
    scale: float = 1.0,
    offset: float = 0.0,
    eps: Optional[float] = None,
    maxiters: int = 1000,
    num_threads: int = 1,
    work_chunks: Tuple[int, int] = (100, 100),
    is_float: bool = True,
    nodata: Optional[Union[int, float]] = None,
) -> xr.Dataset:
    """
    Compute Geomedian on Dask backed Dataset.

    Args:
        src (xr.Dataset or xr.DataArray): Input data, bands can be either float or integer with nodata values to indicate gaps in data.
        scale (float, optional): Only used when input contains integer values. Geomedian will run on scaled values `scale*X+offset`. Only affects internal computation, final result is scaled back to the original value range. Default is 1.0.
        offset (float, optional): See scale param above. Default is 0.0.
        eps (float, optional): Termination criteria passed on to geomedian algorithm. Default is None.
        maxiters (int, optional): Maximum number of iterations done per output pixel. Default is 1000.
        num_threads (int, optional): Configure internal concurrency of the Geomedian computation. Default is 1.
        work_chunks (tuple of int, optional): Default is (100, 100), only applicable when input is Dataset.
        is_float (bool, optional): If True, input is assumed to be float32, if False, input is integer. Default is True.
        nodata (int or float, optional): Nodata value to use, if not provided it will be taken from input attributes.

    Returns:
        xr.Dataset: Geomedian and associated statistics as a Dataset.
    """
    ny, nx = work_chunks

    dim = "time"
    if dim not in src.dims:
        if "spec" in src.dims:
            dim = "spec"
        else:
            raise ValueError("Input dataset must have a 'time' or 'spec' dimension")

    chunked = src.chunk({"y": ny, "x": nx, dim: -1})

    # Check the dtype of the first data variable
    is_float = next(iter(src.dtypes.values())) == "f"

    if eps is None:
        # Is it sensible having a float for an int array?
        eps = 1e-4 if is_float else 0.1

    _gm_with_mads = chunked.map_blocks(
        geomedian_block_processor,
        kwargs=dict(
            scale=scale,
            offset=offset,
            eps=eps,
            maxiters=maxiters,
            num_threads=num_threads,
            dim=dim,
            is_float=is_float,
            nodata=nodata,
        ),
    )

    return _gm_with_mads

percentile(in_array, percentiles, nodata=None)

Calculates the percentiles of the input data along the first axis.

Parameters:

Name Type Description Default
in_array ndarray

Input array with shape (t, *other dims).

required
percentiles sequence of float or float

Sequence of percentiles or a single percentile in the [0.0, 1.0] range.

required
nodata same dtype as in_array

The nodata value. Must be provided for integer datatypes. For float types, this value is ignored and nodata is assumed to be NaN.

None

Returns:

Type Description
ndarray

np.ndarray: Array with shape (len(percentiles), *other dims), where the first index corresponds to the percentiles.

Source code in datacube_compute/__init__.py
def percentile(in_array, percentiles, nodata=None) -> np.ndarray:
    """
    Calculates the percentiles of the input data along the first axis.

    Args:
        in_array (np.ndarray): Input array with shape (t, *other dims).
        percentiles (sequence of float or float): Sequence of percentiles or a single percentile in the [0.0, 1.0] range.
        nodata (same dtype as in_array, optional): The nodata value. Must be provided for integer datatypes. For float types, this value is ignored and nodata is assumed to be NaN.

    Returns:
        np.ndarray: Array with shape (len(percentiles), *other dims), where the first index corresponds to the percentiles.
    """

    if isinstance(percentiles, Iterable):
        percentiles = np.array(list(percentiles))
    else:
        percentiles = np.array([percentiles])

    shape = in_array.shape
    in_array = in_array.reshape((shape[0], -1))

    if in_array.dtype == np.uint16:
        out_array = _percentile_uint16(in_array, percentiles, nodata)
    elif in_array.dtype == np.int16:
        out_array = _percentile_int16(in_array, percentiles, nodata)
    elif in_array.dtype == np.uint8:
        out_array = _percentile_uint8(in_array, percentiles, nodata)
    elif in_array.dtype == np.int8:
        out_array = _percentile_int8(in_array, percentiles, nodata)
    elif in_array.dtype == np.float32:
        out_array = _percentile_f32(in_array, percentiles)
    elif in_array.dtype == np.float64:
        out_array = _percentile_f64(in_array, percentiles)
    else:
        raise NotImplementedError

    return out_array.reshape((len(percentiles),) + shape[1:])