API Reference

`geomedian(in_array, **kwargs)`

Compute the geomedian of a 4D array (y, x, band, time).

Parameters:

Name	Type	Description	Default
`in_array`	`ndarray`	Input array with shape (y, x, band, time).	required
`**kwargs`		Additional keyword arguments for geomedian computation. - num_threads (int, optional): Number of threads to use. Default is 1. - eps (float, optional): Convergence threshold. Default is 1e-6. - maxiters (int, optional): Maximum number of iterations. Default is 1000. - scale (float, optional): Scaling factor for integer arrays. Default is 1.0. - offset (float, optional): Offset for integer arrays. Default is 0.0. - nodata (int or float, optional): Nodata value for the array. Default depends on dtype.	`{}`

Returns:

Type	Description
`ndarray`	np.ndarray: Geomedian array with shape (y, x, band).

Raises:

Type	Description
`ValueError`	If input array does not have 4 dimensions.
`TypeError`	If input dtype is not supported.

Source code in datacube_compute/__init__.py

def geomedian(in_array, **kwargs) -> np.ndarray:
    """
    Compute the geomedian of a 4D array (y, x, band, time).

    Args:
        in_array (np.ndarray): Input array with shape (y, x, band, time).
        **kwargs: Additional keyword arguments for geomedian computation.
            - num_threads (int, optional): Number of threads to use. Default is 1.
            - eps (float, optional): Convergence threshold. Default is 1e-6.
            - maxiters (int, optional): Maximum number of iterations. Default is 1000.
            - scale (float, optional): Scaling factor for integer arrays. Default is 1.0.
            - offset (float, optional): Offset for integer arrays. Default is 0.0.
            - nodata (int or float, optional): Nodata value for the array. Default depends on dtype.

    Returns:
        np.ndarray: Geomedian array with shape (y, x, band).

    Raises:
        ValueError: If input array does not have 4 dimensions.
        TypeError: If input dtype is not supported.
    """
    kwargs.setdefault("num_threads", 1)
    kwargs.setdefault("eps", 1e-6)
    kwargs.setdefault("maxiters", 1000)
    kwargs.setdefault("scale", 1.0)
    kwargs.setdefault("offset", 0.0)

    if len(in_array.shape) != 4:
        raise ValueError(
            (
                "in_array: expected array to have 4 dimensions in format"
                f"(y, x, band, time), found {len(in_array.shape)} dimensions."
            )
        )

    if in_array.dtype == np.float32:
        kwargs.setdefault("nodata", np.nan)
        return _geomedian(
            in_array,
            kwargs["maxiters"],
            kwargs["eps"],
            kwargs["num_threads"],
            kwargs["scale"],
            kwargs["offset"],
        )
    elif in_array.dtype == np.int16:
        kwargs.setdefault("nodata", -1)
        return _geomedian_int16(
            in_array,
            kwargs["maxiters"],
            kwargs["eps"],
            kwargs["num_threads"],
            kwargs["nodata"],
            kwargs["scale"],
            kwargs["offset"],
        )
    elif in_array.dtype == np.uint16:
        kwargs.setdefault("nodata", 0)
        return _geomedian_uint16(
            in_array,
            kwargs["maxiters"],
            kwargs["eps"],
            kwargs["num_threads"],
            kwargs["nodata"],
            kwargs["scale"],
            kwargs["offset"],
        )
    else:
        raise TypeError(
            (
                "in_array: expected dtype to be one of "
                "{np.float32}, {np.int16}, {np.uint16}, found {in_array.dtype}."
            )
        )

`geomedian_block_processor(input, nodata=None, scale=1, offset=0, eps=1e-06, maxiters=1000, num_threads=1, dim='time', is_float=True)`

Process a chunk of an xarray Dataset to compute the geomedian and MADS statistics.

Parameters:

Name	Type	Description	Default
`input`	`Dataset`	Input dataset chunk.	required
`nodata`	`int or float`	Nodata value. If None, will be inferred from input attributes.	`None`
`scale`	`float`	Scaling factor for integer arrays. Default is 1.	`1`
`offset`	`float`	Offset for integer arrays. Default is 0.	`0`
`eps`	`float`	Convergence threshold. Default is 1e-6.	`1e-06`
`maxiters`	`int`	Maximum number of iterations. Default is 1000.	`1000`
`num_threads`	`int`	Number of threads to use. Default is 1.	`1`
`dim`	`str`	Name of the time/spec dimension. Default is "time".	`'time'`
`is_float`	`bool`	If True, input is assumed to be float32, else integer. Default is True.	`True`

Returns:

Type	Description
`Dataset`	xr.Dataset: Dataset containing geomedian, emad, smad, bcmad, and count arrays.

Raises:

Type	Description
`ValueError`	If multiple nodata values are found in input data variables.

Source code in datacube_compute/__init__.py

def geomedian_block_processor(
    input: xr.Dataset,
    nodata=None,
    scale=1,
    offset=0,
    eps=1e-6,
    maxiters=1000,
    num_threads=1,
    dim="time",
    is_float=True,
) -> xr.Dataset:
    """
    Process a chunk of an xarray Dataset to compute the geomedian and MADS statistics.

    Args:
        input (xr.Dataset): Input dataset chunk.
        nodata (int or float, optional): Nodata value. If None, will be inferred from input attributes.
        scale (float, optional): Scaling factor for integer arrays. Default is 1.
        offset (float, optional): Offset for integer arrays. Default is 0.
        eps (float, optional): Convergence threshold. Default is 1e-6.
        maxiters (int, optional): Maximum number of iterations. Default is 1000.
        num_threads (int, optional): Number of threads to use. Default is 1.
        dim (str, optional): Name of the time/spec dimension. Default is "time".
        is_float (bool, optional): If True, input is assumed to be float32, else integer. Default is True.

    Returns:
        xr.Dataset: Dataset containing geomedian, emad, smad, bcmad, and count arrays.

    Raises:
        ValueError: If multiple nodata values are found in input data variables.
    """
    array = input.to_array(dim="band").transpose("y", "x", "band", dim)

    if nodata is None:
        nodata = input.attrs.get("nodata", None)

    if nodata is None:
        # Grab the nodata value from our input array
        nodata_vals = set(
            dv.attrs.get("nodata", None) for dv in input.data_vars.values()
        )
        if len(nodata_vals) > 1:
            # Check for the nan case... nans are not equal to themselves
            string_vals = set(str(v) for v in nodata_vals)
            if len(string_vals) == 1 and string_vals.pop() == "nan":
                nodata = np.nan
            else:
                raise ValueError(
                    "Data arrays have more than 1 nodata value across them", nodata_vals
                )
        elif len(nodata_vals) == 1:
            nodata = nodata_vals.pop()

    if nodata is None:
        if is_float:
            nodata = np.nan
        else:
            nodata = 0

    gm_data, mads = geomedian(
        array.data,
        nodata=nodata,
        num_threads=num_threads,
        eps=eps,
        maxiters=maxiters,
        scale=scale,
        offset=offset,
    )

    dims = ("y", "x", "band")
    coords = {k: array.coords[k] for k in dims}
    result = xr.DataArray(
        data=gm_data, dims=dims, coords=coords, attrs=array.attrs
    ).to_dataset("band")

    emad = mads[:, :, 0]
    smad = mads[:, :, 1]
    bcmad = mads[:, :, 2]

    result["emad"] = xr.DataArray(data=emad, dims=dims[:2], coords=result.coords)
    result["smad"] = xr.DataArray(data=smad, dims=dims[:2], coords=result.coords)
    result["bcmad"] = xr.DataArray(data=bcmad, dims=dims[:2], coords=result.coords)

    if np.isnan(nodata):
        count_good = np.all(~np.isnan(array.data), axis=2).sum(axis=2)
    else:
        count_good = np.all(array.data != nodata, axis=2).sum(axis=2)

    result["count"] = xr.DataArray(
        data=count_good, dims=dims[:2], coords=result.coords
    ).astype("uint16")

    # This is required to ensure that nodata is set per-band
    for band_name in result.data_vars:
        band = result[band_name]
        # Don't think these should all be nan
        if band_name in ["emad", "smad", "bcmad"]:
            band.attrs = dict(nodata=np.nan)
        elif band_name == "count":
            band.attrs = dict(nodata=9999)
        else:
            band.attrs = dict(nodata=nodata)

    return result

`geomedian_with_mads(src, scale=1.0, offset=0.0, eps=None, maxiters=1000, num_threads=1, work_chunks=(100, 100), is_float=True, nodata=None)`

Compute Geomedian on Dask backed Dataset.

Parameters:

Name	Type	Description	Default
`src`	`Dataset or DataArray`	Input data, bands can be either float or integer with nodata values to indicate gaps in data.	required
`scale`	`float`	Only used when input contains integer values. Geomedian will run on scaled values `scale*X+offset`. Only affects internal computation, final result is scaled back to the original value range. Default is 1.0.	`1.0`
`offset`	`float`	See scale param above. Default is 0.0.	`0.0`
`eps`	`float`	Termination criteria passed on to geomedian algorithm. Default is None.	`None`
`maxiters`	`int`	Maximum number of iterations done per output pixel. Default is 1000.	`1000`
`num_threads`	`int`	Configure internal concurrency of the Geomedian computation. Default is 1.	`1`
`work_chunks`	`tuple of int`	Default is (100, 100), only applicable when input is Dataset.	`(100, 100)`
`is_float`	`bool`	If True, input is assumed to be float32, if False, input is integer. Default is True.	`True`
`nodata`	`int or float`	Nodata value to use, if not provided it will be taken from input attributes.	`None`

Returns:

Type	Description
`Dataset`	xr.Dataset: Geomedian and associated statistics as a Dataset.

Source code in datacube_compute/__init__.py

def geomedian_with_mads(
    src: Union[xr.Dataset, xr.DataArray],
    scale: float = 1.0,
    offset: float = 0.0,
    eps: Optional[float] = None,
    maxiters: int = 1000,
    num_threads: int = 1,
    work_chunks: Tuple[int, int] = (100, 100),
    is_float: bool = True,
    nodata: Optional[Union[int, float]] = None,
) -> xr.Dataset:
    """
    Compute Geomedian on Dask backed Dataset.

    Args:
        src (xr.Dataset or xr.DataArray): Input data, bands can be either float or integer with nodata values to indicate gaps in data.
        scale (float, optional): Only used when input contains integer values. Geomedian will run on scaled values `scale*X+offset`. Only affects internal computation, final result is scaled back to the original value range. Default is 1.0.
        offset (float, optional): See scale param above. Default is 0.0.
        eps (float, optional): Termination criteria passed on to geomedian algorithm. Default is None.
        maxiters (int, optional): Maximum number of iterations done per output pixel. Default is 1000.
        num_threads (int, optional): Configure internal concurrency of the Geomedian computation. Default is 1.
        work_chunks (tuple of int, optional): Default is (100, 100), only applicable when input is Dataset.
        is_float (bool, optional): If True, input is assumed to be float32, if False, input is integer. Default is True.
        nodata (int or float, optional): Nodata value to use, if not provided it will be taken from input attributes.

    Returns:
        xr.Dataset: Geomedian and associated statistics as a Dataset.
    """
    ny, nx = work_chunks

    dim = "time"
    if dim not in src.dims:
        if "spec" in src.dims:
            dim = "spec"
        else:
            raise ValueError("Input dataset must have a 'time' or 'spec' dimension")

    chunked = src.chunk({"y": ny, "x": nx, dim: -1})

    # Check the dtype of the first data variable
    is_float = next(iter(src.dtypes.values())) == "f"

    if eps is None:
        # Is it sensible having a float for an int array?
        eps = 1e-4 if is_float else 0.1

    _gm_with_mads = chunked.map_blocks(
        geomedian_block_processor,
        kwargs=dict(
            scale=scale,
            offset=offset,
            eps=eps,
            maxiters=maxiters,
            num_threads=num_threads,
            dim=dim,
            is_float=is_float,
            nodata=nodata,
        ),
    )

    return _gm_with_mads

`percentile(in_array, percentiles, nodata=None)`

Calculates the percentiles of the input data along the first axis.

Parameters:

Name	Type	Description	Default
`in_array`	`ndarray`	Input array with shape (t, *other dims).	required
`percentiles`	`sequence of float or float`	Sequence of percentiles or a single percentile in the [0.0, 1.0] range.	required
`nodata`	`same dtype as in_array`	The nodata value. Must be provided for integer datatypes. For float types, this value is ignored and nodata is assumed to be NaN.	`None`

Returns:

Type	Description
`ndarray`	np.ndarray: Array with shape (len(percentiles), *other dims), where the first index corresponds to the percentiles.

Source code in datacube_compute/__init__.py

def percentile(in_array, percentiles, nodata=None) -> np.ndarray:
    """
    Calculates the percentiles of the input data along the first axis.

    Args:
        in_array (np.ndarray): Input array with shape (t, *other dims).
        percentiles (sequence of float or float): Sequence of percentiles or a single percentile in the [0.0, 1.0] range.
        nodata (same dtype as in_array, optional): The nodata value. Must be provided for integer datatypes. For float types, this value is ignored and nodata is assumed to be NaN.

    Returns:
        np.ndarray: Array with shape (len(percentiles), *other dims), where the first index corresponds to the percentiles.
    """

    if isinstance(percentiles, Iterable):
        percentiles = np.array(list(percentiles))
    else:
        percentiles = np.array([percentiles])

    shape = in_array.shape
    in_array = in_array.reshape((shape[0], -1))

    if in_array.dtype == np.uint16:
        out_array = _percentile_uint16(in_array, percentiles, nodata)
    elif in_array.dtype == np.int16:
        out_array = _percentile_int16(in_array, percentiles, nodata)
    elif in_array.dtype == np.uint8:
        out_array = _percentile_uint8(in_array, percentiles, nodata)
    elif in_array.dtype == np.int8:
        out_array = _percentile_int8(in_array, percentiles, nodata)
    elif in_array.dtype == np.float32:
        out_array = _percentile_f32(in_array, percentiles)
    elif in_array.dtype == np.float64:
        out_array = _percentile_f64(in_array, percentiles)
    else:
        raise NotImplementedError

    return out_array.reshape((len(percentiles),) + shape[1:])