Source code for torchgeo.datasets.copernicus.biomass_s3

# Copyright (c) TorchGeo Contributors. All rights reserved.
# Licensed under the MIT License.

"""Copernicus-Bench Biomass-S3 dataset."""

import glob
import os
from collections.abc import Callable, Sequence
from typing import Literal

import pandas as pd
import torch
from torch import Tensor

from ..utils import Path, stack_samples
from .base import CopernicusBenchBase


[docs]class CopernicusBenchBiomassS3(CopernicusBenchBase):
    """Copernicus-Bench Biomass-S3 dataset.

    Biomass-S3 is a regression dataset based on Sentinel-3 OLCI images and CCI biomass.
    The biomass product is part of the European Space Agency's Climate Change Initiative
    (CCI) program and delivers global forest above-ground biomass at 100 m spatial
    resolution.

    This benchmark supports both static (1 image/location) and time series
    (1-4 images/location) modes, the former is used in the original benchmark.

    If you use this dataset in your research, please cite the following papers:

    * https://arxiv.org/abs/2503.11849
    * https://catalogue.ceda.ac.uk/uuid/02e1b18071ad45a19b4d3e8adafa2817/

    .. versionadded:: 0.7
    """

    url = 'https://hf.co/datasets/wangyi111/Copernicus-Bench/resolve/9d252acd3aa0e3da3128e05c6f028647f0e48e5f/l3_biomass_s3/biomass_s3.zip'
    md5 = '4769ab8c2c23cd8957b99e15e071931c'
    zipfile = 'biomass_s3.zip'
    directory = 'biomass_s3'
    filename = 'static_fnames-{}.csv'
    dtype = torch.float
    filename_regex = r'S3[AB]_(?P<date>\d{8}T\d{6})'
    all_bands = (
        'Oa01_radiance',
        'Oa02_radiance',
        'Oa03_radiance',
        'Oa04_radiance',
        'Oa05_radiance',
        'Oa06_radiance',
        'Oa07_radiance',
        'Oa08_radiance',
        'Oa09_radiance',
        'Oa10_radiance',
        'Oa11_radiance',
        'Oa12_radiance',
        'Oa13_radiance',
        'Oa14_radiance',
        'Oa15_radiance',
        'Oa16_radiance',
        'Oa17_radiance',
        'Oa18_radiance',
        'Oa19_radiance',
        'Oa20_radiance',
        'Oa21_radiance',
    )
    rgb_bands = ('Oa08_radiance', 'Oa06_radiance', 'Oa04_radiance')
    cmap = 'YlGn'

[docs]    def __init__(
        self,
        root: Path = 'data',
        split: Literal['train', 'val', 'test'] = 'train',
        mode: Literal['static', 'time-series'] = 'static',
        bands: Sequence[str] | None = None,
        transforms: Callable[[dict[str, Tensor]], dict[str, Tensor]] | None = None,
        download: bool = False,
        checksum: bool = False,
    ) -> None:
        """Initialize a new CopernicusBenchBiomassS3 instance.

        Args:
            root: Root directory where dataset can be found.
            split: One of 'train', 'val', or 'test'.
            mode: One of 'static' or 'time-series'.
            bands: Sequence of band names to load (defaults to all bands).
            transforms: A function/transform that takes input sample and its target as
                entry and returns a transformed version.
            download: If True, download dataset and store it in the root directory.
            checksum: If True, check the MD5 of the downloaded files (may be slow).

        Raises:
            DatasetNotFoundError: If dataset is not found and *download* is False.
        """
        self.mode = mode
        super().__init__(root, split, bands, transforms, download, checksum)
        filepath = os.path.join(root, self.directory, self.filename.format(split))
        self.files = pd.read_csv(filepath, header=None)

[docs]    def __getitem__(self, index: int) -> dict[str, Tensor]:
        """Return an index within the dataset.

        Args:
            index: Index to return.

        Returns:
            Data and labels at that index.
        """
        pid, file = self.files.iloc[index]
        match self.mode:
            case 'static':
                path = os.path.join(self.root, self.directory, 's3_olci', pid, file)
                sample = self._load_image(path)
            case 'time-series':
                paths = os.path.join(self.root, self.directory, 's3_olci', pid, '*.tif')
                samples = [self._load_image(path) for path in sorted(glob.glob(paths))]
                sample = stack_samples(samples)

        path = os.path.join(self.root, self.directory, 'biomass', f'{pid}.tif')
        sample |= self._load_mask(path)

        if self.transforms is not None:
            sample = self.transforms(sample)

        return sample
Source code for torchgeo.datasets.copernicus.biomass_s3

Docs

Tutorials

Resources