Source code for torchgeo.models.fcsiam

# Copyright (c) TorchGeo Contributors. All rights reserved.
# Licensed under the MIT License.

"""Fully convolutional change detection (FCCD) implementations."""

from collections.abc import Callable, Sequence
from typing import Any

import segmentation_models_pytorch as smp
import torch
from segmentation_models_pytorch import Unet
from segmentation_models_pytorch.base.model import SegmentationModel
from torch import Tensor


[docs]class FCSiamConc(SegmentationModel):  # type: ignore[misc]
    """Fully-convolutional Siamese Concatenation (FC-Siam-conc).

    If you use this model in your research, please cite the following paper:

    * https://doi.org/10.1109/ICIP.2018.8451652
    """

[docs]    def __init__(
        self,
        encoder_name: str = 'resnet34',
        encoder_depth: int = 5,
        encoder_weights: str | None = 'imagenet',
        decoder_use_batchnorm: bool | str | dict[str, Any] = 'batchnorm',
        decoder_channels: Sequence[int] = (256, 128, 64, 32, 16),
        decoder_attention_type: str | None = None,
        in_channels: int = 3,
        classes: int = 1,
        activation: str | Callable[[Tensor], Tensor] | None = None,
    ) -> None:
        """Initialize a new FCSiamConc model.

        Args:
            encoder_name: Name of the classification model that will
                be used as an encoder (a.k.a backbone) to extract features
                of different spatial resolution
            encoder_depth: A number of stages used in encoder in range [3, 5].
                two times smaller in spatial dimensions than previous one
                (e.g. for depth 0 we will have features. Each stage generate
                features with shapes [(N, C, H, W),], for depth
                1 - [(N, C, H, W), (N, C, H // 2, W // 2)] and so on). Default is 5
            encoder_weights: One of **None** (random initialization), **"imagenet"**
                (pre-training on ImageNet) and other pretrained weights (see table
                with available weights for each encoder_name)
            decoder_channels: List of integers which specify **in_channels**
                parameter for convolutions used in decoder. Length of the list
                should be the same as **encoder_depth**
            decoder_use_batchnorm: Specifies normalization between Conv2D and
                activation. Accepts the following types:

                - **True**: Defaults to `"batchnorm"`.
                - **False**: No normalization (`nn.Identity`).
                - **str**: Specifies normalization type using default parameters.
                  Available values: `"batchnorm"`, `"identity"`, `"layernorm"`,
                  `"instancenorm"`, `"inplace"`.
                - **dict**: Fully customizable normalization settings. Structure:
                  ```python
                  {"type": <norm_type>, **kwargs}
                  ```
                  where `norm_name` corresponds to normalization type (see above), and
                  `kwargs` are passed directly to the normalization layer as defined in
                  PyTorch documentation.

                  **Example**:
                  ```python
                  decoder_use_norm={"type": "layernorm", "eps": 1e-2}
                  ```
            decoder_attention_type: Attention module used in decoder of the model.
                Available options are **None** and **scse**. SCSE paper
                https://arxiv.org/abs/1808.08127
            in_channels: A number of input channels for the model,
                default is 3 (RGB images)
            classes: A number of classes for output mask (or you can think as a number
                of channels of output mask)
            activation: An activation function to apply after the final convolution
                n layer. Available options are **"sigmoid"**, **"softmax"**,
                **"logsoftmax"**, **"tanh"**, **"identity"**, **callable**
                and **None**. Default is **None**
        """
        super().__init__()
        self.encoder = smp.encoders.get_encoder(
            encoder_name,
            in_channels=in_channels,
            depth=encoder_depth,
            weights=encoder_weights,
        )
        encoder_out_channels = [c * 2 for c in self.encoder.out_channels[1:]]
        encoder_out_channels.insert(0, self.encoder.out_channels[0])
        self.decoder = smp.decoders.unet.decoder.UnetDecoder(
            encoder_channels=encoder_out_channels,
            decoder_channels=decoder_channels,
            n_blocks=encoder_depth,
            use_norm=decoder_use_batchnorm,
            attention_type=decoder_attention_type,
            add_center_block=True if encoder_name.startswith('vgg') else False,
        )

        self.segmentation_head = smp.base.SegmentationHead(
            in_channels=decoder_channels[-1],
            out_channels=classes,
            activation=activation,
            kernel_size=3,
        )
        self.classification_head = None
        self.name = f'u-{encoder_name}'
        self.initialize()

[docs]    def forward(self, x: Tensor) -> Tensor:
        """Forward pass of the model.

        Args:
            x: input images of shape (b, t, c, h, w)

        Returns:
            predicted change masks of size (b, classes, h, w)
        """
        x1 = x[:, 0]
        x2 = x[:, 1]
        features1, features2 = self.encoder(x1), self.encoder(x2)
        features = [
            torch.cat([features2[i], features1[i]], dim=1)
            for i in range(1, len(features1))
        ]
        features.insert(0, features2[0])
        decoder_output = self.decoder(features)
        masks: Tensor = self.segmentation_head(decoder_output)
        return masks


[docs]class FCSiamDiff(Unet):  # type: ignore[misc]
    """Fully-convolutional Siamese Difference (FC-Siam-diff).

    If you use this model in your research, please cite the following paper:

    * https://doi.org/10.1109/ICIP.2018.8451652
    """

[docs]    def __init__(self, *args: Any, **kwargs: Any) -> None:
        """Initialize a new FCSiamConc model.

        Args:
            *args: Additional arguments passed to
                :class:`~segmentation_models_pytorch.Unet`
            **kwargs: Additional keyword arguments passed to
                :class:`~segmentation_models_pytorch.Unet`
        """
        kwargs['aux_params'] = None
        super().__init__(*args, **kwargs)

[docs]    def forward(self, x: Tensor) -> Tensor:
        """Forward pass of the model.

        Args:
            x: input images of shape (b, t, c, h, w)

        Returns:
            predicted change masks of size (b, classes, h, w)
        """
        x1 = x[:, 0]
        x2 = x[:, 1]
        features1, features2 = self.encoder(x1), self.encoder(x2)
        features = [features2[i] - features1[i] for i in range(1, len(features1))]
        features.insert(0, features2[0])
        decoder_output = self.decoder(features)
        masks: Tensor = self.segmentation_head(decoder_output)
        return masks
Source code for torchgeo.models.fcsiam

Docs

Tutorials

Resources