Source code for combustion.models.efficientdet

#!/usr/bin/env python
# -*- coding: utf-8 -*-

from copy import deepcopy
from typing import List, Optional

import torch.nn as nn
from torch import Tensor

from combustion.nn import BiFPN1d, BiFPN2d, BiFPN3d, MobileNetBlockConfig

from .efficientnet import _EfficientNet


class _EfficientDetMeta(type):
    def __new__(cls, name, bases, dct):
        x = super().__new__(cls, name, bases, dct)
        if "3d" in name:
            x.Conv = nn.Conv3d
            x.BatchNorm = nn.BatchNorm3d
            x.BiFPN = BiFPN3d
            x._get_blocks = MobileNetBlockConfig.get_3d_blocks
        elif "2d" in name:
            x.Conv = nn.Conv2d
            x.BatchNorm = nn.BatchNorm2d
            x.BiFPN = BiFPN2d
            x._get_blocks = MobileNetBlockConfig.get_2d_blocks
        elif "1d" in name:
            x.Conv = nn.Conv1d
            x.BatchNorm = nn.BatchNorm1d
            x.BiFPN = BiFPN1d
            x._get_blocks = MobileNetBlockConfig.get_1d_blocks
        else:
            raise RuntimeError(f"Metaclass: error processing name {cls.__name__}")
        return x


class _EfficientDet(_EfficientNet):
    __constants__ = ["fpn_levels"]

    def __init__(
        self,
        block_configs: List[MobileNetBlockConfig],
        fpn_levels: List[int] = [3, 5, 7, 8, 9],
        fpn_filters: int = 64,
        fpn_repeats: int = 3,
        width_coeff: float = 1.0,
        depth_coeff: float = 1.0,
        width_divisor: float = 8.0,
        min_width: Optional[int] = None,
        stem: Optional[nn.Module] = None,
        head: Optional[nn.Module] = None,
        fpn_kwargs: dict = {},
    ):
        super(_EfficientDet, self).__init__(
            block_configs, width_coeff, depth_coeff, width_divisor, min_width, stem, head
        )
        self.fpn_levels = fpn_levels
        block_configs = deepcopy(block_configs)

        for config in block_configs:
            # update config according to scale coefficients
            config.input_filters = self.round_filters(config.input_filters, width_coeff, width_divisor, min_width)
            config.output_filters = self.round_filters(config.output_filters, width_coeff, width_divisor, min_width)
            config.num_repeats = self.round_repeats(depth_coeff, config.num_repeats)

        # convolutions mapping backbone feature maps to constant number of channels
        fpn_convs = []
        output_filters = self.round_filters(fpn_filters, 1.0, width_divisor, min_width)
        for i, config in enumerate(block_configs):
            if i + 1 in fpn_levels:
                input_filters = config.output_filters
                conv = self.Conv(input_filters, output_filters, kernel_size=1)
                fpn_convs.append(conv)

        for i in fpn_levels:
            if i == len(block_configs) + 1:
                input_filters = block_configs[-1].output_filters
                conv = self.Conv(input_filters, output_filters, kernel_size=3, stride=2, padding=1)
                fpn_convs.append(conv)
            elif i > len(block_configs) + 1:
                input_filters = output_filters
                conv = self.Conv(input_filters, output_filters, kernel_size=3, stride=2, padding=1)
                fpn_convs.append(nn.Sequential(nn.ReLU(), conv))

        self.fpn_convs = nn.ModuleList(fpn_convs)

        bifpn_layers = []
        for i in range(fpn_repeats):
            bifpn = self.BiFPN(output_filters, levels=len(fpn_levels), **fpn_kwargs)
            bifpn_layers.append(bifpn)
        self.bifpn_layers = nn.ModuleList(bifpn_layers)

    def extract_features(self, inputs: Tensor) -> List[Tensor]:
        r"""Runs the EfficientDet stem and body to extract features, returning a list of
        tensors representing features extracted from each block.

        Args:

            inputs (:class:`torch.Tensor`):
                Model inputs

        """
        # efficientnet feature extractor
        backbone_features: List[Tensor] = []
        x = self.stem(inputs)
        prev_x = x
        for block in self.blocks:
            x = block(prev_x)
            backbone_features.append(x)
            prev_x = x

        # pull out feature maps to be used in BiFPN
        captured_features: List[Tensor] = []

        for i in self.fpn_levels:
            if i - 1 < len(backbone_features):
                captured_features.append(backbone_features[i - 1])

        # map to constant channel number using trivial convs
        for i, conv in enumerate(self.fpn_convs):
            if i < len(captured_features):
                captured_features[i] = conv(captured_features[i])
            else:
                prev_x = conv(prev_x)
                captured_features.append(prev_x)

        for bifpn in self.bifpn_layers:
            captured_features = bifpn(captured_features)

        return captured_features

    def forward(self, inputs: Tensor) -> List[Tensor]:
        r"""Runs the entire EfficientDet model, including stem, body, and head.
        If no head was supplied, the output of :func:`extract_features` will be returned.
        Otherwise, the output of the given head will be returned.

        .. note::
            The returned output will always be a list of tensors. If a custom head is given
            and it returns a single tensor, that tensor will be wrapped in a list before
            being returned.

        Args:
            inputs (:class:`torch.Tensor`):
                Model inputs
        """
        output = self.extract_features(inputs)
        if self.head is not None:
            output = self.head(output)
            if not isinstance(output, list):
                output = [
                    output,
                ]

        return output

    @classmethod
    def from_predefined(cls, compound_coeff: int, **kwargs) -> "_EfficientDet":
        r"""Creates an EfficientDet model using one of the parameterizations defined in the
        `EfficientDet paper`_.

        Args:
            compound_coeff (int):
                Compound scaling parameter :math:`\phi`. For example, to construct EfficientDet-D0, set
                ``compound_coeff=0``.

            **kwargs:
                Additional parameters/overrides for model constructor.

        .. _EfficientNet paper:
            https://arxiv.org/abs/1905.11946
        """
        # from paper
        alpha = 1.2
        beta = 1.1
        width_divisor = 8.0

        depth_coeff = alpha ** compound_coeff
        width_coeff = beta ** compound_coeff

        fpn_filters = int(64 * 1.35 ** compound_coeff)
        fpn_repeats = 3 + compound_coeff
        fpn_levels = [3, 5, 7, 8, 9]

        final_kwargs = {
            "block_configs": cls.DEFAULT_BLOCKS,
            "width_coeff": width_coeff,
            "depth_coeff": depth_coeff,
            "width_divisor": width_divisor,
            "fpn_filters": fpn_filters,
            "fpn_repeats": fpn_repeats,
            "fpn_levels": fpn_levels,
        }
        final_kwargs.update(kwargs)
        return cls(**final_kwargs)


class EfficientDet1d(_EfficientDet, metaclass=_EfficientDetMeta):
    pass


[docs]class EfficientDet2d(_EfficientDet, metaclass=_EfficientDetMeta):
    r"""Implementation of EfficientDet as described in the `EfficientDet paper`_.
    EfficientDet is built on an EfficientNet backbone
    (see :class:`combustion.models.EfficientNet2d` for details). EfficientDet adds a
    bidirectional feature pyramid network (see :class:`combustion.nn.BiFPN2d`), which
    mixes information across the various feature maps produced by the EfficientNet backbone.

    .. image:: ./efficientdet.png
        :width: 800px
        :align: center
        :height: 300px
        :alt: Diagram of EfficientDet

    The authors of EfficientDet used the default EfficientNet scaling parameters for the backbone:

    .. math::
        \alpha = 1.2 \\
        \beta = 1.1 \\
        \gamma = 1.15


    The BiFPN was scaled as follows:

    .. math::
        W_\text{bifpn} = 64 \cdot \big(1.35^\phi\big) \\
        D_\text{bifpn} = 3 + \phi

    In the original EfficientDet implementation, the authors extract feature maps from levels
    3, 5, and 7 of the backbone. Two additional coarse levels are created by performing additional
    strided convolutions to the final level in the backbone, for a total of 5 levels in the BiFPN.

    .. note::
        Currently, DropConnect ratios are not scaled based on depth of the given block.
        This is a deviation from the true EfficientNet implementation.

    Args:
        block_configs (list of :class:`combustion.nn.MobileNetBlockConfig`)
            Configs for each of the :class:`combustion.nn.MobileNetConvBlock2d` blocks
            used in the model.

        fpn_levels (list of ints):
            Indicies of EfficientNet feature levels to include in the BiFPN, starting at index 1.
            Values in ``fpn_levels`` greater than the total number of blocks in the backbone denote
            levels that should be created by applying additional strided convolutions to the final
            level in the backbone.

        fpn_filters (int):
            Number of filters to use for the BiFPN. The filter count given here should be the desired
            number of filters after width scaling.

        fpn_repeats (int):
            Number of repeats to use for the BiFPN. The repeat count given here should be the desired
            number of repeats after depth scaling.

        width_coeff (float):
            The width scaling coefficient. Increasing this increases the width of the model.

        depth_coeff (float):
            The depth scaling coefficient. Increasing this increases the depth of the model.

        width_divisor (float):
            Used in calculating number of filters under width scaling. Filters at each block
            will be a multiple of ``width_divisor``.

        min_width (int):
            The minimum width of the model at any block

        stem (:class:`torch.nn.Module`):
            An optional stem to use for the model. The default stem is a single
            3x3/2 conolution that expects 3 input channels.

        head (:class:`torch.nn.Module`):
            An optional head to use for the model. By default, no head will be used
            and ``forward`` will return a list of tensors containing extracted features.

        fpn_kwargs (dict):
            Keyword args to be passed to all :class:`combustion.nn.BiFPN2d` layers.

    Shapes
        * Input: :math:`(N, C, H, W)`
        * Output: List of tensors of shape :math:`(N, C, H', W')`, where height and width vary
          depending on the amount of downsampling for that feature map.

    .. _EfficientDet paper:
        https://arxiv.org/abs/1911.09070
    """


class EfficientDet3d(_EfficientDet, metaclass=_EfficientDetMeta):
    pass
Source code for combustion.models.efficientdet

Docs

Tutorials

Resources