
Source code for combustion.models.efficientdet

#!/usr/bin/env python
# -*- coding: utf-8 -*-

from copy import deepcopy
from typing import List, Optional

import torch.nn as nn
from torch import Tensor

from combustion.nn import BiFPN1d, BiFPN2d, BiFPN3d, MobileNetBlockConfig

from .efficientnet import _EfficientNet

class _EfficientDetMeta(type):
    def __new__(cls, name, bases, dct):
        x = super().__new__(cls, name, bases, dct)
        if "3d" in name:
            x.Conv = nn.Conv3d
            x.BatchNorm = nn.BatchNorm3d
            x.BiFPN = BiFPN3d
            x._get_blocks = MobileNetBlockConfig.get_3d_blocks
        elif "2d" in name:
            x.Conv = nn.Conv2d
            x.BatchNorm = nn.BatchNorm2d
            x.BiFPN = BiFPN2d
            x._get_blocks = MobileNetBlockConfig.get_2d_blocks
        elif "1d" in name:
            x.Conv = nn.Conv1d
            x.BatchNorm = nn.BatchNorm1d
            x.BiFPN = BiFPN1d
            x._get_blocks = MobileNetBlockConfig.get_1d_blocks
            raise RuntimeError(f"Metaclass: error processing name {cls.__name__}")
        return x

class _EfficientDet(_EfficientNet):
    __constants__ = ["fpn_levels"]

    def __init__(
        block_configs: List[MobileNetBlockConfig],
        fpn_levels: List[int] = [3, 5, 7, 8, 9],
        fpn_filters: int = 64,
        fpn_repeats: int = 3,
        width_coeff: float = 1.0,
        depth_coeff: float = 1.0,
        width_divisor: float = 8.0,
        min_width: Optional[int] = None,
        stem: Optional[nn.Module] = None,
        head: Optional[nn.Module] = None,
        fpn_kwargs: dict = {},
        super(_EfficientDet, self).__init__(
            block_configs, width_coeff, depth_coeff, width_divisor, min_width, stem, head
        self.fpn_levels = fpn_levels
        block_configs = deepcopy(block_configs)

        for config in block_configs:
            # update config according to scale coefficients
            config.input_filters = self.round_filters(config.input_filters, width_coeff, width_divisor, min_width)
            config.output_filters = self.round_filters(config.output_filters, width_coeff, width_divisor, min_width)
            config.num_repeats = self.round_repeats(depth_coeff, config.num_repeats)

        # convolutions mapping backbone feature maps to constant number of channels
        fpn_convs = []
        output_filters = self.round_filters(fpn_filters, 1.0, width_divisor, min_width)
        for i, config in enumerate(block_configs):
            if i + 1 in fpn_levels:
                input_filters = config.output_filters
                conv = self.Conv(input_filters, output_filters, kernel_size=1)

        for i in fpn_levels:
            if i == len(block_configs) + 1:
                input_filters = block_configs[-1].output_filters
                conv = self.Conv(input_filters, output_filters, kernel_size=3, stride=2, padding=1)
            elif i > len(block_configs) + 1:
                input_filters = output_filters
                conv = self.Conv(input_filters, output_filters, kernel_size=3, stride=2, padding=1)
                fpn_convs.append(nn.Sequential(nn.ReLU(), conv))

        self.fpn_convs = nn.ModuleList(fpn_convs)

        bifpn_layers = []
        for i in range(fpn_repeats):
            bifpn = self.BiFPN(output_filters, levels=len(fpn_levels), **fpn_kwargs)
        self.bifpn_layers = nn.ModuleList(bifpn_layers)

    def extract_features(self, inputs: Tensor) -> List[Tensor]:
        r"""Runs the EfficientDet stem and body to extract features, returning a list of
        tensors representing features extracted from each block.


            inputs (:class:`torch.Tensor`):
                Model inputs

        # efficientnet feature extractor
        backbone_features: List[Tensor] = []
        x = self.stem(inputs)
        prev_x = x
        for block in self.blocks:
            x = block(prev_x)
            prev_x = x

        # pull out feature maps to be used in BiFPN
        captured_features: List[Tensor] = []

        for i in self.fpn_levels:
            if i - 1 < len(backbone_features):
                captured_features.append(backbone_features[i - 1])

        # map to constant channel number using trivial convs
        for i, conv in enumerate(self.fpn_convs):
            if i < len(captured_features):
                captured_features[i] = conv(captured_features[i])
                prev_x = conv(prev_x)

        for bifpn in self.bifpn_layers:
            captured_features = bifpn(captured_features)

        return captured_features

    def forward(self, inputs: Tensor) -> List[Tensor]:
        r"""Runs the entire EfficientDet model, including stem, body, and head.
        If no head was supplied, the output of :func:`extract_features` will be returned.
        Otherwise, the output of the given head will be returned.

        .. note::
            The returned output will always be a list of tensors. If a custom head is given
            and it returns a single tensor, that tensor will be wrapped in a list before
            being returned.

            inputs (:class:`torch.Tensor`):
                Model inputs
        output = self.extract_features(inputs)
        if self.head is not None:
            output = self.head(output)
            if not isinstance(output, list):
                output = [

        return output

    def from_predefined(cls, compound_coeff: int, **kwargs) -> "_EfficientDet":
        r"""Creates an EfficientDet model using one of the parameterizations defined in the
        `EfficientDet paper`_.

            compound_coeff (int):
                Compound scaling parameter :math:`\phi`. For example, to construct EfficientDet-D0, set

                Additional parameters/overrides for model constructor.

        .. _EfficientNet paper:
        # from paper
        alpha = 1.2
        beta = 1.1
        width_divisor = 8.0

        depth_coeff = alpha ** compound_coeff
        width_coeff = beta ** compound_coeff

        fpn_filters = int(64 * 1.35 ** compound_coeff)
        fpn_repeats = 3 + compound_coeff
        fpn_levels = [3, 5, 7, 8, 9]

        final_kwargs = {
            "block_configs": cls.DEFAULT_BLOCKS,
            "width_coeff": width_coeff,
            "depth_coeff": depth_coeff,
            "width_divisor": width_divisor,
            "fpn_filters": fpn_filters,
            "fpn_repeats": fpn_repeats,
            "fpn_levels": fpn_levels,
        return cls(**final_kwargs)

class EfficientDet1d(_EfficientDet, metaclass=_EfficientDetMeta):

[docs]class EfficientDet2d(_EfficientDet, metaclass=_EfficientDetMeta): r"""Implementation of EfficientDet as described in the `EfficientDet paper`_. EfficientDet is built on an EfficientNet backbone (see :class:`combustion.models.EfficientNet2d` for details). EfficientDet adds a bidirectional feature pyramid network (see :class:`combustion.nn.BiFPN2d`), which mixes information across the various feature maps produced by the EfficientNet backbone. .. image:: ./efficientdet.png :width: 800px :align: center :height: 300px :alt: Diagram of EfficientDet The authors of EfficientDet used the default EfficientNet scaling parameters for the backbone: .. math:: \alpha = 1.2 \\ \beta = 1.1 \\ \gamma = 1.15 The BiFPN was scaled as follows: .. math:: W_\text{bifpn} = 64 \cdot \big(1.35^\phi\big) \\ D_\text{bifpn} = 3 + \phi In the original EfficientDet implementation, the authors extract feature maps from levels 3, 5, and 7 of the backbone. Two additional coarse levels are created by performing additional strided convolutions to the final level in the backbone, for a total of 5 levels in the BiFPN. .. note:: Currently, DropConnect ratios are not scaled based on depth of the given block. This is a deviation from the true EfficientNet implementation. Args: block_configs (list of :class:`combustion.nn.MobileNetBlockConfig`) Configs for each of the :class:`combustion.nn.MobileNetConvBlock2d` blocks used in the model. fpn_levels (list of ints): Indicies of EfficientNet feature levels to include in the BiFPN, starting at index 1. Values in ``fpn_levels`` greater than the total number of blocks in the backbone denote levels that should be created by applying additional strided convolutions to the final level in the backbone. fpn_filters (int): Number of filters to use for the BiFPN. The filter count given here should be the desired number of filters after width scaling. fpn_repeats (int): Number of repeats to use for the BiFPN. The repeat count given here should be the desired number of repeats after depth scaling. width_coeff (float): The width scaling coefficient. Increasing this increases the width of the model. depth_coeff (float): The depth scaling coefficient. Increasing this increases the depth of the model. width_divisor (float): Used in calculating number of filters under width scaling. Filters at each block will be a multiple of ``width_divisor``. min_width (int): The minimum width of the model at any block stem (:class:`torch.nn.Module`): An optional stem to use for the model. The default stem is a single 3x3/2 conolution that expects 3 input channels. head (:class:`torch.nn.Module`): An optional head to use for the model. By default, no head will be used and ``forward`` will return a list of tensors containing extracted features. fpn_kwargs (dict): Keyword args to be passed to all :class:`combustion.nn.BiFPN2d` layers. Shapes * Input: :math:`(N, C, H, W)` * Output: List of tensors of shape :math:`(N, C, H', W')`, where height and width vary depending on the amount of downsampling for that feature map. .. _EfficientDet paper: """
class EfficientDet3d(_EfficientDet, metaclass=_EfficientDetMeta): pass

