Source code for vformer.encoder.embedding.linear

from einops.layers.torch import Rearrange
from torch import nn


[docs]class LinearEmbedding(nn.Module):
    """
    Projects image patches into embedding space using Linear layer.

    Parameters
    -----------
    embedding_dim: int
        Dimension of the resultant embedding
    patch_height: int
        Height of the patch
    patch_width: int
        Width of the patch
    patch_dim: int
        Dimension of the patch

    """

    def __init__(
        self,
        embedding_dim,
        patch_height,
        patch_width,
        patch_dim,
    ):
        super().__init__()

        self.patch_embedding = nn.Sequential(
            Rearrange(
                "b c (h p1) (w p2) -> b (h w) (p1 p2 c)",
                p1=patch_height,
                p2=patch_width,
            ),
            nn.Linear(patch_dim, embedding_dim),
        )

[docs]    def forward(self, x):
        """

        Parameters
        -----------
        x: torch.Tensor
            Input tensor

        Returns
        ----------
        torch.Tensor
            Returns patch embeddings of size `embedding_dim`

        """

        return self.patch_embedding(x)