Source code for vformer.common.blocks

import torch.nn as nn


[docs]class DWConv(nn.Module): """ Depth Wise Convolution Parameters ---------- dim: int Dimension of the input tensor kernel_size_dwconv: int,optional Size of the convolution kernel, default is 3 stride_dwconv: int Stride of the convolution, default is 1 padding_dwconv: int or tuple or str Padding added to all sides of the input, default is 1 bias_dwconv:bool Whether to add learnable bias to the output,default is True. """ def __init__( self, dim, kernel_size_dwconv=3, stride_dwconv=1, padding_dwconv=1, bias_dwconv=True, ): super(DWConv, self).__init__() self.dwconv = nn.Conv2d( dim, dim, kernel_size=kernel_size_dwconv, stride=stride_dwconv, padding=padding_dwconv, bias=bias_dwconv, groups=dim, )
[docs] def forward(self, x, H, W): """ Parameters: ---------- x: torch.Tensor Input tensor H: int Height of image patch W: int Width of image patch Returns: ---------- torch.Tensor Returns output tensor after performing depth-wise convolution operation """ B, N, C = x.shape x = x.transpose(1, 2).view(B, C, H, W) x = self.dwconv(x) x = x.flatten(2).transpose(1, 2) return x