Source code for vformer.utils.window_utils

import torch

from .utils import pair


[docs]def cyclicshift(input, shift_size, dims=None):
    """
    Parameters
    -----------
    input: torch.Tensor
        input tensor
    shift_size: int or tuple(int)
        Number of places by which input tensor is shifted
    dims: int or tuple(int),optional
        Axis along which to roll
    """

    return torch.roll(
        input, shifts=pair(shift_size), dims=(1, 2) if dims == None else dims
    )


[docs]def window_partition(x, window_size):
    """
    Parameters
    -----------
    x: torch.Tensor
        input tensor
    window_size: int
        window size
    """
    B, H, W, C = x.shape
    x = x.view(B, H // window_size, window_size, W // window_size, window_size, C)
    windows = (
        x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, C)
    )

    return windows


[docs]def window_reverse(windows, window_size, H, W):
    """
    Parameters
    -----------
    windows: torch.Tensor
    window_size: int
    """
    B = int(windows.shape[0] / (H * W / window_size / window_size))
    x = windows.view(
        B, H // window_size, W // window_size, window_size, window_size, -1
    )
    x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1)
    return x


[docs]def get_relative_position_bias_index(window_size):
    """
    Parameters
    ----------
    window_size: int or tuple[int]
        Window size
    """
    window_size = pair(window_size)
    coords_h = torch.arange(window_size[0])
    coords_w = torch.arange(window_size[1])
    coords = torch.stack(torch.meshgrid([coords_h, coords_w]))  # 2, Wh, Ww
    coords_flatten = torch.flatten(coords, 1)  # 2, Wh*Ww
    relative_coords = (
        coords_flatten[:, :, None] - coords_flatten[:, None, :]
    )  # 2, Wh*Ww, Wh*Ww
    relative_coords = relative_coords.permute(1, 2, 0).contiguous()  # Wh*Ww, Wh*Ww, 2
    relative_coords[:, :, 0] += window_size[0] - 1  # shift to start from 0
    relative_coords[:, :, 1] += window_size[1] - 1
    relative_coords[:, :, 0] *= 2 * window_size[1] - 1
    relative_position_index = relative_coords.sum(-1)
    return relative_position_index


[docs]def create_mask(window_size, shift_size, H, W):
    """
    Parameters
    ----------
    window_size: int
        Window Size
    shift_size: int
        Shift_size

    """
    img_mask = torch.zeros(1, H, W, 1)
    h_slices = (
        slice(0, -window_size),
        slice(-window_size, -shift_size),
        slice(-shift_size, None),
    )
    w_slices = (
        slice(0, -window_size),
        slice(-window_size, -shift_size),
        slice(-shift_size, None),
    )
    cnt = 0
    for h in h_slices:
        for w in w_slices:
            img_mask[:, h, w, :] = cnt
            cnt += 1
    mask_windows = window_partition(img_mask, window_size)
    mask_windows = mask_windows.view(-1, window_size * window_size)
    attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2)
    attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(
        attn_mask == 0, float(0.0)
    )
    return attn_mask