我目前正在研究基于补丁的超分辨率。大多数论文将图像分割成更小的补丁,然后使用这些补丁作为模型的输入。我能够使用自定义数据加载器创建补丁。代码如下:
import torch.utils.data as data
from torchvision.transforms import CenterCrop, ToTensor, Compose, ToPILImage, Resize, RandomHorizontalFlip, RandomVerticalFlip
from os import listdir
from os.path import join
from PIL import Image
import random
import os
import numpy as np
import torch
def is_image_file(filename):
return any(filename.endswith(extension) for extension in [".png", ".jpg", ".jpeg", ".bmp"])
class TrainDatasetFromFolder(data.Dataset):
def __init__(self, dataset_dir, patch_size, is_gray, stride):
super(TrainDatasetFromFolder, self).__init__()
self.imageHrfilenames = []
self.imageHrfilenames.extend(join(dataset_dir, x)
for x in sorted(listdir(dataset_dir)) if is_image_file(x))
self.is_gray = is_gray
self.patchSize = patch_size
self.stride = stride
def _load_file(self, index):
filename = self.imageHrfilenames[index]
hr = Image.open(self.imageHrfilenames[index])
downsizes = (1, 0.7, 0.45)
downsize = 2
w_ = int(hr.width * downsizes[downsize])
h_ = int(hr.height * downsizes[downsize])
aug = Compose([Resize([h_, w_], interpolation=Image.BICUBIC),
RandomHorizontalFlip(),
RandomVerticalFlip()])
hr = aug(hr)
rv = random.randint(0, 4)
hr = hr.rotate(90*rv, expand=1)
filename = os.path.splitext(os.path.split(filename)[-1])[0]
return hr, filename
def _patching(self, img):
img = ToTensor()(img)
LR_ = Compose([ToPILImage(), Resize(self.patchSize//2, interpolation=Image.BICUBIC), ToTensor()])
HR_p, LR_p = [], []
for i in range(0, img.shape[1] - self.patchSize, self.stride):
for j in range(0, img.shape[2] - self.patchSize, self.stride):
temp = img[:, i:i + self.patchSize, j:j + self.patchSize]
HR_p += [temp]
LR_p += [LR_(temp)]
return torch.stack(LR_p),torch.stack(HR_p)
def __getitem__(self, index):
HR_, filename = self._load_file(index)
LR_p, HR_p = self._patching(HR_)
return LR_p, HR_p
def __len__(self):
return len(self.imageHrfilenames)
假设批量大小为 1,它获取图像并给出 size 的输出[x,3,patchsize,patchsize]
。当批量大小为 2 时,我将有两个不同大小的输出[x,3,patchsize,patchsize]
(例如图像 1 可能给出[50,3,patchsize,patchsize]
,图像2可能给出[75,3,patchsize,patchsize]
)。为了处理这个问题,需要一个自定义的整理函数来沿着维度 0 堆叠这两个输出。整理函数如下:
def my_collate(batch):
data = torch.cat([item[0] for item in batch],dim = 0)
target = torch.cat([item[1] for item in batch],dim = 0)
return [data, target]
这个整理函数沿着 x 连接(从上面的例子中,我终于得到[125,3,patchsize,pathsize]
。出于训练目的,我需要使用 25 的小批量大小来训练模型。是否有任何方法或函数可以用来直接获得大小的输出[25 , 3, patchsize, pathsize]
直接从数据加载器使用必要数量的图像作为数据加载器的输入?