使用PyTorch将文件夹下的图片分为训练集和验证集实例
更新时间:2020年01月08日 14:02:50 作者:xgbm_k
今天小编就为大家分享一篇使用PyTorch将文件夹下的图片分为训练集和验证集实例,具有很好的参考价值,希望对大家有所帮助。一起跟随小编过来看看吧
PyTorch提供了ImageFolder的类来加载文件结构如下的图片数据集:
1 2 3 4 5 6 7 | root / dog / xxx.png root / dog / xxy.png root / dog / xxz.png root / cat / 123.png root / cat / nsdf3.png root / cat / asd932_.png |
使用这个类的问题在于无法将训练集(training dataset)和验证集(validation dataset)分开。我写了两个类来完成这个工作。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 | import os import torch from torch.utils.data import Dataset, DataLoader from torchvision.transforms import ToTensor, Resize, Compose from PIL import Image from sklearn.model_selection import train_test_split class ImageFolderSplitter: # images should be placed in folders like: # --root # ----root\dogs # ----root\dogs\image1.png # ----root\dogs\image2.png # ----root\cats # ----root\cats\image1.png # ----root\cats\image2.png # path: the root of the image folder def __init__( self , path, train_size = 0.8 ): self .path = path self .train_size = train_size self .class2num = {} self .num2class = {} self .class_nums = {} self .data_x_path = [] self .data_y_label = [] self .x_train = [] self .x_valid = [] self .y_train = [] self .y_valid = [] for root, dirs, files in os.walk(path): if len (files) = = 0 and len (dirs) > 1 : for i, dir1 in enumerate (dirs): self .num2class[i] = dir1 self .class2num[dir1] = i elif len (files) > 1 and len (dirs) = = 0 : category = "" for key in self .class2num.keys(): if key in root: category = key break label = self .class2num[category] self .class_nums[label] = 0 for file1 in files: self .data_x_path.append(os.path.join(root, file1)) self .data_y_label.append(label) self .class_nums[label] + = 1 else : raise RuntimeError( "please check the folder structure!" ) self .x_train, self .x_valid, self .y_train, self .y_valid = train_test_split( self .data_x_path, self .data_y_label, shuffle = True , train_size = self .train_size) def getTrainingDataset( self ): return self .x_train, self .y_train def getValidationDataset( self ): return self .x_valid, self .y_valid class DatasetFromFilename(Dataset): # x: a list of image file full path # y: a list of image categories def __init__( self , x, y, transforms = None ): super (DatasetFromFilename, self ).__init__() self .x = x self .y = y if transforms = = None : self .transforms = ToTensor() else : self .transforms = transforms def __len__( self ): return len ( self .x) def __getitem__( self , idx): img = Image. open ( self .x[idx]) img = img.convert( "RGB" ) return self .transforms(img), torch.tensor([[ self .y[idx]]]) # test code # splitter = ImageFolderSplitter("for_test") # transforms = Compose([Resize((51, 51)), ToTensor()]) # x_train, y_train = splitter.getTrainingDataset() # training_dataset = DatasetFromFilename(x_train, y_train, transforms=transforms) # training_dataloader = DataLoader(training_dataset, batch_size=2, shuffle=True) # x_valid, y_valid = splitter.getValidationDataset() # validation_dataset = DatasetFromFilename(x_valid, y_valid, transforms=transforms) # validation_dataloader = DataLoader(validation_dataset, batch_size=2, shuffle=True) # for x, y in training_dataloader: # print(x.shape, y.shape) |
更多的代码可以在我的Github reop下找到。
微信公众号搜索 “ 脚本之家 ” ,选择关注
程序猿的那些事、送书等活动等着你
本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。
如若内容造成侵权/违法违规/事实不符,请将相关资料发送至 reterry123@163.com 进行投诉反馈,一经查实,立即处理!
相关文章
一文详解Python中的Map,Filter和Reduce函数
这篇文章主要介绍了一文详解Python中的Map,Filter和Reduce函数,本文重点介绍Python中的三个特殊函数Map,Filter和Reduce,以及如何使用它们进行代码编程2022-08-08
最新评论