import torch
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import pandas as pd
class CustomImageDataset(Dataset):
def __init__(self, csv_file, img_dir, transform=None):
self.annotations = pd.read_csv(csv_file)
self.img_dir = img_dir
self.transform = transform
def __len__(self):
return len(self.annotations)
def __getitem__(self, idx):
img_path = os.path.join(self.img_dir,
self.annotations.iloc[idx, 0])
image = Image.open(img_path)
label = torch.tensor(self.annotations.iloc[idx, 1])
if self.transform:
image = self.transform(image)
return image, label
# Usage
dataset = CustomImageDataset('labels.csv', 'images/',
transform=transforms.ToTensor())
print(f"Dataset size: {len(dataset)}")
# Create DataLoader with optimizations
train_loader = DataLoader(
dataset=train_dataset,
batch_size=32,
shuffle=True, # Randomize order
num_workers=4, # Parallel loading
pin_memory=True, # Speed up GPU transfer
drop_last=True # Drop incomplete batch
)
val_loader = DataLoader(
dataset=val_dataset,
batch_size=64, # Larger batch for validation
shuffle=False, # Keep order for validation
num_workers=2,
pin_memory=True
)
# Using in training loop
for epoch in range(num_epochs):
for batch_idx, (data, targets) in enumerate(train_loader):
# Move to GPU
data = data.to(device)
targets = targets.to(device)
# Forward pass
outputs = model(data)
loss = criterion(outputs, targets)
# Backward pass
optimizer.zero_grad()
loss.backward()
optimizer.step()