January 18, 2025

It can be said that constructing a learning program in Pytorch is much easier compared to TensorFlow. However, due to the presence of numerous excellent learning models on Huggingface, the opportunities to train models from scratch are gradually decreasing. Therefore, to avoid forgetting about the learning process in Pytorch, I have summarized its basic structure. Below is a program for fine-tuning a model in TorchVision.

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
import torchvision.datasets as datasets
import torchvision.models as models
#https://pytorch.org/vision/stable/models.html
def load_dataset(cfg):
    full_dataset = datasets.ImageFolder(cfg.dir)
    train_size = int(0.8 * len(full_dataset))
    val_size = len(full_dataset) - train_size
    train_dataset, val_dataset = torch.utils.data.random_split(
        full_dataset, [train_size, val_size]
    )
    train_dataset = torch.utils.data.Subset(
        dataset, indices[:train_size]
    )
    val_dataset = torch.utils.data.Subset(
        dataset, indices[train_size:]
    )
    loader_train = torch.utils.data.DataLoader(
        dataset=train_dataset,
        batch_size=cfg.batchsize, 
        shuffle=True, 
        num_workers=cfg.num_workers
    )
    loader_val = torch.utils.data.DataLoader(
        dataset=val_dataset,
        batch_size=cfg.batchsize, 
        shuffle=True, 
        num_workers=cfg.num_workers
    )
    return loader_train, loader_val
def load_model(cfg):
    model = models.get_model(
        cfg.model_name, weights="DEFAULT"
    )
    if cfg.model_name[0:3] == 'vgg':
        in_feat_num = model.classifier[-1].in_features
        model.classifier[-1] = nn.Linear(in_feat_num , cfg.num_classes)
    else:
        raise ValueError('')
    # should change last layer based on dataset
    weights_enum = get_model_weights(cfg.model_name)
    preprocess = weights.transforms()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    scheduler = StepLR(optimizer, step_size=30, gamma=0.1)
    return model, loss_func, optimizer, scheduler
def main(cfg):
    model, loss_func, optimizer, scheduler = load_model(cfg.model)
    loader_train, loader_val = load_dataset(cfg.dataset)
    loss_val_best = 1e10
    for epoch in range(cfg.num_epochs):
        
        loss_train_ave = 0
        model.train()
        for batch in loader_train:
            inputs, labels = batch
            optimizer.zero_grad()
            images = [preprocess(i) for i in inputs]
            outputs = model(images)
            loss = loss_func(outputs, labels)
            loss_train_ave += loss.item() / len(labels)
            loss.backward()
            optimizer.step()
        #
        loss_train_ave = loss_train_ave / len(loader_train)
        scheduler.step()
        loss_val_ave = 0
        model.eval()
        for batch in loader_val:
            inputs, labels = batch
            images = [preprocess(i) for i in inputs]
            with torch.no_grad():
                outputs = model(images)
                loss = loss_func(outputs, labels)
            loss_val_ave += loss.item() / len(labels)
       loss_val_ave = loss_val_ave / len(loader_val)
       if loss_val_ave < loss_val_best:
           loss_val_best = loss_val_ave
           # get metrics and save the model if it is good

Reference

[1] https://pytorch.org/vision/stable/models.html

[2] https://pystyle.info/pytorch-split-dataset/