Train a model on the Wine Quality dataset using ordinal loss and the Kappa metric in PyTorch

Date: 2024-12-18

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch
from torch.utils.data import DataLoader, TensorDataset

# Load the dataset
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv'
data = pd.read_csv(url, sep=';')

# Define features and target
X = data.drop('quality', axis=1)
y = data['quality'] - 3

# Normalize features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Convert to PyTorch tensors
X = torch.tensor(X, dtype=torch.float32)
y = torch.tensor(y, dtype=torch.long)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Create DataLoader for training and testing
train_data = TensorDataset(X_train, y_train)
test_data = TensorDataset(X_test, y_test)

train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
test_loader = DataLoader(test_data, batch_size=32)

# Set the device (GPU if available, otherwise CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
Using device: cuda

Define the Model

The model definition remains the same as in the previous solution.

import torch.nn as nn

class OrdinalNN(nn.Module):
    def __init__(self):
        super(OrdinalNN, self).__init__()
        self.fc1 = nn.Linear(X.shape[1], 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 6)  # Number of classes (3-8)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

model = OrdinalNN().to(device)
class OrdinalFocalLoss(nn.Module):
    def __init__(self, gamma=2, alpha=0.25, num_classes=6):
        super(OrdinalFocalLoss, self).__init__()
        self.gamma = gamma  # Focusing parameter
        self.alpha = alpha  # Weighting factor
        self.num_classes = num_classes

    def forward(self, outputs, targets):
        """
        Compute the Ordinal Focal Loss

        :param outputs: Predicted logits from the model (batch_size, num_classes)
        :param targets: Ground truth labels (batch_size)
        :return: Loss value
        """
        # Convert targets to one-hot encoding
        targets_one_hot = torch.zeros(targets.size(0), self.num_classes).to(targets.device)
        targets_one_hot.scatter_(1, targets.unsqueeze(1), 1)

        # Apply softmax to outputs to get class probabilities
        probs = torch.softmax(outputs, dim=1)

        # Calculate the probability of the true class
        p_t = torch.sum(probs * targets_one_hot, dim=1)  # This is p_t for each instance

        # Compute the focal loss
        loss = -self.alpha * (1 - p_t) ** self.gamma * torch.log(p_t + 1e-8)  # Add epsilon to avoid log(0)

        # Return the average loss
        return torch.mean(loss)
from sklearn.metrics import cohen_kappa_score
# Initialize the criterion and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Function to compute the Kappa score
def compute_kappa(y_true, y_pred):
    return cohen_kappa_score(y_true.cpu(), y_pred.cpu())

# Train the model
num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    y_true = []
    y_pred = []
    
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        
        # Zero gradients
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(inputs)
        
        # Calculate loss
        loss = criterion(outputs, labels)
        
        # Backward pass and optimize
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        
        # Collect true and predicted labels for Kappa score
        y_true.extend(labels.cpu().numpy())
        y_pred.extend(outputs.argmax(dim=1).cpu().numpy())
    
    # Calculate Kappa score
    kappa_score = compute_kappa(torch.tensor(y_true), torch.tensor(y_pred))
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader)}, Kappa: {kappa_score}")
Epoch 1/100, Loss: 1.5258033365011214, Kappa: 0.027160395525093195
Epoch 2/100, Loss: 1.14848440438509, Kappa: 0.1941581189042062
Epoch 3/100, Loss: 1.0254976019263267, Kappa: 0.29289946055146165
Epoch 4/100, Loss: 0.988839827477932, Kappa: 0.3383639594251141
Epoch 5/100, Loss: 0.965928153693676, Kappa: 0.3631989771546358
Epoch 6/100, Loss: 0.9539458021521569, Kappa: 0.3677904087658541
Epoch 7/100, Loss: 0.9409917533397675, Kappa: 0.3671246515932114
Epoch 8/100, Loss: 0.9327839985489845, Kappa: 0.3751479492595736
Epoch 9/100, Loss: 0.9210372045636177, Kappa: 0.3685858232131014
Epoch 10/100, Loss: 0.922743383049965, Kappa: 0.3722848662886773
Epoch 11/100, Loss: 0.9097453355789185, Kappa: 0.3813580749062848
Epoch 12/100, Loss: 0.9032061487436295, Kappa: 0.37044592857844605
Epoch 13/100, Loss: 0.8965445622801781, Kappa: 0.3797972038405859
Epoch 14/100, Loss: 0.8881877571344375, Kappa: 0.4032125634822731
Epoch 15/100, Loss: 0.8835199296474456, Kappa: 0.40018818898663533
Epoch 16/100, Loss: 0.8790309250354766, Kappa: 0.3903325462495525
Epoch 17/100, Loss: 0.8702574223279953, Kappa: 0.40721159778533744
Epoch 18/100, Loss: 0.8697167977690696, Kappa: 0.4072256904559757
Epoch 19/100, Loss: 0.8619306489825249, Kappa: 0.4140637008713334
Epoch 20/100, Loss: 0.8568678289651871, Kappa: 0.4211031088992041
Epoch 21/100, Loss: 0.8524924352765083, Kappa: 0.41451338248257064
Epoch 22/100, Loss: 0.8480689927935601, Kappa: 0.41419673103023
Epoch 23/100, Loss: 0.8440612345933914, Kappa: 0.44178941625750134
Epoch 24/100, Loss: 0.8406461104750633, Kappa: 0.4034622168334888
Epoch 25/100, Loss: 0.8361509755253792, Kappa: 0.43327084440256236
Epoch 26/100, Loss: 0.8344028279185295, Kappa: 0.41844363720856625
Epoch 27/100, Loss: 0.8277643755078316, Kappa: 0.4222757717701464
Epoch 28/100, Loss: 0.8244240581989288, Kappa: 0.43849674868294786
Epoch 29/100, Loss: 0.8242936119437217, Kappa: 0.42040352491703603
Epoch 30/100, Loss: 0.8143198490142822, Kappa: 0.43104302757442947
Epoch 31/100, Loss: 0.8092397406697274, Kappa: 0.44060655010864125
Epoch 32/100, Loss: 0.8103402808308602, Kappa: 0.445418000079017
Epoch 33/100, Loss: 0.8025514364242554, Kappa: 0.4359494216495535
Epoch 34/100, Loss: 0.801366850733757, Kappa: 0.45975261807226997
Epoch 35/100, Loss: 0.7979567974805832, Kappa: 0.4482904623702938
Epoch 36/100, Loss: 0.7909636944532394, Kappa: 0.4499257267039999
Epoch 37/100, Loss: 0.7835334852337837, Kappa: 0.46419209788669
Epoch 38/100, Loss: 0.7814476490020752, Kappa: 0.4648827375611594
Epoch 39/100, Loss: 0.7808900579810143, Kappa: 0.46110282448596707
Epoch 40/100, Loss: 0.7702956169843673, Kappa: 0.4892929788113062
Epoch 41/100, Loss: 0.7734991297125816, Kappa: 0.47925739353272945
Epoch 42/100, Loss: 0.7667867332696915, Kappa: 0.48040533912521344
Epoch 43/100, Loss: 0.7656202584505081, Kappa: 0.475668626326781
Epoch 44/100, Loss: 0.7598542019724845, Kappa: 0.4858225595558915
Epoch 45/100, Loss: 0.7581931084394455, Kappa: 0.49598754033499703
Epoch 46/100, Loss: 0.7500043898820877, Kappa: 0.4938932823137663
Epoch 47/100, Loss: 0.7498278692364693, Kappa: 0.4952857622244943
Epoch 48/100, Loss: 0.743773840367794, Kappa: 0.5079226029464861
Epoch 49/100, Loss: 0.7405090779066086, Kappa: 0.5198281901732583
Epoch 50/100, Loss: 0.7364834100008011, Kappa: 0.5023553770687714
Epoch 51/100, Loss: 0.7336597308516503, Kappa: 0.5167070843799515
Epoch 52/100, Loss: 0.7293936885893345, Kappa: 0.508031801733295
Epoch 53/100, Loss: 0.7258813440799713, Kappa: 0.5246337519963721
Epoch 54/100, Loss: 0.7226672306656837, Kappa: 0.5265172456118816
Epoch 55/100, Loss: 0.7217974692583085, Kappa: 0.5230864181656874
Epoch 56/100, Loss: 0.712223195284605, Kappa: 0.5266455229603828
Epoch 57/100, Loss: 0.7112390361726284, Kappa: 0.5342339017248592
Epoch 58/100, Loss: 0.7029153138399125, Kappa: 0.5468456178231427
Epoch 59/100, Loss: 0.7027535423636436, Kappa: 0.5393219619781618
Epoch 60/100, Loss: 0.7004692874848842, Kappa: 0.553876299131837
Epoch 61/100, Loss: 0.6959677867591381, Kappa: 0.5544725929676446
Epoch 62/100, Loss: 0.6945664048194885, Kappa: 0.5516791604790041
Epoch 63/100, Loss: 0.6882325552403927, Kappa: 0.5410228096173484
Epoch 64/100, Loss: 0.6827452167868614, Kappa: 0.552530873054403
Epoch 65/100, Loss: 0.6810622230172158, Kappa: 0.5720976915356306
Epoch 66/100, Loss: 0.6765776731073856, Kappa: 0.5615048120704035
Epoch 67/100, Loss: 0.6707129381597042, Kappa: 0.5688434451478712
Epoch 68/100, Loss: 0.6698078818619251, Kappa: 0.5779857384414895
Epoch 69/100, Loss: 0.6700948402285576, Kappa: 0.5699745672150069
Epoch 70/100, Loss: 0.6645593464374542, Kappa: 0.5621607359002477
Epoch 71/100, Loss: 0.6604863002896308, Kappa: 0.5721880442236442
Epoch 72/100, Loss: 0.6597750805318355, Kappa: 0.5939745996643764
Epoch 73/100, Loss: 0.6539112649857998, Kappa: 0.5778682427539623
Epoch 74/100, Loss: 0.6505351833999157, Kappa: 0.5958212350004243
Epoch 75/100, Loss: 0.6485376708209515, Kappa: 0.5947310051377226
Epoch 76/100, Loss: 0.6440189868211746, Kappa: 0.590627979197287
Epoch 77/100, Loss: 0.6387162208557129, Kappa: 0.5899935591403664
Epoch 78/100, Loss: 0.6353652991354466, Kappa: 0.5983798594678247
Epoch 79/100, Loss: 0.6351467996835709, Kappa: 0.6006119625320245
Epoch 80/100, Loss: 0.631549759209156, Kappa: 0.6093738987430988
Epoch 81/100, Loss: 0.6277068927884102, Kappa: 0.61762519112683
Epoch 82/100, Loss: 0.6236165843904018, Kappa: 0.6089009813451609
Epoch 83/100, Loss: 0.6264286696910858, Kappa: 0.6153901337182963
Epoch 84/100, Loss: 0.6178285926580429, Kappa: 0.6276718056686829
Epoch 85/100, Loss: 0.6154053710401058, Kappa: 0.6062257547132006
Epoch 86/100, Loss: 0.6118617177009582, Kappa: 0.5975909896858916
Epoch 87/100, Loss: 0.6087865322828293, Kappa: 0.6062408326862287
Epoch 88/100, Loss: 0.6076791845262051, Kappa: 0.6200215390448939
Epoch 89/100, Loss: 0.6064654208719731, Kappa: 0.6229728790088249
Epoch 90/100, Loss: 0.600296714156866, Kappa: 0.6301554272539882
Epoch 91/100, Loss: 0.6021047808229923, Kappa: 0.6221714061388228
Epoch 92/100, Loss: 0.5982880525290966, Kappa: 0.6244946251032697
Epoch 93/100, Loss: 0.590181715041399, Kappa: 0.6320299679124919
Epoch 94/100, Loss: 0.5918105013668538, Kappa: 0.6247179516270536
Epoch 95/100, Loss: 0.5913812078535556, Kappa: 0.6348833139597976
Epoch 96/100, Loss: 0.5805985651910305, Kappa: 0.6416545681465067
Epoch 97/100, Loss: 0.5784861400723458, Kappa: 0.6488824583376482
Epoch 98/100, Loss: 0.5749207615852356, Kappa: 0.6498347050063702
Epoch 99/100, Loss: 0.5697298489511013, Kappa: 0.652995478504598
Epoch 100/100, Loss: 0.5659848034381867, Kappa: 0.6566265295483256
# Evaluate the model
model.eval()
y_true = []
y_pred = []

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        
        # Forward pass
        outputs = model(inputs)
        
        # Collect true and predicted labels for Kappa score
        y_true.extend(labels.cpu().numpy())
        y_pred.extend(outputs.argmax(dim=1).cpu().numpy())

# Calculate Kappa score on the test set
test_kappa = compute_kappa(torch.tensor(y_true), torch.tensor(y_pred))
print(f"Test Kappa Score: {test_kappa}")
Test Kappa Score: 0.38975208678525664

So sánh Cross Entropy Loss và Focal Loss

import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import cohen_kappa_score, accuracy_score
from torch.utils.data import DataLoader

# Assuming OrdinalFocalLoss and model are defined as before
# CrossEntropy Loss
cross_entropy_criterion = nn.CrossEntropyLoss().to(device)

# Focal Loss
focal_loss_criterion = OrdinalFocalLoss().to(device)

# Initialize your model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = OrdinalNN().to(device)

# Helper function for Kappa Score and Accuracy
def compute_metrics(y_true, y_pred):
    accuracy = accuracy_score(y_true.cpu(), y_pred.cpu())
    kappa = cohen_kappa_score(y_true.cpu(), y_pred.cpu())
    return accuracy, kappa

# Training function for comparison
def train_model(criterion, model, num_epochs=100, train_loader=None, test_loader=None):

    # Optimizer
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # Train the model
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        y_true = []
        y_pred = []
        
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            
            # Zero gradients
            optimizer.zero_grad()
            
            # Forward pass
            outputs = model(inputs)
            
            # Calculate loss
            loss = criterion(outputs, labels)
            
            # Backward pass and optimize
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            
            # Collect true and predicted labels for Kappa score
            y_true.extend(labels.cpu().numpy())
            y_pred.extend(outputs.argmax(dim=1).cpu().numpy())
        
        # Calculate Kappa score
        kappa_score = compute_kappa(torch.tensor(y_true), torch.tensor(y_pred))
        # print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader)}, Kappa: {kappa_score}")

    # Evaluate on test set
    model.eval()
    y_true = []
    y_pred = []
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            y_true.extend(labels.cpu().numpy())
            y_pred.extend(outputs.argmax(dim=1).cpu().numpy())

    accuracy, kappa = compute_metrics(torch.tensor(y_true), torch.tensor(y_pred))
    print(f"Test Accuracy: {accuracy:.4f}, Test Kappa: {kappa:.4f}")

# Run training and evaluation for both loss functions
print("Training with CrossEntropyLoss:")
train_model(cross_entropy_criterion, model=model, num_epochs=100, train_loader=train_loader, test_loader=test_loader)

print("\nTraining with Focal Loss:")
train_model(focal_loss_criterion, model=model, num_epochs=100, train_loader=train_loader, test_loader=test_loader)
Training with CrossEntropyLoss:
Test Accuracy: 0.6125, Test Kappa: 0.3739

Training with Focal Loss:
Test Accuracy: 0.5969, Test Kappa: 0.3695
class OrdinalModelWithDropout(nn.Module):
    def __init__(self, input_size, num_classes, dropout_rate=0.5):
        super(OrdinalModelWithDropout, self).__init__()
        
        self.fc1 = nn.Linear(input_size, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, num_classes)
        
        # Adding dropout layers between fully connected layers
        self.dropout = nn.Dropout(p=dropout_rate)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.dropout(x)  # Apply dropout after the first layer
        x = torch.relu(self.fc2(x))
        x = self.dropout(x)  # Apply dropout after the second layer
        x = self.fc3(x)
        return x
# Initialize your model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = OrdinalModelWithDropout(input_size=X.shape[1], num_classes=6).to(device)

# Run training and evaluation for both loss functions
print("Training with CrossEntropyLoss:")
train_model(cross_entropy_criterion, model=model, num_epochs=100, train_loader=train_loader, test_loader=test_loader)

print("\nTraining with Focal Loss:")
train_model(focal_loss_criterion, model=model, num_epochs=100, train_loader=train_loader, test_loader=test_loader)
Training with CrossEntropyLoss:
Test Accuracy: 0.6188, Test Kappa: 0.3709

Training with Focal Loss:
Test Accuracy: 0.6438, Test Kappa: 0.4167