Deep Dive into Stacking Model for Multi-Class Classification

Overview

This notebook demonstrates how to implement a stacking model for multi-class classification using configurations for multiple base models and a meta-model (final estimator). The stacking approach combines predictions from multiple base models to improve performance.

Step 1: Import Libraries

import numpy as np
import pandas as pd
from sklearn.base import BaseEstimator
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder

Step 2: Load Wine Quality Data

# Load the wine quality dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
data = pd.read_csv(url, sep=';')

# Convert target to multi-class (low, medium, high quality)
def quality_label(q):
    if q <= 5:
        return 'low'
    elif q == 6:
        return 'medium'
    else:
        return 'high'

data['quality_label'] = data['quality'].apply(quality_label)
data.drop(columns=['quality'], inplace=True)

# Encode target labels
label_encoder = LabelEncoder()
data['quality_label'] = label_encoder.fit_transform(data['quality_label'])

# Features and target
X = data.drop(columns=['quality_label'])
y = data['quality_label']

Step 3: Define Model Configurations

model_configs = {
    'model_1': {
        'feature_names': ['fixed acidity', 'volatile acidity'],
        'hyperparameters': {'penalty': 'l2', 'solver': 'lbfgs', 'max_iter': 100},
        'estimators': LogisticRegression
    },
    'model_2': {
        'feature_names': ['citric acid', 'residual sugar'],
        'hyperparameters': {'n_estimators': 200, 'learning_rate': 0.05, 'verbosity': -1},
        'estimators': lgb.LGBMClassifier
    },
    'model_3': {
        'feature_names': ['chlorides', 'free sulfur dioxide'],
        'hyperparameters': None,
        'estimators': None
    }
}

final_estimator = lgb.LGBMClassifier(n_estimators=50, learning_rate=0.1, verbosity = -1)

Step 4: Define the Stack Model Class

from sklearn.model_selection import cross_val_predict
from sklearn.base import BaseEstimator
import numpy as np
import pandas as pd

class StackModel:
    def __init__(self, model_configs, final_estimator, cv=None):
        """
        Initialize the stacking model.

        Args:
            model_configs (dict): Configuration for base models. Each key is a model name, and 
                the value is a dictionary with the following keys:
                - 'feature_names': List of feature names used by the model.
                - 'estimators': The model class (e.g., sklearn classifier or regressor).
                - 'hyperparameters': Dictionary of hyperparameters for the estimator.
            final_estimator (BaseEstimator): Meta-model for stacking.
            cv (int, cross-validation generator, or None): Cross-validation strategy for 
                generating meta-features. Default is None (5-fold CV).
        """
        self.model_configs = model_configs
        self.final_estimator = final_estimator
        self.cv = cv or 5
        self.models = {}

    def fit(self, X, y):
        """
        Train the stacking model.

        Args:
            X (pd.DataFrame): Feature matrix.
            y (pd.Series or np.ndarray): Target vector.
        """
        if not isinstance(X, pd.DataFrame):
            raise ValueError("X must be a pandas DataFrame.")
        if not isinstance(y, (pd.Series, np.ndarray)):
            raise ValueError("y must be a pandas Series or a numpy array.")

        self.models = {}
        meta_features_list = []

        # Train base models and generate cross-validated meta-features
        for model_name, config in self.model_configs.items():
            features = config['feature_names']
            if config['estimators'] is not None:
                estimator = config['estimators'](**config['hyperparameters'])
                
                # Generate cross-validated meta-features
                meta_features = cross_val_predict(
                    estimator, X[features], y, cv=self.cv, method='predict_proba'
                )
                meta_features_list.append(meta_features)

                # Train the model on the full dataset
                estimator.fit(X[features], y)

                self.models[model_name] = {
                    'features': features,
                    'model': estimator
                }
            else:
                # Use raw features directly for models without estimators
                meta_features = X[features].values
                meta_features_list.append(meta_features)

                self.models[model_name] = {
                    'features': features,
                    'model': None
                }

        # Combine all meta-features
        self.meta_features = np.hstack(meta_features_list)

        # Train the final estimator using meta-features
        self.final_estimator.fit(self.meta_features, y)

    def predict(self, X):
        """
        Predict class labels using the stacking model.

        Args:
            X (pd.DataFrame): Feature matrix.

        Returns:
            np.ndarray: Predicted class labels.
        """
        meta_features = self.transform(X)
        return self.final_estimator.predict(meta_features)

    def predict_proba(self, X):
        """
        Predict probabilities using the stacking model.

        Args:
            X (pd.DataFrame): Feature matrix.

        Returns:
            np.ndarray: Predicted probabilities.
        """
        meta_features = self.transform(X)
        return self.final_estimator.predict_proba(meta_features)

    def transform(self, X):
        """
        Generate meta-features for a given dataset, transforming the input features.

        Args:
            X (pd.DataFrame): Feature matrix.

        Returns:
            np.ndarray: Transformed meta-features as a numpy array.
        """
        if not isinstance(X, pd.DataFrame):
            raise ValueError("X must be a pandas DataFrame.")

        meta_features = []

        for model_name, model_info in self.models.items():
            features = model_info['features']
            if model_info['model'] is not None:
                model = model_info['model']
                meta_features.append(model.predict_proba(X[features]))
            else:
                meta_features.append(X[features].values)

        return np.hstack(meta_features)

Step 5: Train and Evaluate the Stack Model

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize and train the model
stack_model = StackModel(model_configs, final_estimator)
stack_model.fit(X_train, y_train)

# Predict and evaluate
predictions = stack_model.predict(X_test)
accuracy = accuracy_score(y_test, predictions)

print(f"Accuracy of the stacking model: {accuracy:.4f}")

Accuracy of the stacking model: 0.5583

# Export meta-features for inspection
meta_features_train = stack_model.transform(X_train)
meta_features_test = stack_model.transform(X_test)
pd.DataFrame(meta_features_train)

	0	1	2	3	4	5	6	7
0	0.324457	0.215730	0.459813	0.411349	0.134452	0.454199	0.064	53.0
1	0.185403	0.419977	0.394620	0.036433	0.646075	0.317492	0.071	6.0
2	0.102198	0.483839	0.413963	0.000267	0.861533	0.138200	0.084	12.0
3	0.055714	0.592318	0.351968	0.004361	0.420008	0.575631	0.045	19.0
4	0.088008	0.520087	0.391905	0.086474	0.548249	0.365277	0.077	27.0
...	...	...	...	...	...	...	...	...
1114	0.088350	0.542695	0.368955	0.010300	0.650717	0.338983	0.058	5.0
1115	0.072132	0.569988	0.357881	0.008834	0.430833	0.560332	0.073	25.0
1116	0.072850	0.553664	0.373486	0.019392	0.769255	0.211352	0.077	15.0
1117	0.330940	0.202417	0.466644	0.542588	0.052167	0.405246	0.054	7.0
1118	0.231645	0.267904	0.500451	0.006196	0.557755	0.436049	0.063	3.0

1119 rows × 8 columns

Step 6: Compare the results of StackingClassifier (from sklearn) and the custom StackModel

Step 6.1: Compare `passthrough`

model_configs_passthrough = {
    'model_1': model_configs['model_1'],
    'model_2': model_configs['model_2']
}

from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.dummy import DummyClassifier

# Define feature selectors for each model
model_1_features = model_configs_passthrough['model_1']['feature_names']
model_2_features = model_configs_passthrough['model_2']['feature_names']

# Create pipelines for each base model
model_1_pipeline = Pipeline([
    ('selector', ColumnTransformer([('select', 'passthrough', model_1_features)])),
    ('model', LogisticRegression(**model_configs_passthrough['model_1']['hyperparameters']))
])

model_2_pipeline = Pipeline([
    ('selector', ColumnTransformer([('select', 'passthrough', model_2_features)])),
    ('model', lgb.LGBMClassifier(**model_configs_passthrough['model_2']['hyperparameters']))
])


# Define base models
base_models = [
    ('model_1', model_1_pipeline),
    ('model_2', model_2_pipeline)
]

# Define the stacking classifier
stacking_clf = StackingClassifier(
    estimators=base_models,
    final_estimator=final_estimator,
    passthrough=False
)

from sklearn.metrics import classification_report

# Custom StackModel
stack_model = StackModel(model_configs_passthrough, final_estimator)
stack_model.fit(X_train, y_train)

# Predict and evaluate using the custom StackModel
custom_predictions = stack_model.predict(X_test)
custom_accuracy = accuracy_score(y_test, custom_predictions)
custom_report = classification_report(y_test, custom_predictions, target_names=label_encoder.classes_)

print("Custom StackModel Results:")
print(f"Accuracy: {custom_accuracy:.4f}")
print("Classification Report:")
print(custom_report)

# Sklearn StackingClassifier
stacking_clf.fit(X_train, y_train)

# Predict and evaluate using the sklearn StackingClassifier
sklearn_predictions = stacking_clf.predict(X_test)
sklearn_accuracy = accuracy_score(y_test, sklearn_predictions)
sklearn_report = classification_report(y_test, sklearn_predictions, target_names=label_encoder.classes_)

print("\nSklearn StackingClassifier Results:")
print(f"Accuracy: {sklearn_accuracy:.4f}")
print("Classification Report:")
print(sklearn_report)

# Compare Results
comparison = {
    "Metric": ["Accuracy"],
    "Custom StackModel": [custom_accuracy],
    "Sklearn StackingClassifier": [sklearn_accuracy]
}
comparison_df = pd.DataFrame(comparison)
print("\nComparison of Results:")
print(comparison_df)

Custom StackModel Results:
Accuracy: 0.5229
Classification Report:
              precision    recall  f1-score   support

        high       0.50      0.34      0.41        67
         low       0.56      0.64      0.60       213
      medium       0.48      0.46      0.47       200

    accuracy                           0.52       480
   macro avg       0.51      0.48      0.49       480
weighted avg       0.52      0.52      0.52       480


Sklearn StackingClassifier Results:
Accuracy: 0.5229
Classification Report:
              precision    recall  f1-score   support

        high       0.50      0.34      0.41        67
         low       0.56      0.64      0.60       213
      medium       0.48      0.46      0.47       200

    accuracy                           0.52       480
   macro avg       0.51      0.48      0.49       480
weighted avg       0.52      0.52      0.52       480


Comparison of Results:
     Metric  Custom StackModel  Sklearn StackingClassifier
0  Accuracy           0.522917                    0.522917

==> ok results is the same

Step 6.2: Use config estimator = None

Use PassthroughClassifier to ensure Compatibility: By implementing the standard scikit-learn methods (fit, predict, predict_proba, transform), this class can be used in pipelines or ensemble models (like StackingClassifier) that expect these methods, even though it does not perform any actual prediction or transformation.

from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import FunctionTransformer

from sklearn.base import BaseEstimator, ClassifierMixin
import numpy as np

class PassthroughClassifier(BaseEstimator, ClassifierMixin):
    def fit(self, X, y):
        self.classes_ = np.unique(y)
        self.named_estimators_ = {
            'passthrough': self
        }
        return self
    
    def predict(self, X):        
        return  X
    
    def predict_proba(self, X):        
        return X
    
    def transform(self, X):
        # Return the original X unchanged
        return X


# Define feature selectors for each model
model_1_features = model_configs['model_1']['feature_names']
model_2_features = model_configs['model_2']['feature_names']
model_3_features = model_configs['model_3']['feature_names']

# Create pipelines for each base model
model_1_pipeline = Pipeline([
    ('selector', ColumnTransformer([('select', 'passthrough', model_1_features)])),
    ('model', LogisticRegression(**model_configs['model_1']['hyperparameters']))
])

model_2_pipeline = Pipeline([
    ('selector', ColumnTransformer([('select', 'passthrough', model_2_features)])),
    ('model', lgb.LGBMClassifier(**model_configs['model_2']['hyperparameters']))
])

model_3_pipeline = Pipeline([
    ('selector', ColumnTransformer([('select', 'passthrough', model_3_features)])),
    ('model', PassthroughClassifier())
])

# Define base models
base_models = [
    ('model_1', model_1_pipeline),
    ('model_2', model_2_pipeline),
    ('model_3', model_3_pipeline)
]

# Define the stacking classifier
stacking_clf = StackingClassifier(
    estimators=base_models,
    final_estimator=final_estimator,
    passthrough=False
)

from sklearn.metrics import classification_report

# Custom StackModel
stack_model = StackModel(model_configs, final_estimator)
stack_model.fit(X_train, y_train)

# Predict and evaluate using the custom StackModel
custom_predictions = stack_model.predict(X_test)
custom_accuracy = accuracy_score(y_test, custom_predictions)
custom_report = classification_report(y_test, custom_predictions, target_names=label_encoder.classes_)

print("Custom StackModel Results:")
print(f"Accuracy: {custom_accuracy:.4f}")
print("Classification Report:")
print(custom_report)

# Sklearn StackingClassifier
stacking_clf.fit(X_train, y_train)

# Predict and evaluate using the sklearn StackingClassifier
sklearn_predictions = stacking_clf.predict(X_test)
sklearn_accuracy = accuracy_score(y_test, sklearn_predictions)
sklearn_report = classification_report(y_test, sklearn_predictions, target_names=label_encoder.classes_)

print("\nSklearn StackingClassifier Results:")
print(f"Accuracy: {sklearn_accuracy:.4f}")
print("Classification Report:")
print(sklearn_report)

# Compare Results
comparison = {
    "Metric": ["Accuracy"],
    "Custom StackModel": [custom_accuracy],
    "Sklearn StackingClassifier": [sklearn_accuracy]
}
comparison_df = pd.DataFrame(comparison)
print("\nComparison of Results:")
print(comparison_df)

Custom StackModel Results:
Accuracy: 0.5583
Classification Report:
              precision    recall  f1-score   support

        high       0.47      0.33      0.39        67
         low       0.60      0.71      0.65       213
      medium       0.52      0.47      0.49       200

    accuracy                           0.56       480
   macro avg       0.53      0.50      0.51       480
weighted avg       0.55      0.56      0.55       480


Sklearn StackingClassifier Results:
Accuracy: 0.5583
Classification Report:
              precision    recall  f1-score   support

        high       0.47      0.33      0.39        67
         low       0.60      0.71      0.65       213
      medium       0.52      0.47      0.49       200

    accuracy                           0.56       480
   macro avg       0.53      0.50      0.51       480
weighted avg       0.55      0.56      0.55       480


Comparison of Results:
     Metric  Custom StackModel  Sklearn StackingClassifier
0  Accuracy           0.558333                    0.558333

Step 7: Data input for each model in the stacking process

import pandas as pd
import numpy as np

def generate_meta_features_clf(stacking_clf, X):
    """
    Generate meta-features from the base models in a stacking classifier.

    Args:
        stacking_clf: A fitted StackingClassifier instance.
        X (pd.DataFrame or np.ndarray): The input feature matrix.

    Returns:
        pd.DataFrame: A DataFrame of meta-features generated by base models.
    """
    meta_features = []
    columns = []

    for name, model in stacking_clf.named_estimators_.items():
        if hasattr(model, 'predict_proba'):  # Probability-based model
            proba = model.predict_proba(X)
            meta_features.append(proba)
            columns.extend([f"{name}_class_{i}" for i in range(proba.shape[1])])
        elif hasattr(model, 'decision_function'):  # Decision function-based model
            decision_scores = model.decision_function(X)
            if len(decision_scores.shape) == 1:  # Binary classification case
                decision_scores = decision_scores.reshape(-1, 1)
            meta_features.append(decision_scores)
            columns.append(f"{name}_decision")
        elif hasattr(model, 'predict'):  # Regressor or non-probability classifier
            preds = model.predict(X).reshape(-1, 1)
            meta_features.append(preds)
            columns.append(f"{name}_pred")

    # Stack the meta-features and convert to a DataFrame
    meta_features_array = np.hstack(meta_features)
    return pd.DataFrame(meta_features_array, columns=columns)

proba_train_clf = generate_meta_features_clf(stacking_clf, X_train)
proba_test_clf = generate_meta_features_clf(stacking_clf, X_test)

proba_train_clf.head()

	model_1_class_0	model_1_class_1	model_1_class_2	model_2_class_0	model_2_class_1	model_2_class_2	model_3_class_0	model_3_class_1
0	0.324457	0.215730	0.459813	0.411349	0.134452	0.454199	0.064	53.0
1	0.185403	0.419977	0.394620	0.036433	0.646075	0.317492	0.071	6.0
2	0.102198	0.483839	0.413963	0.000267	0.861533	0.138200	0.084	12.0
3	0.055714	0.592318	0.351968	0.004361	0.420008	0.575631	0.045	19.0
4	0.088008	0.520087	0.391905	0.086474	0.548249	0.365277	0.077	27.0

pd.DataFrame(meta_features_train).head()

	0	1	2	3	4	5	6	7
0	0.324457	0.215730	0.459813	0.411349	0.134452	0.454199	0.064	53.0
1	0.185403	0.419977	0.394620	0.036433	0.646075	0.317492	0.071	6.0
2	0.102198	0.483839	0.413963	0.000267	0.861533	0.138200	0.084	12.0
3	0.055714	0.592318	0.351968	0.004361	0.420008	0.575631	0.045	19.0
4	0.088008	0.520087	0.391905	0.086474	0.548249	0.365277	0.077	27.0

proba_test_clf.head()

	model_1_class_0	model_1_class_1	model_1_class_2	model_2_class_0	model_2_class_1	model_2_class_2	model_3_class_0	model_3_class_1
0	0.096785	0.503089	0.400127	0.404486	0.374178	0.221336	0.114	14.0
1	0.123998	0.449699	0.426303	0.075519	0.901799	0.022682	0.082	21.0
2	0.070821	0.607823	0.321356	0.001647	0.461695	0.536658	0.107	17.0
3	0.150150	0.415067	0.434782	0.047777	0.138113	0.814110	0.078	32.0
4	0.136066	0.411397	0.452537	0.006221	0.597996	0.395783	0.077	18.0

pd.DataFrame(meta_features_test).head()

	0	1	2	3	4	5	6	7
0	0.096785	0.503089	0.400127	0.404486	0.374178	0.221336	0.114	14.0
1	0.123998	0.449699	0.426303	0.075519	0.901799	0.022682	0.082	21.0
2	0.070821	0.607823	0.321356	0.001647	0.461695	0.536658	0.107	17.0
3	0.150150	0.415067	0.434782	0.047777	0.138113	0.814110	0.078	32.0
4	0.136066	0.411397	0.452537	0.006221	0.597996	0.395783	0.077	18.0

Kết luận

Custom StackModel tương đương StackingClassifier:
- Custom StackModel đã được kiểm chứng hoạt động tương đương với StackingClassifier trong scikit-learn, đảm bảo đầu ra (predictions và probabilities) khớp nhau khi sử dụng cùng một cấu hình.
Custom StackModel chỉ hỗ trợ stack_method = 'predict_proba':
- Hiện tại, StackModel chỉ hỗ trợ các base model có thể sử dụng phương pháp predict_proba. Nếu cần mở rộng để hỗ trợ decision_function hoặc predict, cần cập nhật thêm logic tương tự StackingClassifier.
Custom StackModel tương đương StackingClassifier(passthrough=False):
- Khi passthrough=False, chỉ các meta-features từ base models (e.g., xác suất hoặc decision scores) được sử dụng làm đầu vào cho final_estimator. Custom StackModel đang hoạt động theo cơ chế này.
Sử dụng đầu ra của các base models trong mô hình nhiều lớp:
- Trong bài toán phân loại nhiều lớp, các stack models sẽ sử dụng đầu ra của các base model (e.g., xác suất cho mỗi lớp từ predict_proba) và tạo n_classes biến đầu vào tương ứng cho final_estimator.