Deep Dive into Stacking Model for Multi-Class Classification
Overview
This notebook demonstrates how to implement a stacking model for multi-class classification using configurations for multiple base models and a meta-model (final estimator). The stacking approach combines predictions from multiple base models to improve performance.
Step 1: Import Libraries
import numpy as npimport pandas as pdfrom sklearn.base import BaseEstimatorfrom sklearn.ensemble import StackingClassifierfrom sklearn.linear_model import LogisticRegressionimport lightgbm as lgbfrom sklearn.model_selection import train_test_splitfrom sklearn.metrics import accuracy_scorefrom sklearn.preprocessing import LabelEncoder
Step 2: Load Wine Quality Data
# Load the wine quality dataseturl ="https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"data = pd.read_csv(url, sep=';')# Convert target to multi-class (low, medium, high quality)def quality_label(q):if q <=5:return'low'elif q ==6:return'medium'else:return'high'data['quality_label'] = data['quality'].apply(quality_label)data.drop(columns=['quality'], inplace=True)# Encode target labelslabel_encoder = LabelEncoder()data['quality_label'] = label_encoder.fit_transform(data['quality_label'])# Features and targetX = data.drop(columns=['quality_label'])y = data['quality_label']
from sklearn.model_selection import cross_val_predictfrom sklearn.base import BaseEstimatorimport numpy as npimport pandas as pdclass StackModel:def__init__(self, model_configs, final_estimator, cv=None):""" Initialize the stacking model. Args: model_configs (dict): Configuration for base models. Each key is a model name, and the value is a dictionary with the following keys: - 'feature_names': List of feature names used by the model. - 'estimators': The model class (e.g., sklearn classifier or regressor). - 'hyperparameters': Dictionary of hyperparameters for the estimator. final_estimator (BaseEstimator): Meta-model for stacking. cv (int, cross-validation generator, or None): Cross-validation strategy for generating meta-features. Default is None (5-fold CV). """self.model_configs = model_configsself.final_estimator = final_estimatorself.cv = cv or5self.models = {}def fit(self, X, y):""" Train the stacking model. Args: X (pd.DataFrame): Feature matrix. y (pd.Series or np.ndarray): Target vector. """ifnotisinstance(X, pd.DataFrame):raiseValueError("X must be a pandas DataFrame.")ifnotisinstance(y, (pd.Series, np.ndarray)):raiseValueError("y must be a pandas Series or a numpy array.")self.models = {} meta_features_list = []# Train base models and generate cross-validated meta-featuresfor model_name, config inself.model_configs.items(): features = config['feature_names']if config['estimators'] isnotNone: estimator = config['estimators'](**config['hyperparameters'])# Generate cross-validated meta-features meta_features = cross_val_predict( estimator, X[features], y, cv=self.cv, method='predict_proba' ) meta_features_list.append(meta_features)# Train the model on the full dataset estimator.fit(X[features], y)self.models[model_name] = {'features': features,'model': estimator }else:# Use raw features directly for models without estimators meta_features = X[features].values meta_features_list.append(meta_features)self.models[model_name] = {'features': features,'model': None }# Combine all meta-featuresself.meta_features = np.hstack(meta_features_list)# Train the final estimator using meta-featuresself.final_estimator.fit(self.meta_features, y)def predict(self, X):""" Predict class labels using the stacking model. Args: X (pd.DataFrame): Feature matrix. Returns: np.ndarray: Predicted class labels. """ meta_features =self.transform(X)returnself.final_estimator.predict(meta_features)def predict_proba(self, X):""" Predict probabilities using the stacking model. Args: X (pd.DataFrame): Feature matrix. Returns: np.ndarray: Predicted probabilities. """ meta_features =self.transform(X)returnself.final_estimator.predict_proba(meta_features)def transform(self, X):""" Generate meta-features for a given dataset, transforming the input features. Args: X (pd.DataFrame): Feature matrix. Returns: np.ndarray: Transformed meta-features as a numpy array. """ifnotisinstance(X, pd.DataFrame):raiseValueError("X must be a pandas DataFrame.") meta_features = []for model_name, model_info inself.models.items(): features = model_info['features']if model_info['model'] isnotNone: model = model_info['model'] meta_features.append(model.predict_proba(X[features]))else: meta_features.append(X[features].values)return np.hstack(meta_features)
Step 5: Train and Evaluate the Stack Model
# Split dataX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)# Initialize and train the modelstack_model = StackModel(model_configs, final_estimator)stack_model.fit(X_train, y_train)# Predict and evaluatepredictions = stack_model.predict(X_test)accuracy = accuracy_score(y_test, predictions)print(f"Accuracy of the stacking model: {accuracy:.4f}")
Accuracy of the stacking model: 0.5583
# Export meta-features for inspectionmeta_features_train = stack_model.transform(X_train)meta_features_test = stack_model.transform(X_test)pd.DataFrame(meta_features_train)
0
1
2
3
4
5
6
7
0
0.324457
0.215730
0.459813
0.411349
0.134452
0.454199
0.064
53.0
1
0.185403
0.419977
0.394620
0.036433
0.646075
0.317492
0.071
6.0
2
0.102198
0.483839
0.413963
0.000267
0.861533
0.138200
0.084
12.0
3
0.055714
0.592318
0.351968
0.004361
0.420008
0.575631
0.045
19.0
4
0.088008
0.520087
0.391905
0.086474
0.548249
0.365277
0.077
27.0
...
...
...
...
...
...
...
...
...
1114
0.088350
0.542695
0.368955
0.010300
0.650717
0.338983
0.058
5.0
1115
0.072132
0.569988
0.357881
0.008834
0.430833
0.560332
0.073
25.0
1116
0.072850
0.553664
0.373486
0.019392
0.769255
0.211352
0.077
15.0
1117
0.330940
0.202417
0.466644
0.542588
0.052167
0.405246
0.054
7.0
1118
0.231645
0.267904
0.500451
0.006196
0.557755
0.436049
0.063
3.0
1119 rows × 8 columns
Step 6: Compare the results of StackingClassifier (from sklearn) and the custom StackModel
Use PassthroughClassifier to ensure Compatibility: By implementing the standard scikit-learn methods (fit, predict, predict_proba, transform), this class can be used in pipelines or ensemble models (like StackingClassifier) that expect these methods, even though it does not perform any actual prediction or transformation.
from sklearn.compose import ColumnTransformerfrom sklearn.pipeline import Pipelinefrom sklearn.preprocessing import FunctionTransformerfrom sklearn.base import BaseEstimator, ClassifierMixinimport numpy as npclass PassthroughClassifier(BaseEstimator, ClassifierMixin):def fit(self, X, y):self.classes_ = np.unique(y)self.named_estimators_ = {'passthrough': self }returnselfdef predict(self, X): return Xdef predict_proba(self, X): return Xdef transform(self, X):# Return the original X unchangedreturn X# Define feature selectors for each modelmodel_1_features = model_configs['model_1']['feature_names']model_2_features = model_configs['model_2']['feature_names']model_3_features = model_configs['model_3']['feature_names']# Create pipelines for each base modelmodel_1_pipeline = Pipeline([ ('selector', ColumnTransformer([('select', 'passthrough', model_1_features)])), ('model', LogisticRegression(**model_configs['model_1']['hyperparameters']))])model_2_pipeline = Pipeline([ ('selector', ColumnTransformer([('select', 'passthrough', model_2_features)])), ('model', lgb.LGBMClassifier(**model_configs['model_2']['hyperparameters']))])model_3_pipeline = Pipeline([ ('selector', ColumnTransformer([('select', 'passthrough', model_3_features)])), ('model', PassthroughClassifier())])# Define base modelsbase_models = [ ('model_1', model_1_pipeline), ('model_2', model_2_pipeline), ('model_3', model_3_pipeline)]# Define the stacking classifierstacking_clf = StackingClassifier( estimators=base_models, final_estimator=final_estimator, passthrough=False)
from sklearn.metrics import classification_report# Custom StackModelstack_model = StackModel(model_configs, final_estimator)stack_model.fit(X_train, y_train)# Predict and evaluate using the custom StackModelcustom_predictions = stack_model.predict(X_test)custom_accuracy = accuracy_score(y_test, custom_predictions)custom_report = classification_report(y_test, custom_predictions, target_names=label_encoder.classes_)print("Custom StackModel Results:")print(f"Accuracy: {custom_accuracy:.4f}")print("Classification Report:")print(custom_report)# Sklearn StackingClassifierstacking_clf.fit(X_train, y_train)# Predict and evaluate using the sklearn StackingClassifiersklearn_predictions = stacking_clf.predict(X_test)sklearn_accuracy = accuracy_score(y_test, sklearn_predictions)sklearn_report = classification_report(y_test, sklearn_predictions, target_names=label_encoder.classes_)print("\nSklearn StackingClassifier Results:")print(f"Accuracy: {sklearn_accuracy:.4f}")print("Classification Report:")print(sklearn_report)# Compare Resultscomparison = {"Metric": ["Accuracy"],"Custom StackModel": [custom_accuracy],"Sklearn StackingClassifier": [sklearn_accuracy]}comparison_df = pd.DataFrame(comparison)print("\nComparison of Results:")print(comparison_df)
Step 7: Data input for each model in the stacking process
import pandas as pdimport numpy as npdef generate_meta_features_clf(stacking_clf, X):""" Generate meta-features from the base models in a stacking classifier. Args: stacking_clf: A fitted StackingClassifier instance. X (pd.DataFrame or np.ndarray): The input feature matrix. Returns: pd.DataFrame: A DataFrame of meta-features generated by base models. """ meta_features = [] columns = []for name, model in stacking_clf.named_estimators_.items():ifhasattr(model, 'predict_proba'): # Probability-based model proba = model.predict_proba(X) meta_features.append(proba) columns.extend([f"{name}_class_{i}"for i inrange(proba.shape[1])])elifhasattr(model, 'decision_function'): # Decision function-based model decision_scores = model.decision_function(X)iflen(decision_scores.shape) ==1: # Binary classification case decision_scores = decision_scores.reshape(-1, 1) meta_features.append(decision_scores) columns.append(f"{name}_decision")elifhasattr(model, 'predict'): # Regressor or non-probability classifier preds = model.predict(X).reshape(-1, 1) meta_features.append(preds) columns.append(f"{name}_pred")# Stack the meta-features and convert to a DataFrame meta_features_array = np.hstack(meta_features)return pd.DataFrame(meta_features_array, columns=columns)
Custom StackModel đã được kiểm chứng hoạt động tương đương với StackingClassifier trong scikit-learn, đảm bảo đầu ra (predictions và probabilities) khớp nhau khi sử dụng cùng một cấu hình.
Custom StackModel chỉ hỗ trợ stack_method = 'predict_proba':
Hiện tại, StackModel chỉ hỗ trợ các base model có thể sử dụng phương pháp predict_proba. Nếu cần mở rộng để hỗ trợ decision_function hoặc predict, cần cập nhật thêm logic tương tự StackingClassifier.
Khi passthrough=False, chỉ các meta-features từ base models (e.g., xác suất hoặc decision scores) được sử dụng làm đầu vào cho final_estimator. Custom StackModel đang hoạt động theo cơ chế này.
Sử dụng đầu ra của các base models trong mô hình nhiều lớp:
Trong bài toán phân loại nhiều lớp, các stack models sẽ sử dụng đầu ra của các base model (e.g., xác suất cho mỗi lớp từ predict_proba) và tạo n_classes biến đầu vào tương ứng cho final_estimator.