模型评估与超参数调优-平芜编程栈

模型评估与超参数调优

1. 技术分析

1.1 模型评估概述

模型评估是机器学习的关键步骤：

评估指标 分类指标: 准确率、精确率、召回率、F1、AUC 回归指标: MAE、MSE、RMSE、R² 排序指标: MAP、NDCG 评估方法: 交叉验证 时间序列分割 分层抽样

1.2 超参数调优

调优方法 网格搜索: 穷举搜索 随机搜索: 随机采样 贝叶斯优化: 概率模型 遗传算法: 进化优化 调优策略: 粗调: 大范围搜索 微调: 精细搜索

1.3 评估指标对比

指标	适用任务	特点
准确率	分类	不平衡数据有偏差
F1分数	分类	平衡精确率和召回率
AUC-ROC	分类	评估排序能力
RMSE	回归	对异常值敏感
R²	回归	解释方差比例

2. 核心功能实现

2.1 分类评估指标

import numpy as np from sklearn.metrics import confusion_matrix class ClassificationEvaluator: def __init__(self, y_true, y_pred, y_proba=None): self.y_true = y_true self.y_pred = y_pred self.y_proba = y_proba self.confusion = confusion_matrix(y_true, y_pred) def accuracy(self): return np.mean(self.y_true == self.y_pred) def precision(self): tp = self.confusion[1, 1] fp = self.confusion[0, 1] return tp / (tp + fp) if (tp + fp) > 0 else 0 def recall(self): tp = self.confusion[1, 1] fn = self.confusion[1, 0] return tp / (tp + fn) if (tp + fn) > 0 else 0 def f1_score(self): p = self.precision() r = self.recall() return 2 * p * r / (p + r) if (p + r) > 0 else 0 def specificity(self): tn = self.confusion[0, 0] fp = self.confusion[0, 1] return tn / (tn + fp) if (tn + fp) > 0 else 0 def auc_roc(self): if self.y_proba is None: raise ValueError("需要提供预测概率") thresholds = np.sort(np.unique(self.y_proba))[::-1] tpr_list = [] fpr_list = [] for threshold in thresholds: pred = (self.y_proba >= threshold).astype(int) cm = confusion_matrix(self.y_true, pred) tp = cm[1, 1] if cm.shape[0] > 1 and cm.shape[1] > 1 else 0 fn = cm[1, 0] if cm.shape[0] > 1 and cm.shape[1] > 0 else 0 tn = cm[0, 0] if cm.shape[0] > 0 and cm.shape[1] > 0 else 0 fp = cm[0, 1] if cm.shape[0] > 0 and cm.shape[1] > 1 else 0 tpr = tp / (tp + fn) if (tp + fn) > 0 else 0 fpr = fp / (tn + fp) if (tn + fp) > 0 else 0 tpr_list.append(tpr) fpr_list.append(fpr) return np.trapz(tpr_list, fpr_list) def report(self): return { 'accuracy': self.accuracy(), 'precision': self.precision(), 'recall': self.recall(), 'f1_score': self.f1_score(), 'specificity': self.specificity(), 'auc_roc': self.auc_roc() if self.y_proba is not None else None, 'confusion_matrix': self.confusion.tolist() }

2.2 回归评估指标

class RegressionEvaluator: def __init__(self, y_true, y_pred): self.y_true = y_true self.y_pred = y_pred def mae(self): return np.mean(np.abs(self.y_true - self.y_pred)) def mse(self): return np.mean((self.y_true - self.y_pred) ** 2) def rmse(self): return np.sqrt(self.mse()) def mape(self): return np.mean(np.abs((self.y_true - self.y_pred) / self.y_true)) * 100 def r2_score(self): ss_res = np.sum((self.y_true - self.y_pred) ** 2) ss_tot = np.sum((self.y_true - np.mean(self.y_true)) ** 2) return 1 - (ss_res / ss_tot) if ss_tot > 0 else 0 def report(self): return { 'mae': self.mae(), 'mse': self.mse(), 'rmse': self.rmse(), 'mape': self.mape(), 'r2_score': self.r2_score() }

2.3 交叉验证

class CrossValidation: def __init__(self, n_folds=5, shuffle=True): self.n_folds = n_folds self.shuffle = shuffle def split(self, X, y): n_samples = len(y) indices = np.arange(n_samples) if self.shuffle: np.random.shuffle(indices) fold_size = n_samples // self.n_folds folds = [] for i in range(self.n_folds): start = i * fold_size end = (i + 1) * fold_size if i < self.n_folds - 1 else n_samples val_indices = indices[start:end] train_indices = np.concatenate([indices[:start], indices[end:]]) folds.append((train_indices, val_indices)) return folds def evaluate(self, model, X, y, evaluator_func): scores = [] for train_idx, val_idx in self.split(X, y): X_train, X_val = X[train_idx], X[val_idx] y_train, y_val = y[train_idx], y[val_idx] model.fit(X_train, y_train) y_pred = model.predict(X_val) score = evaluator_func(y_val, y_pred) scores.append(score) return { 'mean': np.mean(scores), 'std': np.std(scores), 'scores': scores }

2.4 超参数调优

from sklearn.model_selection import GridSearchCV, RandomizedSearchCV from scipy.stats import randint, uniform class HyperparameterTuner: def __init__(self, model, param_grid, method='grid'): self.model = model self.param_grid = param_grid self.method = method self.best_model = None self.best_params = None def grid_search(self, X, y, cv=5): grid_search = GridSearchCV( self.model, self.param_grid, cv=cv, scoring='accuracy', n_jobs=-1 ) grid_search.fit(X, y) self.best_model = grid_search.best_estimator_ self.best_params = grid_search.best_params_ return { 'best_score': grid_search.best_score_, 'best_params': grid_search.best_params_, 'cv_results': grid_search.cv_results_ } def random_search(self, X, y, n_iter=100, cv=5): random_search = RandomizedSearchCV( self.model, self.param_grid, n_iter=n_iter, cv=cv, scoring='accuracy', n_jobs=-1, random_state=42 ) random_search.fit(X, y) self.best_model = random_search.best_estimator_ self.best_params = random_search.best_params_ return { 'best_score': random_search.best_score_, 'best_params': random_search.best_params_ } def bayesian_optimization(self, X, y, n_iter=50): from bayes_opt import BayesianOptimization def objective(**params): model = self.model.__class__(**params) cv = CrossValidation(n_folds=5) result = cv.evaluate(model, X, y, lambda y_true, y_pred: np.mean(y_true == y_pred)) return result['mean'] bounds = {} for param, values in self.param_grid.items(): if isinstance(values, list): bounds[param] = (min(values), max(values)) optimizer = BayesianOptimization( f=objective, pbounds=bounds, random_state=42 ) optimizer.maximize(n_iter=n_iter) self.best_params = optimizer.max['params'] self.best_model = self.model.__class__(**self.best_params) self.best_model.fit(X, y) return { 'best_score': optimizer.max['target'], 'best_params': optimizer.max['params'] } def tune(self, X, y): if self.method == 'grid': return self.grid_search(X, y) elif self.method == 'random': return self.random_search(X, y) elif self.method == 'bayesian': return self.bayesian_optimization(X, y)

3. 性能对比

3.1 调优方法对比

方法	效率	效果	复杂度
网格搜索	低	中	低
随机搜索	中	中	低
贝叶斯优化	高	高	高
遗传算法	中	高	很高

3.2 评估指标对比

指标	用途	优点	缺点
准确率	整体评估	简单	不平衡数据有偏差
F1分数	不平衡数据	平衡	只关注正类
AUC-ROC	排序能力	全面	需要概率输出

3.3 交叉验证策略对比

方法	适用场景	稳定性
K-fold	通用	高
Stratified K-fold	不平衡数据	高
TimeSeriesSplit	时间序列	中

4. 最佳实践

4.1 模型评估流程

def evaluate_model(model, X_train, y_train, X_test, y_test, task_type='classification'): model.fit(X_train, y_train) y_pred_train = model.predict(X_train) y_pred_test = model.predict(X_test) if task_type == 'classification': if hasattr(model, 'predict_proba'): y_proba_train = model.predict_proba(X_train)[:, 1] y_proba_test = model.predict_proba(X_test)[:, 1] else: y_proba_train = None y_proba_test = None train_eval = ClassificationEvaluator(y_train, y_pred_train, y_proba_train) test_eval = ClassificationEvaluator(y_test, y_pred_test, y_proba_test) print("训练集评估:") print(train_eval.report()) print("\n测试集评估:") print(test_eval.report()) else: train_eval = RegressionEvaluator(y_train, y_pred_train) test_eval = RegressionEvaluator(y_test, y_pred_test) print("训练集评估:") print(train_eval.report()) print("\n测试集评估:") print(test_eval.report())

4.2 超参数调优流程

def tune_hyperparameters(model, X, y, param_grid, method='random'): tuner = HyperparameterTuner(model, param_grid, method=method) result = tuner.tune(X, y) print(f"最佳分数: {result['best_score']:.4f}") print("最佳参数:") for param, value in result['best_params'].items(): print(f" {param}: {value}") return tuner.best_model