模型评估与超参数调优
1. 技术分析
1.1 模型评估概述
模型评估是机器学习的关键步骤:
评估指标 分类指标: 准确率、精确率、召回率、F1、AUC 回归指标: MAE、MSE、RMSE、R² 排序指标: MAP、NDCG 评估方法: 交叉验证 时间序列分割 分层抽样1.2 超参数调优
调优方法 网格搜索: 穷举搜索 随机搜索: 随机采样 贝叶斯优化: 概率模型 遗传算法: 进化优化 调优策略: 粗调: 大范围搜索 微调: 精细搜索1.3 评估指标对比
| 指标 | 适用任务 | 特点 |
|---|---|---|
| 准确率 | 分类 | 不平衡数据有偏差 |
| F1分数 | 分类 | 平衡精确率和召回率 |
| AUC-ROC | 分类 | 评估排序能力 |
| RMSE | 回归 | 对异常值敏感 |
| R² | 回归 | 解释方差比例 |
2. 核心功能实现
2.1 分类评估指标
import numpy as np from sklearn.metrics import confusion_matrix class ClassificationEvaluator: def __init__(self, y_true, y_pred, y_proba=None): self.y_true = y_true self.y_pred = y_pred self.y_proba = y_proba self.confusion = confusion_matrix(y_true, y_pred) def accuracy(self): return np.mean(self.y_true == self.y_pred) def precision(self): tp = self.confusion[1, 1] fp = self.confusion[0, 1] return tp / (tp + fp) if (tp + fp) > 0 else 0 def recall(self): tp = self.confusion[1, 1] fn = self.confusion[1, 0] return tp / (tp + fn) if (tp + fn) > 0 else 0 def f1_score(self): p = self.precision() r = self.recall() return 2 * p * r / (p + r) if (p + r) > 0 else 0 def specificity(self): tn = self.confusion[0, 0] fp = self.confusion[0, 1] return tn / (tn + fp) if (tn + fp) > 0 else 0 def auc_roc(self): if self.y_proba is None: raise ValueError("需要提供预测概率") thresholds = np.sort(np.unique(self.y_proba))[::-1] tpr_list = [] fpr_list = [] for threshold in thresholds: pred = (self.y_proba >= threshold).astype(int) cm = confusion_matrix(self.y_true, pred) tp = cm[1, 1] if cm.shape[0] > 1 and cm.shape[1] > 1 else 0 fn = cm[1, 0] if cm.shape[0] > 1 and cm.shape[1] > 0 else 0 tn = cm[0, 0] if cm.shape[0] > 0 and cm.shape[1] > 0 else 0 fp = cm[0, 1] if cm.shape[0] > 0 and cm.shape[1] > 1 else 0 tpr = tp / (tp + fn) if (tp + fn) > 0 else 0 fpr = fp / (tn + fp) if (tn + fp) > 0 else 0 tpr_list.append(tpr) fpr_list.append(fpr) return np.trapz(tpr_list, fpr_list) def report(self): return { 'accuracy': self.accuracy(), 'precision': self.precision(), 'recall': self.recall(), 'f1_score': self.f1_score(), 'specificity': self.specificity(), 'auc_roc': self.auc_roc() if self.y_proba is not None else None, 'confusion_matrix': self.confusion.tolist() }2.2 回归评估指标
class RegressionEvaluator: def __init__(self, y_true, y_pred): self.y_true = y_true self.y_pred = y_pred def mae(self): return np.mean(np.abs(self.y_true - self.y_pred)) def mse(self): return np.mean((self.y_true - self.y_pred) ** 2) def rmse(self): return np.sqrt(self.mse()) def mape(self): return np.mean(np.abs((self.y_true - self.y_pred) / self.y_true)) * 100 def r2_score(self): ss_res = np.sum((self.y_true - self.y_pred) ** 2) ss_tot = np.sum((self.y_true - np.mean(self.y_true)) ** 2) return 1 - (ss_res / ss_tot) if ss_tot > 0 else 0 def report(self): return { 'mae': self.mae(), 'mse': self.mse(), 'rmse': self.rmse(), 'mape': self.mape(), 'r2_score': self.r2_score() }2.3 交叉验证
class CrossValidation: def __init__(self, n_folds=5, shuffle=True): self.n_folds = n_folds self.shuffle = shuffle def split(self, X, y): n_samples = len(y) indices = np.arange(n_samples) if self.shuffle: np.random.shuffle(indices) fold_size = n_samples // self.n_folds folds = [] for i in range(self.n_folds): start = i * fold_size end = (i + 1) * fold_size if i < self.n_folds - 1 else n_samples val_indices = indices[start:end] train_indices = np.concatenate([indices[:start], indices[end:]]) folds.append((train_indices, val_indices)) return folds def evaluate(self, model, X, y, evaluator_func): scores = [] for train_idx, val_idx in self.split(X, y): X_train, X_val = X[train_idx], X[val_idx] y_train, y_val = y[train_idx], y[val_idx] model.fit(X_train, y_train) y_pred = model.predict(X_val) score = evaluator_func(y_val, y_pred) scores.append(score) return { 'mean': np.mean(scores), 'std': np.std(scores), 'scores': scores }2.4 超参数调优
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV from scipy.stats import randint, uniform class HyperparameterTuner: def __init__(self, model, param_grid, method='grid'): self.model = model self.param_grid = param_grid self.method = method self.best_model = None self.best_params = None def grid_search(self, X, y, cv=5): grid_search = GridSearchCV( self.model, self.param_grid, cv=cv, scoring='accuracy', n_jobs=-1 ) grid_search.fit(X, y) self.best_model = grid_search.best_estimator_ self.best_params = grid_search.best_params_ return { 'best_score': grid_search.best_score_, 'best_params': grid_search.best_params_, 'cv_results': grid_search.cv_results_ } def random_search(self, X, y, n_iter=100, cv=5): random_search = RandomizedSearchCV( self.model, self.param_grid, n_iter=n_iter, cv=cv, scoring='accuracy', n_jobs=-1, random_state=42 ) random_search.fit(X, y) self.best_model = random_search.best_estimator_ self.best_params = random_search.best_params_ return { 'best_score': random_search.best_score_, 'best_params': random_search.best_params_ } def bayesian_optimization(self, X, y, n_iter=50): from bayes_opt import BayesianOptimization def objective(**params): model = self.model.__class__(**params) cv = CrossValidation(n_folds=5) result = cv.evaluate(model, X, y, lambda y_true, y_pred: np.mean(y_true == y_pred)) return result['mean'] bounds = {} for param, values in self.param_grid.items(): if isinstance(values, list): bounds[param] = (min(values), max(values)) optimizer = BayesianOptimization( f=objective, pbounds=bounds, random_state=42 ) optimizer.maximize(n_iter=n_iter) self.best_params = optimizer.max['params'] self.best_model = self.model.__class__(**self.best_params) self.best_model.fit(X, y) return { 'best_score': optimizer.max['target'], 'best_params': optimizer.max['params'] } def tune(self, X, y): if self.method == 'grid': return self.grid_search(X, y) elif self.method == 'random': return self.random_search(X, y) elif self.method == 'bayesian': return self.bayesian_optimization(X, y)3. 性能对比
3.1 调优方法对比
| 方法 | 效率 | 效果 | 复杂度 |
|---|---|---|---|
| 网格搜索 | 低 | 中 | 低 |
| 随机搜索 | 中 | 中 | 低 |
| 贝叶斯优化 | 高 | 高 | 高 |
| 遗传算法 | 中 | 高 | 很高 |
3.2 评估指标对比
| 指标 | 用途 | 优点 | 缺点 |
|---|---|---|---|
| 准确率 | 整体评估 | 简单 | 不平衡数据有偏差 |
| F1分数 | 不平衡数据 | 平衡 | 只关注正类 |
| AUC-ROC | 排序能力 | 全面 | 需要概率输出 |
3.3 交叉验证策略对比
| 方法 | 适用场景 | 稳定性 |
|---|---|---|
| K-fold | 通用 | 高 |
| Stratified K-fold | 不平衡数据 | 高 |
| TimeSeriesSplit | 时间序列 | 中 |
4. 最佳实践
4.1 模型评估流程
def evaluate_model(model, X_train, y_train, X_test, y_test, task_type='classification'): model.fit(X_train, y_train) y_pred_train = model.predict(X_train) y_pred_test = model.predict(X_test) if task_type == 'classification': if hasattr(model, 'predict_proba'): y_proba_train = model.predict_proba(X_train)[:, 1] y_proba_test = model.predict_proba(X_test)[:, 1] else: y_proba_train = None y_proba_test = None train_eval = ClassificationEvaluator(y_train, y_pred_train, y_proba_train) test_eval = ClassificationEvaluator(y_test, y_pred_test, y_proba_test) print("训练集评估:") print(train_eval.report()) print("\n测试集评估:") print(test_eval.report()) else: train_eval = RegressionEvaluator(y_train, y_pred_train) test_eval = RegressionEvaluator(y_test, y_pred_test) print("训练集评估:") print(train_eval.report()) print("\n测试集评估:") print(test_eval.report())4.2 超参数调优流程
def tune_hyperparameters(model, X, y, param_grid, method='random'): tuner = HyperparameterTuner(model, param_grid, method=method) result = tuner.tune(X, y) print(f"最佳分数: {result['best_score']:.4f}") print("最佳参数:") for param, value in result['best_params'].items(): print(f" {param}: {value}") return tuner.best_model5. 总结
模型评估和调优是机器学习的关键环节:
- 分类评估:准确率、F1、AUC-ROC
- 回归评估:RMSE、MAE、R²
- 交叉验证:K-fold、Stratified K-fold
- 超参数调优:网格搜索、随机搜索、贝叶斯优化
对比数据如下:
- 贝叶斯优化效果最好
- 随机搜索性价比最高
- Stratified K-fold适合不平衡数据
- 推荐先随机搜索再贝叶斯优化
良好的评估和调优可以显著提升模型性能。