""" 回归任务评估指标 """ import numpy as np from sklearn.metrics import ( mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error, median_absolute_error ) from typing import Dict, Any import logging logger = logging.getLogger(__name__) class RegressionMetrics: """回归任务评估指标""" def __init__(self): """初始化回归指标""" pass def mse(self, y_true: np.ndarray, y_pred: np.ndarray) -> float: """均方误差 (Mean Squared Error)""" return mean_squared_error(y_true, y_pred) def rmse(self, y_true: np.ndarray, y_pred: np.ndarray) -> float: """均方根误差 (Root Mean Squared Error)""" return np.sqrt(mean_squared_error(y_true, y_pred)) def mae(self, y_true: np.ndarray, y_pred: np.ndarray) -> float: """平均绝对误差 (Mean Absolute Error)""" return mean_absolute_error(y_true, y_pred) def mape(self, y_true: np.ndarray, y_pred: np.ndarray) -> float: """平均绝对百分比误差 (Mean Absolute Percentage Error)""" return mean_absolute_percentage_error(y_true, y_pred) def r2_score(self, y_true: np.ndarray, y_pred: np.ndarray) -> float: """决定系数 (R-squared)""" return r2_score(y_true, y_pred) def median_ae(self, y_true: np.ndarray, y_pred: np.ndarray) -> float: """中位数绝对误差 (Median Absolute Error)""" return median_absolute_error(y_true, y_pred) def mape_robust(self, y_true: np.ndarray, y_pred: np.ndarray, epsilon: float = 1e-8) -> float: """鲁棒的平均绝对百分比误差""" return np.mean(np.abs((y_true - y_pred) / (np.abs(y_true) + epsilon))) * 100 def smape(self, y_true: np.ndarray, y_pred: np.ndarray) -> float: """对称平均绝对百分比误差 (Symmetric Mean Absolute Percentage Error)""" return np.mean(2 * np.abs(y_true - y_pred) / (np.abs(y_true) + np.abs(y_pred))) * 100 def mase(self, y_true: np.ndarray, y_pred: np.ndarray, y_naive: np.ndarray) -> float: """平均绝对标度误差 (Mean Absolute Scaled Error)""" mae = self.mae(y_true, y_pred) mae_naive = self.mae(y_true, y_naive) return mae / mae_naive if mae_naive != 0 else 0 def compute_all_metrics(self, y_true: np.ndarray, y_pred: np.ndarray, y_naive: Optional[np.ndarray] = None) -> Dict[str, float]: """计算所有回归指标""" metrics = { 'mse': self.mse(y_true, y_pred), 'rmse': self.rmse(y_true, y_pred), 'mae': self.mae(y_true, y_pred), 'mape': self.mape(y_true, y_pred), 'r2_score': self.r2_score(y_true, y_pred), 'median_ae': self.median_ae(y_true, y_pred), 'mape_robust': self.mape_robust(y_true, y_pred), 'smape': self.smape(y_true, y_pred) } if y_naive is not None: metrics['mase'] = self.mase(y_true, y_pred, y_naive) return metrics class TimeSeriesMetrics: """时间序列评估指标""" def __init__(self): """初始化时间序列指标""" pass def directional_accuracy(self, y_true: np.ndarray, y_pred: np.ndarray) -> float: """方向准确率""" true_direction = np.diff(y_true) pred_direction = np.diff(y_pred) return np.mean((true_direction * pred_direction) > 0) def theil_u(self, y_true: np.ndarray, y_pred: np.ndarray) -> float: """Theil's U统计量""" mse = np.mean((y_true - y_pred) ** 2) mse_naive = np.mean((y_true[1:] - y_true[:-1]) ** 2) return np.sqrt(mse / mse_naive) if mse_naive != 0 else 0 def mean_absolute_scaled_error(self, y_true: np.ndarray, y_pred: np.ndarray) -> float: """平均绝对标度误差""" mae = np.mean(np.abs(y_true - y_pred)) mae_naive = np.mean(np.abs(y_true[1:] - y_true[:-1])) return mae / mae_naive if mae_naive != 0 else 0 def mean_absolute_percentage_error(self, y_true: np.ndarray, y_pred: np.ndarray) -> float: """平均绝对百分比误差""" return np.mean(np.abs((y_true - y_pred) / y_true)) * 100 class RankingMetrics: """排序任务评估指标""" def __init__(self): """初始化排序指标""" pass def ndcg(self, y_true: np.ndarray, y_pred: np.ndarray, k: int = None) -> float: """归一化折扣累积增益 (Normalized Discounted Cumulative Gain)""" if k is None: k = len(y_true) # 按预测分数排序 sorted_indices = np.argsort(y_pred)[::-1] sorted_true = y_true[sorted_indices] # 计算DCG dcg = 0 for i in range(min(k, len(sorted_true))): dcg += sorted_true[i] / np.log2(i + 2) # 计算IDCG sorted_true_ideal = np.sort(y_true)[::-1] idcg = 0 for i in range(min(k, len(sorted_true_ideal))): idcg += sorted_true_ideal[i] / np.log2(i + 2) return dcg / idcg if idcg > 0 else 0 def hit_rate(self, y_true: np.ndarray, y_pred: np.ndarray, k: int = 10) -> float: """命中率 (Hit Rate)""" # 获取前k个预测 top_k_indices = np.argsort(y_pred)[::-1][:k] return np.sum(y_true[top_k_indices] > 0) / k def precision_at_k(self, y_true: np.ndarray, y_pred: np.ndarray, k: int = 10) -> float: """K位置精确率""" top_k_indices = np.argsort(y_pred)[::-1][:k] return np.sum(y_true[top_k_indices] > 0) / k def recall_at_k(self, y_true: np.ndarray, y_pred: np.ndarray, k: int = 10) -> float: """K位置召回率""" top_k_indices = np.argsort(y_pred)[::-1][:k] relevant_items = np.sum(y_true > 0) return np.sum(y_true[top_k_indices] > 0) / relevant_items if relevant_items > 0 else 0