| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157 |
- """
- 回归任务评估指标
- """
- import numpy as np
- from sklearn.metrics import (
- mean_squared_error, mean_absolute_error, r2_score,
- mean_absolute_percentage_error, median_absolute_error
- )
- from typing import Dict, Any
- import logging
- logger = logging.getLogger(__name__)
- class RegressionMetrics:
- """回归任务评估指标"""
-
- def __init__(self):
- """初始化回归指标"""
- pass
-
- def mse(self, y_true: np.ndarray, y_pred: np.ndarray) -> float:
- """均方误差 (Mean Squared Error)"""
- return mean_squared_error(y_true, y_pred)
-
- def rmse(self, y_true: np.ndarray, y_pred: np.ndarray) -> float:
- """均方根误差 (Root Mean Squared Error)"""
- return np.sqrt(mean_squared_error(y_true, y_pred))
-
- def mae(self, y_true: np.ndarray, y_pred: np.ndarray) -> float:
- """平均绝对误差 (Mean Absolute Error)"""
- return mean_absolute_error(y_true, y_pred)
-
- def mape(self, y_true: np.ndarray, y_pred: np.ndarray) -> float:
- """平均绝对百分比误差 (Mean Absolute Percentage Error)"""
- return mean_absolute_percentage_error(y_true, y_pred)
-
- def r2_score(self, y_true: np.ndarray, y_pred: np.ndarray) -> float:
- """决定系数 (R-squared)"""
- return r2_score(y_true, y_pred)
-
- def median_ae(self, y_true: np.ndarray, y_pred: np.ndarray) -> float:
- """中位数绝对误差 (Median Absolute Error)"""
- return median_absolute_error(y_true, y_pred)
-
- def mape_robust(self, y_true: np.ndarray, y_pred: np.ndarray,
- epsilon: float = 1e-8) -> float:
- """鲁棒的平均绝对百分比误差"""
- return np.mean(np.abs((y_true - y_pred) / (np.abs(y_true) + epsilon))) * 100
-
- def smape(self, y_true: np.ndarray, y_pred: np.ndarray) -> float:
- """对称平均绝对百分比误差 (Symmetric Mean Absolute Percentage Error)"""
- return np.mean(2 * np.abs(y_true - y_pred) / (np.abs(y_true) + np.abs(y_pred))) * 100
-
- def mase(self, y_true: np.ndarray, y_pred: np.ndarray,
- y_naive: np.ndarray) -> float:
- """平均绝对标度误差 (Mean Absolute Scaled Error)"""
- mae = self.mae(y_true, y_pred)
- mae_naive = self.mae(y_true, y_naive)
- return mae / mae_naive if mae_naive != 0 else 0
-
- def compute_all_metrics(self, y_true: np.ndarray, y_pred: np.ndarray,
- y_naive: Optional[np.ndarray] = None) -> Dict[str, float]:
- """计算所有回归指标"""
- metrics = {
- 'mse': self.mse(y_true, y_pred),
- 'rmse': self.rmse(y_true, y_pred),
- 'mae': self.mae(y_true, y_pred),
- 'mape': self.mape(y_true, y_pred),
- 'r2_score': self.r2_score(y_true, y_pred),
- 'median_ae': self.median_ae(y_true, y_pred),
- 'mape_robust': self.mape_robust(y_true, y_pred),
- 'smape': self.smape(y_true, y_pred)
- }
-
- if y_naive is not None:
- metrics['mase'] = self.mase(y_true, y_pred, y_naive)
-
- return metrics
- class TimeSeriesMetrics:
- """时间序列评估指标"""
-
- def __init__(self):
- """初始化时间序列指标"""
- pass
-
- def directional_accuracy(self, y_true: np.ndarray, y_pred: np.ndarray) -> float:
- """方向准确率"""
- true_direction = np.diff(y_true)
- pred_direction = np.diff(y_pred)
- return np.mean((true_direction * pred_direction) > 0)
-
- def theil_u(self, y_true: np.ndarray, y_pred: np.ndarray) -> float:
- """Theil's U统计量"""
- mse = np.mean((y_true - y_pred) ** 2)
- mse_naive = np.mean((y_true[1:] - y_true[:-1]) ** 2)
- return np.sqrt(mse / mse_naive) if mse_naive != 0 else 0
-
- def mean_absolute_scaled_error(self, y_true: np.ndarray, y_pred: np.ndarray) -> float:
- """平均绝对标度误差"""
- mae = np.mean(np.abs(y_true - y_pred))
- mae_naive = np.mean(np.abs(y_true[1:] - y_true[:-1]))
- return mae / mae_naive if mae_naive != 0 else 0
-
- def mean_absolute_percentage_error(self, y_true: np.ndarray, y_pred: np.ndarray) -> float:
- """平均绝对百分比误差"""
- return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
- class RankingMetrics:
- """排序任务评估指标"""
-
- def __init__(self):
- """初始化排序指标"""
- pass
-
- def ndcg(self, y_true: np.ndarray, y_pred: np.ndarray, k: int = None) -> float:
- """归一化折扣累积增益 (Normalized Discounted Cumulative Gain)"""
- if k is None:
- k = len(y_true)
-
- # 按预测分数排序
- sorted_indices = np.argsort(y_pred)[::-1]
- sorted_true = y_true[sorted_indices]
-
- # 计算DCG
- dcg = 0
- for i in range(min(k, len(sorted_true))):
- dcg += sorted_true[i] / np.log2(i + 2)
-
- # 计算IDCG
- sorted_true_ideal = np.sort(y_true)[::-1]
- idcg = 0
- for i in range(min(k, len(sorted_true_ideal))):
- idcg += sorted_true_ideal[i] / np.log2(i + 2)
-
- return dcg / idcg if idcg > 0 else 0
-
- def hit_rate(self, y_true: np.ndarray, y_pred: np.ndarray, k: int = 10) -> float:
- """命中率 (Hit Rate)"""
- # 获取前k个预测
- top_k_indices = np.argsort(y_pred)[::-1][:k]
- return np.sum(y_true[top_k_indices] > 0) / k
-
- def precision_at_k(self, y_true: np.ndarray, y_pred: np.ndarray, k: int = 10) -> float:
- """K位置精确率"""
- top_k_indices = np.argsort(y_pred)[::-1][:k]
- return np.sum(y_true[top_k_indices] > 0) / k
-
- def recall_at_k(self, y_true: np.ndarray, y_pred: np.ndarray, k: int = 10) -> float:
- """K位置召回率"""
- top_k_indices = np.argsort(y_pred)[::-1][:k]
- relevant_items = np.sum(y_true > 0)
- return np.sum(y_true[top_k_indices] > 0) / relevant_items if relevant_items > 0 else 0
|