regression.py 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157
  1. """
  2. 回归任务评估指标
  3. """
  4. import numpy as np
  5. from sklearn.metrics import (
  6. mean_squared_error, mean_absolute_error, r2_score,
  7. mean_absolute_percentage_error, median_absolute_error
  8. )
  9. from typing import Dict, Any
  10. import logging
  11. logger = logging.getLogger(__name__)
  12. class RegressionMetrics:
  13. """回归任务评估指标"""
  14. def __init__(self):
  15. """初始化回归指标"""
  16. pass
  17. def mse(self, y_true: np.ndarray, y_pred: np.ndarray) -> float:
  18. """均方误差 (Mean Squared Error)"""
  19. return mean_squared_error(y_true, y_pred)
  20. def rmse(self, y_true: np.ndarray, y_pred: np.ndarray) -> float:
  21. """均方根误差 (Root Mean Squared Error)"""
  22. return np.sqrt(mean_squared_error(y_true, y_pred))
  23. def mae(self, y_true: np.ndarray, y_pred: np.ndarray) -> float:
  24. """平均绝对误差 (Mean Absolute Error)"""
  25. return mean_absolute_error(y_true, y_pred)
  26. def mape(self, y_true: np.ndarray, y_pred: np.ndarray) -> float:
  27. """平均绝对百分比误差 (Mean Absolute Percentage Error)"""
  28. return mean_absolute_percentage_error(y_true, y_pred)
  29. def r2_score(self, y_true: np.ndarray, y_pred: np.ndarray) -> float:
  30. """决定系数 (R-squared)"""
  31. return r2_score(y_true, y_pred)
  32. def median_ae(self, y_true: np.ndarray, y_pred: np.ndarray) -> float:
  33. """中位数绝对误差 (Median Absolute Error)"""
  34. return median_absolute_error(y_true, y_pred)
  35. def mape_robust(self, y_true: np.ndarray, y_pred: np.ndarray,
  36. epsilon: float = 1e-8) -> float:
  37. """鲁棒的平均绝对百分比误差"""
  38. return np.mean(np.abs((y_true - y_pred) / (np.abs(y_true) + epsilon))) * 100
  39. def smape(self, y_true: np.ndarray, y_pred: np.ndarray) -> float:
  40. """对称平均绝对百分比误差 (Symmetric Mean Absolute Percentage Error)"""
  41. return np.mean(2 * np.abs(y_true - y_pred) / (np.abs(y_true) + np.abs(y_pred))) * 100
  42. def mase(self, y_true: np.ndarray, y_pred: np.ndarray,
  43. y_naive: np.ndarray) -> float:
  44. """平均绝对标度误差 (Mean Absolute Scaled Error)"""
  45. mae = self.mae(y_true, y_pred)
  46. mae_naive = self.mae(y_true, y_naive)
  47. return mae / mae_naive if mae_naive != 0 else 0
  48. def compute_all_metrics(self, y_true: np.ndarray, y_pred: np.ndarray,
  49. y_naive: Optional[np.ndarray] = None) -> Dict[str, float]:
  50. """计算所有回归指标"""
  51. metrics = {
  52. 'mse': self.mse(y_true, y_pred),
  53. 'rmse': self.rmse(y_true, y_pred),
  54. 'mae': self.mae(y_true, y_pred),
  55. 'mape': self.mape(y_true, y_pred),
  56. 'r2_score': self.r2_score(y_true, y_pred),
  57. 'median_ae': self.median_ae(y_true, y_pred),
  58. 'mape_robust': self.mape_robust(y_true, y_pred),
  59. 'smape': self.smape(y_true, y_pred)
  60. }
  61. if y_naive is not None:
  62. metrics['mase'] = self.mase(y_true, y_pred, y_naive)
  63. return metrics
  64. class TimeSeriesMetrics:
  65. """时间序列评估指标"""
  66. def __init__(self):
  67. """初始化时间序列指标"""
  68. pass
  69. def directional_accuracy(self, y_true: np.ndarray, y_pred: np.ndarray) -> float:
  70. """方向准确率"""
  71. true_direction = np.diff(y_true)
  72. pred_direction = np.diff(y_pred)
  73. return np.mean((true_direction * pred_direction) > 0)
  74. def theil_u(self, y_true: np.ndarray, y_pred: np.ndarray) -> float:
  75. """Theil's U统计量"""
  76. mse = np.mean((y_true - y_pred) ** 2)
  77. mse_naive = np.mean((y_true[1:] - y_true[:-1]) ** 2)
  78. return np.sqrt(mse / mse_naive) if mse_naive != 0 else 0
  79. def mean_absolute_scaled_error(self, y_true: np.ndarray, y_pred: np.ndarray) -> float:
  80. """平均绝对标度误差"""
  81. mae = np.mean(np.abs(y_true - y_pred))
  82. mae_naive = np.mean(np.abs(y_true[1:] - y_true[:-1]))
  83. return mae / mae_naive if mae_naive != 0 else 0
  84. def mean_absolute_percentage_error(self, y_true: np.ndarray, y_pred: np.ndarray) -> float:
  85. """平均绝对百分比误差"""
  86. return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
  87. class RankingMetrics:
  88. """排序任务评估指标"""
  89. def __init__(self):
  90. """初始化排序指标"""
  91. pass
  92. def ndcg(self, y_true: np.ndarray, y_pred: np.ndarray, k: int = None) -> float:
  93. """归一化折扣累积增益 (Normalized Discounted Cumulative Gain)"""
  94. if k is None:
  95. k = len(y_true)
  96. # 按预测分数排序
  97. sorted_indices = np.argsort(y_pred)[::-1]
  98. sorted_true = y_true[sorted_indices]
  99. # 计算DCG
  100. dcg = 0
  101. for i in range(min(k, len(sorted_true))):
  102. dcg += sorted_true[i] / np.log2(i + 2)
  103. # 计算IDCG
  104. sorted_true_ideal = np.sort(y_true)[::-1]
  105. idcg = 0
  106. for i in range(min(k, len(sorted_true_ideal))):
  107. idcg += sorted_true_ideal[i] / np.log2(i + 2)
  108. return dcg / idcg if idcg > 0 else 0
  109. def hit_rate(self, y_true: np.ndarray, y_pred: np.ndarray, k: int = 10) -> float:
  110. """命中率 (Hit Rate)"""
  111. # 获取前k个预测
  112. top_k_indices = np.argsort(y_pred)[::-1][:k]
  113. return np.sum(y_true[top_k_indices] > 0) / k
  114. def precision_at_k(self, y_true: np.ndarray, y_pred: np.ndarray, k: int = 10) -> float:
  115. """K位置精确率"""
  116. top_k_indices = np.argsort(y_pred)[::-1][:k]
  117. return np.sum(y_true[top_k_indices] > 0) / k
  118. def recall_at_k(self, y_true: np.ndarray, y_pred: np.ndarray, k: int = 10) -> float:
  119. """K位置召回率"""
  120. top_k_indices = np.argsort(y_pred)[::-1][:k]
  121. relevant_items = np.sum(y_true > 0)
  122. return np.sum(y_true[top_k_indices] > 0) / relevant_items if relevant_items > 0 else 0