Expand description
Comprehensive evaluation metrics for ToRSh
This module provides PyTorch-compatible metrics for model evaluation, built on top of SciRS2’s comprehensive metrics library.
Re-exports§
pub use deep_learning::BleuScore;pub use deep_learning::DeepLearningMetrics;pub use deep_learning::RougeMetrics;pub use deep_learning::RougeScore;pub use deep_learning::RougeType;pub use deep_learning::SimilarityType;pub use deep_learning::VectorizedFidScore;pub use deep_learning::VectorizedInceptionScore;pub use deep_learning::VectorizedPerplexity;pub use deep_learning::VectorizedSemanticSimilarity;pub use classification::ConfusionMatrix;pub use classification::MultiClassMetrics;pub use classification::ThresholdMetrics;pub use ranking::IRMetrics;pub use uncertainty::BayesianUncertainty;pub use uncertainty::CalibrationMetrics;pub use uncertainty::EnsembleUncertainty;pub use uncertainty::MCDropoutUncertainty;pub use uncertainty::UncertaintyDecomposition;pub use fairness::FairnessMetrics;pub use statistics::BootstrapResult;pub use statistics::CrossValidationResult;pub use statistics::HypothesisTestResult;pub use gpu::GpuAccuracy;pub use gpu::GpuBatchMetrics;pub use gpu::GpuConfusionMatrix;pub use parallel::ParallelAccuracy;pub use parallel::ParallelConfusionMatrix;pub use parallel::ParallelMetricCollection;pub use reporting::ComparisonReport;pub use reporting::MetricReport;pub use reporting::ReportBuilder;pub use reporting::ReportFormat;pub use memory_efficient::ChunkedEvaluator;pub use memory_efficient::MemoryEfficientAccuracy;pub use memory_efficient::MemoryEfficientMAE;pub use memory_efficient::MemoryEfficientMSE;pub use memory_efficient::OnlineConfusionMatrix;pub use memory_efficient::StreamingMetric;pub use tensorboard::MetricLogger as TensorBoardLogger;pub use tensorboard::TensorBoardWriter;pub use mlflow::ExperimentTracker;pub use mlflow::MLflowClient;pub use mlflow::MLflowRun;pub use visualization::CalibrationCurvePlot;pub use visualization::ConfusionMatrixPlot;pub use visualization::ExportFormat;pub use visualization::FeatureImportancePlot;pub use visualization::InteractiveDashboard;pub use visualization::LatexReportBuilder;pub use visualization::LearningCurvePlot;pub use visualization::MetricComparisonPlot;pub use visualization::PRCurvePlot;pub use visualization::ROCCurvePlot;pub use visualization::VisualizationAggregator;pub use advanced_ml::ContinualLearningMetrics;pub use advanced_ml::DomainAdaptationMetrics;pub use advanced_ml::FewShotMetrics;pub use advanced_ml::MetaLearningMetrics;pub use sklearn_compat::SklearnAccuracy;pub use sklearn_compat::SklearnF1Score;pub use sklearn_compat::SklearnMeanAbsoluteError;pub use sklearn_compat::SklearnMeanSquaredError;pub use sklearn_compat::SklearnMetric;pub use sklearn_compat::SklearnPrecision;pub use sklearn_compat::SklearnR2Score;pub use sklearn_compat::SklearnRecall;pub use wandb::LogEntry;pub use wandb::WandbClient;pub use model_selection::AICc;pub use model_selection::CVModelComparison;pub use model_selection::CVModelSelection;pub use model_selection::CVScoreType;pub use model_selection::ModelComparisonReport;pub use model_selection::MultiModelComparison;pub use model_selection::AIC;pub use model_selection::BIC;pub use model_selection::HQIC;pub use statistical_tests::FiveByTwoCVTest;pub use statistical_tests::FriedmanTest;pub use statistical_tests::KruskalWallisTest;pub use statistical_tests::MannWhitneyTest;pub use statistical_tests::McNemarTest;pub use statistical_tests::NemenyiTest;pub use statistical_tests::PairedTTest;pub use statistical_tests::WilcoxonTest;pub use time_series::dtw_distance;pub use time_series::error_autocorrelation;pub use time_series::mape;pub use time_series::mase;pub use time_series::mean_directional_accuracy;pub use time_series::msis;pub use time_series::smape;pub use time_series::theil_u;pub use time_series::tracking_signal;pub use regression_diagnostics::breusch_pagan_test;pub use regression_diagnostics::calculate_leverage;pub use regression_diagnostics::condition_number;pub use regression_diagnostics::cooks_distance;pub use regression_diagnostics::dffits;pub use regression_diagnostics::durbin_watson;pub use regression_diagnostics::variance_inflation_factor;pub use regression_diagnostics::RegressionDiagnosticReport;pub use regression_diagnostics::ResidualDiagnostics;pub use explainability::attribution_agreement;pub use explainability::counterfactual_validity;pub use explainability::explanation_completeness;pub use explainability::explanation_faithfulness;pub use explainability::feature_importance_stability;pub use explainability::feature_monotonicity;pub use explainability::interaction_strength;pub use explainability::ExplainabilityMetrics;pub use robustness::adversarial_accuracy;pub use robustness::attack_success_rate;pub use robustness::certified_robustness_radius;pub use robustness::confidence_stability;pub use robustness::corruption_robustness;pub use robustness::gradient_stability;pub use robustness::noise_sensitivity;pub use robustness::ood_detection_score;pub use robustness::robustness_accuracy_tradeoff;pub use robustness::RobustnessReport;
Modules§
- advanced_
ml - Advanced ML Metrics for Meta-Learning, Few-Shot Learning, Domain Adaptation, and Continual Learning
- classification
- Classification metrics
- clustering
- Clustering metrics with comprehensive evaluation algorithms
- deep_
learning - Deep learning specific metrics with high-performance vectorized implementations
- explainability
- Explainability and interpretability metrics
- fairness
- Fairness and bias detection metrics
- gpu
- GPU-accelerated metrics for high-performance evaluation
- memory_
efficient - Memory-efficient large dataset evaluation
- mlflow
- MLflow integration for experiment tracking
- model_
selection - Model selection metrics for choosing optimal models
- parallel
- Parallel metric computation for scalability
- ranking
- Ranking and recommendation metrics with comprehensive implementations
- regression
- Regression metrics
- regression_
diagnostics - Regression diagnostic metrics and tools
- reporting
- Automated metric reporting and visualization
- robustness
- Robustness and reliability metrics
- sklearn_
compat - Scikit-learn compatibility layer for torsh-metrics
- statistical_
tests - Advanced statistical hypothesis testing for model comparison
- statistics
- Statistical validation and bootstrap confidence intervals
- streaming
- Streaming (online) metrics for efficient large-scale evaluation
- tensorboard
- TensorBoard integration for metric logging
- time_
series - Time series forecasting metrics
- uncertainty
- Uncertainty quantification metrics
- utils
- Utility functions for metrics
- visualization
- Metric visualization utilities
- wandb
- Weights & Biases (W&B) integration for experiment tracking
Structs§
- Metric
Collection - Metric collection for evaluating multiple metrics at once
Traits§
- Metric
- Base trait for all metrics