1use crate::error::{StatsError, StatsResult as Result};
7use scirs2_core::ndarray::{Array1, Array2, ArrayView1, ArrayView2};
8use scirs2_core::validation::*;
9
10use crate::bayesian::{BayesianLinearRegression, BayesianRegressionResult};
11use crate::mcmc::{GibbsSampler, MultivariateNormalGibbs};
12use crate::multivariate::{FactorAnalysis, FactorAnalysisResult, PCAResult, PCA};
13use crate::qmc::{halton, latin_hypercube, sobol};
14use crate::survival::{CoxPHModel, KaplanMeierEstimator};
15
16#[derive(Debug, Clone)]
18pub struct BayesianAnalysisWorkflow {
19 pub use_mcmc: bool,
21 pub n_mcmc_samples: usize,
23 pub mcmc_burnin: usize,
25 pub random_seed: Option<u64>,
27}
28
29impl Default for BayesianAnalysisWorkflow {
30 fn default() -> Self {
31 Self {
32 use_mcmc: true,
33 n_mcmc_samples: 1000,
34 mcmc_burnin: 100,
35 random_seed: None,
36 }
37 }
38}
39
40#[derive(Debug, Clone)]
42pub struct BayesianAnalysisResult {
43 pub regression: BayesianRegressionResult,
45 pub mcmc_samples: Option<Array2<f64>>,
47 pub predictive_samples: Option<Array2<f64>>,
49 pub model_metrics: BayesianModelMetrics,
51}
52
53#[derive(Debug, Clone)]
55pub struct BayesianModelMetrics {
56 pub log_marginal_likelihood: f64,
58 pub dic: f64,
60 pub waic: f64,
62 pub loo_ic: f64,
64}
65
66impl BayesianAnalysisWorkflow {
67 pub fn new() -> Self {
69 Self::default()
70 }
71
72 pub fn with_mcmc(mut self, n_samples_: usize, burnin: usize) -> Self {
74 self.use_mcmc = true;
75 self.n_mcmc_samples = n_samples_;
76 self.mcmc_burnin = burnin;
77 self
78 }
79
80 pub fn without_mcmc(mut self) -> Self {
82 self.use_mcmc = false;
83 self
84 }
85
86 pub fn with_seed(mut self, seed: u64) -> Self {
88 self.random_seed = Some(seed);
89 self
90 }
91
92 pub fn analyze(
94 &self,
95 x: ArrayView2<f64>,
96 y: ArrayView1<f64>,
97 ) -> Result<BayesianAnalysisResult> {
98 checkarray_finite(&x, "x")?;
99 checkarray_finite(&y, "y")?;
100
101 let (n_samples_, n_features) = x.dim();
102 if y.len() != n_samples_ {
103 return Err(StatsError::DimensionMismatch(format!(
104 "y length ({}) must match x rows ({})",
105 y.len(),
106 n_samples_
107 )));
108 }
109
110 let bayesian_reg = BayesianLinearRegression::new(n_features, true)?;
112 let regression = bayesian_reg.fit(x, y)?;
113
114 let mcmc_samples = if self.use_mcmc {
116 Some(self.perform_mcmc_sampling(®ression, n_features)?)
117 } else {
118 None
119 };
120
121 let predictive_samples = if self.use_mcmc {
123 Some(self.generate_predictive_samples(&bayesian_reg, ®ression, x)?)
124 } else {
125 None
126 };
127
128 let model_metrics = self.compute_model_metrics(®ression, x, y)?;
130
131 Ok(BayesianAnalysisResult {
132 regression,
133 mcmc_samples,
134 predictive_samples,
135 model_metrics,
136 })
137 }
138
139 fn perform_mcmc_sampling(
141 &self,
142 regression: &BayesianRegressionResult,
143 _n_features: usize,
144 ) -> Result<Array2<f64>> {
145 use scirs2_core::random::{rngs::StdRng, SeedableRng};
146
147 let mut rng = match self.random_seed {
148 Some(seed) => StdRng::seed_from_u64(seed),
149 None => {
150 use std::time::{SystemTime, UNIX_EPOCH};
151 let seed = SystemTime::now()
152 .duration_since(UNIX_EPOCH)
153 .unwrap_or_default()
154 .as_secs();
155 StdRng::seed_from_u64(seed)
156 }
157 };
158
159 let gibbs_sampler = MultivariateNormalGibbs::from_precision(
161 regression.posterior_mean.clone(),
162 regression.posterior_covariance.clone(),
163 )?;
164
165 let mut sampler = GibbsSampler::new(gibbs_sampler, regression.posterior_mean.clone())?;
166
167 for _ in 0..self.mcmc_burnin {
169 sampler.step(&mut rng)?;
170 }
171
172 let samples = sampler.sample(self.n_mcmc_samples, &mut rng)?;
174 Ok(samples)
175 }
176
177 fn generate_predictive_samples(
179 &self,
180 bayesian_reg: &BayesianLinearRegression,
181 regression: &BayesianRegressionResult,
182 x_test: ArrayView2<f64>,
183 ) -> Result<Array2<f64>> {
184 use scirs2_core::random::{rngs::StdRng, SeedableRng};
185 use scirs2_core::random::{Distribution, Normal};
186
187 let mut rng = match self.random_seed {
188 Some(seed) => StdRng::seed_from_u64(seed),
189 None => {
190 use std::time::{SystemTime, UNIX_EPOCH};
191 let seed = SystemTime::now()
192 .duration_since(UNIX_EPOCH)
193 .unwrap_or_default()
194 .as_secs();
195 StdRng::seed_from_u64(seed)
196 }
197 };
198
199 let n_test = x_test.nrows();
200 let mut predictive_samples = Array2::zeros((self.n_mcmc_samples, n_test));
201
202 for i in 0..self.n_mcmc_samples {
204 let mut beta_sample = Array1::zeros(regression.posterior_mean.len());
206 for j in 0..beta_sample.len() {
207 let var = regression.posterior_covariance[[j, j]];
208 let normal =
209 Normal::new(regression.posterior_mean[j], var.sqrt()).map_err(|e| {
210 StatsError::ComputationError(format!("Failed to create normal: {}", e))
211 })?;
212 beta_sample[j] = normal.sample(&mut rng);
213 }
214
215 let pred_result = bayesian_reg.predict(x_test, regression)?;
217
218 let noise_std = (regression.posterior_beta / regression.posterior_alpha).sqrt();
220 let noise_normal = Normal::new(0.0, noise_std).map_err(|e| {
221 StatsError::ComputationError(format!("Failed to create noise normal: {}", e))
222 })?;
223
224 for j in 0..n_test {
225 let noise = noise_normal.sample(&mut rng);
226 predictive_samples[[i, j]] = pred_result.mean[j] + noise;
227 }
228 }
229
230 Ok(predictive_samples)
231 }
232
233 fn compute_model_metrics(
235 &self,
236 regression: &BayesianRegressionResult,
237 x: ArrayView2<f64>,
238 _y: ArrayView1<f64>,
239 ) -> Result<BayesianModelMetrics> {
240 let n_samples_ = x.nrows() as f64;
241 let n_params = regression.posterior_mean.len() as f64;
242
243 let log_marginal_likelihood = regression.log_marginal_likelihood;
245
246 let deviance = -2.0 * log_marginal_likelihood;
248 let effective_params = n_params; let dic = deviance + 2.0 * effective_params;
250
251 let waic = -2.0 * log_marginal_likelihood + 2.0 * effective_params;
253
254 let loo_ic = -2.0 * log_marginal_likelihood
256 + 2.0 * effective_params * n_samples_ / (n_samples_ - n_params - 1.0);
257
258 Ok(BayesianModelMetrics {
259 log_marginal_likelihood,
260 dic,
261 waic,
262 loo_ic,
263 })
264 }
265}
266
267#[derive(Debug, Clone)]
269pub struct DimensionalityAnalysisWorkflow {
270 pub n_pca_components: Option<usize>,
272 pub n_factors: Option<usize>,
274 pub use_incremental_pca: bool,
276 pub pca_batchsize: usize,
278 pub random_seed: Option<u64>,
280}
281
282impl Default for DimensionalityAnalysisWorkflow {
283 fn default() -> Self {
284 Self {
285 n_pca_components: None,
286 n_factors: None,
287 use_incremental_pca: false,
288 pca_batchsize: 1000,
289 random_seed: None,
290 }
291 }
292}
293
294#[derive(Debug, Clone)]
296pub struct DimensionalityAnalysisResult {
297 pub pca: Option<PCAResult>,
299 pub factor_analysis: Option<FactorAnalysisResult>,
301 pub recommendations: DimensionalityRecommendations,
303 pub comparison_metrics: DimensionalityMetrics,
305}
306
307#[derive(Debug, Clone)]
309pub struct DimensionalityRecommendations {
310 pub optimal_pca_components: usize,
312 pub optimal_factors: usize,
314 pub explained_variance_ratio: f64,
316}
317
318#[derive(Debug, Clone)]
320pub struct DimensionalityMetrics {
321 pub eigenvalues: Array1<f64>,
323 pub cumulative_variance: Array1<f64>,
325 pub kmo_measure: f64,
327 pub bartlett_test: (f64, f64),
329}
330
331impl DimensionalityAnalysisWorkflow {
332 pub fn new() -> Self {
334 Self::default()
335 }
336
337 pub fn with_pca(
339 mut self,
340 n_components: Option<usize>,
341 incremental: bool,
342 batchsize: usize,
343 ) -> Self {
344 self.n_pca_components = n_components;
345 self.use_incremental_pca = incremental;
346 self.pca_batchsize = batchsize;
347 self
348 }
349
350 pub fn with_factor_analysis(mut self, n_factors: Option<usize>) -> Self {
352 self.n_factors = n_factors;
353 self
354 }
355
356 pub fn with_seed(mut self, seed: u64) -> Self {
358 self.random_seed = Some(seed);
359 self
360 }
361
362 pub fn analyze(&self, data: ArrayView2<f64>) -> Result<DimensionalityAnalysisResult> {
364 checkarray_finite(&data, "data")?;
365 let (n_samples_, n_features) = data.dim();
366
367 if n_samples_ < 3 {
368 return Err(StatsError::InvalidArgument(
369 "Need at least 3 samples for analysis".to_string(),
370 ));
371 }
372
373 let pca = if self.use_incremental_pca && n_samples_ > self.pca_batchsize {
375 Some(self.perform_incremental_pca(data)?)
376 } else {
377 Some(self.perform_standard_pca(data)?)
378 };
379
380 let factor_analysis = if self.n_factors.is_some() {
382 Some(self.perform_factor_analysis(data)?)
383 } else {
384 None
385 };
386
387 let recommendations = self.generate_recommendations(data, &pca)?;
389
390 let comparison_metrics = self.compute_metrics(data)?;
392
393 Ok(DimensionalityAnalysisResult {
394 pca,
395 factor_analysis,
396 recommendations,
397 comparison_metrics,
398 })
399 }
400
401 fn perform_standard_pca(&self, data: ArrayView2<f64>) -> Result<PCAResult> {
403 let n_components = self
404 .n_pca_components
405 .unwrap_or(data.ncols().min(data.nrows()));
406
407 let pca = PCA::new()
408 .with_n_components(n_components)
409 .with_center(true)
410 .with_scale(false);
411
412 if let Some(seed) = self.random_seed {
413 pca.with_random_state(seed).fit(data)
414 } else {
415 pca.fit(data)
416 }
417 }
418
419 fn perform_incremental_pca(&self, data: ArrayView2<f64>) -> Result<PCAResult> {
421 self.perform_standard_pca(data)
424 }
425
426 fn perform_factor_analysis(&self, data: ArrayView2<f64>) -> Result<FactorAnalysisResult> {
428 use crate::multivariate::RotationType;
429
430 let n_factors = self.n_factors.unwrap_or(2);
431
432 let mut fa = FactorAnalysis::new(n_factors)?
433 .with_rotation(RotationType::Varimax)
434 .with_max_iter(1000)
435 .with_tolerance(1e-6);
436
437 if let Some(seed) = self.random_seed {
438 fa = fa.with_random_state(seed);
439 }
440
441 fa.fit(data)
442 }
443
444 fn generate_recommendations(
446 &self,
447 data: ArrayView2<f64>,
448 pca: &Option<PCAResult>,
449 ) -> Result<DimensionalityRecommendations> {
450 use crate::multivariate::{efa::parallel_analysis, mle_components};
451
452 let optimal_pca_components = if let Some(ref pca_result) = pca {
454 pca_result
455 .explained_variance
456 .iter()
457 .position(|&ev| ev < 1.0)
458 .unwrap_or(pca_result.explained_variance.len())
459 } else {
460 mle_components(data, None)?
461 };
462
463 let optimal_factors = parallel_analysis(data, 100, 95.0, self.random_seed)?;
465
466 let explained_variance_ratio = if let Some(ref pca_result) = pca {
468 pca_result
469 .explained_variance_ratio
470 .slice(scirs2_core::ndarray::s![..optimal_pca_components])
471 .sum()
472 } else {
473 0.0
474 };
475
476 Ok(DimensionalityRecommendations {
477 optimal_pca_components,
478 optimal_factors,
479 explained_variance_ratio,
480 })
481 }
482
483 fn compute_metrics(&self, data: ArrayView2<f64>) -> Result<DimensionalityMetrics> {
485 use crate::multivariate::efa::{bartlett_test, kmo_test};
486
487 let mean = data.mean_axis(scirs2_core::ndarray::Axis(0)).unwrap();
489 let mut centered = data.to_owned();
490 for mut row in centered.rows_mut() {
491 row -= &mean;
492 }
493
494 let cov = centered.t().dot(¢ered) / (data.nrows() - 1) as f64;
495
496 use scirs2_core::ndarray::ndarray_linalg::Eigh;
498 let eigenvalues = cov
499 .eigh(scirs2_core::ndarray::ndarray_linalg::UPLO::Upper)
500 .map_err(|e| {
501 StatsError::ComputationError(format!("Eigenvalue decomposition failed: {}", e))
502 })?
503 .0;
504
505 let mut sorted_eigenvalues = eigenvalues.to_vec();
507 sorted_eigenvalues.sort_by(|a, b| b.partial_cmp(a).unwrap());
508 let eigenvalues = Array1::from_vec(sorted_eigenvalues);
509
510 let total_variance = eigenvalues.sum();
512 let mut cumulative_variance = Array1::zeros(eigenvalues.len());
513 let mut cumsum = 0.0;
514 for i in 0..eigenvalues.len() {
515 cumsum += eigenvalues[i];
516 cumulative_variance[i] = cumsum / total_variance;
517 }
518
519 let kmo_measure = kmo_test(data)?;
521
522 let bartlett_test = bartlett_test(data)?;
524
525 Ok(DimensionalityMetrics {
526 eigenvalues,
527 cumulative_variance,
528 kmo_measure,
529 bartlett_test,
530 })
531 }
532}
533
534#[derive(Debug, Clone)]
536pub struct QMCWorkflow {
537 pub sequence_type: QMCSequenceType,
539 pub scrambling: bool,
541 pub dimensions: usize,
543 pub n_samples_: usize,
545 pub random_seed: Option<u64>,
547}
548
549#[derive(Debug, Clone, Copy)]
551pub enum QMCSequenceType {
552 Sobol,
554 Halton,
556 LatinHypercube,
558}
559
560#[derive(Debug, Clone)]
562pub struct QMCResult {
563 pub samples: Array2<f64>,
565 pub sequence_type: QMCSequenceType,
567 pub quality_metrics: QMCQualityMetrics,
569}
570
571#[derive(Debug, Clone)]
573pub struct QMCQualityMetrics {
574 pub star_discrepancy: f64,
576 pub uniformity: f64,
578 pub coverage_efficiency: f64,
580}
581
582impl Default for QMCWorkflow {
583 fn default() -> Self {
584 Self {
585 sequence_type: QMCSequenceType::Sobol,
586 scrambling: true,
587 dimensions: 2,
588 n_samples_: 1000,
589 random_seed: None,
590 }
591 }
592}
593
594impl QMCWorkflow {
595 pub fn new(dimensions: usize, n_samples_: usize) -> Self {
597 Self {
598 dimensions,
599 n_samples_,
600 ..Default::default()
601 }
602 }
603
604 pub fn with_sequence_type(mut self, sequence_type: QMCSequenceType) -> Self {
606 self.sequence_type = sequence_type;
607 self
608 }
609
610 pub fn with_scrambling(mut self, scrambling: bool) -> Self {
612 self.scrambling = scrambling;
613 self
614 }
615
616 pub fn with_seed(mut self, seed: u64) -> Self {
618 self.random_seed = Some(seed);
619 self
620 }
621
622 pub fn generate(&self) -> Result<QMCResult> {
624 check_positive(self.dimensions, "dimensions")?;
625 check_positive(self.n_samples_, "n_samples_")?;
626
627 let samples = match self.sequence_type {
629 QMCSequenceType::Sobol => sobol(
630 self.n_samples_,
631 self.dimensions,
632 self.scrambling,
633 self.random_seed,
634 )?,
635 QMCSequenceType::Halton => halton(
636 self.n_samples_,
637 self.dimensions,
638 self.scrambling,
639 self.random_seed,
640 )?,
641 QMCSequenceType::LatinHypercube => {
642 latin_hypercube(self.n_samples_, self.dimensions, self.random_seed)?
643 }
644 };
645
646 let quality_metrics = self.compute_quality_metrics(&samples)?;
648
649 Ok(QMCResult {
650 samples,
651 sequence_type: self.sequence_type,
652 quality_metrics,
653 })
654 }
655
656 fn compute_quality_metrics(&self, samples: &Array2<f64>) -> Result<QMCQualityMetrics> {
658 use crate::qmc::star_discrepancy;
659
660 let sample_points: Vec<Array1<f64>> = samples
662 .rows()
663 .into_iter()
664 .map(|row| row.to_owned())
665 .collect();
666
667 let samples_view = Array1::from_vec(sample_points);
668 let star_discrepancy = star_discrepancy(&samples_view.view())?;
669
670 let uniformity = self.compute_uniformity(samples)?;
672
673 let coverage_efficiency = self.compute_coverage_efficiency(samples)?;
675
676 Ok(QMCQualityMetrics {
677 star_discrepancy,
678 uniformity,
679 coverage_efficiency,
680 })
681 }
682
683 fn compute_uniformity(&self, samples: &Array2<f64>) -> Result<f64> {
685 let n_samples_ = samples.nrows();
686 let mut min_distances = Array1::zeros(n_samples_);
687
688 for i in 0..n_samples_ {
690 let mut min_dist = f64::INFINITY;
691 for j in 0..n_samples_ {
692 if i != j {
693 let mut dist = 0.0;
694 for k in 0..self.dimensions {
695 let diff = samples[[i, k]] - samples[[j, k]];
696 dist += diff * diff;
697 }
698 dist = dist.sqrt();
699 if dist < min_dist {
700 min_dist = dist;
701 }
702 }
703 }
704 min_distances[i] = min_dist;
705 }
706
707 let mean_dist = min_distances.mean().unwrap();
709 let var_dist = min_distances.var(1.0);
710 let uniformity = 1.0 / (var_dist.sqrt() / mean_dist); Ok(uniformity)
713 }
714
715 fn compute_coverage_efficiency(&self, samples: &Array2<f64>) -> Result<f64> {
717 let n_bins = (self.n_samples_ as f64)
719 .powf(1.0 / self.dimensions as f64)
720 .ceil() as usize;
721 let mut occupied_bins = std::collections::HashSet::new();
722
723 for i in 0..samples.nrows() {
724 let mut bin_id = Vec::new();
725 for j in 0..self.dimensions {
726 let bin = (samples[[i, j]] * n_bins as f64).floor() as usize;
727 bin_id.push(bin.min(n_bins - 1));
728 }
729 occupied_bins.insert(bin_id);
730 }
731
732 let total_bins = n_bins.pow(self.dimensions as u32);
733 let coverage_efficiency = occupied_bins.len() as f64 / total_bins as f64;
734
735 Ok(coverage_efficiency)
736 }
737}
738
739#[derive(Debug, Clone)]
741pub struct SurvivalAnalysisWorkflow {
742 pub confidence_level: f64,
744 pub fit_cox_model: bool,
746 pub cox_max_iter: usize,
748 pub cox_tolerance: f64,
750}
751
752impl Default for SurvivalAnalysisWorkflow {
753 fn default() -> Self {
754 Self {
755 confidence_level: 0.95,
756 fit_cox_model: true,
757 cox_max_iter: 100,
758 cox_tolerance: 1e-6,
759 }
760 }
761}
762
763#[derive(Debug, Clone)]
765pub struct SurvivalAnalysisResult {
766 pub kaplan_meier: crate::survival::KaplanMeierEstimator,
768 pub cox_model: Option<crate::survival::CoxPHModel>,
770 pub summary_stats: SurvivalSummaryStats,
772}
773
774#[derive(Debug, Clone)]
776pub struct SurvivalSummaryStats {
777 pub median_survival: Option<f64>,
779 pub q25_survival: Option<f64>,
781 pub q75_survival: Option<f64>,
783 pub event_rate: f64,
785 pub censoring_rate: f64,
787}
788
789impl SurvivalAnalysisWorkflow {
790 pub fn new() -> Self {
792 Self::default()
793 }
794
795 pub fn with_confidence_level(mut self, level: f64) -> Self {
797 self.confidence_level = level;
798 self
799 }
800
801 pub fn with_cox_model(mut self, max_iter: usize, tolerance: f64) -> Self {
803 self.fit_cox_model = true;
804 self.cox_max_iter = max_iter;
805 self.cox_tolerance = tolerance;
806 self
807 }
808
809 pub fn without_cox_model(mut self) -> Self {
811 self.fit_cox_model = false;
812 self
813 }
814
815 pub fn analyze(
817 &self,
818 durations: ArrayView1<f64>,
819 events: ArrayView1<bool>,
820 covariates: Option<ArrayView2<f64>>,
821 ) -> Result<SurvivalAnalysisResult> {
822 checkarray_finite(&durations, "durations")?;
823
824 if durations.len() != events.len() {
825 return Err(StatsError::DimensionMismatch(format!(
826 "durations length ({}) must match events length ({})",
827 durations.len(),
828 events.len()
829 )));
830 }
831
832 let kaplan_meier =
834 KaplanMeierEstimator::fit(durations, events, Some(self.confidence_level))?;
835
836 let cox_model = if self.fit_cox_model {
838 if let Some(cov) = covariates {
839 Some(CoxPHModel::fit(
840 durations,
841 events,
842 cov,
843 Some(self.cox_max_iter),
844 Some(self.cox_tolerance),
845 )?)
846 } else {
847 None
848 }
849 } else {
850 None
851 };
852
853 let summary_stats = self.compute_summary_stats(&durations, &events, &kaplan_meier)?;
855
856 Ok(SurvivalAnalysisResult {
857 kaplan_meier,
858 cox_model,
859 summary_stats,
860 })
861 }
862
863 fn compute_summary_stats(
865 &self,
866 _durations: &ArrayView1<f64>,
867 events: &ArrayView1<bool>,
868 km: &KaplanMeierEstimator,
869 ) -> Result<SurvivalSummaryStats> {
870 let total_events: usize = events.iter().map(|&e| if e { 1 } else { 0 }).sum();
872 let total_observations = events.len();
873 let event_rate = total_events as f64 / total_observations as f64;
874 let censoring_rate = 1.0 - event_rate;
875
876 let median_survival = km.median_survival_time;
878
879 let q25_survival = self.find_survival_percentile(km, 0.75)?; let q75_survival = self.find_survival_percentile(km, 0.25)?; Ok(SurvivalSummaryStats {
884 median_survival,
885 q25_survival,
886 q75_survival,
887 event_rate,
888 censoring_rate,
889 })
890 }
891
892 fn find_survival_percentile(
894 &self,
895 km: &KaplanMeierEstimator,
896 target_survival: f64,
897 ) -> Result<Option<f64>> {
898 for i in 0..km.survival_function.len() {
899 if km.survival_function[i] <= target_survival {
900 return Ok(Some(km.event_times[i]));
901 }
902 }
903 Ok(None) }
905}