1use crate::error::{StatsError, StatsResult};
4use crate::regression::utils::*;
5use crate::regression::RegressionResults;
6use scirs2_core::ndarray::{s, Array1, Array2, ArrayView1, ArrayView2};
7use scirs2_core::numeric::Float;
8use scirs2_linalg::{inv, lstsq};
9use std::collections::HashSet;
10
11type PreprocessingResult<F> = (Array2<F>, F, Array1<F>, Array1<F>);
13
14#[allow(clippy::too_many_arguments)]
59#[allow(dead_code)]
60pub fn ridge_regression<F>(
61 x: &ArrayView2<F>,
62 y: &ArrayView1<F>,
63 alpha: Option<F>,
64 fit_intercept: Option<bool>,
65 normalize: Option<bool>,
66 tol: Option<F>,
67 max_iter: Option<usize>,
68 conf_level: Option<F>,
69) -> StatsResult<RegressionResults<F>>
70where
71 F: Float
72 + std::iter::Sum<F>
73 + std::ops::Div<Output = F>
74 + std::fmt::Debug
75 + std::fmt::Display
76 + 'static
77 + scirs2_core::numeric::NumAssign
78 + scirs2_core::numeric::One
79 + scirs2_core::ndarray::ScalarOperand
80 + Send
81 + Sync,
82{
83 if x.nrows() != y.len() {
85 return Err(StatsError::DimensionMismatch(format!(
86 "Input x has {} rows but y has length {}",
87 x.nrows(),
88 y.len()
89 )));
90 }
91
92 let n = x.nrows();
93 let p_features = x.ncols();
94
95 let alpha = alpha.unwrap_or_else(|| F::from(1.0).expect("Failed to convert constant to float"));
97 let fit_intercept = fit_intercept.unwrap_or(true);
98 let normalize = normalize.unwrap_or(false);
99 let tol = tol.unwrap_or_else(|| F::from(1e-4).expect("Failed to convert constant to float"));
100 let max_iter = max_iter.unwrap_or(1000);
101 let conf_level =
102 conf_level.unwrap_or_else(|| F::from(0.95).expect("Failed to convert constant to float"));
103
104 if alpha < F::zero() {
105 return Err(StatsError::InvalidArgument(
106 "alpha must be non-negative".to_string(),
107 ));
108 }
109
110 let (x_processed, y_mean, x_mean, x_std) = preprocessdata(x, y, fit_intercept, normalize)?;
112
113 let p = if fit_intercept {
115 p_features + 1
116 } else {
117 p_features
118 };
119
120 if n < 2 {
122 return Err(StatsError::InvalidArgument(
123 "At least 2 observations required for ridge regression".to_string(),
124 ));
125 }
126
127 let ridgesize = if fit_intercept { p_features } else { p };
132 let mut x_ridge = Array2::zeros((n + ridgesize, p));
133
134 for i in 0..n {
136 for j in 0..p {
137 x_ridge[[i, j]] = x_processed[[i, j]];
138 }
139 }
140
141 let sqrt_alpha = scirs2_core::numeric::Float::sqrt(alpha);
143 for i in 0..ridgesize {
144 let j = if fit_intercept { i + 1 } else { i }; x_ridge[[n + i, j]] = sqrt_alpha;
146 }
147
148 let mut y_ridge = Array1::zeros(n + ridgesize);
150 for i in 0..n {
151 y_ridge[i] = y[i];
152 }
153
154 let coefficients = solve_ridge_system(&x_ridge.view(), &y_ridge.view(), tol, max_iter)?;
156
157 let transformed_coefficients = if normalize || fit_intercept {
159 transform_coefficients(&coefficients, y_mean, &x_mean, &x_std, fit_intercept)
160 } else {
161 coefficients.clone()
162 };
163
164 let x_design = if fit_intercept {
166 add_intercept(x)
167 } else {
168 x.to_owned()
169 };
170
171 let fitted_values = x_design.dot(&transformed_coefficients);
172 let residuals = y.to_owned() - &fitted_values;
173
174 let df_model = p - 1; let df_residuals = n - p;
177
178 let (_y_mean, ss_total, ss_residual, ss_explained) =
180 calculate_sum_of_squares(y, &residuals.view());
181
182 let r_squared = ss_explained / ss_total;
184 let adj_r_squared = F::one()
185 - (F::one() - r_squared) * F::from(n - 1).expect("Failed to convert to float")
186 / F::from(df_residuals).expect("Failed to convert to float");
187
188 let mse = ss_residual / F::from(df_residuals).expect("Failed to convert to float");
190 let residual_std_error = scirs2_core::numeric::Float::sqrt(mse);
191
192 let std_errors = match calculate_ridge_std_errors(
194 &x_design.view(),
195 &residuals.view(),
196 alpha,
197 df_residuals,
198 ) {
199 Ok(se) => se,
200 Err(_) => Array1::<F>::zeros(p),
201 };
202
203 let t_values = calculate_t_values(&transformed_coefficients, &std_errors);
205
206 let p_values = t_values.mapv(|t| {
208 let t_abs = crate::regression::utils::float_abs(t);
209 let df_f = F::from(df_residuals).expect("Failed to convert to float");
210 let ratio = t_abs / crate::regression::utils::float_sqrt(df_f + t_abs * t_abs);
211 let one_minus_ratio = F::one() - ratio;
212 F::from(2.0).expect("Failed to convert constant to float") * one_minus_ratio
213 });
214
215 let mut conf_intervals = Array2::<F>::zeros((p, 2));
217 let z = norm_ppf(
218 F::from(0.5).expect("Failed to convert constant to float") * (F::one() + conf_level),
219 );
220
221 for i in 0..p {
222 let margin = std_errors[i] * z;
223 conf_intervals[[i, 0]] = transformed_coefficients[i] - margin;
224 conf_intervals[[i, 1]] = transformed_coefficients[i] + margin;
225 }
226
227 let f_statistic = if df_model > 0 && df_residuals > 0 {
229 (ss_explained / F::from(df_model).expect("Failed to convert to float"))
230 / (ss_residual / F::from(df_residuals).expect("Failed to convert to float"))
231 } else {
232 F::infinity()
233 };
234
235 let f_p_value = F::zero(); Ok(RegressionResults {
240 coefficients: transformed_coefficients,
241 std_errors,
242 t_values,
243 p_values,
244 conf_intervals,
245 r_squared,
246 adj_r_squared,
247 f_statistic,
248 f_p_value,
249 residual_std_error,
250 df_residuals,
251 residuals,
252 fitted_values,
253 inlier_mask: vec![true; n], })
255}
256
257#[allow(dead_code)]
259fn solve_ridge_system<F>(
260 x_ridge: &ArrayView2<F>,
261 y_ridge: &ArrayView1<F>,
262 _tol: F,
263 _max_iter: usize,
264) -> StatsResult<Array1<F>>
265where
266 F: Float
267 + std::iter::Sum<F>
268 + std::ops::Div<Output = F>
269 + 'static
270 + scirs2_core::numeric::NumAssign
271 + scirs2_core::numeric::One
272 + scirs2_core::ndarray::ScalarOperand
273 + std::fmt::Display
274 + Send
275 + Sync,
276{
277 match lstsq(x_ridge, y_ridge, None) {
278 Ok(result) => Ok(result.x),
279 Err(e) => Err(StatsError::ComputationError(format!(
280 "Least squares computation failed: {:?}",
281 e
282 ))),
283 }
284}
285
286#[allow(dead_code)]
288fn preprocessdata<F>(
289 x: &ArrayView2<F>,
290 y: &ArrayView1<F>,
291 fit_intercept: bool,
292 normalize: bool,
293) -> StatsResult<PreprocessingResult<F>>
294where
295 F: Float + std::iter::Sum<F> + 'static + std::fmt::Display,
296{
297 let n = x.nrows();
298 let p = x.ncols();
299
300 let y_mean = if fit_intercept {
302 y.iter().cloned().sum::<F>() / F::from(n).expect("Failed to convert to float")
303 } else {
304 F::zero()
305 };
306
307 let mut x_mean = Array1::<F>::zeros(p);
309 let mut x_std = Array1::<F>::ones(p);
310
311 if fit_intercept || normalize {
312 for j in 0..p {
313 let col = x.column(j);
314 let mean =
315 col.iter().cloned().sum::<F>() / F::from(n).expect("Failed to convert to float");
316 x_mean[j] = mean;
317
318 if normalize {
319 let mut ss = F::zero();
320 for &val in col {
321 ss = ss + scirs2_core::numeric::Float::powi(val - mean, 2);
322 }
323 let std_dev = scirs2_core::numeric::Float::sqrt(
324 ss / F::from(n).expect("Failed to convert to float"),
325 );
326 x_std[j] = if std_dev > F::epsilon() {
327 std_dev
328 } else {
329 F::one()
330 };
331 }
332 }
333 }
334
335 let mut x_processed = if fit_intercept {
337 Array2::<F>::zeros((n, p + 1))
338 } else {
339 Array2::<F>::zeros((n, p))
340 };
341
342 if fit_intercept {
344 for i in 0..n {
345 x_processed[[i, 0]] = F::one();
346 }
347 }
348
349 let offset = if fit_intercept { 1 } else { 0 };
351 for i in 0..n {
352 for j in 0..p {
353 let val = if normalize || fit_intercept {
354 (x[[i, j]] - x_mean[j]) / x_std[j]
355 } else {
356 x[[i, j]]
357 };
358 x_processed[[i, j + offset]] = val;
359 }
360 }
361
362 Ok((x_processed, y_mean, x_mean, x_std))
363}
364
365#[allow(dead_code)]
367fn transform_coefficients<F>(
368 coefficients: &Array1<F>,
369 y_mean: F,
370 x_mean: &Array1<F>,
371 x_std: &Array1<F>,
372 fit_intercept: bool,
373) -> Array1<F>
374where
375 F: Float + 'static + std::fmt::Display,
376{
377 let _p = coefficients.len();
378 let p_features = x_mean.len();
379
380 let mut transformed = coefficients.clone();
381
382 if fit_intercept {
383 let mut _intercept = coefficients[0];
384
385 for j in 0..p_features {
387 _intercept = _intercept - coefficients[j + 1] * x_mean[j] / x_std[j];
388 }
389
390 _intercept = _intercept + y_mean;
392
393 transformed[0] = _intercept;
394
395 for j in 0..p_features {
397 transformed[j + 1] = coefficients[j + 1] / x_std[j];
398 }
399 } else {
400 for j in 0..p_features {
402 transformed[j] = coefficients[j] / x_std[j];
403 }
404 }
405
406 transformed
407}
408
409#[allow(dead_code)]
411fn calculate_ridge_std_errors<F>(
412 x: &ArrayView2<F>,
413 residuals: &ArrayView1<F>,
414 alpha: F,
415 df: usize,
416) -> StatsResult<Array1<F>>
417where
418 F: Float
419 + std::iter::Sum<F>
420 + std::ops::Div<Output = F>
421 + 'static
422 + scirs2_core::numeric::NumAssign
423 + scirs2_core::numeric::One
424 + scirs2_core::ndarray::ScalarOperand
425 + std::fmt::Display
426 + Send
427 + Sync,
428{
429 let mse = residuals
431 .iter()
432 .map(|&r| scirs2_core::numeric::Float::powi(r, 2))
433 .sum::<F>()
434 / F::from(df).expect("Failed to convert to float");
435
436 let xtx = x.t().dot(x);
438
439 let p = x.ncols();
441 let mut xtx_reg = xtx.clone();
442
443 for i in 0..p {
444 xtx_reg[[i, i]] += alpha;
445 }
446
447 let xtx_reg_inv = match inv(&xtx_reg.view(), None) {
449 Ok(inv_result) => inv_result,
450 Err(_) => {
451 return Ok(Array1::<F>::zeros(p));
453 }
454 };
455
456 let std_errors = (xtx_reg_inv.dot(&xtx).dot(&xtx_reg_inv))
459 .diag()
460 .mapv(|v| scirs2_core::numeric::Float::sqrt(v * mse));
461
462 Ok(std_errors)
463}
464
465#[allow(clippy::too_many_arguments)]
517#[allow(dead_code)]
518pub fn lasso_regression<F>(
519 x: &ArrayView2<F>,
520 y: &ArrayView1<F>,
521 alpha: Option<F>,
522 fit_intercept: Option<bool>,
523 normalize: Option<bool>,
524 tol: Option<F>,
525 max_iter: Option<usize>,
526 conf_level: Option<F>,
527) -> StatsResult<RegressionResults<F>>
528where
529 F: Float
530 + std::iter::Sum<F>
531 + std::ops::Div<Output = F>
532 + std::fmt::Debug
533 + std::fmt::Display
534 + 'static
535 + scirs2_core::numeric::NumAssign
536 + scirs2_core::numeric::One
537 + scirs2_core::ndarray::ScalarOperand
538 + Send
539 + Sync,
540{
541 if x.nrows() != y.len() {
543 return Err(StatsError::DimensionMismatch(format!(
544 "Input x has {} rows but y has length {}",
545 x.nrows(),
546 y.len()
547 )));
548 }
549
550 let n = x.nrows();
551 let p_features = x.ncols();
552
553 let alpha = alpha.unwrap_or_else(|| F::from(1.0).expect("Failed to convert constant to float"));
555 let fit_intercept = fit_intercept.unwrap_or(true);
556 let normalize = normalize.unwrap_or(false);
557 let tol = tol.unwrap_or_else(|| F::from(1e-4).expect("Failed to convert constant to float"));
558 let max_iter = max_iter.unwrap_or(1000);
559 let conf_level =
560 conf_level.unwrap_or_else(|| F::from(0.95).expect("Failed to convert constant to float"));
561
562 if alpha < F::zero() {
563 return Err(StatsError::InvalidArgument(
564 "alpha must be non-negative".to_string(),
565 ));
566 }
567
568 let (x_processed, y_mean, x_mean, x_std) = preprocessdata(x, y, fit_intercept, normalize)?;
570
571 let p = if fit_intercept {
573 p_features + 1
574 } else {
575 p_features
576 };
577
578 if n < 2 {
580 return Err(StatsError::InvalidArgument(
581 "At least 2 observations required for lasso regression".to_string(),
582 ));
583 }
584
585 let mut coefficients = Array1::<F>::zeros(p);
587
588 let xtx = x_processed.t().dot(&x_processed);
590 let xty = x_processed.t().dot(y);
591
592 let mut converged = false;
594 let mut _iter = 0;
595
596 while !converged && _iter < max_iter {
597 converged = true;
598
599 let old_coefs = coefficients.clone();
601
602 for j in 0..p {
604 let r_partial = xty[j]
606 - xtx
607 .row(j)
608 .iter()
609 .zip(coefficients.iter())
610 .enumerate()
611 .filter(|&(i_, _)| i_ != j)
612 .map(|(_, (&xtx_ij, &coef_i))| xtx_ij * coef_i)
613 .sum::<F>();
614
615 let xtx_jj = xtx[[j, j]];
617 if xtx_jj < F::epsilon() {
618 coefficients[j] = F::zero();
619 continue;
620 }
621
622 if j == 0 && fit_intercept {
623 coefficients[j] = r_partial / xtx_jj;
625 } else {
626 if crate::regression::utils::float_abs(r_partial) <= alpha {
628 coefficients[j] = F::zero();
629 } else if r_partial > F::zero() {
630 coefficients[j] = (r_partial - alpha) / xtx_jj;
631 } else {
632 coefficients[j] = (r_partial + alpha) / xtx_jj;
633 }
634 }
635 }
636
637 let coef_diff = (&coefficients - &old_coefs)
639 .mapv(|x| scirs2_core::numeric::Float::abs(x))
640 .sum();
641 let coef_norm = old_coefs
642 .mapv(|x| scirs2_core::numeric::Float::abs(x))
643 .sum()
644 .max(F::epsilon());
645
646 if coef_diff / coef_norm < tol {
647 converged = true;
648 }
649
650 _iter += 1;
651 }
652
653 let transformed_coefficients = if normalize || fit_intercept {
655 transform_coefficients(&coefficients, y_mean, &x_mean, &x_std, fit_intercept)
656 } else {
657 coefficients.clone()
658 };
659
660 let x_design = if fit_intercept {
662 add_intercept(x)
663 } else {
664 x.to_owned()
665 };
666
667 let fitted_values = x_design.dot(&transformed_coefficients);
668 let residuals = y.to_owned() - &fitted_values;
669
670 let nonzero_coefs = transformed_coefficients
673 .iter()
674 .filter(|&&x| crate::regression::utils::float_abs(x) > F::epsilon())
675 .count();
676 let df_model = nonzero_coefs - if fit_intercept { 1 } else { 0 };
677 let df_residuals = n - nonzero_coefs;
678
679 let (_y_mean, ss_total, ss_residual, ss_explained) =
681 calculate_sum_of_squares(y, &residuals.view());
682
683 let r_squared = ss_explained / ss_total;
685 let adj_r_squared = F::one()
686 - (F::one() - r_squared) * F::from(n - 1).expect("Failed to convert to float")
687 / F::from(df_residuals).expect("Failed to convert to float");
688
689 let mse = ss_residual / F::from(df_residuals).expect("Failed to convert to float");
691 let residual_std_error = scirs2_core::numeric::Float::sqrt(mse);
692
693 let std_errors = match calculate_lasso_std_errors(
695 &x_design.view(),
696 &residuals.view(),
697 &transformed_coefficients,
698 df_residuals,
699 ) {
700 Ok(se) => se,
701 Err(_) => Array1::<F>::zeros(p),
702 };
703
704 let t_values = calculate_t_values(&transformed_coefficients, &std_errors);
706
707 let p_values = t_values.mapv(|t| {
709 let t_abs = crate::regression::utils::float_abs(t);
710 let df_f = F::from(df_residuals).expect("Failed to convert to float");
711 let ratio = t_abs / crate::regression::utils::float_sqrt(df_f + t_abs * t_abs);
712 let one_minus_ratio = F::one() - ratio;
713 F::from(2.0).expect("Failed to convert constant to float") * one_minus_ratio
714 });
715
716 let mut conf_intervals = Array2::<F>::zeros((p, 2));
718 let z = norm_ppf(
719 F::from(0.5).expect("Failed to convert constant to float") * (F::one() + conf_level),
720 );
721
722 for i in 0..p {
723 let margin = std_errors[i] * z;
724 conf_intervals[[i, 0]] = transformed_coefficients[i] - margin;
725 conf_intervals[[i, 1]] = transformed_coefficients[i] + margin;
726 }
727
728 let f_statistic = if df_model > 0 && df_residuals > 0 {
730 (ss_explained / F::from(df_model).expect("Failed to convert to float"))
731 / (ss_residual / F::from(df_residuals).expect("Failed to convert to float"))
732 } else {
733 F::infinity()
734 };
735
736 let f_p_value = F::zero(); Ok(RegressionResults {
741 coefficients: transformed_coefficients,
742 std_errors,
743 t_values,
744 p_values,
745 conf_intervals,
746 r_squared,
747 adj_r_squared,
748 f_statistic,
749 f_p_value,
750 residual_std_error,
751 df_residuals,
752 residuals,
753 fitted_values,
754 inlier_mask: vec![true; n], })
756}
757
758#[allow(dead_code)]
760fn calculate_lasso_std_errors<F>(
761 x: &ArrayView2<F>,
762 residuals: &ArrayView1<F>,
763 coefficients: &Array1<F>,
764 df: usize,
765) -> StatsResult<Array1<F>>
766where
767 F: Float
768 + std::iter::Sum<F>
769 + std::ops::Div<Output = F>
770 + 'static
771 + scirs2_core::numeric::NumAssign
772 + scirs2_core::numeric::One
773 + scirs2_core::ndarray::ScalarOperand
774 + std::fmt::Display
775 + Send
776 + Sync,
777{
778 let mse = residuals
780 .iter()
781 .map(|&r| scirs2_core::numeric::Float::powi(r, 2))
782 .sum::<F>()
783 / F::from(df).expect("Failed to convert to float");
784
785 let p = coefficients.len();
787 let mut active_set = Vec::new();
788
789 for j in 0..p {
790 if crate::regression::utils::float_abs(coefficients[j]) > F::epsilon() {
791 active_set.push(j);
792 }
793 }
794
795 if active_set.is_empty() {
797 return Ok(Array1::<F>::zeros(p));
798 }
799
800 let n_active = active_set.len();
802 let mut xtx_active = Array2::<F>::zeros((n_active, n_active));
803
804 for (i, &idx_i) in active_set.iter().enumerate() {
805 for (j, &idx_j) in active_set.iter().enumerate() {
806 let x_i = x.column(idx_i);
807 let x_j = x.column(idx_j);
808
809 xtx_active[[i, j]] = x_i.iter().zip(x_j.iter()).map(|(&xi, &xj)| xi * xj).sum();
810 }
811 }
812
813 let xtx_active_inv = match inv(&xtx_active.view(), None) {
815 Ok(inv_result) => inv_result,
816 Err(_) => {
817 return Ok(Array1::<F>::zeros(p));
819 }
820 };
821
822 let mut std_errors = Array1::<F>::zeros(p);
824
825 for (i, &idx) in active_set.iter().enumerate() {
826 std_errors[idx] = scirs2_core::numeric::Float::sqrt(xtx_active_inv[[i, i]] * mse);
827 }
828
829 Ok(std_errors)
830}
831
832#[allow(clippy::too_many_arguments)]
883#[allow(dead_code)]
884pub fn elastic_net<F>(
885 x: &ArrayView2<F>,
886 y: &ArrayView1<F>,
887 alpha: Option<F>,
888 l1_ratio: Option<F>,
889 fit_intercept: Option<bool>,
890 normalize: Option<bool>,
891 tol: Option<F>,
892 max_iter: Option<usize>,
893 conf_level: Option<F>,
894) -> StatsResult<RegressionResults<F>>
895where
896 F: Float
897 + std::iter::Sum<F>
898 + std::ops::Div<Output = F>
899 + std::fmt::Debug
900 + std::fmt::Display
901 + 'static
902 + scirs2_core::numeric::NumAssign
903 + scirs2_core::numeric::One
904 + scirs2_core::ndarray::ScalarOperand
905 + Send
906 + Sync,
907{
908 if x.nrows() != y.len() {
910 return Err(StatsError::DimensionMismatch(format!(
911 "Input x has {} rows but y has length {}",
912 x.nrows(),
913 y.len()
914 )));
915 }
916
917 let n = x.nrows();
918 let p_features = x.ncols();
919
920 let alpha = alpha.unwrap_or_else(|| F::from(1.0).expect("Failed to convert constant to float"));
922 let l1_ratio =
923 l1_ratio.unwrap_or_else(|| F::from(0.5).expect("Failed to convert constant to float"));
924 let fit_intercept = fit_intercept.unwrap_or(true);
925 let normalize = normalize.unwrap_or(false);
926 let tol = tol.unwrap_or_else(|| F::from(1e-4).expect("Failed to convert constant to float"));
927 let max_iter = max_iter.unwrap_or(1000);
928 let conf_level =
929 conf_level.unwrap_or_else(|| F::from(0.95).expect("Failed to convert constant to float"));
930
931 if alpha < F::zero() {
932 return Err(StatsError::InvalidArgument(
933 "alpha must be non-negative".to_string(),
934 ));
935 }
936
937 if l1_ratio < F::zero() || l1_ratio > F::one() {
938 return Err(StatsError::InvalidArgument(
939 "l1_ratio must be between 0 and 1".to_string(),
940 ));
941 }
942
943 if l1_ratio < F::epsilon() {
945 return ridge_regression(
946 x,
947 y,
948 Some(alpha),
949 Some(fit_intercept),
950 Some(normalize),
951 Some(tol),
952 Some(max_iter),
953 Some(conf_level),
954 );
955 }
956
957 if crate::regression::utils::float_abs(l1_ratio - F::one()) < F::epsilon() {
959 return lasso_regression(
960 x,
961 y,
962 Some(alpha),
963 Some(fit_intercept),
964 Some(normalize),
965 Some(tol),
966 Some(max_iter),
967 Some(conf_level),
968 );
969 }
970
971 let (x_processed, y_mean, x_mean, x_std) = preprocessdata(x, y, fit_intercept, normalize)?;
973
974 let p = if fit_intercept {
976 p_features + 1
977 } else {
978 p_features
979 };
980
981 if n < 2 {
983 return Err(StatsError::InvalidArgument(
984 "At least 2 observations required for elastic net regression".to_string(),
985 ));
986 }
987
988 let mut coefficients = Array1::<F>::zeros(p);
990
991 let xtx = x_processed.t().dot(&x_processed);
993 let xty = x_processed.t().dot(y);
994
995 let alpha_l1 = alpha * l1_ratio;
997 let one_minus_l1_ratio = F::one() - l1_ratio;
998 let alpha_l2 = alpha * one_minus_l1_ratio;
999
1000 let mut converged = false;
1002 let mut _iter = 0;
1003
1004 while !converged && _iter < max_iter {
1005 converged = true;
1006
1007 let old_coefs = coefficients.clone();
1009
1010 for j in 0..p {
1012 let r_partial = xty[j]
1014 - xtx
1015 .row(j)
1016 .iter()
1017 .zip(coefficients.iter())
1018 .enumerate()
1019 .filter(|&(i_, _)| i_ != j)
1020 .map(|(_, (&xtx_ij, &coef_i))| xtx_ij * coef_i)
1021 .sum::<F>();
1022
1023 let xtx_jj = xtx[[j, j]] + alpha_l2;
1025 if xtx_jj < F::epsilon() {
1026 coefficients[j] = F::zero();
1027 continue;
1028 }
1029
1030 if j == 0 && fit_intercept {
1031 coefficients[j] = r_partial / xtx_jj;
1033 } else {
1034 if crate::regression::utils::float_abs(r_partial) <= alpha_l1 {
1036 coefficients[j] = F::zero();
1037 } else if r_partial > F::zero() {
1038 coefficients[j] = (r_partial - alpha_l1) / xtx_jj;
1039 } else {
1040 coefficients[j] = (r_partial + alpha_l1) / xtx_jj;
1041 }
1042 }
1043 }
1044
1045 let coef_diff = (&coefficients - &old_coefs)
1047 .mapv(|x| scirs2_core::numeric::Float::abs(x))
1048 .sum();
1049 let coef_norm = old_coefs
1050 .mapv(|x| scirs2_core::numeric::Float::abs(x))
1051 .sum()
1052 .max(F::epsilon());
1053
1054 if coef_diff / coef_norm < tol {
1055 converged = true;
1056 }
1057
1058 _iter += 1;
1059 }
1060
1061 let transformed_coefficients = if normalize || fit_intercept {
1063 transform_coefficients(&coefficients, y_mean, &x_mean, &x_std, fit_intercept)
1064 } else {
1065 coefficients.clone()
1066 };
1067
1068 let x_design = if fit_intercept {
1070 add_intercept(x)
1071 } else {
1072 x.to_owned()
1073 };
1074
1075 let fitted_values = x_design.dot(&transformed_coefficients);
1076 let residuals = y.to_owned() - &fitted_values;
1077
1078 let nonzero_coefs = transformed_coefficients
1081 .iter()
1082 .filter(|&&x| crate::regression::utils::float_abs(x) > F::epsilon())
1083 .count();
1084 let df_model = nonzero_coefs - if fit_intercept { 1 } else { 0 };
1085 let df_residuals = n - nonzero_coefs;
1086
1087 let (_y_mean, ss_total, ss_residual, ss_explained) =
1089 calculate_sum_of_squares(y, &residuals.view());
1090
1091 let r_squared = ss_explained / ss_total;
1093 let adj_r_squared = F::one()
1094 - (F::one() - r_squared) * F::from(n - 1).expect("Failed to convert to float")
1095 / F::from(df_residuals).expect("Failed to convert to float");
1096
1097 let mse = ss_residual / F::from(df_residuals).expect("Failed to convert to float");
1099 let residual_std_error = scirs2_core::numeric::Float::sqrt(mse);
1100
1101 let std_errors = match calculate_elastic_net_std_errors(
1103 &x_design.view(),
1104 &residuals.view(),
1105 &transformed_coefficients,
1106 alpha_l2,
1107 df_residuals,
1108 ) {
1109 Ok(se) => se,
1110 Err(_) => Array1::<F>::zeros(p),
1111 };
1112
1113 let t_values = calculate_t_values(&transformed_coefficients, &std_errors);
1115
1116 let p_values = t_values.mapv(|t| {
1118 let t_abs = crate::regression::utils::float_abs(t);
1119 let df_f = F::from(df_residuals).expect("Failed to convert to float");
1120 let _ratio = t_abs / crate::regression::utils::float_sqrt(df_f + t_abs * t_abs);
1121 let one_minus_ratio = F::one() - _ratio;
1122 F::from(2.0).expect("Failed to convert constant to float") * one_minus_ratio
1123 });
1124
1125 let mut conf_intervals = Array2::<F>::zeros((p, 2));
1127 let z = norm_ppf(
1128 F::from(0.5).expect("Failed to convert constant to float") * (F::one() + conf_level),
1129 );
1130
1131 for i in 0..p {
1132 let margin = std_errors[i] * z;
1133 conf_intervals[[i, 0]] = transformed_coefficients[i] - margin;
1134 conf_intervals[[i, 1]] = transformed_coefficients[i] + margin;
1135 }
1136
1137 let f_statistic = if df_model > 0 && df_residuals > 0 {
1139 (ss_explained / F::from(df_model).expect("Failed to convert to float"))
1140 / (ss_residual / F::from(df_residuals).expect("Failed to convert to float"))
1141 } else {
1142 F::infinity()
1143 };
1144
1145 let f_p_value = F::zero(); Ok(RegressionResults {
1150 coefficients: transformed_coefficients,
1151 std_errors,
1152 t_values,
1153 p_values,
1154 conf_intervals,
1155 r_squared,
1156 adj_r_squared,
1157 f_statistic,
1158 f_p_value,
1159 residual_std_error,
1160 df_residuals,
1161 residuals,
1162 fitted_values,
1163 inlier_mask: vec![true; n], })
1165}
1166
1167#[allow(dead_code)]
1169fn calculate_elastic_net_std_errors<F>(
1170 x: &ArrayView2<F>,
1171 residuals: &ArrayView1<F>,
1172 coefficients: &Array1<F>,
1173 alpha_l2: F,
1174 df: usize,
1175) -> StatsResult<Array1<F>>
1176where
1177 F: Float
1178 + std::iter::Sum<F>
1179 + std::ops::Div<Output = F>
1180 + 'static
1181 + scirs2_core::numeric::NumAssign
1182 + scirs2_core::numeric::One
1183 + scirs2_core::ndarray::ScalarOperand
1184 + std::fmt::Display
1185 + Send
1186 + Sync,
1187{
1188 let mse = residuals
1190 .iter()
1191 .map(|&r| scirs2_core::numeric::Float::powi(r, 2))
1192 .sum::<F>()
1193 / F::from(df).expect("Failed to convert to float");
1194
1195 let p = coefficients.len();
1197 let mut active_set = Vec::new();
1198
1199 for j in 0..p {
1200 if crate::regression::utils::float_abs(coefficients[j]) > F::epsilon() {
1201 active_set.push(j);
1202 }
1203 }
1204
1205 if active_set.is_empty() {
1207 return Ok(Array1::<F>::zeros(p));
1208 }
1209
1210 let n_active = active_set.len();
1212 let mut xtx_active = Array2::<F>::zeros((n_active, n_active));
1213
1214 for (i, &idx_i) in active_set.iter().enumerate() {
1215 for (j, &idx_j) in active_set.iter().enumerate() {
1216 let x_i = x.column(idx_i);
1217 let x_j = x.column(idx_j);
1218
1219 xtx_active[[i, j]] = x_i.iter().zip(x_j.iter()).map(|(&xi, &xj)| xi * xj).sum();
1220
1221 if i == j {
1223 xtx_active[[i, j]] += alpha_l2;
1224 }
1225 }
1226 }
1227
1228 let xtx_active_inv = match inv(&xtx_active.view(), None) {
1230 Ok(inv_result) => inv_result,
1231 Err(_) => {
1232 return Ok(Array1::<F>::zeros(p));
1234 }
1235 };
1236
1237 let mut std_errors = Array1::<F>::zeros(p);
1239
1240 for (i, &idx) in active_set.iter().enumerate() {
1241 std_errors[idx] = scirs2_core::numeric::Float::sqrt(xtx_active_inv[[i, i]] * mse);
1242 }
1243
1244 Ok(std_errors)
1245}
1246
1247#[allow(clippy::too_many_arguments)]
1303#[allow(dead_code)]
1304pub fn group_lasso<F>(
1305 x: &ArrayView2<F>,
1306 y: &ArrayView1<F>,
1307 groups: &[usize],
1308 alpha: Option<F>,
1309 fit_intercept: Option<bool>,
1310 normalize: Option<bool>,
1311 tol: Option<F>,
1312 max_iter: Option<usize>,
1313 conf_level: Option<F>,
1314) -> StatsResult<RegressionResults<F>>
1315where
1316 F: Float
1317 + std::iter::Sum<F>
1318 + std::ops::Div<Output = F>
1319 + std::fmt::Debug
1320 + std::fmt::Display
1321 + 'static
1322 + scirs2_core::numeric::NumAssign
1323 + scirs2_core::numeric::One
1324 + scirs2_core::ndarray::ScalarOperand
1325 + Send
1326 + Sync,
1327{
1328 if x.nrows() != y.len() {
1330 return Err(StatsError::DimensionMismatch(format!(
1331 "Input x has {} rows but y has length {}",
1332 x.nrows(),
1333 y.len()
1334 )));
1335 }
1336
1337 if x.ncols() != groups.len() {
1338 return Err(StatsError::DimensionMismatch(format!(
1339 "Number of columns in x ({}) must match length of groups ({})",
1340 x.ncols(),
1341 groups.len()
1342 )));
1343 }
1344
1345 let n = x.nrows();
1346 let p_features = x.ncols();
1347
1348 let alpha = alpha.unwrap_or_else(|| F::from(1.0).expect("Failed to convert constant to float"));
1350 let fit_intercept = fit_intercept.unwrap_or(true);
1351 let normalize = normalize.unwrap_or(false);
1352 let tol = tol.unwrap_or_else(|| F::from(1e-4).expect("Failed to convert constant to float"));
1353 let max_iter = max_iter.unwrap_or(1000);
1354 let conf_level =
1355 conf_level.unwrap_or_else(|| F::from(0.95).expect("Failed to convert constant to float"));
1356
1357 if alpha < F::zero() {
1358 return Err(StatsError::InvalidArgument(
1359 "alpha must be non-negative".to_string(),
1360 ));
1361 }
1362
1363 let (x_processed, y_mean, x_mean, x_std) = preprocessdata(x, y, fit_intercept, normalize)?;
1365
1366 let p = if fit_intercept {
1368 p_features + 1
1369 } else {
1370 p_features
1371 };
1372
1373 if n < 2 {
1375 return Err(StatsError::InvalidArgument(
1376 "At least 2 observations required for group lasso regression".to_string(),
1377 ));
1378 }
1379
1380 let mut unique_groups = HashSet::new();
1382 for &g in groups {
1383 unique_groups.insert(g);
1384 }
1385
1386 let mut group_indices = Vec::new();
1387 for &g in &unique_groups {
1388 let mut indices = Vec::new();
1389 for (i, &group) in groups.iter().enumerate() {
1390 if group == g {
1391 indices.push(if fit_intercept { i + 1 } else { i });
1392 }
1393 }
1394 group_indices.push(indices);
1395 }
1396
1397 let mut coefficients = Array1::<F>::zeros(p);
1399
1400 let mut converged = false;
1402 let mut _iter = 0;
1403
1404 while !converged && _iter < max_iter {
1405 converged = true;
1406
1407 let old_coefs = coefficients.clone();
1409
1410 if fit_intercept {
1412 let r = y - &x_processed
1413 .slice(s![.., 1..])
1414 .dot(&coefficients.slice(s![1..]));
1415 let r_sum: F = r.iter().cloned().sum();
1416 coefficients[0] = r_sum / F::from(r.len()).expect("Operation failed");
1417 }
1418
1419 for group in &group_indices {
1421 if group.is_empty() {
1423 continue;
1424 }
1425
1426 let mut r = y.to_owned();
1428
1429 for j in 0..p {
1431 if !group.contains(&j) {
1432 let x_j = x_processed.column(j);
1433 let beta_j = coefficients[j];
1434
1435 for i in 0..n {
1436 r[i] -= x_j[i] * beta_j;
1437 }
1438 }
1439 }
1440
1441 let mut x_group = Array2::<F>::zeros((n, group.len()));
1443 for (i, &idx) in group.iter().enumerate() {
1444 x_group.column_mut(i).assign(&x_processed.column(idx));
1445 }
1446
1447 let xtr = x_group.t().dot(&r);
1449
1450 let xtx = x_group.t().dot(&x_group);
1452
1453 let xtr_norm = scirs2_core::numeric::Float::sqrt(
1455 xtr.iter()
1456 .map(|&x| scirs2_core::numeric::Float::powi(x, 2))
1457 .sum::<F>(),
1458 );
1459
1460 if xtr_norm < alpha {
1462 for &idx in group {
1463 coefficients[idx] = F::zero();
1464 }
1465 continue;
1466 }
1467
1468 let mut beta_group = match solve_group(xtr, xtx, alpha, tol, max_iter) {
1470 Ok(beta) => beta,
1471 Err(_) => Array1::<F>::zeros(group.len()),
1472 };
1473
1474 let beta_norm = scirs2_core::numeric::Float::sqrt(
1476 beta_group
1477 .iter()
1478 .map(|&x| scirs2_core::numeric::Float::powi(x, 2))
1479 .sum::<F>(),
1480 );
1481 if beta_norm > F::epsilon() {
1482 let shrinkage = F::one().max((beta_norm - alpha) / beta_norm);
1483 beta_group = beta_group.mapv(|x| x * shrinkage);
1484 } else {
1485 beta_group.fill(F::zero());
1486 }
1487
1488 for (i, &idx) in group.iter().enumerate() {
1490 coefficients[idx] = beta_group[i];
1491 }
1492 }
1493
1494 let coef_diff = (&coefficients - &old_coefs)
1496 .mapv(|x| scirs2_core::numeric::Float::abs(x))
1497 .sum();
1498 let coef_norm = old_coefs
1499 .mapv(|x| scirs2_core::numeric::Float::abs(x))
1500 .sum()
1501 .max(F::epsilon());
1502
1503 if coef_diff / coef_norm < tol {
1504 converged = true;
1505 }
1506
1507 _iter += 1;
1508 }
1509
1510 let transformed_coefficients = if normalize || fit_intercept {
1512 transform_coefficients(&coefficients, y_mean, &x_mean, &x_std, fit_intercept)
1513 } else {
1514 coefficients.clone()
1515 };
1516
1517 let x_design = if fit_intercept {
1519 add_intercept(x)
1520 } else {
1521 x.to_owned()
1522 };
1523
1524 let fitted_values = x_design.dot(&transformed_coefficients);
1525 let residuals = y.to_owned() - &fitted_values;
1526
1527 let mut nonzero_coefs = 0;
1530 let mut nonzero_groups = HashSet::new();
1531
1532 for (i, &g) in groups.iter().enumerate() {
1533 let idx = if fit_intercept { i + 1 } else { i };
1534 if crate::regression::utils::float_abs(transformed_coefficients[idx]) > F::epsilon() {
1535 nonzero_groups.insert(g);
1536 }
1537 }
1538
1539 for &g in &nonzero_groups {
1540 let groupsize = groups.iter().filter(|&&group| group == g).count();
1541 nonzero_coefs += groupsize;
1542 }
1543
1544 if fit_intercept
1545 && crate::regression::utils::float_abs(transformed_coefficients[0]) > F::epsilon()
1546 {
1547 nonzero_coefs += 1;
1548 }
1549
1550 let df_model = nonzero_coefs - if fit_intercept { 1 } else { 0 };
1551 let df_residuals = n - nonzero_coefs;
1552
1553 let (_y_mean, ss_total, ss_residual, ss_explained) =
1555 calculate_sum_of_squares(y, &residuals.view());
1556
1557 let r_squared = ss_explained / ss_total;
1559 let adj_r_squared = F::one()
1560 - (F::one() - r_squared) * F::from(n - 1).expect("Failed to convert to float")
1561 / F::from(df_residuals).expect("Failed to convert to float");
1562
1563 let mse = ss_residual / F::from(df_residuals).expect("Failed to convert to float");
1565 let residual_std_error = scirs2_core::numeric::Float::sqrt(mse);
1566
1567 let std_errors = match calculate_group_lasso_std_errors(
1569 &x_design.view(),
1570 &residuals.view(),
1571 &transformed_coefficients,
1572 groups,
1573 fit_intercept,
1574 df_residuals,
1575 ) {
1576 Ok(se) => se,
1577 Err(_) => Array1::<F>::zeros(p),
1578 };
1579
1580 let t_values = calculate_t_values(&transformed_coefficients, &std_errors);
1582
1583 let p_values = t_values.mapv(|t| {
1585 let t_abs = crate::regression::utils::float_abs(t);
1586 let df_f = F::from(df_residuals).expect("Failed to convert to float");
1587 let ratio = t_abs / crate::regression::utils::float_sqrt(df_f + t_abs * t_abs);
1588 let one_minus_ratio = F::one() - ratio;
1589 F::from(2.0).expect("Failed to convert constant to float") * one_minus_ratio
1590 });
1591
1592 let mut conf_intervals = Array2::<F>::zeros((p, 2));
1594 let z = norm_ppf(
1595 F::from(0.5).expect("Failed to convert constant to float") * (F::one() + conf_level),
1596 );
1597
1598 for i in 0..p {
1599 let margin = std_errors[i] * z;
1600 conf_intervals[[i, 0]] = transformed_coefficients[i] - margin;
1601 conf_intervals[[i, 1]] = transformed_coefficients[i] + margin;
1602 }
1603
1604 let f_statistic = if df_model > 0 && df_residuals > 0 {
1606 (ss_explained / F::from(df_model).expect("Failed to convert to float"))
1607 / (ss_residual / F::from(df_residuals).expect("Failed to convert to float"))
1608 } else {
1609 F::infinity()
1610 };
1611
1612 let f_p_value = F::zero(); Ok(RegressionResults {
1617 coefficients: transformed_coefficients,
1618 std_errors,
1619 t_values,
1620 p_values,
1621 conf_intervals,
1622 r_squared,
1623 adj_r_squared,
1624 f_statistic,
1625 f_p_value,
1626 residual_std_error,
1627 df_residuals,
1628 residuals,
1629 fitted_values,
1630 inlier_mask: vec![true; n], })
1632}
1633
1634#[allow(dead_code)]
1636fn solve_group<F>(
1637 xtr: Array1<F>,
1638 xtx: Array2<F>,
1639 _alpha: F,
1640 tol: F,
1641 max_iter: usize,
1642) -> StatsResult<Array1<F>>
1643where
1644 F: Float
1645 + std::iter::Sum<F>
1646 + std::ops::Div<Output = F>
1647 + 'static
1648 + scirs2_core::numeric::NumAssign
1649 + scirs2_core::numeric::One
1650 + scirs2_core::ndarray::ScalarOperand
1651 + std::fmt::Display
1652 + Send
1653 + Sync,
1654{
1655 let p = xtr.len();
1656
1657 let mut beta = Array1::<F>::zeros(p);
1659
1660 match inv(&xtx.view(), None) {
1662 Ok(xtx_inv) => {
1663 beta = xtx_inv.dot(&xtr);
1664 return Ok(beta);
1665 }
1666 Err(_) => {
1667 }
1669 }
1670
1671 let mut _iter = 0;
1673 let mut converged = false;
1674
1675 let lr = F::from(0.01).expect("Failed to convert constant to float");
1677
1678 while !converged && _iter < max_iter {
1679 let old_beta = beta.clone();
1680
1681 let xtx_beta = xtx.dot(&beta);
1683 let grad = &xtx_beta - &xtr;
1684
1685 let lr_grad = grad.mapv(|g| g * lr);
1687 beta = &beta - &lr_grad;
1688
1689 let beta_diff = (&beta - &old_beta)
1691 .mapv(|x| scirs2_core::numeric::Float::abs(x))
1692 .sum();
1693 let beta_norm = old_beta
1694 .mapv(|x| scirs2_core::numeric::Float::abs(x))
1695 .sum()
1696 .max(F::epsilon());
1697
1698 if beta_diff / beta_norm < tol {
1699 converged = true;
1700 }
1701
1702 _iter += 1;
1703 }
1704
1705 Ok(beta)
1706}
1707
1708#[allow(dead_code)]
1710fn calculate_group_lasso_std_errors<F>(
1711 x: &ArrayView2<F>,
1712 residuals: &ArrayView1<F>,
1713 coefficients: &Array1<F>,
1714 groups: &[usize],
1715 fit_intercept: bool,
1716 df: usize,
1717) -> StatsResult<Array1<F>>
1718where
1719 F: Float
1720 + std::iter::Sum<F>
1721 + std::ops::Div<Output = F>
1722 + 'static
1723 + scirs2_core::numeric::NumAssign
1724 + scirs2_core::numeric::One
1725 + scirs2_core::ndarray::ScalarOperand
1726 + std::fmt::Display
1727 + Send
1728 + Sync,
1729{
1730 let mse = residuals
1732 .iter()
1733 .map(|&r| scirs2_core::numeric::Float::powi(r, 2))
1734 .sum::<F>()
1735 / F::from(df).expect("Failed to convert to float");
1736
1737 let p = coefficients.len();
1739 let mut active_groups = HashSet::new();
1740
1741 for (i, &g) in groups.iter().enumerate() {
1742 let idx = if fit_intercept { i + 1 } else { i };
1743 if crate::regression::utils::float_abs(coefficients[idx]) > F::epsilon() {
1744 active_groups.insert(g);
1745 }
1746 }
1747
1748 let mut active_set = Vec::new();
1750
1751 if fit_intercept && crate::regression::utils::float_abs(coefficients[0]) > F::epsilon() {
1752 active_set.push(0);
1753 }
1754
1755 for (i, &g) in groups.iter().enumerate() {
1756 if active_groups.contains(&g) {
1757 let idx = if fit_intercept { i + 1 } else { i };
1758 active_set.push(idx);
1759 }
1760 }
1761
1762 if active_set.is_empty() {
1764 return Ok(Array1::<F>::zeros(p));
1765 }
1766
1767 let n_active = active_set.len();
1769 let mut xtx_active = Array2::<F>::zeros((n_active, n_active));
1770
1771 for (i, &idx_i) in active_set.iter().enumerate() {
1772 for (j, &idx_j) in active_set.iter().enumerate() {
1773 let x_i = x.column(idx_i);
1774 let x_j = x.column(idx_j);
1775
1776 xtx_active[[i, j]] = x_i.iter().zip(x_j.iter()).map(|(&xi, &xj)| xi * xj).sum();
1777 }
1778 }
1779
1780 let xtx_active_inv = match inv(&xtx_active.view(), None) {
1782 Ok(inv_result) => inv_result,
1783 Err(_) => {
1784 return Ok(Array1::<F>::zeros(p));
1786 }
1787 };
1788
1789 let mut std_errors = Array1::<F>::zeros(p);
1791
1792 for (i, &idx) in active_set.iter().enumerate() {
1793 std_errors[idx] = scirs2_core::numeric::Float::sqrt(xtx_active_inv[[i, i]] * mse);
1794 }
1795
1796 Ok(std_errors)
1797}