use crate::error::{StatsError, StatsResult as Result};
use scirs2_core::ndarray::{Array1, Array2, ArrayView1, ArrayView2};
use scirs2_core::validation::*;
#[derive(Debug, Clone)]
pub struct KaplanMeierEstimator {
pub event_times: Array1<f64>,
pub survival_function: Array1<f64>,
pub confidence_intervals: Option<(Array1<f64>, Array1<f64>)>,
pub at_risk: Array1<usize>,
pub events: Array1<usize>,
pub median_survival_time: Option<f64>,
}
impl KaplanMeierEstimator {
pub fn fit(
durations: ArrayView1<f64>,
event_observed: ArrayView1<bool>,
confidence_level: Option<f64>,
) -> Result<Self> {
checkarray_finite(&durations, "durations")?;
if durations.len() != event_observed.len() {
return Err(StatsError::DimensionMismatch(format!(
"durations length ({durations_len}) must match event_observed length ({events_len})",
durations_len = durations.len(),
events_len = event_observed.len()
)));
}
if durations.is_empty() {
return Err(StatsError::InvalidArgument(
"Input arrays cannot be empty".to_string(),
));
}
if let Some(conf) = confidence_level {
check_probability(conf, "confidence_level")?;
}
let mut time_event_pairs: Vec<(f64, bool)> = durations
.iter()
.zip(event_observed.iter())
.map(|(&t, &e)| (t, e))
.collect();
time_event_pairs.sort_by(|a, b| a.0.partial_cmp(&b.0).expect("Operation failed"));
let mut unique_times = Vec::new();
let mut at_risk_counts = Vec::new();
let mut event_counts = Vec::new();
let mut survival_probs = Vec::new();
let n = time_event_pairs.len();
let mut current_survival = 1.0;
let mut current_at_risk = n;
let mut i = 0;
while i < time_event_pairs.len() {
let current_time = time_event_pairs[i].0;
let mut events_at_time = 0;
let mut censored_at_time = 0;
while i < time_event_pairs.len() && time_event_pairs[i].0 == current_time {
if time_event_pairs[i].1 {
events_at_time += 1;
} else {
censored_at_time += 1;
}
i += 1;
}
if events_at_time > 0 {
let survival_this_time = 1.0 - (events_at_time as f64) / (current_at_risk as f64);
current_survival *= survival_this_time;
unique_times.push(current_time);
at_risk_counts.push(current_at_risk);
event_counts.push(events_at_time);
survival_probs.push(current_survival);
}
current_at_risk -= events_at_time + censored_at_time;
}
let event_times = Array1::from_vec(unique_times);
let survival_function = Array1::from_vec(survival_probs);
let at_risk = Array1::from_vec(at_risk_counts);
let events = Array1::from_vec(event_counts);
let confidence_intervals = if let Some(conf_level) = confidence_level {
Some(Self::calculate_confidence_intervals(
&survival_function,
&at_risk,
&events,
conf_level,
)?)
} else {
None
};
let median_survival_time =
Self::calculate_median_survival(&event_times, &survival_function);
Ok(Self {
event_times,
survival_function,
confidence_intervals,
at_risk,
events,
median_survival_time,
})
}
fn calculate_confidence_intervals(
survival_function: &Array1<f64>,
at_risk: &Array1<usize>,
events: &Array1<usize>,
confidence_level: f64,
) -> Result<(Array1<f64>, Array1<f64>)> {
let _alpha = 1.0 - confidence_level;
let z_score = 1.96;
let mut lower_bounds = Array1::zeros(survival_function.len());
let mut upper_bounds = Array1::zeros(survival_function.len());
let mut cumulative_variance = 0.0;
for i in 0..survival_function.len() {
let n_i = at_risk[i] as f64;
let d_i = events[i] as f64;
if n_i > d_i && n_i > 0.0 {
cumulative_variance += d_i / (n_i * (n_i - d_i));
}
let s_t = survival_function[i];
if s_t > 0.0 {
let se = s_t * cumulative_variance.sqrt();
let log_log_s = (-(s_t.ln())).ln();
let se_log_log = se / (s_t * s_t.ln().abs());
let lower_log_log = log_log_s - z_score * se_log_log;
let upper_log_log = log_log_s + z_score * se_log_log;
lower_bounds[i] = (-(-lower_log_log.exp()).exp()).max(0.0);
upper_bounds[i] = (-(-upper_log_log.exp()).exp()).min(1.0);
} else {
lower_bounds[i] = 0.0;
upper_bounds[i] = 0.0;
}
}
Ok((lower_bounds, upper_bounds))
}
fn calculate_median_survival(
event_times: &Array1<f64>,
survival_function: &Array1<f64>,
) -> Option<f64> {
for i in 0..survival_function.len() {
if survival_function[i] <= 0.5 {
return Some(event_times[i]);
}
}
None }
pub fn predict(&self, times: ArrayView1<f64>) -> Result<Array1<f64>> {
checkarray_finite(×, "times")?;
let mut predictions = Array1::zeros(times.len());
for (i, &t) in times.iter().enumerate() {
if t < 0.0 {
return Err(StatsError::InvalidArgument(
"Times must be non-negative".to_string(),
));
}
let mut survival_prob = 1.0;
for j in 0..self.event_times.len() {
if self.event_times[j] <= t {
survival_prob = self.survival_function[j];
} else {
break;
}
}
predictions[i] = survival_prob;
}
Ok(predictions)
}
}
pub struct LogRankTest;
impl LogRankTest {
pub fn compare_two_groups(
durations1: ArrayView1<f64>,
events1: ArrayView1<bool>,
durations2: ArrayView1<f64>,
events2: ArrayView1<bool>,
) -> Result<(f64, f64)> {
checkarray_finite(&durations1, "durations1")?;
checkarray_finite(&durations2, "durations2")?;
if durations1.len() != events1.len() || durations2.len() != events2.len() {
return Err(StatsError::DimensionMismatch(
"Durations and events arrays must have same length".to_string(),
));
}
let mut combineddata = Vec::new();
for i in 0..durations1.len() {
combineddata.push((durations1[i], events1[i], 0)); }
for i in 0..durations2.len() {
combineddata.push((durations2[i], events2[i], 1)); }
combineddata.sort_by(|a, b| a.0.partial_cmp(&b.0).expect("Operation failed"));
let mut observed_group1 = 0.0;
let mut expected_group1 = 0.0;
let mut variance = 0.0;
let n1 = durations1.len() as f64;
let n2 = durations2.len() as f64;
let mut at_risk1 = n1;
let mut at_risk2 = n2;
let mut i = 0;
while i < combineddata.len() {
let current_time = combineddata[i].0;
let mut events_group1 = 0.0;
let mut events_group2 = 0.0;
let mut censored_group1 = 0.0;
let mut censored_group2 = 0.0;
while i < combineddata.len() && combineddata[i].0 == current_time {
let (_, is_event, group) = combineddata[i];
if group == 0 {
if is_event {
events_group1 += 1.0;
} else {
censored_group1 += 1.0;
}
} else if is_event {
events_group2 += 1.0;
} else {
censored_group2 += 1.0;
}
i += 1;
}
let total_events = events_group1 + events_group2;
let total_at_risk = at_risk1 + at_risk2;
if total_events > 0.0 && total_at_risk > 0.0 {
let expected_events1 = (at_risk1 / total_at_risk) * total_events;
let var_contrib =
(at_risk1 * at_risk2 * total_events * (total_at_risk - total_events))
/ (total_at_risk.powi(2) * (total_at_risk - 1.0).max(1.0));
observed_group1 += events_group1;
expected_group1 += expected_events1;
variance += var_contrib;
}
at_risk1 -= events_group1 + censored_group1;
at_risk2 -= events_group2 + censored_group2;
}
if variance <= 0.0 {
return Ok((0.0, 1.0)); }
let test_statistic = (observed_group1 - expected_group1).powi(2) / variance;
let p_value = Self::chi_square_survival(test_statistic, 1.0);
Ok((test_statistic, p_value))
}
fn chi_square_survival(x: f64, df: f64) -> f64 {
if x <= 0.0 {
return 1.0;
}
let mean = df;
let var = 2.0 * df;
let std = var.sqrt();
if df > 30.0 {
let z = (x - mean) / std;
return 0.5 * (1.0 - Self::erf(z / std::f64::consts::SQRT_2));
}
(-x / mean).exp()
}
fn erf(x: f64) -> f64 {
let a1 = 0.254829592;
let a2 = -0.284496736;
let a3 = 1.421413741;
let a4 = -1.453152027;
let a5 = 1.061405429;
let p = 0.3275911;
let sign = if x >= 0.0 { 1.0 } else { -1.0 };
let x = x.abs();
let t = 1.0 / (1.0 + p * x);
let y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * (-x * x).exp();
sign * y
}
}
#[derive(Debug, Clone)]
pub struct CoxPHModel {
pub coefficients: Array1<f64>,
pub covariance_matrix: Array2<f64>,
pub log_likelihood: f64,
pub baseline_cumulative_hazard: Array1<f64>,
pub baseline_times: Array1<f64>,
pub n_iter: usize,
}
impl CoxPHModel {
pub fn fit(
durations: ArrayView1<f64>,
events: ArrayView1<bool>,
covariates: ArrayView2<f64>,
max_iter: Option<usize>,
tol: Option<f64>,
) -> Result<Self> {
checkarray_finite(&durations, "durations")?;
checkarray_finite(&covariates, "covariates")?;
let (n_samples_, n_features) = covariates.dim();
let max_iter = max_iter.unwrap_or(100);
let tol = tol.unwrap_or(1e-6);
if durations.len() != n_samples_ || events.len() != n_samples_ {
return Err(StatsError::DimensionMismatch(
"All input arrays must have the same number of samples".to_string(),
));
}
let mut beta = Array1::zeros(n_features);
let mut prev_log_likelihood = f64::NEG_INFINITY;
for iteration in 0..max_iter {
let (log_likelihood, gradient, hessian) =
Self::partial_likelihood_derivatives(&durations, &events, &covariates, &beta)?;
if (log_likelihood - prev_log_likelihood).abs() < tol {
let covariance_matrix = Self::invert_hessian(&hessian)?;
let (baseline_times, baseline_cumulative_hazard) =
Self::calculatebaseline_hazard(&durations, &events, &covariates, &beta)?;
return Ok(Self {
coefficients: beta,
covariance_matrix,
log_likelihood,
baseline_cumulative_hazard,
baseline_times,
n_iter: iteration + 1,
});
}
let hessian_inv = Self::invert_hessian(&hessian)?;
let delta = hessian_inv.dot(&gradient);
beta = &beta + δ
prev_log_likelihood = log_likelihood;
}
Err(StatsError::ConvergenceError(format!(
"Cox model failed to converge after {max_iter} iterations"
)))
}
fn partial_likelihood_derivatives(
durations: &ArrayView1<f64>,
events: &ArrayView1<bool>,
covariates: &ArrayView2<f64>,
beta: &Array1<f64>,
) -> Result<(f64, Array1<f64>, Array2<f64>)> {
let n_samples_ = durations.len();
let n_features = beta.len();
let mut indices: Vec<usize> = (0..n_samples_).collect();
indices.sort_by(|&i, &j| {
durations[i]
.partial_cmp(&durations[j])
.expect("Operation failed")
});
let mut log_likelihood = 0.0;
let mut gradient = Array1::zeros(n_features);
let mut hessian = Array2::zeros((n_features, n_features));
for &i in &indices {
if !events[i] {
continue; }
let t_i = durations[i];
let x_i = covariates.row(i);
let mut risk_set = Vec::new();
for &j in &indices {
if durations[j] >= t_i {
risk_set.push(j);
}
}
if risk_set.is_empty() {
continue;
}
let mut exp_beta_x = Array1::zeros(risk_set.len());
for (k, &j) in risk_set.iter().enumerate() {
let x_j = covariates.row(j);
exp_beta_x[k] = x_j.dot(beta).exp();
}
let sum_exp = exp_beta_x.sum();
if sum_exp <= 0.0 {
continue;
}
log_likelihood += x_i.dot(beta) - sum_exp.ln();
let mut weighted_x = Array1::<f64>::zeros(n_features);
for (k, &j) in risk_set.iter().enumerate() {
let x_j = covariates.row(j);
let weight = exp_beta_x[k] / sum_exp;
weighted_x = &weighted_x + &(weight * &x_j.to_owned());
}
gradient = &gradient + &(&x_i.to_owned() - &weighted_x);
for p in 0..n_features {
for q in 0..n_features {
let mut weighted_sum = 0.0;
for (k, &j) in risk_set.iter().enumerate() {
let x_j = covariates.row(j);
let weight = exp_beta_x[k] / sum_exp;
weighted_sum += weight * x_j[p] * x_j[q];
}
hessian[[p, q]] -= weighted_sum - (weighted_x[p] * weighted_x[q]);
}
}
}
Ok((log_likelihood, gradient, hessian))
}
fn invert_hessian(hessian: &Array2<f64>) -> Result<Array2<f64>> {
let neg_hessian = -hessian;
scirs2_linalg::inv(&neg_hessian.view(), None)
.map_err(|e| StatsError::ComputationError(format!("Failed to invert Hessian: {e}")))
}
fn calculatebaseline_hazard(
durations: &ArrayView1<f64>,
events: &ArrayView1<bool>,
covariates: &ArrayView2<f64>,
beta: &Array1<f64>,
) -> Result<(Array1<f64>, Array1<f64>)> {
let n_samples_ = durations.len();
let mut indices: Vec<usize> = (0..n_samples_).collect();
indices.sort_by(|&i, &j| {
durations[i]
.partial_cmp(&durations[j])
.expect("Operation failed")
});
let mut times = Vec::new();
let mut cumulative_hazard = Vec::new();
let mut current_cumhaz = 0.0;
for &i in &indices {
if !events[i] {
continue;
}
let t_i = durations[i];
let mut risk_sum = 0.0;
for &j in &indices {
if durations[j] >= t_i {
let x_j = covariates.row(j);
risk_sum += x_j.dot(beta).exp();
}
}
if risk_sum > 0.0 {
current_cumhaz += 1.0 / risk_sum; times.push(t_i);
cumulative_hazard.push(current_cumhaz);
}
}
Ok((Array1::from_vec(times), Array1::from_vec(cumulative_hazard)))
}
pub fn predict_hazard_ratio(&self, covariates: ArrayView2<f64>) -> Result<Array1<f64>> {
checkarray_finite(&covariates, "covariates")?;
if covariates.ncols() != self.coefficients.len() {
return Err(StatsError::DimensionMismatch(format!(
"covariates has {features} features, expected {expected}",
features = covariates.ncols(),
expected = self.coefficients.len()
)));
}
let mut hazard_ratios = Array1::zeros(covariates.nrows());
for i in 0..covariates.nrows() {
let x_i = covariates.row(i);
hazard_ratios[i] = x_i.dot(&self.coefficients).exp();
}
Ok(hazard_ratios)
}
}
#[derive(Debug, Clone)]
pub struct AFTModel {
pub coefficients: Array1<f64>,
pub scale: f64,
pub distribution: AFTDistribution,
}
#[derive(Debug, Clone, Copy)]
pub enum AFTDistribution {
Weibull,
Lognormal,
Exponential,
}
impl AFTModel {
pub fn fit(
durations: ArrayView1<f64>,
events: ArrayView1<bool>,
covariates: ArrayView2<f64>,
distribution: AFTDistribution,
) -> Result<Self> {
checkarray_finite(&durations, "durations")?;
checkarray_finite(&covariates, "covariates")?;
let (n_samples_, n_features) = covariates.dim();
if durations.len() != n_samples_ || events.len() != n_samples_ {
return Err(StatsError::DimensionMismatch(
"All input arrays must have the same number of samples".to_string(),
));
}
let mut y = Array1::zeros(n_samples_);
let mut weights = Array1::zeros(n_samples_);
for i in 0..n_samples_ {
y[i] = durations[i].ln();
weights[i] = if events[i] { 1.0 } else { 0.5 }; }
let mut xtx = Array2::zeros((n_features, n_features));
let mut xty = Array1::zeros(n_features);
for i in 0..n_samples_ {
let x_i = covariates.row(i);
let w = weights[i];
for j in 0..n_features {
xty[j] += w * x_i[j] * y[i];
for k in 0..n_features {
xtx[[j, k]] += w * x_i[j] * x_i[k];
}
}
}
let coefficients = scirs2_linalg::solve(&xtx.view(), &xty.view(), None).map_err(|e| {
StatsError::ComputationError(format!("Failed to solve regression: {e}"))
})?;
let mut residual_sum = 0.0;
let mut count = 0;
for i in 0..n_samples_ {
if events[i] {
let x_i = covariates.row(i);
let predicted = x_i.dot(&coefficients);
let residual = y[i] - predicted;
residual_sum += residual * residual;
count += 1;
}
}
let scale = if count > 0 {
(residual_sum / count as f64).sqrt()
} else {
1.0
};
Ok(Self {
coefficients,
scale,
distribution,
})
}
pub fn predict(&self, covariates: ArrayView2<f64>) -> Result<Array1<f64>> {
checkarray_finite(&covariates, "covariates")?;
if covariates.ncols() != self.coefficients.len() {
return Err(StatsError::DimensionMismatch(format!(
"covariates has {features} features, expected {expected}",
features = covariates.ncols(),
expected = self.coefficients.len()
)));
}
let mut predictions = Array1::zeros(covariates.nrows());
for i in 0..covariates.nrows() {
let x_i = covariates.row(i);
let log_time = x_i.dot(&self.coefficients);
predictions[i] = log_time.exp();
}
Ok(predictions)
}
}
#[derive(Debug, Clone)]
pub struct ExtendedCoxModel {
pub coefficients: Array1<f64>,
pub covariance_matrix: Array2<f64>,
pub log_likelihood: f64,
pub stratumbaseline_hazards: Vec<(Array1<f64>, Array1<f64>)>, pub strata: Option<Array1<usize>>,
pub n_strata: usize,
pub time_varying_indices: Vec<usize>,
pub n_iter: usize,
}
impl ExtendedCoxModel {
pub fn fit_stratified(
durations: ArrayView1<f64>,
events: ArrayView1<bool>,
covariates: ArrayView2<f64>,
strata: Option<ArrayView1<usize>>,
time_varying_indices: Option<Vec<usize>>,
max_iter: Option<usize>,
tol: Option<f64>,
) -> Result<Self> {
checkarray_finite(&durations, "durations")?;
checkarray_finite(&covariates, "covariates")?;
let (n_samples_, n_features) = covariates.dim();
let max_iter = max_iter.unwrap_or(100);
let tol = tol.unwrap_or(1e-6);
if durations.len() != n_samples_ || events.len() != n_samples_ {
return Err(StatsError::DimensionMismatch(
"All input arrays must have the same number of samples".to_string(),
));
}
let (strata_array, n_strata) = if let Some(strata_input) = strata {
if strata_input.len() != n_samples_ {
return Err(StatsError::DimensionMismatch(
"Strata length must match number of samples".to_string(),
));
}
let max_stratum = strata_input.iter().cloned().max().unwrap_or(0);
(Some(strata_input.to_owned()), max_stratum + 1)
} else {
(None, 1)
};
let time_varying_indices = time_varying_indices.unwrap_or_default();
let mut beta = Array1::zeros(n_features);
let mut prev_log_likelihood = f64::NEG_INFINITY;
for iteration in 0..max_iter {
let (log_likelihood, gradient, hessian) =
Self::stratified_partial_likelihood_derivatives(
&durations,
&events,
&covariates,
&beta,
&strata_array,
n_strata,
)?;
if (log_likelihood - prev_log_likelihood).abs() < tol {
let covariance_matrix = Self::invert_hessian(&hessian)?;
let baseline_hazards = Self::calculate_stratifiedbaseline_hazards(
&durations,
&events,
&covariates,
&beta,
&strata_array,
n_strata,
)?;
return Ok(Self {
coefficients: beta,
covariance_matrix,
log_likelihood,
stratumbaseline_hazards: baseline_hazards,
strata: strata_array,
n_strata,
time_varying_indices,
n_iter: iteration + 1,
});
}
let hessian_inv = Self::invert_hessian(&hessian)?;
let delta = hessian_inv.dot(&gradient);
beta = &beta + δ
prev_log_likelihood = log_likelihood;
}
Err(StatsError::ConvergenceError(format!(
"Extended Cox model failed to converge after {max_iter} iterations"
)))
}
fn stratified_partial_likelihood_derivatives(
durations: &ArrayView1<f64>,
events: &ArrayView1<bool>,
covariates: &ArrayView2<f64>,
beta: &Array1<f64>,
strata: &Option<Array1<usize>>,
n_strata: usize,
) -> Result<(f64, Array1<f64>, Array2<f64>)> {
let n_samples_ = durations.len();
let n_features = beta.len();
let mut total_log_likelihood = 0.0;
let mut total_gradient = Array1::zeros(n_features);
let mut total_hessian = Array2::zeros((n_features, n_features));
for stratum in 0..n_strata {
let stratum_indices: Vec<usize> = if let Some(ref strata_array) = strata {
(0..n_samples_)
.filter(|&i| strata_array[i] == stratum)
.collect()
} else {
(0..n_samples_).collect()
};
if stratum_indices.is_empty() {
continue;
}
let mut sorted_indices = stratum_indices.clone();
sorted_indices.sort_by(|&i, &j| {
durations[i]
.partial_cmp(&durations[j])
.expect("Operation failed")
});
let (stratum_ll, stratum_grad, stratum_hess) = Self::stratum_partial_likelihood(
durations,
events,
covariates,
beta,
&sorted_indices,
)?;
total_log_likelihood += stratum_ll;
total_gradient = &total_gradient + &stratum_grad;
total_hessian = &total_hessian + &stratum_hess;
}
Ok((total_log_likelihood, total_gradient, total_hessian))
}
fn stratum_partial_likelihood(
durations: &ArrayView1<f64>,
events: &ArrayView1<bool>,
covariates: &ArrayView2<f64>,
beta: &Array1<f64>,
sorted_indices: &[usize],
) -> Result<(f64, Array1<f64>, Array2<f64>)> {
let n_features = beta.len();
let mut log_likelihood = 0.0;
let mut gradient = Array1::zeros(n_features);
let mut hessian = Array2::zeros((n_features, n_features));
for &i in sorted_indices {
if !events[i] {
continue; }
let t_i = durations[i];
let x_i = covariates.row(i);
let mut risk_set = Vec::new();
for &j in sorted_indices {
if durations[j] >= t_i {
risk_set.push(j);
}
}
if risk_set.is_empty() {
continue;
}
let mut exp_beta_x = Array1::zeros(risk_set.len());
for (k, &j) in risk_set.iter().enumerate() {
let x_j = covariates.row(j);
exp_beta_x[k] = x_j.dot(beta).exp();
}
let sum_exp = exp_beta_x.sum();
if sum_exp <= 0.0 {
continue;
}
log_likelihood += x_i.dot(beta) - sum_exp.ln();
let mut weighted_x = Array1::<f64>::zeros(n_features);
for (k, &j) in risk_set.iter().enumerate() {
let x_j = covariates.row(j);
let weight = exp_beta_x[k] / sum_exp;
weighted_x = &weighted_x + &(weight * &x_j.to_owned());
}
gradient = &gradient + &(&x_i.to_owned() - &weighted_x);
for p in 0..n_features {
for q in 0..n_features {
let mut weighted_sum = 0.0;
for (k, &j) in risk_set.iter().enumerate() {
let x_j = covariates.row(j);
let weight = exp_beta_x[k] / sum_exp;
weighted_sum += weight * x_j[p] * x_j[q];
}
hessian[[p, q]] -= weighted_sum - (weighted_x[p] * weighted_x[q]);
}
}
}
Ok((log_likelihood, gradient, hessian))
}
fn calculate_stratifiedbaseline_hazards(
durations: &ArrayView1<f64>,
events: &ArrayView1<bool>,
covariates: &ArrayView2<f64>,
beta: &Array1<f64>,
strata: &Option<Array1<usize>>,
n_strata: usize,
) -> Result<Vec<(Array1<f64>, Array1<f64>)>> {
let n_samples_ = durations.len();
let mut baseline_hazards = Vec::new();
for stratum in 0..n_strata {
let stratum_indices: Vec<usize> = if let Some(ref strata_array) = strata {
(0..n_samples_)
.filter(|&i| strata_array[i] == stratum)
.collect()
} else {
(0..n_samples_).collect()
};
if stratum_indices.is_empty() {
baseline_hazards.push((Array1::zeros(0), Array1::zeros(0)));
continue;
}
let mut sorted_indices = stratum_indices.clone();
sorted_indices.sort_by(|&i, &j| {
durations[i]
.partial_cmp(&durations[j])
.expect("Operation failed")
});
let mut times = Vec::new();
let mut cumulative_hazard = Vec::new();
let mut current_cumhaz = 0.0;
for &i in &sorted_indices {
if !events[i] {
continue;
}
let t_i = durations[i];
let mut risk_sum = 0.0;
for &j in &sorted_indices {
if durations[j] >= t_i {
let x_j = covariates.row(j);
risk_sum += x_j.dot(beta).exp();
}
}
if risk_sum > 0.0 {
current_cumhaz += 1.0 / risk_sum; times.push(t_i);
cumulative_hazard.push(current_cumhaz);
}
}
baseline_hazards.push((Array1::from_vec(times), Array1::from_vec(cumulative_hazard)));
}
Ok(baseline_hazards)
}
fn invert_hessian(hessian: &Array2<f64>) -> Result<Array2<f64>> {
let neg_hessian = -hessian;
scirs2_linalg::inv(&neg_hessian.view(), None)
.map_err(|e| StatsError::ComputationError(format!("Failed to invert Hessian: {e}")))
}
pub fn predict_hazard_ratio_stratified(
&self,
covariates: ArrayView2<f64>,
strata: Option<ArrayView1<usize>>,
) -> Result<Array1<f64>> {
checkarray_finite(&covariates, "covariates")?;
if covariates.ncols() != self.coefficients.len() {
return Err(StatsError::DimensionMismatch(format!(
"covariates has {features} features, expected {expected}",
features = covariates.ncols(),
expected = self.coefficients.len()
)));
}
if let Some(ref strata_input) = strata {
if strata_input.len() != covariates.nrows() {
return Err(StatsError::DimensionMismatch(
"Strata length must match number of predictions".to_string(),
));
}
}
let mut hazard_ratios = Array1::zeros(covariates.nrows());
for i in 0..covariates.nrows() {
let x_i = covariates.row(i);
hazard_ratios[i] = x_i.dot(&self.coefficients).exp();
}
Ok(hazard_ratios)
}
pub fn coefficient_confidence_intervals(&self, confidencelevel: f64) -> Result<Array2<f64>> {
check_probability(confidencelevel, "confidence_level")?;
let n_features = self.coefficients.len();
let mut intervals = Array2::zeros((n_features, 2));
let _alpha = (1.0 - confidencelevel) / 2.0;
let z_critical = 1.96;
for i in 0..n_features {
let coeff = self.coefficients[i];
let se = self.covariance_matrix[[i, i]].sqrt();
intervals[[i, 0]] = coeff - z_critical * se; intervals[[i, 1]] = coeff + z_critical * se; }
Ok(intervals)
}
}
#[derive(Debug, Clone)]
pub struct CompetingRisksModel {
pub coefficients: Vec<Array1<f64>>,
pub covariance_matrices: Vec<Array2<f64>>,
pub baseline_cifs: Vec<(Array1<f64>, Array1<f64>)>, pub n_risks: usize,
pub log_likelihood: f64,
}
impl CompetingRisksModel {
pub fn fit(
durations: ArrayView1<f64>,
events: ArrayView1<usize>,
covariates: ArrayView2<f64>,
n_risks: usize,
target_risk: usize,
max_iter: Option<usize>,
tol: Option<f64>,
) -> Result<Self> {
checkarray_finite(&durations, "durations")?;
checkarray_finite(&covariates, "covariates")?;
check_positive(n_risks, "n_risks")?;
let (n_samples_, n_features) = covariates.dim();
let max_iter = max_iter.unwrap_or(100);
let tol = tol.unwrap_or(1e-6);
if durations.len() != n_samples_ || events.len() != n_samples_ {
return Err(StatsError::DimensionMismatch(
"All input arrays must have the same number of samples".to_string(),
));
}
if target_risk == 0 || target_risk > n_risks {
return Err(StatsError::InvalidArgument(
"target_risk must be between 1 and n_risks".to_string(),
));
}
let mut coefficients = vec![Array1::zeros(n_features); n_risks];
let mut covariance_matrices = vec![Array2::zeros((n_features, n_features)); n_risks];
let mut baseline_cifs = vec![(Array1::zeros(0), Array1::zeros(0)); n_risks];
let (modified_durations, modified_events, modified_weights) =
Self::prepare_fine_gray_data(&durations, &events, target_risk)?;
let mut beta = Array1::zeros(n_features);
let mut prev_log_likelihood = f64::NEG_INFINITY;
for _iteration in 0..max_iter {
let (log_likelihood, gradient, hessian) = Self::subdistribution_partial_likelihood(
&modified_durations,
&modified_events,
&covariates,
&modified_weights,
&beta,
)?;
if (log_likelihood - prev_log_likelihood).abs() < tol {
coefficients[target_risk - 1] = beta.clone();
covariance_matrices[target_risk - 1] = Self::invert_hessian(&hessian)?;
let (times, cif) = Self::calculatebaseline_cif(
&modified_durations,
&modified_events,
&covariates,
&modified_weights,
&beta,
)?;
baseline_cifs[target_risk - 1] = (times, cif);
return Ok(Self {
coefficients,
covariance_matrices,
baseline_cifs,
n_risks,
log_likelihood,
});
}
let hessian_inv = Self::invert_hessian(&hessian)?;
let delta = hessian_inv.dot(&gradient);
beta = &beta + δ
prev_log_likelihood = log_likelihood;
}
Err(StatsError::ConvergenceError(format!(
"Competing _risks model failed to converge after {max_iter} iterations"
)))
}
fn prepare_fine_gray_data(
durations: &ArrayView1<f64>,
events: &ArrayView1<usize>,
target_risk: usize,
) -> Result<(Array1<f64>, Array1<bool>, Array1<f64>)> {
let n_samples_ = durations.len();
let modified_durations = durations.to_owned();
let mut modified_events = Array1::from_elem(n_samples_, false);
let mut weights = Array1::ones(n_samples_);
let censoring_km = Self::kaplan_meier_censoring(durations, events)?;
for i in 0..n_samples_ {
if events[i] == target_risk {
modified_events[i] = true;
weights[i] = 1.0;
} else if events[i] == 0 {
modified_events[i] = false;
weights[i] = 1.0;
} else {
modified_events[i] = false;
let km_prob = Self::interpolate_km_probability(
&censoring_km.0,
&censoring_km.1,
durations[i],
);
weights[i] = if km_prob > 0.0 { 1.0 / km_prob } else { 0.0 };
}
}
Ok((modified_durations, modified_events, weights))
}
fn kaplan_meier_censoring(
durations: &ArrayView1<f64>,
events: &ArrayView1<usize>,
) -> Result<(Array1<f64>, Array1<f64>)> {
let censoring_events: Array1<bool> = events.mapv(|e| e == 0);
let mut time_event_pairs: Vec<(f64, bool)> = durations
.iter()
.zip(censoring_events.iter())
.map(|(&t, &e)| (t, e))
.collect();
time_event_pairs.sort_by(|a, b| a.0.partial_cmp(&b.0).expect("Operation failed"));
let mut times = Vec::new();
let mut survival_probs = Vec::new();
let mut current_survival = 1.0;
let mut current_at_risk = time_event_pairs.len();
let mut i = 0;
while i < time_event_pairs.len() {
let current_time = time_event_pairs[i].0;
let mut events_at_time = 0;
let mut total_at_time = 0;
while i < time_event_pairs.len() && time_event_pairs[i].0 == current_time {
if time_event_pairs[i].1 {
events_at_time += 1;
}
total_at_time += 1;
i += 1;
}
if events_at_time > 0 {
let survival_this_time = 1.0 - (events_at_time as f64) / (current_at_risk as f64);
current_survival *= survival_this_time;
times.push(current_time);
survival_probs.push(current_survival);
}
current_at_risk -= total_at_time;
}
Ok((Array1::from_vec(times), Array1::from_vec(survival_probs)))
}
fn interpolate_km_probability(times: &Array1<f64>, probs: &Array1<f64>, t: f64) -> f64 {
if times.is_empty() {
return 1.0;
}
if t <= times[0] {
return 1.0;
}
for i in 0..times.len() {
if times[i] >= t {
return probs[i];
}
}
probs[probs.len() - 1]
}
fn subdistribution_partial_likelihood(
durations: &Array1<f64>,
events: &Array1<bool>,
covariates: &ArrayView2<f64>,
weights: &Array1<f64>,
beta: &Array1<f64>,
) -> Result<(f64, Array1<f64>, Array2<f64>)> {
let n_samples_ = durations.len();
let n_features = beta.len();
let mut indices: Vec<usize> = (0..n_samples_).collect();
indices.sort_by(|&i, &j| {
durations[i]
.partial_cmp(&durations[j])
.expect("Operation failed")
});
let mut log_likelihood = 0.0;
let mut gradient = Array1::zeros(n_features);
let mut hessian = Array2::zeros((n_features, n_features));
for &i in &indices {
if !events[i] {
continue; }
let t_i = durations[i];
let x_i = covariates.row(i);
let w_i = weights[i];
let mut weighted_exp_sum = 0.0;
let mut weighted_x_sum = Array1::zeros(n_features);
let mut weighted_xx_sum = Array2::zeros((n_features, n_features));
for &j in &indices {
if durations[j] >= t_i {
let x_j = covariates.row(j);
let w_j = weights[j];
let exp_beta_x = x_j.dot(beta).exp();
let weighted_exp = w_j * exp_beta_x;
weighted_exp_sum += weighted_exp;
weighted_x_sum = &weighted_x_sum + &(weighted_exp * &x_j.to_owned());
for p in 0..n_features {
for q in 0..n_features {
weighted_xx_sum[[p, q]] += weighted_exp * x_j[p] * x_j[q];
}
}
}
}
if weighted_exp_sum <= 0.0 {
continue;
}
let weighted_mean_x = &weighted_x_sum / weighted_exp_sum;
log_likelihood += w_i * (x_i.dot(beta) - weighted_exp_sum.ln());
gradient = &gradient + &(w_i * (&x_i.to_owned() - &weighted_mean_x));
let weighted_mean_xx = &weighted_xx_sum / weighted_exp_sum;
let outer_product = outer_product_array(&weighted_mean_x);
hessian = &hessian - &(w_i * (&weighted_mean_xx - &outer_product));
}
Ok((log_likelihood, gradient, hessian))
}
fn calculatebaseline_cif(
durations: &Array1<f64>,
events: &Array1<bool>,
covariates: &ArrayView2<f64>,
weights: &Array1<f64>,
beta: &Array1<f64>,
) -> Result<(Array1<f64>, Array1<f64>)> {
let n_samples_ = durations.len();
let mut indices: Vec<usize> = (0..n_samples_).collect();
indices.sort_by(|&i, &j| {
durations[i]
.partial_cmp(&durations[j])
.expect("Operation failed")
});
let mut times = Vec::new();
let mut cumulative_incidence = Vec::new();
let mut current_cif = 0.0;
for &i in &indices {
if !events[i] {
continue;
}
let t_i = durations[i];
let w_i = weights[i];
let mut weighted_risk_sum = 0.0;
for &j in &indices {
if durations[j] >= t_i {
let x_j = covariates.row(j);
let w_j = weights[j];
weighted_risk_sum += w_j * x_j.dot(beta).exp();
}
}
if weighted_risk_sum > 0.0 {
current_cif += w_i / weighted_risk_sum;
times.push(t_i);
cumulative_incidence.push(current_cif);
}
}
Ok((
Array1::from_vec(times),
Array1::from_vec(cumulative_incidence),
))
}
fn invert_hessian(hessian: &Array2<f64>) -> Result<Array2<f64>> {
let neg_hessian = -hessian;
scirs2_linalg::inv(&neg_hessian.view(), None)
.map_err(|e| StatsError::ComputationError(format!("Failed to invert Hessian: {e}")))
}
pub fn predict_cumulative_incidence(
&self,
covariates: ArrayView2<f64>,
target_risk: usize,
times: ArrayView1<f64>,
) -> Result<Array2<f64>> {
checkarray_finite(&covariates, "covariates")?;
checkarray_finite(×, "times")?;
if target_risk == 0 || target_risk > self.n_risks {
return Err(StatsError::InvalidArgument(
"target_risk must be between 1 and n_risks".to_string(),
));
}
let risk_idx = target_risk - 1;
let n_samples_ = covariates.nrows();
let n_times = times.len();
let mut predictions = Array2::zeros((n_samples_, n_times));
let beta = &self.coefficients[risk_idx];
let (baseline_times, baseline_cif) = &self.baseline_cifs[risk_idx];
for i in 0..n_samples_ {
let x_i = covariates.row(i);
let hazard_ratio = x_i.dot(beta).exp();
for (j, &t) in times.iter().enumerate() {
let baseline_value = Self::interpolatebaseline_cif(baseline_times, baseline_cif, t);
predictions[[i, j]] = 1.0 - (1.0 - baseline_value).powf(hazard_ratio);
}
}
Ok(predictions)
}
fn interpolatebaseline_cif(times: &Array1<f64>, cif: &Array1<f64>, t: f64) -> f64 {
if times.is_empty() {
return 0.0;
}
if t <= times[0] {
return 0.0;
}
for i in 0..times.len() {
if times[i] >= t {
return cif[i];
}
}
cif[cif.len() - 1]
}
}
#[allow(dead_code)]
fn outer_product_array(v: &Array1<f64>) -> Array2<f64> {
let n = v.len();
let mut result = Array2::zeros((n, n));
for i in 0..n {
for j in 0..n {
result[[i, j]] = v[i] * v[j];
}
}
result
}