1use crate::error::TimeSeriesError;
11use scirs2_core::ndarray::{Array1, Array2};
12use scirs2_core::validation::checkarray_finite;
13use std::collections::HashMap;
14
15use super::{fisher_yates_shuffle, CausalityResult};
16
17#[derive(Debug, Clone)]
19pub struct TransferEntropyResult {
20 pub transfer_entropy: f64,
22 pub p_value: Option<f64>,
24 pub bins: usize,
26 pub embedding_dim: usize,
28 pub time_delay: usize,
30 pub estimator: TransferEntropyEstimator,
32 pub std_error: Option<f64>,
34}
35
36#[derive(Debug, Clone)]
38pub struct TransferEntropyConfig {
39 pub bins: usize,
41 pub embedding_dim: usize,
43 pub time_delay: usize,
45 pub bootstrap_samples: Option<usize>,
47 pub estimator: TransferEntropyEstimator,
49 pub seed: Option<u64>,
51}
52
53impl Default for TransferEntropyConfig {
54 fn default() -> Self {
55 Self {
56 bins: 10,
57 embedding_dim: 3,
58 time_delay: 1,
59 bootstrap_samples: Some(100),
60 estimator: TransferEntropyEstimator::Binning,
61 seed: None,
62 }
63 }
64}
65
66#[derive(Debug, Clone, Copy, PartialEq, Eq)]
68pub enum TransferEntropyEstimator {
69 Binning,
71 KDE,
73 KNN,
75}
76
77#[derive(Debug, Clone)]
79pub struct RenyiTransferEntropyConfig {
80 pub alpha: f64,
82 pub bins: usize,
84 pub embedding_dim: usize,
86 pub time_delay: usize,
88 pub bootstrap_samples: Option<usize>,
90 pub seed: Option<u64>,
92}
93
94impl Default for RenyiTransferEntropyConfig {
95 fn default() -> Self {
96 Self {
97 alpha: 2.0,
98 bins: 10,
99 embedding_dim: 3,
100 time_delay: 1,
101 bootstrap_samples: Some(100),
102 seed: None,
103 }
104 }
105}
106
107#[derive(Debug, Clone)]
109pub struct ConditionalTransferEntropyConfig {
110 pub bins: usize,
112 pub embedding_dim: usize,
114 pub cond_embedding_dim: usize,
116 pub time_delay: usize,
118 pub bootstrap_samples: Option<usize>,
120 pub seed: Option<u64>,
122}
123
124impl Default for ConditionalTransferEntropyConfig {
125 fn default() -> Self {
126 Self {
127 bins: 8,
128 embedding_dim: 2,
129 cond_embedding_dim: 2,
130 time_delay: 1,
131 bootstrap_samples: Some(100),
132 seed: None,
133 }
134 }
135}
136
137#[derive(Debug, Clone)]
139pub struct EffectiveTransferEntropyConfig {
140 pub bins: usize,
142 pub embedding_dim: usize,
144 pub time_delay: usize,
146 pub n_surrogates: usize,
148 pub seed: Option<u64>,
150}
151
152impl Default for EffectiveTransferEntropyConfig {
153 fn default() -> Self {
154 Self {
155 bins: 10,
156 embedding_dim: 3,
157 time_delay: 1,
158 n_surrogates: 100,
159 seed: None,
160 }
161 }
162}
163
164#[derive(Debug, Clone)]
166pub struct EffectiveTransferEntropyResult {
167 pub effective_te: f64,
169 pub raw_te: f64,
171 pub surrogate_mean: f64,
173 pub surrogate_std: f64,
175 pub z_score: f64,
177 pub p_value: f64,
179 pub n_surrogates: usize,
181}
182
183pub fn shannon_transfer_entropy(
201 x: &Array1<f64>,
202 y: &Array1<f64>,
203 config: &TransferEntropyConfig,
204) -> CausalityResult<TransferEntropyResult> {
205 checkarray_finite(x, "x")?;
206 checkarray_finite(y, "y")?;
207
208 if x.len() != y.len() {
209 return Err(TimeSeriesError::InvalidInput(
210 "Time series must have the same length".to_string(),
211 ));
212 }
213
214 let required_length = config.embedding_dim * config.time_delay + 1;
215 if x.len() < required_length {
216 return Err(TimeSeriesError::InvalidInput(format!(
217 "Time series too short (len={}) for embedding (need {})",
218 x.len(),
219 required_length
220 )));
221 }
222
223 let te = match config.estimator {
224 TransferEntropyEstimator::Binning => {
225 compute_te_binning(x, y, config.bins, config.embedding_dim, config.time_delay)?
226 }
227 TransferEntropyEstimator::KDE => {
228 compute_te_kde(x, y, config.bins, config.embedding_dim, config.time_delay)?
229 }
230 TransferEntropyEstimator::KNN => {
231 compute_te_knn(x, y, config.embedding_dim, config.time_delay)?
232 }
233 };
234
235 let (p_value, std_error) = if let Some(n_bootstrap) = config.bootstrap_samples {
237 let (p, se) = bootstrap_te_significance(x, y, config, te, n_bootstrap, config.seed)?;
238 (Some(p), Some(se))
239 } else {
240 (None, None)
241 };
242
243 Ok(TransferEntropyResult {
244 transfer_entropy: te,
245 p_value,
246 bins: config.bins,
247 embedding_dim: config.embedding_dim,
248 time_delay: config.time_delay,
249 estimator: config.estimator,
250 std_error,
251 })
252}
253
254pub fn renyi_transfer_entropy(
265 x: &Array1<f64>,
266 y: &Array1<f64>,
267 config: &RenyiTransferEntropyConfig,
268) -> CausalityResult<TransferEntropyResult> {
269 checkarray_finite(x, "x")?;
270 checkarray_finite(y, "y")?;
271
272 if x.len() != y.len() {
273 return Err(TimeSeriesError::InvalidInput(
274 "Time series must have the same length".to_string(),
275 ));
276 }
277
278 if config.alpha <= 0.0 {
279 return Err(TimeSeriesError::InvalidInput(
280 "Renyi order alpha must be positive".to_string(),
281 ));
282 }
283
284 let required_length = config.embedding_dim * config.time_delay + 1;
285 if x.len() < required_length {
286 return Err(TimeSeriesError::InvalidInput(
287 "Time series too short for the specified embedding parameters".to_string(),
288 ));
289 }
290
291 let te = if (config.alpha - 1.0).abs() < 1e-6 {
293 compute_te_binning(x, y, config.bins, config.embedding_dim, config.time_delay)?
294 } else {
295 compute_renyi_te(x, y, config)?
296 };
297
298 let p_value = if let Some(n_bootstrap) = config.bootstrap_samples {
299 let shannon_config = TransferEntropyConfig {
300 bins: config.bins,
301 embedding_dim: config.embedding_dim,
302 time_delay: config.time_delay,
303 bootstrap_samples: None,
304 estimator: TransferEntropyEstimator::Binning,
305 seed: config.seed,
306 };
307 let (p, _) =
308 bootstrap_te_significance(x, y, &shannon_config, te, n_bootstrap, config.seed)?;
309 Some(p)
310 } else {
311 None
312 };
313
314 Ok(TransferEntropyResult {
315 transfer_entropy: te,
316 p_value,
317 bins: config.bins,
318 embedding_dim: config.embedding_dim,
319 time_delay: config.time_delay,
320 estimator: TransferEntropyEstimator::Binning,
321 std_error: None,
322 })
323}
324
325pub fn conditional_transfer_entropy(
337 x: &Array1<f64>,
338 y: &Array1<f64>,
339 z: &Array2<f64>,
340 config: &ConditionalTransferEntropyConfig,
341) -> CausalityResult<TransferEntropyResult> {
342 checkarray_finite(x, "x")?;
343 checkarray_finite(y, "y")?;
344
345 if x.len() != y.len() || x.len() != z.nrows() {
346 return Err(TimeSeriesError::InvalidInput(
347 "All time series must have the same length".to_string(),
348 ));
349 }
350
351 let max_embed = config.embedding_dim.max(config.cond_embedding_dim);
352 let required_length = max_embed * config.time_delay + 1;
353 if x.len() < required_length {
354 return Err(TimeSeriesError::InvalidInput(
355 "Time series too short for the specified embedding parameters".to_string(),
356 ));
357 }
358
359 let te = compute_conditional_te(x, y, z, config)?;
360
361 let p_value = if let Some(n_bootstrap) = config.bootstrap_samples {
362 let p = bootstrap_conditional_te_pvalue(x, y, z, config, te, n_bootstrap)?;
363 Some(p)
364 } else {
365 None
366 };
367
368 Ok(TransferEntropyResult {
369 transfer_entropy: te,
370 p_value,
371 bins: config.bins,
372 embedding_dim: config.embedding_dim,
373 time_delay: config.time_delay,
374 estimator: TransferEntropyEstimator::Binning,
375 std_error: None,
376 })
377}
378
379pub fn effective_transfer_entropy(
392 x: &Array1<f64>,
393 y: &Array1<f64>,
394 config: &EffectiveTransferEntropyConfig,
395) -> CausalityResult<EffectiveTransferEntropyResult> {
396 checkarray_finite(x, "x")?;
397 checkarray_finite(y, "y")?;
398
399 if x.len() != y.len() {
400 return Err(TimeSeriesError::InvalidInput(
401 "Time series must have the same length".to_string(),
402 ));
403 }
404
405 let required_length = config.embedding_dim * config.time_delay + 1;
406 if x.len() < required_length {
407 return Err(TimeSeriesError::InvalidInput(
408 "Time series too short for the specified embedding parameters".to_string(),
409 ));
410 }
411
412 let raw_te = compute_te_binning(x, y, config.bins, config.embedding_dim, config.time_delay)?;
414
415 let mut surrogate_tes = Vec::with_capacity(config.n_surrogates);
417 for s in 0..config.n_surrogates {
418 let mut x_shuffled = x.clone();
419 let seed_val = config.seed.map(|base| base.wrapping_add(s as u64));
420 fisher_yates_shuffle(&mut x_shuffled, seed_val);
421
422 let surr_te = compute_te_binning(
423 &x_shuffled,
424 y,
425 config.bins,
426 config.embedding_dim,
427 config.time_delay,
428 )?;
429 surrogate_tes.push(surr_te);
430 }
431
432 let n_surr = surrogate_tes.len() as f64;
433 let surrogate_mean = surrogate_tes.iter().sum::<f64>() / n_surr;
434 let surrogate_var = surrogate_tes
435 .iter()
436 .map(|&v| (v - surrogate_mean).powi(2))
437 .sum::<f64>()
438 / (n_surr - 1.0).max(1.0);
439 let surrogate_std = surrogate_var.sqrt();
440
441 let effective_te = raw_te - surrogate_mean;
442
443 let z_score = if surrogate_std > 1e-15 {
444 (raw_te - surrogate_mean) / surrogate_std
445 } else {
446 0.0
447 };
448
449 let count_above = surrogate_tes.iter().filter(|&&v| v >= raw_te).count();
450 let p_value = count_above as f64 / config.n_surrogates as f64;
451
452 Ok(EffectiveTransferEntropyResult {
453 effective_te,
454 raw_te,
455 surrogate_mean,
456 surrogate_std,
457 z_score,
458 p_value,
459 n_surrogates: config.n_surrogates,
460 })
461}
462
463fn compute_te_binning(
467 x: &Array1<f64>,
468 y: &Array1<f64>,
469 bins: usize,
470 embedding_dim: usize,
471 time_delay: usize,
472) -> CausalityResult<f64> {
473 let (x_embed, y_embed, y_future) = create_embeddings(x, y, embedding_dim, time_delay)?;
474
475 let x_discrete = discretize_matrix(&x_embed, bins)?;
476 let y_discrete = discretize_matrix(&y_embed, bins)?;
477 let y_future_discrete = discretize_vector(&y_future, bins)?;
478
479 compute_te_from_discrete(&x_discrete, &y_discrete, &y_future_discrete)
480}
481
482fn compute_te_kde(
484 x: &Array1<f64>,
485 y: &Array1<f64>,
486 bins: usize,
487 embedding_dim: usize,
488 time_delay: usize,
489) -> CausalityResult<f64> {
490 let (x_embed, y_embed, y_future) = create_embeddings(x, y, embedding_dim, time_delay)?;
491
492 let n = x_embed.nrows();
493 if n < 5 {
494 return Err(TimeSeriesError::InvalidInput(
495 "Too few embedded points for KDE estimation".to_string(),
496 ));
497 }
498
499 let total_dim = (2 * embedding_dim + 1) as f64;
501 let bandwidth = (4.0 / ((total_dim + 2.0) * n as f64)).powf(1.0 / (total_dim + 4.0));
502
503 let mut te_sum = 0.0;
506
507 for i in 0..n {
508 let mut log_joint_cond = 0.0;
510 let mut log_marginal_cond = 0.0;
511
512 let mut joint_density = 0.0;
513 let mut yx_density = 0.0;
514 let mut y_density = 0.0;
515 let mut y_only_density = 0.0;
516
517 for j in 0..n {
518 if i == j {
519 continue;
520 }
521
522 let y_fut_dist = (y_future[i] - y_future[j]).powi(2);
524
525 let mut y_past_dist = 0.0;
526 for d in 0..embedding_dim {
527 y_past_dist += (y_embed[[i, d]] - y_embed[[j, d]]).powi(2);
528 }
529
530 let mut x_past_dist = 0.0;
531 for d in 0..embedding_dim {
532 x_past_dist += (x_embed[[i, d]] - x_embed[[j, d]]).powi(2);
533 }
534
535 let bw2 = bandwidth * bandwidth;
536 let k_yfut = (-y_fut_dist / (2.0 * bw2)).exp();
537 let k_ypast = (-y_past_dist / (2.0 * bw2)).exp();
538 let k_xpast = (-x_past_dist / (2.0 * bw2)).exp();
539
540 joint_density += k_yfut * k_ypast * k_xpast;
542 yx_density += k_ypast * k_xpast;
544 y_density += k_yfut * k_ypast;
546 y_only_density += k_ypast;
548 }
549
550 if joint_density > 1e-15
553 && y_only_density > 1e-15
554 && y_density > 1e-15
555 && yx_density > 1e-15
556 {
557 te_sum += (joint_density * y_only_density / (y_density * yx_density)).ln();
558 }
559 }
560
561 Ok(te_sum / n as f64)
562}
563
564fn compute_te_knn(
566 x: &Array1<f64>,
567 y: &Array1<f64>,
568 embedding_dim: usize,
569 time_delay: usize,
570) -> CausalityResult<f64> {
571 let (x_embed, y_embed, y_future) = create_embeddings(x, y, embedding_dim, time_delay)?;
572
573 let n = x_embed.nrows();
574 let k = 4.min(n.saturating_sub(1)); if k == 0 {
577 return Err(TimeSeriesError::InvalidInput(
578 "Too few embedded points for kNN estimation".to_string(),
579 ));
580 }
581
582 let mut te_sum = 0.0;
587
588 for i in 0..n {
589 let mut distances = Vec::with_capacity(n);
591 for j in 0..n {
592 if i == j {
593 continue;
594 }
595
596 let mut max_dist = (y_future[i] - y_future[j]).abs();
597
598 for d in 0..embedding_dim {
599 max_dist = max_dist.max((y_embed[[i, d]] - y_embed[[j, d]]).abs());
600 max_dist = max_dist.max((x_embed[[i, d]] - x_embed[[j, d]]).abs());
601 }
602
603 distances.push((max_dist, j));
604 }
605
606 distances.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(std::cmp::Ordering::Equal));
607
608 if distances.len() < k {
609 continue;
610 }
611
612 let epsilon = distances[k - 1].0;
613
614 let mut n_yz = 0; let mut n_y = 0; let mut n_yx = 0; for j in 0..n {
620 if i == j {
621 continue;
622 }
623
624 let mut y_past_dist: f64 = 0.0;
626 for d in 0..embedding_dim {
627 y_past_dist = y_past_dist.max((y_embed[[i, d]] - y_embed[[j, d]]).abs());
628 }
629
630 let mut x_past_dist: f64 = 0.0;
632 for d in 0..embedding_dim {
633 x_past_dist = x_past_dist.max((x_embed[[i, d]] - x_embed[[j, d]]).abs());
634 }
635
636 let y_fut_dist = (y_future[i] - y_future[j]).abs();
637
638 if y_fut_dist <= epsilon && y_past_dist <= epsilon {
639 n_yz += 1;
640 }
641 if y_past_dist <= epsilon {
642 n_y += 1;
643 }
644 if y_past_dist <= epsilon && x_past_dist <= epsilon {
645 n_yx += 1;
646 }
647 }
648
649 te_sum +=
651 digamma(n_y as f64 + 1.0) - digamma(n_yz as f64 + 1.0) - digamma(n_yx as f64 + 1.0);
652 }
653
654 let te = digamma(k as f64) + te_sum / n as f64;
655
656 Ok(te.max(0.0))
657}
658
659fn compute_renyi_te(
661 x: &Array1<f64>,
662 y: &Array1<f64>,
663 config: &RenyiTransferEntropyConfig,
664) -> CausalityResult<f64> {
665 let (x_embed, y_embed, y_future) =
666 create_embeddings(x, y, config.embedding_dim, config.time_delay)?;
667
668 let x_discrete = discretize_matrix(&x_embed, config.bins)?;
669 let y_discrete = discretize_matrix(&y_embed, config.bins)?;
670 let y_future_discrete = discretize_vector(&y_future, config.bins)?;
671
672 let n_samples = x_discrete.nrows();
673 let alpha = config.alpha;
674
675 let mut joint_counts: HashMap<(usize, Vec<usize>, Vec<usize>), usize> = HashMap::new();
677 let mut marginal_yfut_ypast: HashMap<(usize, Vec<usize>), usize> = HashMap::new();
678 let mut cond_ypast_xpast: HashMap<(Vec<usize>, Vec<usize>), usize> = HashMap::new();
679 let mut y_only_counts: HashMap<Vec<usize>, usize> = HashMap::new();
680
681 for i in 0..n_samples {
682 let x_state = x_discrete.row(i).to_vec();
683 let y_state = y_discrete.row(i).to_vec();
684 let y_fut = y_future_discrete[i];
685
686 *joint_counts
687 .entry((y_fut, y_state.clone(), x_state.clone()))
688 .or_insert(0) += 1;
689 *marginal_yfut_ypast
690 .entry((y_fut, y_state.clone()))
691 .or_insert(0) += 1;
692 *cond_ypast_xpast
693 .entry((y_state.clone(), x_state))
694 .or_insert(0) += 1;
695 *y_only_counts.entry(y_state).or_insert(0) += 1;
696 }
697
698 let n_f = n_samples as f64;
701 let mut renyi_sum = 0.0;
702
703 for ((y_fut, y_state, x_state), &count) in &joint_counts {
704 let p_joint = count as f64 / n_f;
705 if p_joint <= 0.0 {
706 continue;
707 }
708
709 let p_yfut_ypast = marginal_yfut_ypast
710 .get(&(*y_fut, y_state.clone()))
711 .copied()
712 .unwrap_or(0) as f64
713 / n_f;
714 let p_ypast_xpast = cond_ypast_xpast
715 .get(&(y_state.clone(), x_state.clone()))
716 .copied()
717 .unwrap_or(0) as f64
718 / n_f;
719 let p_ypast = y_only_counts.get(y_state).copied().unwrap_or(0) as f64 / n_f;
720
721 if p_yfut_ypast > 0.0 && p_ypast_xpast > 0.0 && p_ypast > 0.0 {
722 let cond_full = p_joint / p_ypast_xpast;
725 let cond_restricted = p_yfut_ypast / p_ypast;
726
727 if cond_full > 0.0 && cond_restricted > 0.0 {
728 let ratio = cond_full / cond_restricted;
729 renyi_sum += p_joint * ratio.powf(alpha - 1.0);
730 }
731 }
732 }
733
734 let te = if (alpha - 1.0).abs() > 1e-10 && renyi_sum > 0.0 {
735 renyi_sum.ln() / (alpha - 1.0)
736 } else {
737 0.0
738 };
739
740 Ok(te.max(0.0))
741}
742
743fn compute_conditional_te(
745 x: &Array1<f64>,
746 y: &Array1<f64>,
747 z: &Array2<f64>,
748 config: &ConditionalTransferEntropyConfig,
749) -> CausalityResult<f64> {
750 let max_embed = config.embedding_dim.max(config.cond_embedding_dim);
751 let embed_length = max_embed * config.time_delay;
752 let n_points = x.len() - embed_length;
753
754 if n_points < 5 {
755 return Err(TimeSeriesError::InvalidInput(
756 "Too few embedded points for conditional TE".to_string(),
757 ));
758 }
759
760 let n_cond = z.ncols();
761
762 let mut joint_counts: HashMap<(usize, Vec<usize>, Vec<usize>, Vec<usize>), usize> =
764 HashMap::new();
765 let mut marginal_yz: HashMap<(usize, Vec<usize>, Vec<usize>), usize> = HashMap::new();
766 let mut cond_yxz: HashMap<(Vec<usize>, Vec<usize>, Vec<usize>), usize> = HashMap::new();
767 let mut yz_only: HashMap<(Vec<usize>, Vec<usize>), usize> = HashMap::new();
768
769 for i in 0..n_points {
770 let row_idx = embed_length + i;
771 let y_fut = discretize_single(y[row_idx], y, config.bins);
772
773 let mut y_state = Vec::with_capacity(config.embedding_dim);
774 let mut x_state = Vec::with_capacity(config.embedding_dim);
775 let mut z_state = Vec::with_capacity(config.cond_embedding_dim * n_cond);
776
777 for d in 0..config.embedding_dim {
778 let idx = i + d * config.time_delay;
779 y_state.push(discretize_single(y[idx], y, config.bins));
780 x_state.push(discretize_single(x[idx], x, config.bins));
781 }
782
783 for c in 0..n_cond {
784 let z_col: Array1<f64> = z.column(c).to_owned();
785 for d in 0..config.cond_embedding_dim {
786 let idx = i + d * config.time_delay;
787 z_state.push(discretize_single(z[[idx, c]], &z_col, config.bins));
788 }
789 }
790
791 *joint_counts
792 .entry((y_fut, y_state.clone(), x_state.clone(), z_state.clone()))
793 .or_insert(0) += 1;
794 *marginal_yz
795 .entry((y_fut, y_state.clone(), z_state.clone()))
796 .or_insert(0) += 1;
797 *cond_yxz
798 .entry((y_state.clone(), x_state, z_state.clone()))
799 .or_insert(0) += 1;
800 *yz_only.entry((y_state, z_state)).or_insert(0) += 1;
801 }
802
803 let n_f = n_points as f64;
804 let mut te = 0.0;
805
806 for ((y_fut, y_state, x_state, z_state), &count) in &joint_counts {
807 let p_joint = count as f64 / n_f;
808 if p_joint <= 0.0 {
809 continue;
810 }
811
812 let p_yz = marginal_yz
813 .get(&(*y_fut, y_state.clone(), z_state.clone()))
814 .copied()
815 .unwrap_or(0) as f64
816 / n_f;
817 let p_yxz = cond_yxz
818 .get(&(y_state.clone(), x_state.clone(), z_state.clone()))
819 .copied()
820 .unwrap_or(0) as f64
821 / n_f;
822 let p_yz_only = yz_only
823 .get(&(y_state.clone(), z_state.clone()))
824 .copied()
825 .unwrap_or(0) as f64
826 / n_f;
827
828 if p_yz > 0.0 && p_yxz > 0.0 && p_yz_only > 0.0 {
829 let ratio = (p_joint * p_yz_only) / (p_yz * p_yxz);
830 if ratio > 0.0 {
831 te += p_joint * ratio.ln();
832 }
833 }
834 }
835
836 Ok(te)
837}
838
839fn create_embeddings(
842 x: &Array1<f64>,
843 y: &Array1<f64>,
844 embedding_dim: usize,
845 time_delay: usize,
846) -> CausalityResult<(Array2<f64>, Array2<f64>, Array1<f64>)> {
847 let embed_length = embedding_dim * time_delay;
848 let n_points = x.len() - embed_length;
849
850 let mut x_embed = Array2::zeros((n_points, embedding_dim));
851 let mut y_embed = Array2::zeros((n_points, embedding_dim));
852 let mut y_future = Array1::zeros(n_points);
853
854 for i in 0..n_points {
855 y_future[i] = y[i + embed_length];
856
857 for j in 0..embedding_dim {
858 let idx = i + j * time_delay;
859 x_embed[[i, j]] = x[idx];
860 y_embed[[i, j]] = y[idx];
861 }
862 }
863
864 Ok((x_embed, y_embed, y_future))
865}
866
867fn discretize_matrix(data: &Array2<f64>, bins: usize) -> CausalityResult<Array2<usize>> {
868 let mut discrete = Array2::zeros((data.nrows(), data.ncols()));
869
870 for col in 0..data.ncols() {
871 let column = data.column(col);
872 let min_val = column.iter().cloned().fold(f64::INFINITY, f64::min);
873 let max_val = column.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
874
875 if (max_val - min_val).abs() < f64::EPSILON {
876 continue;
877 }
878
879 let bin_width = (max_val - min_val) / bins as f64;
880
881 for row in 0..data.nrows() {
882 let val = data[[row, col]];
883 let bin = ((val - min_val) / bin_width).floor() as usize;
884 discrete[[row, col]] = bin.min(bins - 1);
885 }
886 }
887
888 Ok(discrete)
889}
890
891fn discretize_vector(data: &Array1<f64>, bins: usize) -> CausalityResult<Array1<usize>> {
892 let min_val = data.iter().cloned().fold(f64::INFINITY, f64::min);
893 let max_val = data.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
894
895 if (max_val - min_val).abs() < f64::EPSILON {
896 return Ok(Array1::zeros(data.len()));
897 }
898
899 let bin_width = (max_val - min_val) / bins as f64;
900 let discrete = data.mapv(|val| {
901 let bin = ((val - min_val) / bin_width).floor() as usize;
902 bin.min(bins - 1)
903 });
904
905 Ok(discrete)
906}
907
908fn discretize_single(value: f64, series: &Array1<f64>, bins: usize) -> usize {
909 let min_val = series.iter().cloned().fold(f64::INFINITY, f64::min);
910 let max_val = series.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
911
912 if (max_val - min_val).abs() < f64::EPSILON {
913 return 0;
914 }
915
916 let bin_width = (max_val - min_val) / bins as f64;
917 let bin = ((value - min_val) / bin_width).floor() as usize;
918 bin.min(bins - 1)
919}
920
921fn compute_te_from_discrete(
922 x_discrete: &Array2<usize>,
923 y_discrete: &Array2<usize>,
924 y_future_discrete: &Array1<usize>,
925) -> CausalityResult<f64> {
926 let mut joint_counts: HashMap<(usize, Vec<usize>, Vec<usize>), usize> = HashMap::new();
927 let mut marginal_y_counts: HashMap<(usize, Vec<usize>), usize> = HashMap::new();
928 let mut conditional_counts: HashMap<(Vec<usize>, Vec<usize>), usize> = HashMap::new();
929 let mut y_only_counts: HashMap<Vec<usize>, usize> = HashMap::new();
930 let n_samples = x_discrete.nrows();
931
932 for i in 0..n_samples {
933 let x_state = x_discrete.row(i).to_vec();
934 let y_state = y_discrete.row(i).to_vec();
935 let y_fut = y_future_discrete[i];
936
937 *joint_counts
938 .entry((y_fut, y_state.clone(), x_state.clone()))
939 .or_insert(0) += 1;
940 *marginal_y_counts
941 .entry((y_fut, y_state.clone()))
942 .or_insert(0) += 1;
943 *conditional_counts
944 .entry((y_state.clone(), x_state))
945 .or_insert(0) += 1;
946 *y_only_counts.entry(y_state).or_insert(0) += 1;
947 }
948
949 let mut te = 0.0;
950
951 for (joint_key, &joint_count) in &joint_counts {
952 let prob_joint = joint_count as f64 / n_samples as f64;
953
954 if prob_joint > 0.0 {
955 let (y_fut, y_state, x_state) = joint_key;
956
957 let marginal_count = marginal_y_counts
958 .get(&(*y_fut, y_state.clone()))
959 .copied()
960 .unwrap_or(0);
961 let cond_count = conditional_counts
962 .get(&(y_state.clone(), x_state.clone()))
963 .copied()
964 .unwrap_or(0);
965 let y_only_count = y_only_counts.get(y_state).copied().unwrap_or(0);
966
967 if marginal_count > 0 && cond_count > 0 && y_only_count > 0 {
968 let prob_marginal = marginal_count as f64 / n_samples as f64;
969 let prob_cond = cond_count as f64 / n_samples as f64;
970 let prob_y_only = y_only_count as f64 / n_samples as f64;
971
972 let ratio = (prob_joint * prob_y_only) / (prob_marginal * prob_cond);
973 if ratio > 0.0 {
974 te += prob_joint * ratio.ln();
975 }
976 }
977 }
978 }
979
980 Ok(te)
981}
982
983fn bootstrap_te_significance(
986 x: &Array1<f64>,
987 y: &Array1<f64>,
988 config: &TransferEntropyConfig,
989 observed_te: f64,
990 n_bootstrap: usize,
991 seed: Option<u64>,
992) -> CausalityResult<(f64, f64)> {
993 let mut te_values = Vec::with_capacity(n_bootstrap);
994
995 for s in 0..n_bootstrap {
996 let mut x_shuffled = x.clone();
997 let seed_val = seed.map(|base| base.wrapping_add(s as u64));
998 fisher_yates_shuffle(&mut x_shuffled, seed_val);
999
1000 let surr_te = compute_te_binning(
1001 &x_shuffled,
1002 y,
1003 config.bins,
1004 config.embedding_dim,
1005 config.time_delay,
1006 )?;
1007 te_values.push(surr_te);
1008 }
1009
1010 let count = te_values.iter().filter(|&&te| te >= observed_te).count();
1011 let p_value = count as f64 / n_bootstrap as f64;
1012
1013 let mean_te = te_values.iter().sum::<f64>() / te_values.len() as f64;
1014 let var_te = te_values
1015 .iter()
1016 .map(|&v| (v - mean_te).powi(2))
1017 .sum::<f64>()
1018 / (te_values.len() as f64 - 1.0).max(1.0);
1019 let std_error = var_te.sqrt();
1020
1021 Ok((p_value, std_error))
1022}
1023
1024fn bootstrap_conditional_te_pvalue(
1025 x: &Array1<f64>,
1026 y: &Array1<f64>,
1027 z: &Array2<f64>,
1028 config: &ConditionalTransferEntropyConfig,
1029 observed_te: f64,
1030 n_bootstrap: usize,
1031) -> CausalityResult<f64> {
1032 let mut count = 0;
1033
1034 for s in 0..n_bootstrap {
1035 let mut x_shuffled = x.clone();
1036 fisher_yates_shuffle(
1037 &mut x_shuffled,
1038 config.seed.map(|b| b.wrapping_add(s as u64)),
1039 );
1040
1041 let surr_te = compute_conditional_te(&x_shuffled, y, z, config)?;
1042 if surr_te >= observed_te {
1043 count += 1;
1044 }
1045 }
1046
1047 Ok(count as f64 / n_bootstrap as f64)
1048}
1049
1050fn digamma(x: f64) -> f64 {
1052 if x <= 0.0 {
1053 return f64::NEG_INFINITY;
1054 }
1055
1056 let mut result = 0.0;
1058 let mut xx = x;
1059
1060 while xx < 6.0 {
1061 result -= 1.0 / xx;
1062 xx += 1.0;
1063 }
1064
1065 result += xx.ln() - 1.0 / (2.0 * xx);
1067 let xx2 = xx * xx;
1068 result -= 1.0 / (12.0 * xx2);
1069 result += 1.0 / (120.0 * xx2 * xx2);
1070 result -= 1.0 / (252.0 * xx2 * xx2 * xx2);
1071
1072 result
1073}
1074
1075#[cfg(test)]
1076mod tests {
1077 use super::*;
1078 use scirs2_core::ndarray::Array1;
1079
1080 #[test]
1081 fn test_shannon_transfer_entropy() {
1082 let n = 50;
1083 let x = Array1::from_vec((0..n).map(|i| (i as f64 * 0.1).sin()).collect());
1084 let y = Array1::from_vec((0..n).map(|i| ((i as f64 + 1.0) * 0.1).sin()).collect());
1085
1086 let config = TransferEntropyConfig {
1087 bins: 5,
1088 embedding_dim: 2,
1089 time_delay: 1,
1090 bootstrap_samples: None,
1091 estimator: TransferEntropyEstimator::Binning,
1092 seed: Some(42),
1093 };
1094
1095 let result = shannon_transfer_entropy(&x, &y, &config).expect("Shannon TE failed");
1096
1097 assert!(result.transfer_entropy >= 0.0);
1098 assert_eq!(result.bins, 5);
1099 assert_eq!(result.embedding_dim, 2);
1100 }
1101
1102 #[test]
1103 fn test_shannon_te_with_bootstrap() {
1104 let n = 50;
1105 let x = Array1::from_vec((0..n).map(|i| (i as f64 * 0.1).sin()).collect());
1106 let y = Array1::from_vec((0..n).map(|i| ((i as f64 + 1.0) * 0.1).sin()).collect());
1107
1108 let config = TransferEntropyConfig {
1109 bins: 5,
1110 embedding_dim: 2,
1111 time_delay: 1,
1112 bootstrap_samples: Some(20),
1113 estimator: TransferEntropyEstimator::Binning,
1114 seed: Some(42),
1115 };
1116
1117 let result =
1118 shannon_transfer_entropy(&x, &y, &config).expect("Shannon TE with bootstrap failed");
1119
1120 assert!(result.transfer_entropy >= 0.0);
1121 assert!(result.p_value.is_some());
1122 let p = result.p_value.expect("Should have p-value");
1123 assert!(p >= 0.0 && p <= 1.0);
1124 }
1125
1126 #[test]
1127 fn test_kde_estimator() {
1128 let n = 40;
1129 let x = Array1::from_vec((0..n).map(|i| (i as f64 * 0.2).sin()).collect());
1130 let y = Array1::from_vec((0..n).map(|i| ((i as f64 + 2.0) * 0.2).cos()).collect());
1131
1132 let config = TransferEntropyConfig {
1133 bins: 5,
1134 embedding_dim: 2,
1135 time_delay: 1,
1136 bootstrap_samples: None,
1137 estimator: TransferEntropyEstimator::KDE,
1138 seed: None,
1139 };
1140
1141 let result = shannon_transfer_entropy(&x, &y, &config).expect("KDE TE failed");
1142
1143 assert!(result.transfer_entropy.is_finite());
1144 }
1145
1146 #[test]
1147 fn test_knn_estimator() {
1148 let n = 40;
1149 let x = Array1::from_vec((0..n).map(|i| (i as f64 * 0.2).sin()).collect());
1150 let y = Array1::from_vec((0..n).map(|i| ((i as f64 + 2.0) * 0.2).cos()).collect());
1151
1152 let config = TransferEntropyConfig {
1153 bins: 5,
1154 embedding_dim: 2,
1155 time_delay: 1,
1156 bootstrap_samples: None,
1157 estimator: TransferEntropyEstimator::KNN,
1158 seed: None,
1159 };
1160
1161 let result = shannon_transfer_entropy(&x, &y, &config).expect("kNN TE failed");
1162
1163 assert!(result.transfer_entropy >= 0.0);
1164 }
1165
1166 #[test]
1167 fn test_renyi_transfer_entropy() {
1168 let n = 50;
1169 let x = Array1::from_vec((0..n).map(|i| (i as f64 * 0.1).sin()).collect());
1170 let y = Array1::from_vec((0..n).map(|i| ((i as f64 + 1.0) * 0.1).sin()).collect());
1171
1172 let config = RenyiTransferEntropyConfig {
1173 alpha: 2.0,
1174 bins: 5,
1175 embedding_dim: 2,
1176 time_delay: 1,
1177 bootstrap_samples: None,
1178 seed: Some(42),
1179 };
1180
1181 let result = renyi_transfer_entropy(&x, &y, &config).expect("Renyi TE failed");
1182
1183 assert!(result.transfer_entropy >= 0.0);
1184 }
1185
1186 #[test]
1187 fn test_renyi_alpha_one_equals_shannon() {
1188 let n = 50;
1189 let x = Array1::from_vec((0..n).map(|i| (i as f64 * 0.1).sin()).collect());
1190 let y = Array1::from_vec((0..n).map(|i| ((i as f64 + 1.0) * 0.1).sin()).collect());
1191
1192 let shannon_config = TransferEntropyConfig {
1193 bins: 5,
1194 embedding_dim: 2,
1195 time_delay: 1,
1196 bootstrap_samples: None,
1197 estimator: TransferEntropyEstimator::Binning,
1198 seed: None,
1199 };
1200
1201 let renyi_config = RenyiTransferEntropyConfig {
1202 alpha: 1.0, bins: 5,
1204 embedding_dim: 2,
1205 time_delay: 1,
1206 bootstrap_samples: None,
1207 seed: None,
1208 };
1209
1210 let shannon_result =
1211 shannon_transfer_entropy(&x, &y, &shannon_config).expect("Shannon failed");
1212 let renyi_result = renyi_transfer_entropy(&x, &y, &renyi_config).expect("Renyi failed");
1213
1214 assert!(
1216 (shannon_result.transfer_entropy - renyi_result.transfer_entropy).abs() < 0.1,
1217 "Shannon ({}) and Renyi(alpha=1) ({}) should be close",
1218 shannon_result.transfer_entropy,
1219 renyi_result.transfer_entropy
1220 );
1221 }
1222
1223 #[test]
1224 fn test_conditional_transfer_entropy() {
1225 let n = 50;
1226 let x = Array1::from_vec((0..n).map(|i| (i as f64 * 0.1).sin()).collect());
1227 let y = Array1::from_vec((0..n).map(|i| ((i as f64 + 1.0) * 0.1).sin()).collect());
1228 let z = Array2::from_shape_vec((n, 1), (0..n).map(|i| (i as f64 * 0.05).cos()).collect())
1229 .expect("Shape creation failed");
1230
1231 let config = ConditionalTransferEntropyConfig {
1232 bins: 4,
1233 embedding_dim: 2,
1234 cond_embedding_dim: 2,
1235 time_delay: 1,
1236 bootstrap_samples: None,
1237 seed: None,
1238 };
1239
1240 let result =
1241 conditional_transfer_entropy(&x, &y, &z, &config).expect("Conditional TE failed");
1242
1243 assert!(result.transfer_entropy.is_finite());
1244 }
1245
1246 #[test]
1247 fn test_effective_transfer_entropy() {
1248 let n = 50;
1249 let x = Array1::from_vec((0..n).map(|i| (i as f64 * 0.1).sin()).collect());
1250 let y = Array1::from_vec((0..n).map(|i| ((i as f64 + 1.0) * 0.1).sin()).collect());
1251
1252 let config = EffectiveTransferEntropyConfig {
1253 bins: 5,
1254 embedding_dim: 2,
1255 time_delay: 1,
1256 n_surrogates: 20,
1257 seed: Some(42),
1258 };
1259
1260 let result = effective_transfer_entropy(&x, &y, &config).expect("Effective TE failed");
1261
1262 assert!(result.raw_te >= 0.0);
1263 assert!(result.surrogate_mean >= 0.0);
1264 assert!(result.surrogate_std >= 0.0);
1265 assert!(result.p_value >= 0.0 && result.p_value <= 1.0);
1266 assert_eq!(result.n_surrogates, 20);
1267 assert!(result.effective_te.is_finite());
1270 }
1271
1272 #[test]
1273 fn test_te_mismatched_lengths() {
1274 let x = Array1::from_vec(vec![1.0, 2.0, 3.0]);
1275 let y = Array1::from_vec(vec![1.0, 2.0]);
1276
1277 let config = TransferEntropyConfig::default();
1278 let result = shannon_transfer_entropy(&x, &y, &config);
1279 assert!(result.is_err());
1280 }
1281
1282 #[test]
1283 fn test_te_series_too_short() {
1284 let x = Array1::from_vec(vec![1.0, 2.0]);
1285 let y = Array1::from_vec(vec![1.0, 2.0]);
1286
1287 let config = TransferEntropyConfig {
1288 bins: 5,
1289 embedding_dim: 3,
1290 time_delay: 1,
1291 bootstrap_samples: None,
1292 estimator: TransferEntropyEstimator::Binning,
1293 seed: None,
1294 };
1295
1296 let result = shannon_transfer_entropy(&x, &y, &config);
1297 assert!(result.is_err());
1298 }
1299
1300 #[test]
1301 fn test_renyi_invalid_alpha() {
1302 let x = Array1::from_vec(vec![1.0, 2.0, 3.0, 4.0, 5.0]);
1303 let y = Array1::from_vec(vec![2.0, 3.0, 4.0, 5.0, 6.0]);
1304
1305 let config = RenyiTransferEntropyConfig {
1306 alpha: -1.0,
1307 ..Default::default()
1308 };
1309
1310 let result = renyi_transfer_entropy(&x, &y, &config);
1311 assert!(result.is_err());
1312 }
1313
1314 #[test]
1315 fn test_digamma_function() {
1316 let d1 = digamma(1.0);
1318 assert!((d1 - (-0.5772156649)).abs() < 0.01);
1319
1320 let d2 = digamma(2.0);
1322 assert!((d2 - (1.0 - 0.5772156649)).abs() < 0.01);
1323 }
1324}