scirs2_series/causality/
transfer_entropy.rs

1//! Transfer entropy estimation for time series
2//!
3//! This module provides comprehensive transfer entropy analysis:
4//! - **Shannon transfer entropy**: Standard information-theoretic measure
5//! - **Renyi transfer entropy**: Generalized with order parameter alpha
6//! - **Conditional transfer entropy**: Controlling for confounders
7//! - **Effective transfer entropy**: Surrogate-corrected for bias removal
8//! - **Estimators**: Binning-based, KDE-based, and kNN-based
9
10use crate::error::TimeSeriesError;
11use scirs2_core::ndarray::{Array1, Array2};
12use scirs2_core::validation::checkarray_finite;
13use std::collections::HashMap;
14
15use super::{fisher_yates_shuffle, CausalityResult};
16
17/// Transfer entropy result
18#[derive(Debug, Clone)]
19pub struct TransferEntropyResult {
20    /// Transfer entropy value (in nats)
21    pub transfer_entropy: f64,
22    /// P-value from significance test (if computed)
23    pub p_value: Option<f64>,
24    /// Number of bins used for entropy calculation
25    pub bins: usize,
26    /// Embedding dimension used
27    pub embedding_dim: usize,
28    /// Time delay used
29    pub time_delay: usize,
30    /// Estimator type used
31    pub estimator: TransferEntropyEstimator,
32    /// Standard error estimate (if available)
33    pub std_error: Option<f64>,
34}
35
36/// Configuration for Shannon transfer entropy calculation
37#[derive(Debug, Clone)]
38pub struct TransferEntropyConfig {
39    /// Number of bins for histogram-based estimation
40    pub bins: usize,
41    /// Embedding dimension (number of past values to consider)
42    pub embedding_dim: usize,
43    /// Time delay for embedding
44    pub time_delay: usize,
45    /// Number of bootstrap samples for significance testing (None = skip)
46    pub bootstrap_samples: Option<usize>,
47    /// Estimator to use
48    pub estimator: TransferEntropyEstimator,
49    /// Random seed for reproducibility
50    pub seed: Option<u64>,
51}
52
53impl Default for TransferEntropyConfig {
54    fn default() -> Self {
55        Self {
56            bins: 10,
57            embedding_dim: 3,
58            time_delay: 1,
59            bootstrap_samples: Some(100),
60            estimator: TransferEntropyEstimator::Binning,
61            seed: None,
62        }
63    }
64}
65
66/// Transfer entropy estimator type
67#[derive(Debug, Clone, Copy, PartialEq, Eq)]
68pub enum TransferEntropyEstimator {
69    /// Histogram/binning based estimator
70    Binning,
71    /// Kernel density estimation based estimator
72    KDE,
73    /// k-nearest neighbor estimator (Kraskov-Stogbauer-Grassberger)
74    KNN,
75}
76
77/// Configuration for Renyi transfer entropy
78#[derive(Debug, Clone)]
79pub struct RenyiTransferEntropyConfig {
80    /// Renyi order parameter (alpha). alpha=1 reduces to Shannon
81    pub alpha: f64,
82    /// Number of bins for histogram estimation
83    pub bins: usize,
84    /// Embedding dimension
85    pub embedding_dim: usize,
86    /// Time delay
87    pub time_delay: usize,
88    /// Bootstrap samples for significance
89    pub bootstrap_samples: Option<usize>,
90    /// Random seed
91    pub seed: Option<u64>,
92}
93
94impl Default for RenyiTransferEntropyConfig {
95    fn default() -> Self {
96        Self {
97            alpha: 2.0,
98            bins: 10,
99            embedding_dim: 3,
100            time_delay: 1,
101            bootstrap_samples: Some(100),
102            seed: None,
103        }
104    }
105}
106
107/// Configuration for conditional transfer entropy
108#[derive(Debug, Clone)]
109pub struct ConditionalTransferEntropyConfig {
110    /// Number of bins
111    pub bins: usize,
112    /// Embedding dimension for source and target
113    pub embedding_dim: usize,
114    /// Embedding dimension for conditioning variables
115    pub cond_embedding_dim: usize,
116    /// Time delay
117    pub time_delay: usize,
118    /// Bootstrap samples for significance
119    pub bootstrap_samples: Option<usize>,
120    /// Random seed
121    pub seed: Option<u64>,
122}
123
124impl Default for ConditionalTransferEntropyConfig {
125    fn default() -> Self {
126        Self {
127            bins: 8,
128            embedding_dim: 2,
129            cond_embedding_dim: 2,
130            time_delay: 1,
131            bootstrap_samples: Some(100),
132            seed: None,
133        }
134    }
135}
136
137/// Configuration for effective transfer entropy (surrogate-corrected)
138#[derive(Debug, Clone)]
139pub struct EffectiveTransferEntropyConfig {
140    /// Number of bins
141    pub bins: usize,
142    /// Embedding dimension
143    pub embedding_dim: usize,
144    /// Time delay
145    pub time_delay: usize,
146    /// Number of surrogates for bias estimation
147    pub n_surrogates: usize,
148    /// Random seed
149    pub seed: Option<u64>,
150}
151
152impl Default for EffectiveTransferEntropyConfig {
153    fn default() -> Self {
154        Self {
155            bins: 10,
156            embedding_dim: 3,
157            time_delay: 1,
158            n_surrogates: 100,
159            seed: None,
160        }
161    }
162}
163
164/// Result of effective transfer entropy
165#[derive(Debug, Clone)]
166pub struct EffectiveTransferEntropyResult {
167    /// Effective (bias-corrected) transfer entropy
168    pub effective_te: f64,
169    /// Raw (uncorrected) transfer entropy
170    pub raw_te: f64,
171    /// Mean surrogate transfer entropy (bias estimate)
172    pub surrogate_mean: f64,
173    /// Standard deviation of surrogate TE
174    pub surrogate_std: f64,
175    /// Z-score relative to surrogates
176    pub z_score: f64,
177    /// P-value from surrogate distribution
178    pub p_value: f64,
179    /// Number of surrogates used
180    pub n_surrogates: usize,
181}
182
183/// Compute Shannon transfer entropy from source x to target y
184///
185/// Transfer entropy TE(X -> Y) measures the reduction in uncertainty about
186/// the future of Y when past values of X are known, beyond what is already
187/// known from past values of Y alone.
188///
189/// TE(X -> Y) = H(Y_future | Y_past) - H(Y_future | Y_past, X_past)
190///
191/// # Arguments
192///
193/// * `x` - Source time series
194/// * `y` - Target time series
195/// * `config` - Configuration
196///
197/// # Returns
198///
199/// `TransferEntropyResult` with the transfer entropy value and statistics
200pub fn shannon_transfer_entropy(
201    x: &Array1<f64>,
202    y: &Array1<f64>,
203    config: &TransferEntropyConfig,
204) -> CausalityResult<TransferEntropyResult> {
205    checkarray_finite(x, "x")?;
206    checkarray_finite(y, "y")?;
207
208    if x.len() != y.len() {
209        return Err(TimeSeriesError::InvalidInput(
210            "Time series must have the same length".to_string(),
211        ));
212    }
213
214    let required_length = config.embedding_dim * config.time_delay + 1;
215    if x.len() < required_length {
216        return Err(TimeSeriesError::InvalidInput(format!(
217            "Time series too short (len={}) for embedding (need {})",
218            x.len(),
219            required_length
220        )));
221    }
222
223    let te = match config.estimator {
224        TransferEntropyEstimator::Binning => {
225            compute_te_binning(x, y, config.bins, config.embedding_dim, config.time_delay)?
226        }
227        TransferEntropyEstimator::KDE => {
228            compute_te_kde(x, y, config.bins, config.embedding_dim, config.time_delay)?
229        }
230        TransferEntropyEstimator::KNN => {
231            compute_te_knn(x, y, config.embedding_dim, config.time_delay)?
232        }
233    };
234
235    // Bootstrap for p-value
236    let (p_value, std_error) = if let Some(n_bootstrap) = config.bootstrap_samples {
237        let (p, se) = bootstrap_te_significance(x, y, config, te, n_bootstrap, config.seed)?;
238        (Some(p), Some(se))
239    } else {
240        (None, None)
241    };
242
243    Ok(TransferEntropyResult {
244        transfer_entropy: te,
245        p_value,
246        bins: config.bins,
247        embedding_dim: config.embedding_dim,
248        time_delay: config.time_delay,
249        estimator: config.estimator,
250        std_error,
251    })
252}
253
254/// Compute Renyi transfer entropy
255///
256/// Generalization of Shannon transfer entropy using Renyi divergence of order alpha.
257/// When alpha -> 1, this converges to Shannon transfer entropy.
258///
259/// # Arguments
260///
261/// * `x` - Source time series
262/// * `y` - Target time series
263/// * `config` - Configuration with Renyi order alpha
264pub fn renyi_transfer_entropy(
265    x: &Array1<f64>,
266    y: &Array1<f64>,
267    config: &RenyiTransferEntropyConfig,
268) -> CausalityResult<TransferEntropyResult> {
269    checkarray_finite(x, "x")?;
270    checkarray_finite(y, "y")?;
271
272    if x.len() != y.len() {
273        return Err(TimeSeriesError::InvalidInput(
274            "Time series must have the same length".to_string(),
275        ));
276    }
277
278    if config.alpha <= 0.0 {
279        return Err(TimeSeriesError::InvalidInput(
280            "Renyi order alpha must be positive".to_string(),
281        ));
282    }
283
284    let required_length = config.embedding_dim * config.time_delay + 1;
285    if x.len() < required_length {
286        return Err(TimeSeriesError::InvalidInput(
287            "Time series too short for the specified embedding parameters".to_string(),
288        ));
289    }
290
291    // If alpha is close to 1, use Shannon
292    let te = if (config.alpha - 1.0).abs() < 1e-6 {
293        compute_te_binning(x, y, config.bins, config.embedding_dim, config.time_delay)?
294    } else {
295        compute_renyi_te(x, y, config)?
296    };
297
298    let p_value = if let Some(n_bootstrap) = config.bootstrap_samples {
299        let shannon_config = TransferEntropyConfig {
300            bins: config.bins,
301            embedding_dim: config.embedding_dim,
302            time_delay: config.time_delay,
303            bootstrap_samples: None,
304            estimator: TransferEntropyEstimator::Binning,
305            seed: config.seed,
306        };
307        let (p, _) =
308            bootstrap_te_significance(x, y, &shannon_config, te, n_bootstrap, config.seed)?;
309        Some(p)
310    } else {
311        None
312    };
313
314    Ok(TransferEntropyResult {
315        transfer_entropy: te,
316        p_value,
317        bins: config.bins,
318        embedding_dim: config.embedding_dim,
319        time_delay: config.time_delay,
320        estimator: TransferEntropyEstimator::Binning,
321        std_error: None,
322    })
323}
324
325/// Compute conditional transfer entropy: TE(X -> Y | Z)
326///
327/// Measures the information transfer from X to Y while controlling for
328/// the influence of conditioning variables Z.
329///
330/// # Arguments
331///
332/// * `x` - Source time series
333/// * `y` - Target time series
334/// * `z` - Conditioning variables (2D: rows = time, cols = variables)
335/// * `config` - Configuration
336pub fn conditional_transfer_entropy(
337    x: &Array1<f64>,
338    y: &Array1<f64>,
339    z: &Array2<f64>,
340    config: &ConditionalTransferEntropyConfig,
341) -> CausalityResult<TransferEntropyResult> {
342    checkarray_finite(x, "x")?;
343    checkarray_finite(y, "y")?;
344
345    if x.len() != y.len() || x.len() != z.nrows() {
346        return Err(TimeSeriesError::InvalidInput(
347            "All time series must have the same length".to_string(),
348        ));
349    }
350
351    let max_embed = config.embedding_dim.max(config.cond_embedding_dim);
352    let required_length = max_embed * config.time_delay + 1;
353    if x.len() < required_length {
354        return Err(TimeSeriesError::InvalidInput(
355            "Time series too short for the specified embedding parameters".to_string(),
356        ));
357    }
358
359    let te = compute_conditional_te(x, y, z, config)?;
360
361    let p_value = if let Some(n_bootstrap) = config.bootstrap_samples {
362        let p = bootstrap_conditional_te_pvalue(x, y, z, config, te, n_bootstrap)?;
363        Some(p)
364    } else {
365        None
366    };
367
368    Ok(TransferEntropyResult {
369        transfer_entropy: te,
370        p_value,
371        bins: config.bins,
372        embedding_dim: config.embedding_dim,
373        time_delay: config.time_delay,
374        estimator: TransferEntropyEstimator::Binning,
375        std_error: None,
376    })
377}
378
379/// Compute effective transfer entropy with surrogate correction
380///
381/// Subtracts the average transfer entropy from surrogates (shuffled source)
382/// to remove estimation bias, yielding a more accurate TE estimate.
383///
384/// TE_effective = TE_raw - mean(TE_surrogates)
385///
386/// # Arguments
387///
388/// * `x` - Source time series
389/// * `y` - Target time series
390/// * `config` - Configuration with surrogate parameters
391pub fn effective_transfer_entropy(
392    x: &Array1<f64>,
393    y: &Array1<f64>,
394    config: &EffectiveTransferEntropyConfig,
395) -> CausalityResult<EffectiveTransferEntropyResult> {
396    checkarray_finite(x, "x")?;
397    checkarray_finite(y, "y")?;
398
399    if x.len() != y.len() {
400        return Err(TimeSeriesError::InvalidInput(
401            "Time series must have the same length".to_string(),
402        ));
403    }
404
405    let required_length = config.embedding_dim * config.time_delay + 1;
406    if x.len() < required_length {
407        return Err(TimeSeriesError::InvalidInput(
408            "Time series too short for the specified embedding parameters".to_string(),
409        ));
410    }
411
412    // Compute raw transfer entropy
413    let raw_te = compute_te_binning(x, y, config.bins, config.embedding_dim, config.time_delay)?;
414
415    // Compute surrogate distribution
416    let mut surrogate_tes = Vec::with_capacity(config.n_surrogates);
417    for s in 0..config.n_surrogates {
418        let mut x_shuffled = x.clone();
419        let seed_val = config.seed.map(|base| base.wrapping_add(s as u64));
420        fisher_yates_shuffle(&mut x_shuffled, seed_val);
421
422        let surr_te = compute_te_binning(
423            &x_shuffled,
424            y,
425            config.bins,
426            config.embedding_dim,
427            config.time_delay,
428        )?;
429        surrogate_tes.push(surr_te);
430    }
431
432    let n_surr = surrogate_tes.len() as f64;
433    let surrogate_mean = surrogate_tes.iter().sum::<f64>() / n_surr;
434    let surrogate_var = surrogate_tes
435        .iter()
436        .map(|&v| (v - surrogate_mean).powi(2))
437        .sum::<f64>()
438        / (n_surr - 1.0).max(1.0);
439    let surrogate_std = surrogate_var.sqrt();
440
441    let effective_te = raw_te - surrogate_mean;
442
443    let z_score = if surrogate_std > 1e-15 {
444        (raw_te - surrogate_mean) / surrogate_std
445    } else {
446        0.0
447    };
448
449    let count_above = surrogate_tes.iter().filter(|&&v| v >= raw_te).count();
450    let p_value = count_above as f64 / config.n_surrogates as f64;
451
452    Ok(EffectiveTransferEntropyResult {
453        effective_te,
454        raw_te,
455        surrogate_mean,
456        surrogate_std,
457        z_score,
458        p_value,
459        n_surrogates: config.n_surrogates,
460    })
461}
462
463// ---- Internal estimator implementations ----
464
465/// Binning-based transfer entropy estimation
466fn compute_te_binning(
467    x: &Array1<f64>,
468    y: &Array1<f64>,
469    bins: usize,
470    embedding_dim: usize,
471    time_delay: usize,
472) -> CausalityResult<f64> {
473    let (x_embed, y_embed, y_future) = create_embeddings(x, y, embedding_dim, time_delay)?;
474
475    let x_discrete = discretize_matrix(&x_embed, bins)?;
476    let y_discrete = discretize_matrix(&y_embed, bins)?;
477    let y_future_discrete = discretize_vector(&y_future, bins)?;
478
479    compute_te_from_discrete(&x_discrete, &y_discrete, &y_future_discrete)
480}
481
482/// KDE-based transfer entropy estimation
483fn compute_te_kde(
484    x: &Array1<f64>,
485    y: &Array1<f64>,
486    bins: usize,
487    embedding_dim: usize,
488    time_delay: usize,
489) -> CausalityResult<f64> {
490    let (x_embed, y_embed, y_future) = create_embeddings(x, y, embedding_dim, time_delay)?;
491
492    let n = x_embed.nrows();
493    if n < 5 {
494        return Err(TimeSeriesError::InvalidInput(
495            "Too few embedded points for KDE estimation".to_string(),
496        ));
497    }
498
499    // Use Silverman's rule for bandwidth
500    let total_dim = (2 * embedding_dim + 1) as f64;
501    let bandwidth = (4.0 / ((total_dim + 2.0) * n as f64)).powf(1.0 / (total_dim + 4.0));
502
503    // Estimate TE via KDE ratio
504    // TE = E[ ln( p(y_t | y_past, x_past) / p(y_t | y_past) ) ]
505    let mut te_sum = 0.0;
506
507    for i in 0..n {
508        // Joint density p(y_t, y_past, x_past) and marginal p(y_past, x_past)
509        let mut log_joint_cond = 0.0;
510        let mut log_marginal_cond = 0.0;
511
512        let mut joint_density = 0.0;
513        let mut yx_density = 0.0;
514        let mut y_density = 0.0;
515        let mut y_only_density = 0.0;
516
517        for j in 0..n {
518            if i == j {
519                continue;
520            }
521
522            // Compute kernel distances
523            let y_fut_dist = (y_future[i] - y_future[j]).powi(2);
524
525            let mut y_past_dist = 0.0;
526            for d in 0..embedding_dim {
527                y_past_dist += (y_embed[[i, d]] - y_embed[[j, d]]).powi(2);
528            }
529
530            let mut x_past_dist = 0.0;
531            for d in 0..embedding_dim {
532                x_past_dist += (x_embed[[i, d]] - x_embed[[j, d]]).powi(2);
533            }
534
535            let bw2 = bandwidth * bandwidth;
536            let k_yfut = (-y_fut_dist / (2.0 * bw2)).exp();
537            let k_ypast = (-y_past_dist / (2.0 * bw2)).exp();
538            let k_xpast = (-x_past_dist / (2.0 * bw2)).exp();
539
540            // p(y_future, y_past, x_past)
541            joint_density += k_yfut * k_ypast * k_xpast;
542            // p(y_past, x_past)
543            yx_density += k_ypast * k_xpast;
544            // p(y_future, y_past)
545            y_density += k_yfut * k_ypast;
546            // p(y_past)
547            y_only_density += k_ypast;
548        }
549
550        // TE contribution: ln(p(y_t|y_past,x_past) / p(y_t|y_past))
551        // = ln(p(y_t,y_past,x_past) * p(y_past)) - ln(p(y_t,y_past) * p(y_past,x_past))
552        if joint_density > 1e-15
553            && y_only_density > 1e-15
554            && y_density > 1e-15
555            && yx_density > 1e-15
556        {
557            te_sum += (joint_density * y_only_density / (y_density * yx_density)).ln();
558        }
559    }
560
561    Ok(te_sum / n as f64)
562}
563
564/// k-nearest neighbor based transfer entropy estimation (KSG-style)
565fn compute_te_knn(
566    x: &Array1<f64>,
567    y: &Array1<f64>,
568    embedding_dim: usize,
569    time_delay: usize,
570) -> CausalityResult<f64> {
571    let (x_embed, y_embed, y_future) = create_embeddings(x, y, embedding_dim, time_delay)?;
572
573    let n = x_embed.nrows();
574    let k = 4.min(n.saturating_sub(1)); // k nearest neighbors
575
576    if k == 0 {
577        return Err(TimeSeriesError::InvalidInput(
578            "Too few embedded points for kNN estimation".to_string(),
579        ));
580    }
581
582    // For each point, find the k-th nearest neighbor distance in the full joint space
583    // Then count neighbors within that distance in each marginal subspace
584    // TE = digamma(k) - <digamma(n_yz+1) - digamma(n_y+1) + digamma(n_yx+1)>
585
586    let mut te_sum = 0.0;
587
588    for i in 0..n {
589        // Compute distances in full joint space (y_future, y_past, x_past)
590        let mut distances = Vec::with_capacity(n);
591        for j in 0..n {
592            if i == j {
593                continue;
594            }
595
596            let mut max_dist = (y_future[i] - y_future[j]).abs();
597
598            for d in 0..embedding_dim {
599                max_dist = max_dist.max((y_embed[[i, d]] - y_embed[[j, d]]).abs());
600                max_dist = max_dist.max((x_embed[[i, d]] - x_embed[[j, d]]).abs());
601            }
602
603            distances.push((max_dist, j));
604        }
605
606        distances.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(std::cmp::Ordering::Equal));
607
608        if distances.len() < k {
609            continue;
610        }
611
612        let epsilon = distances[k - 1].0;
613
614        // Count neighbors within epsilon in each subspace
615        let mut n_yz = 0; // (y_future, y_past) subspace
616        let mut n_y = 0; // (y_past) subspace
617        let mut n_yx = 0; // (y_past, x_past) subspace
618
619        for j in 0..n {
620            if i == j {
621                continue;
622            }
623
624            // y_past distance
625            let mut y_past_dist: f64 = 0.0;
626            for d in 0..embedding_dim {
627                y_past_dist = y_past_dist.max((y_embed[[i, d]] - y_embed[[j, d]]).abs());
628            }
629
630            // x_past distance
631            let mut x_past_dist: f64 = 0.0;
632            for d in 0..embedding_dim {
633                x_past_dist = x_past_dist.max((x_embed[[i, d]] - x_embed[[j, d]]).abs());
634            }
635
636            let y_fut_dist = (y_future[i] - y_future[j]).abs();
637
638            if y_fut_dist <= epsilon && y_past_dist <= epsilon {
639                n_yz += 1;
640            }
641            if y_past_dist <= epsilon {
642                n_y += 1;
643            }
644            if y_past_dist <= epsilon && x_past_dist <= epsilon {
645                n_yx += 1;
646            }
647        }
648
649        // Digamma contributions
650        te_sum +=
651            digamma(n_y as f64 + 1.0) - digamma(n_yz as f64 + 1.0) - digamma(n_yx as f64 + 1.0);
652    }
653
654    let te = digamma(k as f64) + te_sum / n as f64;
655
656    Ok(te.max(0.0))
657}
658
659/// Renyi transfer entropy computation
660fn compute_renyi_te(
661    x: &Array1<f64>,
662    y: &Array1<f64>,
663    config: &RenyiTransferEntropyConfig,
664) -> CausalityResult<f64> {
665    let (x_embed, y_embed, y_future) =
666        create_embeddings(x, y, config.embedding_dim, config.time_delay)?;
667
668    let x_discrete = discretize_matrix(&x_embed, config.bins)?;
669    let y_discrete = discretize_matrix(&y_embed, config.bins)?;
670    let y_future_discrete = discretize_vector(&y_future, config.bins)?;
671
672    let n_samples = x_discrete.nrows();
673    let alpha = config.alpha;
674
675    // Build joint and marginal probability tables
676    let mut joint_counts: HashMap<(usize, Vec<usize>, Vec<usize>), usize> = HashMap::new();
677    let mut marginal_yfut_ypast: HashMap<(usize, Vec<usize>), usize> = HashMap::new();
678    let mut cond_ypast_xpast: HashMap<(Vec<usize>, Vec<usize>), usize> = HashMap::new();
679    let mut y_only_counts: HashMap<Vec<usize>, usize> = HashMap::new();
680
681    for i in 0..n_samples {
682        let x_state = x_discrete.row(i).to_vec();
683        let y_state = y_discrete.row(i).to_vec();
684        let y_fut = y_future_discrete[i];
685
686        *joint_counts
687            .entry((y_fut, y_state.clone(), x_state.clone()))
688            .or_insert(0) += 1;
689        *marginal_yfut_ypast
690            .entry((y_fut, y_state.clone()))
691            .or_insert(0) += 1;
692        *cond_ypast_xpast
693            .entry((y_state.clone(), x_state))
694            .or_insert(0) += 1;
695        *y_only_counts.entry(y_state).or_insert(0) += 1;
696    }
697
698    // Renyi TE = 1/(alpha-1) * ln( sum_states p(y_fut, y_past, x_past) *
699    //            [p(y_fut|y_past,x_past) / p(y_fut|y_past)]^(alpha-1) )
700    let n_f = n_samples as f64;
701    let mut renyi_sum = 0.0;
702
703    for ((y_fut, y_state, x_state), &count) in &joint_counts {
704        let p_joint = count as f64 / n_f;
705        if p_joint <= 0.0 {
706            continue;
707        }
708
709        let p_yfut_ypast = marginal_yfut_ypast
710            .get(&(*y_fut, y_state.clone()))
711            .copied()
712            .unwrap_or(0) as f64
713            / n_f;
714        let p_ypast_xpast = cond_ypast_xpast
715            .get(&(y_state.clone(), x_state.clone()))
716            .copied()
717            .unwrap_or(0) as f64
718            / n_f;
719        let p_ypast = y_only_counts.get(y_state).copied().unwrap_or(0) as f64 / n_f;
720
721        if p_yfut_ypast > 0.0 && p_ypast_xpast > 0.0 && p_ypast > 0.0 {
722            // p(y_fut | y_past, x_past) = p_joint / p_ypast_xpast
723            // p(y_fut | y_past) = p_yfut_ypast / p_ypast
724            let cond_full = p_joint / p_ypast_xpast;
725            let cond_restricted = p_yfut_ypast / p_ypast;
726
727            if cond_full > 0.0 && cond_restricted > 0.0 {
728                let ratio = cond_full / cond_restricted;
729                renyi_sum += p_joint * ratio.powf(alpha - 1.0);
730            }
731        }
732    }
733
734    let te = if (alpha - 1.0).abs() > 1e-10 && renyi_sum > 0.0 {
735        renyi_sum.ln() / (alpha - 1.0)
736    } else {
737        0.0
738    };
739
740    Ok(te.max(0.0))
741}
742
743/// Conditional transfer entropy computation
744fn compute_conditional_te(
745    x: &Array1<f64>,
746    y: &Array1<f64>,
747    z: &Array2<f64>,
748    config: &ConditionalTransferEntropyConfig,
749) -> CausalityResult<f64> {
750    let max_embed = config.embedding_dim.max(config.cond_embedding_dim);
751    let embed_length = max_embed * config.time_delay;
752    let n_points = x.len() - embed_length;
753
754    if n_points < 5 {
755        return Err(TimeSeriesError::InvalidInput(
756            "Too few embedded points for conditional TE".to_string(),
757        ));
758    }
759
760    let n_cond = z.ncols();
761
762    // Build joint state representations
763    let mut joint_counts: HashMap<(usize, Vec<usize>, Vec<usize>, Vec<usize>), usize> =
764        HashMap::new();
765    let mut marginal_yz: HashMap<(usize, Vec<usize>, Vec<usize>), usize> = HashMap::new();
766    let mut cond_yxz: HashMap<(Vec<usize>, Vec<usize>, Vec<usize>), usize> = HashMap::new();
767    let mut yz_only: HashMap<(Vec<usize>, Vec<usize>), usize> = HashMap::new();
768
769    for i in 0..n_points {
770        let row_idx = embed_length + i;
771        let y_fut = discretize_single(y[row_idx], y, config.bins);
772
773        let mut y_state = Vec::with_capacity(config.embedding_dim);
774        let mut x_state = Vec::with_capacity(config.embedding_dim);
775        let mut z_state = Vec::with_capacity(config.cond_embedding_dim * n_cond);
776
777        for d in 0..config.embedding_dim {
778            let idx = i + d * config.time_delay;
779            y_state.push(discretize_single(y[idx], y, config.bins));
780            x_state.push(discretize_single(x[idx], x, config.bins));
781        }
782
783        for c in 0..n_cond {
784            let z_col: Array1<f64> = z.column(c).to_owned();
785            for d in 0..config.cond_embedding_dim {
786                let idx = i + d * config.time_delay;
787                z_state.push(discretize_single(z[[idx, c]], &z_col, config.bins));
788            }
789        }
790
791        *joint_counts
792            .entry((y_fut, y_state.clone(), x_state.clone(), z_state.clone()))
793            .or_insert(0) += 1;
794        *marginal_yz
795            .entry((y_fut, y_state.clone(), z_state.clone()))
796            .or_insert(0) += 1;
797        *cond_yxz
798            .entry((y_state.clone(), x_state, z_state.clone()))
799            .or_insert(0) += 1;
800        *yz_only.entry((y_state, z_state)).or_insert(0) += 1;
801    }
802
803    let n_f = n_points as f64;
804    let mut te = 0.0;
805
806    for ((y_fut, y_state, x_state, z_state), &count) in &joint_counts {
807        let p_joint = count as f64 / n_f;
808        if p_joint <= 0.0 {
809            continue;
810        }
811
812        let p_yz = marginal_yz
813            .get(&(*y_fut, y_state.clone(), z_state.clone()))
814            .copied()
815            .unwrap_or(0) as f64
816            / n_f;
817        let p_yxz = cond_yxz
818            .get(&(y_state.clone(), x_state.clone(), z_state.clone()))
819            .copied()
820            .unwrap_or(0) as f64
821            / n_f;
822        let p_yz_only = yz_only
823            .get(&(y_state.clone(), z_state.clone()))
824            .copied()
825            .unwrap_or(0) as f64
826            / n_f;
827
828        if p_yz > 0.0 && p_yxz > 0.0 && p_yz_only > 0.0 {
829            let ratio = (p_joint * p_yz_only) / (p_yz * p_yxz);
830            if ratio > 0.0 {
831                te += p_joint * ratio.ln();
832            }
833        }
834    }
835
836    Ok(te)
837}
838
839// ---- Embedding and discretization helpers ----
840
841fn create_embeddings(
842    x: &Array1<f64>,
843    y: &Array1<f64>,
844    embedding_dim: usize,
845    time_delay: usize,
846) -> CausalityResult<(Array2<f64>, Array2<f64>, Array1<f64>)> {
847    let embed_length = embedding_dim * time_delay;
848    let n_points = x.len() - embed_length;
849
850    let mut x_embed = Array2::zeros((n_points, embedding_dim));
851    let mut y_embed = Array2::zeros((n_points, embedding_dim));
852    let mut y_future = Array1::zeros(n_points);
853
854    for i in 0..n_points {
855        y_future[i] = y[i + embed_length];
856
857        for j in 0..embedding_dim {
858            let idx = i + j * time_delay;
859            x_embed[[i, j]] = x[idx];
860            y_embed[[i, j]] = y[idx];
861        }
862    }
863
864    Ok((x_embed, y_embed, y_future))
865}
866
867fn discretize_matrix(data: &Array2<f64>, bins: usize) -> CausalityResult<Array2<usize>> {
868    let mut discrete = Array2::zeros((data.nrows(), data.ncols()));
869
870    for col in 0..data.ncols() {
871        let column = data.column(col);
872        let min_val = column.iter().cloned().fold(f64::INFINITY, f64::min);
873        let max_val = column.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
874
875        if (max_val - min_val).abs() < f64::EPSILON {
876            continue;
877        }
878
879        let bin_width = (max_val - min_val) / bins as f64;
880
881        for row in 0..data.nrows() {
882            let val = data[[row, col]];
883            let bin = ((val - min_val) / bin_width).floor() as usize;
884            discrete[[row, col]] = bin.min(bins - 1);
885        }
886    }
887
888    Ok(discrete)
889}
890
891fn discretize_vector(data: &Array1<f64>, bins: usize) -> CausalityResult<Array1<usize>> {
892    let min_val = data.iter().cloned().fold(f64::INFINITY, f64::min);
893    let max_val = data.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
894
895    if (max_val - min_val).abs() < f64::EPSILON {
896        return Ok(Array1::zeros(data.len()));
897    }
898
899    let bin_width = (max_val - min_val) / bins as f64;
900    let discrete = data.mapv(|val| {
901        let bin = ((val - min_val) / bin_width).floor() as usize;
902        bin.min(bins - 1)
903    });
904
905    Ok(discrete)
906}
907
908fn discretize_single(value: f64, series: &Array1<f64>, bins: usize) -> usize {
909    let min_val = series.iter().cloned().fold(f64::INFINITY, f64::min);
910    let max_val = series.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
911
912    if (max_val - min_val).abs() < f64::EPSILON {
913        return 0;
914    }
915
916    let bin_width = (max_val - min_val) / bins as f64;
917    let bin = ((value - min_val) / bin_width).floor() as usize;
918    bin.min(bins - 1)
919}
920
921fn compute_te_from_discrete(
922    x_discrete: &Array2<usize>,
923    y_discrete: &Array2<usize>,
924    y_future_discrete: &Array1<usize>,
925) -> CausalityResult<f64> {
926    let mut joint_counts: HashMap<(usize, Vec<usize>, Vec<usize>), usize> = HashMap::new();
927    let mut marginal_y_counts: HashMap<(usize, Vec<usize>), usize> = HashMap::new();
928    let mut conditional_counts: HashMap<(Vec<usize>, Vec<usize>), usize> = HashMap::new();
929    let mut y_only_counts: HashMap<Vec<usize>, usize> = HashMap::new();
930    let n_samples = x_discrete.nrows();
931
932    for i in 0..n_samples {
933        let x_state = x_discrete.row(i).to_vec();
934        let y_state = y_discrete.row(i).to_vec();
935        let y_fut = y_future_discrete[i];
936
937        *joint_counts
938            .entry((y_fut, y_state.clone(), x_state.clone()))
939            .or_insert(0) += 1;
940        *marginal_y_counts
941            .entry((y_fut, y_state.clone()))
942            .or_insert(0) += 1;
943        *conditional_counts
944            .entry((y_state.clone(), x_state))
945            .or_insert(0) += 1;
946        *y_only_counts.entry(y_state).or_insert(0) += 1;
947    }
948
949    let mut te = 0.0;
950
951    for (joint_key, &joint_count) in &joint_counts {
952        let prob_joint = joint_count as f64 / n_samples as f64;
953
954        if prob_joint > 0.0 {
955            let (y_fut, y_state, x_state) = joint_key;
956
957            let marginal_count = marginal_y_counts
958                .get(&(*y_fut, y_state.clone()))
959                .copied()
960                .unwrap_or(0);
961            let cond_count = conditional_counts
962                .get(&(y_state.clone(), x_state.clone()))
963                .copied()
964                .unwrap_or(0);
965            let y_only_count = y_only_counts.get(y_state).copied().unwrap_or(0);
966
967            if marginal_count > 0 && cond_count > 0 && y_only_count > 0 {
968                let prob_marginal = marginal_count as f64 / n_samples as f64;
969                let prob_cond = cond_count as f64 / n_samples as f64;
970                let prob_y_only = y_only_count as f64 / n_samples as f64;
971
972                let ratio = (prob_joint * prob_y_only) / (prob_marginal * prob_cond);
973                if ratio > 0.0 {
974                    te += prob_joint * ratio.ln();
975                }
976            }
977        }
978    }
979
980    Ok(te)
981}
982
983// ---- Bootstrap helpers ----
984
985fn bootstrap_te_significance(
986    x: &Array1<f64>,
987    y: &Array1<f64>,
988    config: &TransferEntropyConfig,
989    observed_te: f64,
990    n_bootstrap: usize,
991    seed: Option<u64>,
992) -> CausalityResult<(f64, f64)> {
993    let mut te_values = Vec::with_capacity(n_bootstrap);
994
995    for s in 0..n_bootstrap {
996        let mut x_shuffled = x.clone();
997        let seed_val = seed.map(|base| base.wrapping_add(s as u64));
998        fisher_yates_shuffle(&mut x_shuffled, seed_val);
999
1000        let surr_te = compute_te_binning(
1001            &x_shuffled,
1002            y,
1003            config.bins,
1004            config.embedding_dim,
1005            config.time_delay,
1006        )?;
1007        te_values.push(surr_te);
1008    }
1009
1010    let count = te_values.iter().filter(|&&te| te >= observed_te).count();
1011    let p_value = count as f64 / n_bootstrap as f64;
1012
1013    let mean_te = te_values.iter().sum::<f64>() / te_values.len() as f64;
1014    let var_te = te_values
1015        .iter()
1016        .map(|&v| (v - mean_te).powi(2))
1017        .sum::<f64>()
1018        / (te_values.len() as f64 - 1.0).max(1.0);
1019    let std_error = var_te.sqrt();
1020
1021    Ok((p_value, std_error))
1022}
1023
1024fn bootstrap_conditional_te_pvalue(
1025    x: &Array1<f64>,
1026    y: &Array1<f64>,
1027    z: &Array2<f64>,
1028    config: &ConditionalTransferEntropyConfig,
1029    observed_te: f64,
1030    n_bootstrap: usize,
1031) -> CausalityResult<f64> {
1032    let mut count = 0;
1033
1034    for s in 0..n_bootstrap {
1035        let mut x_shuffled = x.clone();
1036        fisher_yates_shuffle(
1037            &mut x_shuffled,
1038            config.seed.map(|b| b.wrapping_add(s as u64)),
1039        );
1040
1041        let surr_te = compute_conditional_te(&x_shuffled, y, z, config)?;
1042        if surr_te >= observed_te {
1043            count += 1;
1044        }
1045    }
1046
1047    Ok(count as f64 / n_bootstrap as f64)
1048}
1049
1050/// Digamma function approximation
1051fn digamma(x: f64) -> f64 {
1052    if x <= 0.0 {
1053        return f64::NEG_INFINITY;
1054    }
1055
1056    // For small x, use recurrence: digamma(x) = digamma(x+1) - 1/x
1057    let mut result = 0.0;
1058    let mut xx = x;
1059
1060    while xx < 6.0 {
1061        result -= 1.0 / xx;
1062        xx += 1.0;
1063    }
1064
1065    // Asymptotic expansion for large x
1066    result += xx.ln() - 1.0 / (2.0 * xx);
1067    let xx2 = xx * xx;
1068    result -= 1.0 / (12.0 * xx2);
1069    result += 1.0 / (120.0 * xx2 * xx2);
1070    result -= 1.0 / (252.0 * xx2 * xx2 * xx2);
1071
1072    result
1073}
1074
1075#[cfg(test)]
1076mod tests {
1077    use super::*;
1078    use scirs2_core::ndarray::Array1;
1079
1080    #[test]
1081    fn test_shannon_transfer_entropy() {
1082        let n = 50;
1083        let x = Array1::from_vec((0..n).map(|i| (i as f64 * 0.1).sin()).collect());
1084        let y = Array1::from_vec((0..n).map(|i| ((i as f64 + 1.0) * 0.1).sin()).collect());
1085
1086        let config = TransferEntropyConfig {
1087            bins: 5,
1088            embedding_dim: 2,
1089            time_delay: 1,
1090            bootstrap_samples: None,
1091            estimator: TransferEntropyEstimator::Binning,
1092            seed: Some(42),
1093        };
1094
1095        let result = shannon_transfer_entropy(&x, &y, &config).expect("Shannon TE failed");
1096
1097        assert!(result.transfer_entropy >= 0.0);
1098        assert_eq!(result.bins, 5);
1099        assert_eq!(result.embedding_dim, 2);
1100    }
1101
1102    #[test]
1103    fn test_shannon_te_with_bootstrap() {
1104        let n = 50;
1105        let x = Array1::from_vec((0..n).map(|i| (i as f64 * 0.1).sin()).collect());
1106        let y = Array1::from_vec((0..n).map(|i| ((i as f64 + 1.0) * 0.1).sin()).collect());
1107
1108        let config = TransferEntropyConfig {
1109            bins: 5,
1110            embedding_dim: 2,
1111            time_delay: 1,
1112            bootstrap_samples: Some(20),
1113            estimator: TransferEntropyEstimator::Binning,
1114            seed: Some(42),
1115        };
1116
1117        let result =
1118            shannon_transfer_entropy(&x, &y, &config).expect("Shannon TE with bootstrap failed");
1119
1120        assert!(result.transfer_entropy >= 0.0);
1121        assert!(result.p_value.is_some());
1122        let p = result.p_value.expect("Should have p-value");
1123        assert!(p >= 0.0 && p <= 1.0);
1124    }
1125
1126    #[test]
1127    fn test_kde_estimator() {
1128        let n = 40;
1129        let x = Array1::from_vec((0..n).map(|i| (i as f64 * 0.2).sin()).collect());
1130        let y = Array1::from_vec((0..n).map(|i| ((i as f64 + 2.0) * 0.2).cos()).collect());
1131
1132        let config = TransferEntropyConfig {
1133            bins: 5,
1134            embedding_dim: 2,
1135            time_delay: 1,
1136            bootstrap_samples: None,
1137            estimator: TransferEntropyEstimator::KDE,
1138            seed: None,
1139        };
1140
1141        let result = shannon_transfer_entropy(&x, &y, &config).expect("KDE TE failed");
1142
1143        assert!(result.transfer_entropy.is_finite());
1144    }
1145
1146    #[test]
1147    fn test_knn_estimator() {
1148        let n = 40;
1149        let x = Array1::from_vec((0..n).map(|i| (i as f64 * 0.2).sin()).collect());
1150        let y = Array1::from_vec((0..n).map(|i| ((i as f64 + 2.0) * 0.2).cos()).collect());
1151
1152        let config = TransferEntropyConfig {
1153            bins: 5,
1154            embedding_dim: 2,
1155            time_delay: 1,
1156            bootstrap_samples: None,
1157            estimator: TransferEntropyEstimator::KNN,
1158            seed: None,
1159        };
1160
1161        let result = shannon_transfer_entropy(&x, &y, &config).expect("kNN TE failed");
1162
1163        assert!(result.transfer_entropy >= 0.0);
1164    }
1165
1166    #[test]
1167    fn test_renyi_transfer_entropy() {
1168        let n = 50;
1169        let x = Array1::from_vec((0..n).map(|i| (i as f64 * 0.1).sin()).collect());
1170        let y = Array1::from_vec((0..n).map(|i| ((i as f64 + 1.0) * 0.1).sin()).collect());
1171
1172        let config = RenyiTransferEntropyConfig {
1173            alpha: 2.0,
1174            bins: 5,
1175            embedding_dim: 2,
1176            time_delay: 1,
1177            bootstrap_samples: None,
1178            seed: Some(42),
1179        };
1180
1181        let result = renyi_transfer_entropy(&x, &y, &config).expect("Renyi TE failed");
1182
1183        assert!(result.transfer_entropy >= 0.0);
1184    }
1185
1186    #[test]
1187    fn test_renyi_alpha_one_equals_shannon() {
1188        let n = 50;
1189        let x = Array1::from_vec((0..n).map(|i| (i as f64 * 0.1).sin()).collect());
1190        let y = Array1::from_vec((0..n).map(|i| ((i as f64 + 1.0) * 0.1).sin()).collect());
1191
1192        let shannon_config = TransferEntropyConfig {
1193            bins: 5,
1194            embedding_dim: 2,
1195            time_delay: 1,
1196            bootstrap_samples: None,
1197            estimator: TransferEntropyEstimator::Binning,
1198            seed: None,
1199        };
1200
1201        let renyi_config = RenyiTransferEntropyConfig {
1202            alpha: 1.0, // Should converge to Shannon
1203            bins: 5,
1204            embedding_dim: 2,
1205            time_delay: 1,
1206            bootstrap_samples: None,
1207            seed: None,
1208        };
1209
1210        let shannon_result =
1211            shannon_transfer_entropy(&x, &y, &shannon_config).expect("Shannon failed");
1212        let renyi_result = renyi_transfer_entropy(&x, &y, &renyi_config).expect("Renyi failed");
1213
1214        // Should be approximately equal
1215        assert!(
1216            (shannon_result.transfer_entropy - renyi_result.transfer_entropy).abs() < 0.1,
1217            "Shannon ({}) and Renyi(alpha=1) ({}) should be close",
1218            shannon_result.transfer_entropy,
1219            renyi_result.transfer_entropy
1220        );
1221    }
1222
1223    #[test]
1224    fn test_conditional_transfer_entropy() {
1225        let n = 50;
1226        let x = Array1::from_vec((0..n).map(|i| (i as f64 * 0.1).sin()).collect());
1227        let y = Array1::from_vec((0..n).map(|i| ((i as f64 + 1.0) * 0.1).sin()).collect());
1228        let z = Array2::from_shape_vec((n, 1), (0..n).map(|i| (i as f64 * 0.05).cos()).collect())
1229            .expect("Shape creation failed");
1230
1231        let config = ConditionalTransferEntropyConfig {
1232            bins: 4,
1233            embedding_dim: 2,
1234            cond_embedding_dim: 2,
1235            time_delay: 1,
1236            bootstrap_samples: None,
1237            seed: None,
1238        };
1239
1240        let result =
1241            conditional_transfer_entropy(&x, &y, &z, &config).expect("Conditional TE failed");
1242
1243        assert!(result.transfer_entropy.is_finite());
1244    }
1245
1246    #[test]
1247    fn test_effective_transfer_entropy() {
1248        let n = 50;
1249        let x = Array1::from_vec((0..n).map(|i| (i as f64 * 0.1).sin()).collect());
1250        let y = Array1::from_vec((0..n).map(|i| ((i as f64 + 1.0) * 0.1).sin()).collect());
1251
1252        let config = EffectiveTransferEntropyConfig {
1253            bins: 5,
1254            embedding_dim: 2,
1255            time_delay: 1,
1256            n_surrogates: 20,
1257            seed: Some(42),
1258        };
1259
1260        let result = effective_transfer_entropy(&x, &y, &config).expect("Effective TE failed");
1261
1262        assert!(result.raw_te >= 0.0);
1263        assert!(result.surrogate_mean >= 0.0);
1264        assert!(result.surrogate_std >= 0.0);
1265        assert!(result.p_value >= 0.0 && result.p_value <= 1.0);
1266        assert_eq!(result.n_surrogates, 20);
1267        // Effective TE should be smaller than raw (bias removed)
1268        // or can be negative if there's no real signal
1269        assert!(result.effective_te.is_finite());
1270    }
1271
1272    #[test]
1273    fn test_te_mismatched_lengths() {
1274        let x = Array1::from_vec(vec![1.0, 2.0, 3.0]);
1275        let y = Array1::from_vec(vec![1.0, 2.0]);
1276
1277        let config = TransferEntropyConfig::default();
1278        let result = shannon_transfer_entropy(&x, &y, &config);
1279        assert!(result.is_err());
1280    }
1281
1282    #[test]
1283    fn test_te_series_too_short() {
1284        let x = Array1::from_vec(vec![1.0, 2.0]);
1285        let y = Array1::from_vec(vec![1.0, 2.0]);
1286
1287        let config = TransferEntropyConfig {
1288            bins: 5,
1289            embedding_dim: 3,
1290            time_delay: 1,
1291            bootstrap_samples: None,
1292            estimator: TransferEntropyEstimator::Binning,
1293            seed: None,
1294        };
1295
1296        let result = shannon_transfer_entropy(&x, &y, &config);
1297        assert!(result.is_err());
1298    }
1299
1300    #[test]
1301    fn test_renyi_invalid_alpha() {
1302        let x = Array1::from_vec(vec![1.0, 2.0, 3.0, 4.0, 5.0]);
1303        let y = Array1::from_vec(vec![2.0, 3.0, 4.0, 5.0, 6.0]);
1304
1305        let config = RenyiTransferEntropyConfig {
1306            alpha: -1.0,
1307            ..Default::default()
1308        };
1309
1310        let result = renyi_transfer_entropy(&x, &y, &config);
1311        assert!(result.is_err());
1312    }
1313
1314    #[test]
1315    fn test_digamma_function() {
1316        // digamma(1) = -gamma (Euler-Mascheroni constant)
1317        let d1 = digamma(1.0);
1318        assert!((d1 - (-0.5772156649)).abs() < 0.01);
1319
1320        // digamma(2) = 1 - gamma
1321        let d2 = digamma(2.0);
1322        assert!((d2 - (1.0 - 0.5772156649)).abs() < 0.01);
1323    }
1324}
scirs2_series/causality/transfer_entropy.rs

scirs2_series/causality/
transfer_entropy.rs