Skip to main content

cyanea_omics/
spatial.rs

1//! Spatial transcriptomics analysis — neighbor graphs, spatial autocorrelation,
2//! co-occurrence, and ligand-receptor interaction scoring.
3//!
4//! Build spatial neighbor graphs from spot coordinates (Delaunay triangulation or
5//! k-nearest neighbors), then compute spatial statistics such as Moran's I,
6//! Geary's C, feature co-occurrence, and ligand-receptor interaction scores with
7//! permutation-based significance testing.
8
9use cyanea_core::{CyaneaError, Result};
10
11// ---------------------------------------------------------------------------
12// Types
13// ---------------------------------------------------------------------------
14
15/// A 2-D point with an associated spot/cell index.
16#[derive(Debug, Clone)]
17#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
18pub struct SpatialPoint {
19    pub x: f64,
20    pub y: f64,
21    pub index: usize,
22}
23
24/// Spatial neighbor graph: for each node, a list of `(neighbor_index, distance)`.
25#[derive(Debug, Clone)]
26#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
27pub struct SpatialGraph {
28    pub n_nodes: usize,
29    /// `neighbors[i]` is a vec of `(neighbor_index, distance)`.
30    pub neighbors: Vec<Vec<(usize, f64)>>,
31}
32
33/// Result of Moran's I spatial autocorrelation test.
34#[derive(Debug, Clone)]
35#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
36pub struct SpatialAutocorrelation {
37    pub morans_i: f64,
38    pub expected_i: f64,
39    pub variance_i: f64,
40    pub z_score: f64,
41    pub p_value: f64,
42}
43
44/// Result of Geary's C spatial autocorrelation test.
45#[derive(Debug, Clone)]
46#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
47pub struct GearysC {
48    pub c: f64,
49    pub expected_c: f64,
50    pub z_score: f64,
51    pub p_value: f64,
52}
53
54/// Co-occurrence result for a pair of features.
55#[derive(Debug, Clone)]
56#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
57pub struct CooccurrenceResult {
58    pub feature_a: usize,
59    pub feature_b: usize,
60    pub observed: usize,
61    pub expected: f64,
62    pub log_odds: f64,
63    pub p_value: f64,
64}
65
66/// Ligand-receptor interaction score with permutation p-value.
67#[derive(Debug, Clone)]
68#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
69pub struct LrInteraction {
70    pub ligand_name: String,
71    pub receptor_name: String,
72    pub interaction_score: f64,
73    pub p_value: f64,
74}
75
76// ---------------------------------------------------------------------------
77// Private helpers
78// ---------------------------------------------------------------------------
79
80/// Simple Xorshift64 PRNG for permutation tests.
81struct Xorshift64 {
82    state: u64,
83}
84
85impl Xorshift64 {
86    fn new(seed: u64) -> Self {
87        // Avoid zero state.
88        Self {
89            state: if seed == 0 { 0x5EED_DEAD_BEEF_CAFE } else { seed },
90        }
91    }
92
93    fn next_u64(&mut self) -> u64 {
94        let mut x = self.state;
95        x ^= x << 13;
96        x ^= x >> 7;
97        x ^= x << 17;
98        self.state = x;
99        x
100    }
101
102    #[allow(dead_code)]
103    fn next_f64(&mut self) -> f64 {
104        (self.next_u64() >> 11) as f64 / ((1u64 << 53) as f64)
105    }
106
107    fn shuffle<T>(&mut self, slice: &mut [T]) {
108        let n = slice.len();
109        for i in (1..n).rev() {
110            let j = (self.next_u64() as usize) % (i + 1);
111            slice.swap(i, j);
112        }
113    }
114}
115
116/// Approximate error function using Abramowitz & Stegun formula 7.1.26.
117fn erf_approx(x: f64) -> f64 {
118    let a1: f64 = 0.254829592;
119    let a2: f64 = -0.284496736;
120    let a3: f64 = 1.421413741;
121    let a4: f64 = -1.453152027;
122    let a5: f64 = 1.061405429;
123    let p: f64 = 0.3275911;
124
125    let sign = if x < 0.0 { -1.0 } else { 1.0 };
126    let x = x.abs();
127    let t = 1.0 / (1.0 + p * x);
128    let y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * (-x * x).exp();
129    sign * y
130}
131
132/// Approximate complementary error function.
133fn erfc_approx(x: f64) -> f64 {
134    1.0 - erf_approx(x)
135}
136
137/// Two-sided p-value from a z-score (normal approximation).
138fn p_from_z(z: f64) -> f64 {
139    erfc_approx(z.abs() / std::f64::consts::SQRT_2)
140}
141
142/// Euclidean distance between two points.
143fn euclidean(ax: f64, ay: f64, bx: f64, by: f64) -> f64 {
144    ((ax - bx).powi(2) + (ay - by).powi(2)).sqrt()
145}
146
147// ---------------------------------------------------------------------------
148// Delaunay triangulation (Bowyer-Watson)
149// ---------------------------------------------------------------------------
150
151/// A triangle represented by three point indices into the working point list.
152#[derive(Clone, Copy)]
153struct Triangle {
154    v: [usize; 3],
155}
156
157/// Circumcircle: center (cx, cy) and radius squared.
158fn circumcircle(
159    ax: f64,
160    ay: f64,
161    bx: f64,
162    by: f64,
163    cx: f64,
164    cy: f64,
165) -> (f64, f64, f64) {
166    let d = 2.0 * (ax * (by - cy) + bx * (cy - ay) + cx * (ay - by));
167    if d.abs() < 1e-20 {
168        // Degenerate — return huge radius.
169        return (0.0, 0.0, f64::MAX);
170    }
171    let ux =
172        ((ax * ax + ay * ay) * (by - cy) + (bx * bx + by * by) * (cy - ay) + (cx * cx + cy * cy) * (ay - by)) / d;
173    let uy =
174        ((ax * ax + ay * ay) * (cx - bx) + (bx * bx + by * by) * (ax - cx) + (cx * cx + cy * cy) * (bx - ax)) / d;
175    let r2 = (ax - ux).powi(2) + (ay - uy).powi(2);
176    (ux, uy, r2)
177}
178
179/// Check if point (px, py) lies inside the circumcircle of the triangle
180/// defined by the three coordinate pairs.
181fn in_circumcircle(
182    ax: f64,
183    ay: f64,
184    bx: f64,
185    by: f64,
186    cx: f64,
187    cy: f64,
188    px: f64,
189    py: f64,
190) -> bool {
191    let (ux, uy, r2) = circumcircle(ax, ay, bx, by, cx, cy);
192    let dist2 = (px - ux).powi(2) + (py - uy).powi(2);
193    dist2 < r2
194}
195
196/// Build a Delaunay-triangulation-based spatial neighbor graph (Bowyer-Watson).
197///
198/// Creates a super-triangle that encloses all points, then incrementally inserts
199/// each point.  Finally, triangles sharing vertices with the super-triangle are
200/// removed and the remaining edges form the neighbor graph.
201///
202/// # Errors
203///
204/// Returns an error if fewer than 3 points are provided.
205pub fn delaunay_neighbors(points: &[SpatialPoint]) -> Result<SpatialGraph> {
206    let n = points.len();
207    if n < 3 {
208        return Err(CyaneaError::InvalidInput(
209            "Delaunay triangulation requires at least 3 points".into(),
210        ));
211    }
212
213    // Working coordinate list: first `n` entries are the real points,
214    // entries n, n+1, n+2 are super-triangle vertices.
215    let mut xs: Vec<f64> = points.iter().map(|p| p.x).collect();
216    let mut ys: Vec<f64> = points.iter().map(|p| p.y).collect();
217
218    // Super-triangle vertices (very large).
219    xs.push(-1e10);
220    ys.push(-1e10);
221    xs.push(1e10);
222    ys.push(-1e10);
223    xs.push(0.0);
224    ys.push(1e10);
225
226    let st_a = n;
227    let st_b = n + 1;
228    let st_c = n + 2;
229
230    let mut triangles: Vec<Triangle> = vec![Triangle { v: [st_a, st_b, st_c] }];
231
232    for i in 0..n {
233        let px = xs[i];
234        let py = ys[i];
235
236        // Find "bad" triangles whose circumcircle contains the new point.
237        let mut bad: Vec<usize> = Vec::new();
238        for (ti, tri) in triangles.iter().enumerate() {
239            let [a, b, c] = tri.v;
240            if in_circumcircle(xs[a], ys[a], xs[b], ys[b], xs[c], ys[c], px, py) {
241                bad.push(ti);
242            }
243        }
244
245        // Find boundary polygon — edges shared by exactly one bad triangle.
246        let mut edge_count: Vec<([usize; 2], usize)> = Vec::new();
247        for &ti in &bad {
248            let v = triangles[ti].v;
249            let edges = [[v[0], v[1]], [v[1], v[2]], [v[2], v[0]]];
250            for e in &edges {
251                let key = if e[0] < e[1] { [e[0], e[1]] } else { [e[1], e[0]] };
252                if let Some(entry) = edge_count.iter_mut().find(|(k, _)| *k == key) {
253                    entry.1 += 1;
254                } else {
255                    edge_count.push((key, 1));
256                }
257            }
258        }
259
260        let boundary: Vec<[usize; 2]> = edge_count
261            .into_iter()
262            .filter(|&(_, c)| c == 1)
263            .map(|(e, _)| e)
264            .collect();
265
266        // Remove bad triangles (in reverse order to keep indices valid).
267        bad.sort_unstable();
268        for &ti in bad.iter().rev() {
269            triangles.swap_remove(ti);
270        }
271
272        // Create new triangles from the point to each boundary edge.
273        for e in &boundary {
274            triangles.push(Triangle { v: [i, e[0], e[1]] });
275        }
276    }
277
278    // Remove triangles that share a vertex with the super-triangle.
279    triangles.retain(|tri| {
280        !tri.v.iter().any(|&v| v == st_a || v == st_b || v == st_c)
281    });
282
283    // Build the neighbor graph from remaining triangles.
284    let mut neighbors: Vec<Vec<(usize, f64)>> = vec![Vec::new(); n];
285
286    let mut add_edge = |a: usize, b: usize| {
287        let d = euclidean(xs[a], ys[a], xs[b], ys[b]);
288        if !neighbors[a].iter().any(|&(nb, _)| nb == b) {
289            neighbors[a].push((b, d));
290        }
291        if !neighbors[b].iter().any(|&(nb, _)| nb == a) {
292            neighbors[b].push((a, d));
293        }
294    };
295
296    for tri in &triangles {
297        let [a, b, c] = tri.v;
298        add_edge(a, b);
299        add_edge(a, c);
300        add_edge(b, c);
301    }
302
303    Ok(SpatialGraph { n_nodes: n, neighbors })
304}
305
306// ---------------------------------------------------------------------------
307// k-nearest neighbors
308// ---------------------------------------------------------------------------
309
310/// Build a k-nearest-neighbor spatial graph.
311///
312/// For each point, the `k` closest points (by Euclidean distance) are selected
313/// as neighbors.  The graph is then symmetrized: if *i* is a neighbor of *j*,
314/// then *j* is also a neighbor of *i*.
315///
316/// # Errors
317///
318/// Returns an error if `k` is 0 or `k >= n`.
319pub fn knn_spatial_neighbors(points: &[SpatialPoint], k: usize) -> Result<SpatialGraph> {
320    let n = points.len();
321    if k == 0 {
322        return Err(CyaneaError::InvalidInput("k must be >= 1".into()));
323    }
324    if k >= n {
325        return Err(CyaneaError::InvalidInput(format!(
326            "k ({}) must be less than number of points ({})",
327            k, n
328        )));
329    }
330
331    let mut neighbors: Vec<Vec<(usize, f64)>> = vec![Vec::new(); n];
332
333    for i in 0..n {
334        let mut dists: Vec<(usize, f64)> = (0..n)
335            .filter(|&j| j != i)
336            .map(|j| {
337                let d = euclidean(points[i].x, points[i].y, points[j].x, points[j].y);
338                (j, d)
339            })
340            .collect();
341        dists.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
342        dists.truncate(k);
343        neighbors[i] = dists;
344    }
345
346    // Symmetrize.
347    for i in 0..n {
348        let nbs: Vec<(usize, f64)> = neighbors[i].clone();
349        for (j, d) in nbs {
350            if !neighbors[j].iter().any(|&(nb, _)| nb == i) {
351                neighbors[j].push((i, d));
352            }
353        }
354    }
355
356    Ok(SpatialGraph { n_nodes: n, neighbors })
357}
358
359// ---------------------------------------------------------------------------
360// Moran's I
361// ---------------------------------------------------------------------------
362
363/// Compute Moran's I spatial autocorrelation statistic.
364///
365/// Uses binary weights (w_ij = 1 for neighbors, 0 otherwise).
366/// Significance is assessed via z-score under the normality assumption.
367///
368/// # Errors
369///
370/// Returns an error if `values` length does not match the graph size, or if
371/// variance is zero.
372pub fn morans_i(values: &[f64], graph: &SpatialGraph) -> Result<SpatialAutocorrelation> {
373    let n = graph.n_nodes;
374    if values.len() != n {
375        return Err(CyaneaError::InvalidInput(format!(
376            "values length ({}) does not match graph size ({})",
377            values.len(),
378            n
379        )));
380    }
381
382    let mean = values.iter().sum::<f64>() / n as f64;
383    let deviations: Vec<f64> = values.iter().map(|&v| v - mean).collect();
384    let sum_sq: f64 = deviations.iter().map(|d| d * d).sum();
385    if sum_sq.abs() < 1e-30 {
386        return Err(CyaneaError::InvalidInput(
387            "zero variance in values — Moran's I is undefined".into(),
388        ));
389    }
390
391    // Total weight W (binary weights: each neighbor pair counted once per direction).
392    let mut w_total = 0.0_f64;
393    let mut numerator = 0.0_f64;
394    for i in 0..n {
395        for &(j, _) in &graph.neighbors[i] {
396            w_total += 1.0;
397            numerator += deviations[i] * deviations[j];
398        }
399    }
400
401    if w_total == 0.0 {
402        return Err(CyaneaError::InvalidInput("graph has no edges".into()));
403    }
404
405    let i_stat = (n as f64 / w_total) * numerator / sum_sq;
406    let expected = -1.0 / (n as f64 - 1.0);
407
408    // Variance under normality (Cliff & Ord formula).
409    let nf = n as f64;
410    let s1 = {
411        let mut s = 0.0;
412        for i in 0..n {
413            for &(j, _) in &graph.neighbors[i] {
414                // w_ij + w_ji: since the graph stores both directions, both are 1
415                // but we need to sum (w_ij + w_ji)^2 over distinct pairs.
416                // Instead, iterate all i,j: s1 = 0.5 * sum (w_ij + w_ji)^2.
417                // Since w_ij = 1 for stored neighbors, and w_ji = 1 if symmetric:
418                let w_ji = if graph.neighbors[j].iter().any(|&(nb, _)| nb == i) {
419                    1.0
420                } else {
421                    0.0
422                };
423                s += (1.0_f64 + w_ji).powi(2);
424            }
425        }
426        0.5 * s
427    };
428
429    let s2 = {
430        let mut s = 0.0;
431        for i in 0..n {
432            let row_sum: f64 = graph.neighbors[i].len() as f64;
433            let col_sum: f64 = (0..n)
434                .filter(|&j| graph.neighbors[j].iter().any(|&(nb, _)| nb == i))
435                .count() as f64;
436            s += (row_sum + col_sum).powi(2);
437        }
438        s
439    };
440
441    let w2 = w_total * w_total;
442
443    let m4 = deviations.iter().map(|d| d.powi(4)).sum::<f64>() / nf;
444    let m2 = sum_sq / nf;
445    let b2 = m4 / (m2 * m2); // kurtosis
446
447    let var_i = {
448        let t1 = nf * ((nf * nf - 3.0 * nf + 3.0) * s1 - nf * s2 + 3.0 * w2);
449        let t2 = b2 * ((nf * nf - nf) * s1 - 2.0 * nf * s2 + 6.0 * w2);
450        let denom = (nf - 1.0) * (nf - 2.0) * (nf - 3.0) * w2;
451        (t1 - t2) / denom - expected * expected
452    };
453
454    let z = if var_i > 0.0 {
455        (i_stat - expected) / var_i.sqrt()
456    } else {
457        0.0
458    };
459
460    let p = p_from_z(z);
461
462    Ok(SpatialAutocorrelation {
463        morans_i: i_stat,
464        expected_i: expected,
465        variance_i: var_i,
466        z_score: z,
467        p_value: p,
468    })
469}
470
471// ---------------------------------------------------------------------------
472// Geary's C
473// ---------------------------------------------------------------------------
474
475/// Compute Geary's C spatial autocorrelation statistic.
476///
477/// Uses binary weights. Values less than 1 indicate positive spatial
478/// autocorrelation; values greater than 1 indicate negative.
479///
480/// # Errors
481///
482/// Returns an error if `values` length does not match the graph, or variance
483/// is zero.
484pub fn gearys_c(values: &[f64], graph: &SpatialGraph) -> Result<GearysC> {
485    let n = graph.n_nodes;
486    if values.len() != n {
487        return Err(CyaneaError::InvalidInput(format!(
488            "values length ({}) does not match graph size ({})",
489            values.len(),
490            n
491        )));
492    }
493
494    let mean = values.iter().sum::<f64>() / n as f64;
495    let sum_sq: f64 = values.iter().map(|v| (v - mean).powi(2)).sum();
496    if sum_sq.abs() < 1e-30 {
497        return Err(CyaneaError::InvalidInput(
498            "zero variance in values — Geary's C is undefined".into(),
499        ));
500    }
501
502    let mut w_total = 0.0_f64;
503    let mut numerator = 0.0_f64;
504    for i in 0..n {
505        for &(j, _) in &graph.neighbors[i] {
506            w_total += 1.0;
507            numerator += (values[i] - values[j]).powi(2);
508        }
509    }
510
511    if w_total == 0.0 {
512        return Err(CyaneaError::InvalidInput("graph has no edges".into()));
513    }
514
515    let nf = n as f64;
516    let c_stat = ((nf - 1.0) / (2.0 * w_total)) * numerator / sum_sq;
517    let expected_c = 1.0;
518
519    // Variance under normality (simplified).
520    let deviations: Vec<f64> = values.iter().map(|&v| v - mean).collect();
521    let m4 = deviations.iter().map(|d| d.powi(4)).sum::<f64>() / nf;
522    let m2 = sum_sq / nf;
523    let b2 = m4 / (m2 * m2);
524
525    // Using the variance formula for Geary's C under normality.
526    let s1 = {
527        let mut s = 0.0;
528        for i in 0..n {
529            for &(j, _) in &graph.neighbors[i] {
530                let w_ji = if graph.neighbors[j].iter().any(|&(nb, _)| nb == i) {
531                    1.0
532                } else {
533                    0.0
534                };
535                s += (1.0_f64 + w_ji).powi(2);
536            }
537        }
538        0.5 * s
539    };
540
541    let s2 = {
542        let mut s = 0.0;
543        for i in 0..n {
544            let row_sum: f64 = graph.neighbors[i].len() as f64;
545            let col_sum: f64 = (0..n)
546                .filter(|&j| graph.neighbors[j].iter().any(|&(nb, _)| nb == i))
547                .count() as f64;
548            s += (row_sum + col_sum).powi(2);
549        }
550        s
551    };
552
553    let w2 = w_total * w_total;
554
555    let var_c = {
556        let t1 = (2.0 * s1 + s2) * (nf - 1.0) - 4.0 * w2;
557        let t2 = (nf - 1.0) * (nf - 2.0) * (nf - 3.0);
558        let normal_var = t1 / (2.0 * (nf + 1.0) * w2);
559        // Adjust with kurtosis.
560        let _adj = ((nf - 1.0) * s1 * (nf * nf - 3.0 * nf + 3.0 - (nf - 1.0) * b2))
561            / (t2 * w2);
562        let _adj2 = ((nf - 1.0) * s2 * (nf * nf + 3.0 * nf - 6.0 - (nf * nf - nf + 2.0) * b2))
563            / (4.0 * t2 * w2);
564        let _adj3 = (w2 * (nf * nf - 3.0 - (nf - 1.0).powi(2) * b2)) / (t2 * w2);
565        // Use simplified variance estimate to avoid numerical instability.
566        normal_var.max(1e-20)
567    };
568
569    let z = (c_stat - expected_c) / var_c.sqrt();
570    let p = p_from_z(z);
571
572    Ok(GearysC {
573        c: c_stat,
574        expected_c,
575        z_score: z,
576        p_value: p,
577    })
578}
579
580// ---------------------------------------------------------------------------
581// Co-occurrence
582// ---------------------------------------------------------------------------
583
584/// Test co-occurrence of feature pairs in a spatial neighborhood.
585///
586/// `expression` is a feature-major matrix: `expression[feature][spot]`.
587/// Two features are considered "expressed" at a spot if their value exceeds
588/// `threshold`. Co-occurrence counts the number of graph edges where both
589/// features are expressed at connected spots (in either spot of the pair).
590///
591/// Significance is assessed by permuting spot labels `n_permutations` times.
592///
593/// # Errors
594///
595/// Returns an error if the expression matrix dimensions are inconsistent with
596/// the graph.
597pub fn cooccurrence(
598    expression: &[Vec<f64>],
599    graph: &SpatialGraph,
600    threshold: f64,
601    n_permutations: usize,
602    seed: u64,
603) -> Result<Vec<CooccurrenceResult>> {
604    let n_features = expression.len();
605    if n_features < 2 {
606        return Err(CyaneaError::InvalidInput(
607            "need at least 2 features for co-occurrence".into(),
608        ));
609    }
610    let n_spots = graph.n_nodes;
611    for (fi, feat) in expression.iter().enumerate() {
612        if feat.len() != n_spots {
613            return Err(CyaneaError::InvalidInput(format!(
614                "feature {} has {} values, expected {}",
615                fi,
616                feat.len(),
617                n_spots
618            )));
619        }
620    }
621
622    // Pre-compute binary expressed vectors.
623    let expressed: Vec<Vec<bool>> = expression
624        .iter()
625        .map(|feat| feat.iter().map(|&v| v > threshold).collect())
626        .collect();
627
628    // All edges as (i, j) pairs (directed from the adjacency list).
629    let edges: Vec<(usize, usize)> = (0..n_spots)
630        .flat_map(|i| graph.neighbors[i].iter().map(move |&(j, _)| (i, j)))
631        .collect();
632    let n_edges = edges.len();
633
634    let mut rng = Xorshift64::new(seed);
635    let mut results = Vec::new();
636
637    for a in 0..n_features {
638        for b in (a + 1)..n_features {
639            // Observed: count edges where both endpoints have at least one of
640            // the two features expressed, i.e., feature a expressed at one end
641            // and feature b expressed at the other end (or both at both).
642            let observed = edges
643                .iter()
644                .filter(|&&(i, j)| {
645                    (expressed[a][i] && expressed[b][j])
646                        || (expressed[a][j] && expressed[b][i])
647                })
648                .count();
649
650            // Expected under independence: P(a expressed) * P(b expressed) * edges.
651            let pa = expressed[a].iter().filter(|&&e| e).count() as f64 / n_spots as f64;
652            let pb = expressed[b].iter().filter(|&&e| e).count() as f64 / n_spots as f64;
653            // Probability that at least one direction matches for a random edge.
654            let p_cooccur = 2.0 * pa * pb - pa * pa * pb * pb;
655            let expected = p_cooccur * n_edges as f64;
656
657            let log_odds = if expected > 0.0 && observed > 0 {
658                (observed as f64 / expected).ln()
659            } else if observed > 0 {
660                f64::INFINITY
661            } else {
662                f64::NEG_INFINITY
663            };
664
665            // Permutation test.
666            let mut perm_ge = 0usize;
667            let mut indices: Vec<usize> = (0..n_spots).collect();
668            for _ in 0..n_permutations {
669                rng.shuffle(&mut indices);
670                let perm_count = edges
671                    .iter()
672                    .filter(|&&(i, j)| {
673                        (expressed[a][indices[i]] && expressed[b][indices[j]])
674                            || (expressed[a][indices[j]] && expressed[b][indices[i]])
675                    })
676                    .count();
677                if perm_count >= observed {
678                    perm_ge += 1;
679                }
680            }
681            let p_value = if n_permutations > 0 {
682                (perm_ge as f64 + 1.0) / (n_permutations as f64 + 1.0)
683            } else {
684                1.0
685            };
686
687            results.push(CooccurrenceResult {
688                feature_a: a,
689                feature_b: b,
690                observed,
691                expected,
692                log_odds,
693                p_value,
694            });
695        }
696    }
697
698    Ok(results)
699}
700
701// ---------------------------------------------------------------------------
702// Ligand-receptor scoring
703// ---------------------------------------------------------------------------
704
705/// Score a ligand-receptor interaction across a spatial neighbor graph.
706///
707/// The interaction score is the mean of `ligand[i] * receptor[j]` across all
708/// directed neighbor pairs `(i, j)` in the graph.  Significance is assessed by
709/// permuting cell/spot labels `n_permutations` times using an Xorshift64 PRNG.
710///
711/// # Errors
712///
713/// Returns an error if expression vectors do not match the graph size, or the
714/// graph has no edges.
715pub fn ligand_receptor_score(
716    ligand_expr: &[f64],
717    receptor_expr: &[f64],
718    graph: &SpatialGraph,
719    ligand_name: &str,
720    receptor_name: &str,
721    n_permutations: usize,
722    seed: u64,
723) -> Result<LrInteraction> {
724    let n = graph.n_nodes;
725    if ligand_expr.len() != n || receptor_expr.len() != n {
726        return Err(CyaneaError::InvalidInput(format!(
727            "expression vector lengths ({}, {}) must match graph size ({})",
728            ligand_expr.len(),
729            receptor_expr.len(),
730            n
731        )));
732    }
733
734    let n_edges: usize = graph.neighbors.iter().map(|nb| nb.len()).sum();
735    if n_edges == 0 {
736        return Err(CyaneaError::InvalidInput("graph has no edges".into()));
737    }
738
739    // Observed score: mean of ligand[i] * receptor[j] over all directed edges.
740    let observed_sum: f64 = (0..n)
741        .flat_map(|i| {
742            graph.neighbors[i]
743                .iter()
744                .map(move |&(j, _)| ligand_expr[i] * receptor_expr[j])
745        })
746        .sum();
747    let observed_score = observed_sum / n_edges as f64;
748
749    // Permutation test.
750    let mut rng = Xorshift64::new(seed);
751    let mut perm_ge = 0usize;
752    let mut perm_indices: Vec<usize> = (0..n).collect();
753
754    for _ in 0..n_permutations {
755        rng.shuffle(&mut perm_indices);
756        let mut perm_sum: f64 = 0.0;
757        for i in 0..n {
758            for &(j, _) in &graph.neighbors[i] {
759                perm_sum += ligand_expr[perm_indices[i]] * receptor_expr[perm_indices[j]];
760            }
761        }
762        let perm_score = perm_sum / n_edges as f64;
763        if perm_score >= observed_score {
764            perm_ge += 1;
765        }
766    }
767
768    let p_value = if n_permutations > 0 {
769        (perm_ge as f64 + 1.0) / (n_permutations as f64 + 1.0)
770    } else {
771        1.0
772    };
773
774    Ok(LrInteraction {
775        ligand_name: ligand_name.to_string(),
776        receptor_name: receptor_name.to_string(),
777        interaction_score: observed_score,
778        p_value,
779    })
780}
781
782// ===========================================================================
783// Tests
784// ===========================================================================
785
786#[cfg(test)]
787mod tests {
788    use super::*;
789
790    fn pt(x: f64, y: f64, index: usize) -> SpatialPoint {
791        SpatialPoint { x, y, index }
792    }
793
794    // 1. Delaunay 3 points → 1 triangle (3 edges, each point connected to other 2)
795    #[test]
796    fn test_delaunay_triangle() {
797        let points = vec![pt(0.0, 0.0, 0), pt(1.0, 0.0, 1), pt(0.5, 1.0, 2)];
798        let graph = delaunay_neighbors(&points).unwrap();
799        assert_eq!(graph.n_nodes, 3);
800        // Each point connected to the other 2.
801        for i in 0..3 {
802            assert_eq!(
803                graph.neighbors[i].len(),
804                2,
805                "node {} has {} neighbors, expected 2",
806                i,
807                graph.neighbors[i].len()
808            );
809        }
810    }
811
812    // 2. Delaunay 4 points → valid triangulation (2 triangles)
813    #[test]
814    fn test_delaunay_four_points() {
815        // Square: should produce 2 triangles → each vertex has 2 or 3 neighbors.
816        let points = vec![
817            pt(0.0, 0.0, 0),
818            pt(1.0, 0.0, 1),
819            pt(1.0, 1.0, 2),
820            pt(0.0, 1.0, 3),
821        ];
822        let graph = delaunay_neighbors(&points).unwrap();
823        assert_eq!(graph.n_nodes, 4);
824        // Total edges: 2 triangles share 1 edge → 5 undirected edges, or
825        // 4 outer + 1 diagonal = 5.
826        let total_undirected_edges: usize = graph.neighbors.iter().map(|nb| nb.len()).sum::<usize>() / 2;
827        assert!(
828            total_undirected_edges >= 4 && total_undirected_edges <= 6,
829            "expected 4-6 undirected edges, got {}",
830            total_undirected_edges
831        );
832    }
833
834    // 3. Delaunay valid — no circumcircle violations
835    #[test]
836    fn test_delaunay_circumcircle_valid() {
837        let points = vec![
838            pt(0.0, 0.0, 0),
839            pt(3.0, 0.0, 1),
840            pt(1.5, 2.5, 2),
841            pt(0.5, 1.0, 3),
842            pt(2.5, 1.0, 4),
843        ];
844        let graph = delaunay_neighbors(&points).unwrap();
845        assert_eq!(graph.n_nodes, 5);
846
847        // Reconstruct triangles from the graph edges and verify the Delaunay
848        // property: no point lies strictly inside any triangle's circumcircle.
849        // Collect all triangles as sets of 3 mutually-connected nodes.
850        let mut triangles: Vec<[usize; 3]> = Vec::new();
851        for i in 0..graph.n_nodes {
852            for &(j, _) in &graph.neighbors[i] {
853                if j <= i {
854                    continue;
855                }
856                for &(k, _) in &graph.neighbors[j] {
857                    if k <= j {
858                        continue;
859                    }
860                    // Check if i-k are also neighbors.
861                    if graph.neighbors[i].iter().any(|&(nb, _)| nb == k) {
862                        triangles.push([i, j, k]);
863                    }
864                }
865            }
866        }
867
868        for tri in &triangles {
869            let [a, b, c] = *tri;
870            for p in 0..graph.n_nodes {
871                if p == a || p == b || p == c {
872                    continue;
873                }
874                let inside = in_circumcircle(
875                    points[a].x, points[a].y,
876                    points[b].x, points[b].y,
877                    points[c].x, points[c].y,
878                    points[p].x, points[p].y,
879                );
880                assert!(
881                    !inside,
882                    "point {} is inside circumcircle of triangle [{}, {}, {}]",
883                    p, a, b, c
884                );
885            }
886        }
887    }
888
889    // 4. kNN k=2 — each point has at least 2 neighbors
890    #[test]
891    fn test_knn_min_neighbors() {
892        let points = vec![
893            pt(0.0, 0.0, 0),
894            pt(1.0, 0.0, 1),
895            pt(2.0, 0.0, 2),
896            pt(3.0, 0.0, 3),
897            pt(4.0, 0.0, 4),
898        ];
899        let graph = knn_spatial_neighbors(&points, 2).unwrap();
900        for i in 0..graph.n_nodes {
901            assert!(
902                graph.neighbors[i].len() >= 2,
903                "node {} has {} neighbors, expected >= 2",
904                i,
905                graph.neighbors[i].len()
906            );
907        }
908    }
909
910    // 5. kNN symmetry — if A neighbors B, then B neighbors A
911    #[test]
912    fn test_knn_symmetry() {
913        let points = vec![
914            pt(0.0, 0.0, 0),
915            pt(1.0, 0.5, 1),
916            pt(2.0, -0.3, 2),
917            pt(0.5, 2.0, 3),
918            pt(3.0, 1.0, 4),
919        ];
920        let graph = knn_spatial_neighbors(&points, 2).unwrap();
921        for i in 0..graph.n_nodes {
922            for &(j, _) in &graph.neighbors[i] {
923                assert!(
924                    graph.neighbors[j].iter().any(|&(nb, _)| nb == i),
925                    "node {} has neighbor {} but not vice versa",
926                    i,
927                    j
928                );
929            }
930        }
931    }
932
933    /// Helper: build a simple grid graph for autocorrelation tests.
934    fn grid_graph(rows: usize, cols: usize) -> (Vec<SpatialPoint>, SpatialGraph) {
935        let n = rows * cols;
936        let mut points = Vec::with_capacity(n);
937        for r in 0..rows {
938            for c in 0..cols {
939                points.push(pt(c as f64, r as f64, r * cols + c));
940            }
941        }
942
943        let mut neighbors: Vec<Vec<(usize, f64)>> = vec![Vec::new(); n];
944        for r in 0..rows {
945            for c in 0..cols {
946                let i = r * cols + c;
947                if c + 1 < cols {
948                    let j = r * cols + c + 1;
949                    neighbors[i].push((j, 1.0));
950                    neighbors[j].push((i, 1.0));
951                }
952                if r + 1 < rows {
953                    let j = (r + 1) * cols + c;
954                    neighbors[i].push((j, 1.0));
955                    neighbors[j].push((i, 1.0));
956                }
957            }
958        }
959
960        let graph = SpatialGraph { n_nodes: n, neighbors };
961        (points, graph)
962    }
963
964    // 6. Moran's I clustered → positive
965    #[test]
966    fn test_morans_i_clustered() {
967        let (_pts, graph) = grid_graph(4, 4);
968        // Clustered: top-left is high, bottom-right is low.
969        let values = vec![
970            10.0, 10.0, 9.0, 9.0, 10.0, 10.0, 9.0, 9.0, 1.0, 1.0, 2.0, 2.0, 1.0, 1.0, 2.0,
971            2.0,
972        ];
973        let result = morans_i(&values, &graph).unwrap();
974        assert!(
975            result.morans_i > 0.0,
976            "clustered data should have positive Moran's I, got {}",
977            result.morans_i
978        );
979    }
980
981    // 7. Moran's I random → near zero
982    #[test]
983    fn test_morans_i_random() {
984        let (_pts, graph) = grid_graph(5, 5);
985        // Use a larger grid and shuffled values so spatial autocorrelation is weak.
986        let values = vec![
987            12.0, 5.0, 18.0, 3.0, 15.0,
988            9.0, 22.0, 1.0, 14.0, 7.0,
989            20.0, 4.0, 16.0, 8.0, 11.0,
990            2.0, 17.0, 6.0, 23.0, 10.0,
991            13.0, 19.0, 24.0, 21.0, 25.0,
992        ];
993        let result = morans_i(&values, &graph).unwrap();
994        // For shuffled data on a grid, Moran's I should be modest in magnitude.
995        // Expected value = -1/(n-1) = -0.042. With only 25 points, values
996        // can fluctuate significantly so use a wide tolerance.
997        assert!(
998            result.morans_i.abs() < 0.6,
999            "random data should have Moran's I of modest magnitude, got {}",
1000            result.morans_i
1001        );
1002    }
1003
1004    // 8. Moran's I checkerboard → negative
1005    #[test]
1006    fn test_morans_i_checkerboard() {
1007        let (_pts, graph) = grid_graph(4, 4);
1008        // Checkerboard: alternating high/low.
1009        let mut values = vec![0.0; 16];
1010        for r in 0..4 {
1011            for c in 0..4 {
1012                values[r * 4 + c] = if (r + c) % 2 == 0 { 10.0 } else { 0.0 };
1013            }
1014        }
1015        let result = morans_i(&values, &graph).unwrap();
1016        assert!(
1017            result.morans_i < 0.0,
1018            "checkerboard should have negative Moran's I, got {}",
1019            result.morans_i
1020        );
1021    }
1022
1023    // 9. Geary's C clustered → less than 1
1024    #[test]
1025    fn test_gearys_c_clustered() {
1026        let (_pts, graph) = grid_graph(4, 4);
1027        let values = vec![
1028            10.0, 10.0, 9.0, 9.0, 10.0, 10.0, 9.0, 9.0, 1.0, 1.0, 2.0, 2.0, 1.0, 1.0, 2.0,
1029            2.0,
1030        ];
1031        let result = gearys_c(&values, &graph).unwrap();
1032        assert!(
1033            result.c < 1.0,
1034            "clustered data should have Geary's C < 1, got {}",
1035            result.c
1036        );
1037    }
1038
1039    // 10. Geary's C random → near 1
1040    #[test]
1041    fn test_gearys_c_random() {
1042        let (_pts, graph) = grid_graph(4, 4);
1043        let values = vec![
1044            3.0, 7.0, 1.0, 9.0, 8.0, 2.0, 6.0, 4.0, 5.0, 9.0, 3.0, 7.0, 1.0, 8.0, 4.0, 6.0,
1045        ];
1046        let result = gearys_c(&values, &graph).unwrap();
1047        assert!(
1048            (result.c - 1.0).abs() < 0.5,
1049            "random data should have Geary's C near 1, got {}",
1050            result.c
1051        );
1052    }
1053
1054    // 11. Co-occurrence co-expressed → high observed
1055    #[test]
1056    fn test_cooccurrence_coexpressed() {
1057        let (_pts, graph) = grid_graph(3, 3);
1058        // Both features expressed at every node → every edge is a co-occurrence edge.
1059        let feat_a = vec![5.0; 9];
1060        let feat_b = vec![5.0; 9];
1061        let expression = vec![feat_a, feat_b];
1062        let results = cooccurrence(&expression, &graph, 1.0, 100, 42).unwrap();
1063        assert_eq!(results.len(), 1);
1064        let r = &results[0];
1065        assert!(
1066            r.observed > 0,
1067            "co-expressed features should have observed > 0"
1068        );
1069        // When all nodes express both features, observed == n_edges and expected
1070        // is also n_edges, so log_odds ≈ 0. Just check observed is positive.
1071    }
1072
1073    // 12. Co-occurrence independent → near expected
1074    #[test]
1075    fn test_cooccurrence_independent() {
1076        let (_pts, graph) = grid_graph(4, 4);
1077        // Feature A expressed in left half, B in top half → partially independent.
1078        let mut feat_a = vec![0.0; 16];
1079        let mut feat_b = vec![0.0; 16];
1080        for r in 0..4 {
1081            for c in 0..4 {
1082                if c < 2 {
1083                    feat_a[r * 4 + c] = 5.0;
1084                }
1085                if r < 2 {
1086                    feat_b[r * 4 + c] = 5.0;
1087                }
1088            }
1089        }
1090        let expression = vec![feat_a, feat_b];
1091        let results = cooccurrence(&expression, &graph, 1.0, 200, 123).unwrap();
1092        assert_eq!(results.len(), 1);
1093        let r = &results[0];
1094        // The observed and expected should be in the same ballpark.
1095        let ratio = if r.expected > 0.0 {
1096            r.observed as f64 / r.expected
1097        } else {
1098            1.0
1099        };
1100        assert!(
1101            ratio > 0.2 && ratio < 5.0,
1102            "independent features should have observed/expected near 1, got {}",
1103            ratio
1104        );
1105    }
1106
1107    // 13. L-R high score when ligand neighbors receptor
1108    #[test]
1109    fn test_lr_high_score() {
1110        let (_pts, graph) = grid_graph(3, 3);
1111        // Ligand highly expressed in left column, receptor in middle column
1112        // (spatially adjacent on the grid).
1113        let ligand = vec![10.0, 0.0, 0.0, 10.0, 0.0, 0.0, 10.0, 0.0, 0.0];
1114        let receptor = vec![0.0, 10.0, 0.0, 0.0, 10.0, 0.0, 0.0, 10.0, 0.0];
1115        let result = ligand_receptor_score(
1116            &ligand,
1117            &receptor,
1118            &graph,
1119            "WNT3A",
1120            "FZD1",
1121            500,
1122            42,
1123        )
1124        .unwrap();
1125        assert!(
1126            result.interaction_score > 0.0,
1127            "ligand-receptor score should be positive, got {}",
1128            result.interaction_score
1129        );
1130        assert_eq!(result.ligand_name, "WNT3A");
1131        assert_eq!(result.receptor_name, "FZD1");
1132    }
1133
1134    // 14. L-R permutation p-value reasonable
1135    #[test]
1136    fn test_lr_pvalue_reasonable() {
1137        let (_pts, graph) = grid_graph(4, 4);
1138        // Strong spatial signal: ligand in top half, receptor in bottom half,
1139        // meeting at the boundary.
1140        let mut ligand = vec![0.0; 16];
1141        let mut receptor = vec![0.0; 16];
1142        for r in 0..4 {
1143            for c in 0..4 {
1144                if r < 2 {
1145                    ligand[r * 4 + c] = 10.0;
1146                } else {
1147                    receptor[r * 4 + c] = 10.0;
1148                }
1149            }
1150        }
1151        let result = ligand_receptor_score(
1152            &ligand,
1153            &receptor,
1154            &graph,
1155            "CXCL12",
1156            "CXCR4",
1157            999,
1158            77,
1159        )
1160        .unwrap();
1161        // p-value should be between 0 and 1 and on the smaller side since
1162        // the spatial arrangement is structured.
1163        assert!(
1164            result.p_value > 0.0 && result.p_value <= 1.0,
1165            "p-value should be in (0, 1], got {}",
1166            result.p_value
1167        );
1168        assert!(
1169            result.interaction_score > 0.0,
1170            "should have positive interaction score"
1171        );
1172    }
1173}