Skip to main content

cyanea_omics/
genome_arithmetic.rs

1//! BEDTools-style genome arithmetic on genomic intervals.
2//!
3//! All functions operate on slices of [`GenomicInterval`] for flexibility —
4//! works with raw vecs, BED parse output, or [`IntervalSet::intervals()`].
5//! Coordinates are 0-based, half-open `[start, end)`.
6
7use std::collections::BTreeMap;
8
9use cyanea_core::{CyaneaError, Result};
10
11use crate::genomic::{GenomicInterval, Strand};
12
13/// Chromosome name → size mapping.
14pub type GenomeInfo = BTreeMap<String, u64>;
15
16/// How to handle strand when comparing intervals.
17#[derive(Debug, Clone, Copy, PartialEq, Eq)]
18pub enum StrandMode {
19    /// Ignore strand (default BEDTools behavior).
20    Ignore,
21    /// Only match intervals on the same strand (`-s`).
22    Same,
23    /// Only match intervals on the opposite strand (`-S`).
24    Opposite,
25}
26
27/// Result of a closest-interval query.
28#[derive(Debug, Clone, PartialEq)]
29pub struct ClosestResult {
30    /// The query interval.
31    pub query: GenomicInterval,
32    /// The closest interval in the target set, if any.
33    pub closest: Option<GenomicInterval>,
34    /// Distance to the closest interval (0 if overlapping, `None` if no target on chrom).
35    pub distance: Option<u64>,
36}
37
38/// Extended Jaccard similarity statistics.
39#[derive(Debug, Clone, PartialEq)]
40pub struct JaccardStats {
41    /// Total base pairs in the intersection.
42    pub intersection_bp: u64,
43    /// Total base pairs in the union.
44    pub union_bp: u64,
45    /// Jaccard index (intersection_bp / union_bp).
46    pub jaccard: f64,
47    /// Number of intersecting interval pairs.
48    pub n_intersections: u64,
49}
50
51/// Convenience constructor for [`GenomeInfo`].
52pub fn genome_info(chroms: &[(&str, u64)]) -> GenomeInfo {
53    chroms.iter().map(|(c, s)| (c.to_string(), *s)).collect()
54}
55
56// ---------------------------------------------------------------------------
57// Internal helpers
58// ---------------------------------------------------------------------------
59
60/// Group intervals by chromosome, sorting each group by start position.
61fn group_by_chrom(intervals: &[GenomicInterval]) -> BTreeMap<String, Vec<&GenomicInterval>> {
62    let mut groups: BTreeMap<String, Vec<&GenomicInterval>> = BTreeMap::new();
63    for iv in intervals {
64        groups.entry(iv.chrom.clone()).or_default().push(iv);
65    }
66    for group in groups.values_mut() {
67        group.sort_by_key(|iv| (iv.start, iv.end));
68    }
69    groups
70}
71
72/// Check whether two intervals match under the given strand mode.
73fn strands_match(a: &GenomicInterval, b: &GenomicInterval, mode: StrandMode) -> bool {
74    match mode {
75        StrandMode::Ignore => true,
76        StrandMode::Same => a.strand == b.strand,
77        StrandMode::Opposite => {
78            matches!(
79                (&a.strand, &b.strand),
80                (Strand::Forward, Strand::Reverse) | (Strand::Reverse, Strand::Forward)
81            )
82        }
83    }
84}
85
86/// Merge a slice of intervals that are already sorted by start on a single chromosome.
87/// Abutting intervals (end == next.start) are merged.
88fn merge_sorted(sorted: &[&GenomicInterval]) -> Vec<GenomicInterval> {
89    if sorted.is_empty() {
90        return Vec::new();
91    }
92    let mut merged = Vec::new();
93    let mut cur_start = sorted[0].start;
94    let mut cur_end = sorted[0].end;
95    let chrom = &sorted[0].chrom;
96    let strand = sorted[0].strand;
97
98    for iv in &sorted[1..] {
99        if iv.start <= cur_end {
100            cur_end = cur_end.max(iv.end);
101        } else {
102            merged.push(GenomicInterval {
103                chrom: chrom.clone(),
104                start: cur_start,
105                end: cur_end,
106                strand,
107            });
108            cur_start = iv.start;
109            cur_end = iv.end;
110        }
111    }
112    merged.push(GenomicInterval {
113        chrom: chrom.clone(),
114        start: cur_start,
115        end: cur_end,
116        strand,
117    });
118    merged
119}
120
121// ---------------------------------------------------------------------------
122// Public API
123// ---------------------------------------------------------------------------
124
125/// Merge overlapping and abutting intervals.
126///
127/// When `strand_mode` is [`StrandMode::Same`], intervals are grouped by
128/// (chromosome, strand) before merging. Otherwise strand is ignored.
129pub fn merge(intervals: &[GenomicInterval], strand_mode: StrandMode) -> Vec<GenomicInterval> {
130    if intervals.is_empty() {
131        return Vec::new();
132    }
133
134    if strand_mode == StrandMode::Same {
135        // Group by (chrom, strand)
136        let mut groups: BTreeMap<(String, Strand), Vec<&GenomicInterval>> = BTreeMap::new();
137        for iv in intervals {
138            groups
139                .entry((iv.chrom.clone(), iv.strand))
140                .or_default()
141                .push(iv);
142        }
143        let mut result = Vec::new();
144        for group in groups.values_mut() {
145            group.sort_by_key(|iv| (iv.start, iv.end));
146            result.extend(merge_sorted(group));
147        }
148        result
149    } else {
150        let groups = group_by_chrom(intervals);
151        let mut result = Vec::new();
152        for group in groups.values() {
153            result.extend(merge_sorted(group));
154        }
155        result
156    }
157}
158
159/// Union of two interval sets (concatenate + merge).
160pub fn union(
161    a: &[GenomicInterval],
162    b: &[GenomicInterval],
163    strand_mode: StrandMode,
164) -> Vec<GenomicInterval> {
165    let mut combined: Vec<GenomicInterval> = Vec::with_capacity(a.len() + b.len());
166    combined.extend_from_slice(a);
167    combined.extend_from_slice(b);
168    merge(&combined, strand_mode)
169}
170
171/// Intersect two interval sets, returning the overlapping sub-regions.
172///
173/// For each pair of overlapping intervals (one from `a`, one from `b`),
174/// emits `max(a.start, b.start)..min(a.end, b.end)`.
175pub fn intersect(
176    a: &[GenomicInterval],
177    b: &[GenomicInterval],
178    strand_mode: StrandMode,
179) -> Result<Vec<GenomicInterval>> {
180    let groups_a = group_by_chrom(a);
181    let groups_b = group_by_chrom(b);
182    let mut result = Vec::new();
183
184    for (chrom, a_ivs) in &groups_a {
185        let b_ivs = match groups_b.get(chrom) {
186            Some(v) => v,
187            None => continue,
188        };
189
190        for a_iv in a_ivs {
191            for b_iv in b_ivs {
192                if !strands_match(a_iv, b_iv, strand_mode) {
193                    continue;
194                }
195                if a_iv.start < b_iv.end && b_iv.start < a_iv.end {
196                    let start = a_iv.start.max(b_iv.start);
197                    let end = a_iv.end.min(b_iv.end);
198                    result.push(GenomicInterval {
199                        chrom: chrom.clone(),
200                        start,
201                        end,
202                        strand: a_iv.strand,
203                    });
204                }
205            }
206        }
207    }
208
209    Ok(result)
210}
211
212/// Return intervals from `a` that have any overlap with `b` (BEDTools `-u`).
213pub fn intersect_report_a(
214    a: &[GenomicInterval],
215    b: &[GenomicInterval],
216    strand_mode: StrandMode,
217) -> Vec<GenomicInterval> {
218    let groups_b = group_by_chrom(b);
219    let mut result = Vec::new();
220
221    for a_iv in a {
222        let b_ivs = match groups_b.get(&a_iv.chrom) {
223            Some(v) => v,
224            None => continue,
225        };
226        let has_overlap = b_ivs.iter().any(|b_iv| {
227            strands_match(a_iv, b_iv, strand_mode)
228                && a_iv.start < b_iv.end
229                && b_iv.start < a_iv.end
230        });
231        if has_overlap {
232            result.push(a_iv.clone());
233        }
234    }
235
236    result
237}
238
239/// Subtract intervals in `b` from intervals in `a`.
240///
241/// For each interval in `a`, removes any bases covered by (merged) `b`.
242pub fn subtract(
243    a: &[GenomicInterval],
244    b: &[GenomicInterval],
245    strand_mode: StrandMode,
246) -> Result<Vec<GenomicInterval>> {
247    if b.is_empty() {
248        return Ok(a.to_vec());
249    }
250
251    let merged_b = merge(b, StrandMode::Ignore);
252    let groups_b = group_by_chrom(&merged_b);
253    let mut result = Vec::new();
254
255    for a_iv in a {
256        let b_ivs = match groups_b.get(&a_iv.chrom) {
257            Some(v) => v,
258            None => {
259                result.push(a_iv.clone());
260                continue;
261            }
262        };
263
264        // Filter b intervals by strand
265        let matching_b: Vec<&&GenomicInterval> = b_ivs
266            .iter()
267            .filter(|b_iv| strands_match(a_iv, b_iv, strand_mode))
268            .collect();
269
270        if matching_b.is_empty() {
271            result.push(a_iv.clone());
272            continue;
273        }
274
275        // Walk through matching b intervals, emitting uncovered regions of a
276        let mut pos = a_iv.start;
277
278        for b_iv in &matching_b {
279            if b_iv.start >= a_iv.end {
280                break;
281            }
282            if b_iv.end <= pos {
283                continue;
284            }
285            // Emit gap before this b interval
286            if b_iv.start > pos {
287                let end = b_iv.start.min(a_iv.end);
288                if end > pos {
289                    result.push(GenomicInterval {
290                        chrom: a_iv.chrom.clone(),
291                        start: pos,
292                        end,
293                        strand: a_iv.strand,
294                    });
295                }
296            }
297            pos = pos.max(b_iv.end);
298        }
299
300        // Emit trailing region after last b overlap
301        if pos < a_iv.end {
302            result.push(GenomicInterval {
303                chrom: a_iv.chrom.clone(),
304                start: pos,
305                end: a_iv.end,
306                strand: a_iv.strand,
307            });
308        }
309    }
310
311    Ok(result)
312}
313
314/// Complement of intervals with respect to a genome.
315///
316/// Returns the gaps between (merged) intervals and chromosome boundaries `[0, size)`.
317pub fn complement(
318    intervals: &[GenomicInterval],
319    genome: &GenomeInfo,
320) -> Result<Vec<GenomicInterval>> {
321    // Validate all intervals reference known chroms and are within bounds
322    for iv in intervals {
323        match genome.get(&iv.chrom) {
324            None => {
325                return Err(CyaneaError::InvalidInput(format!(
326                    "chromosome '{}' not found in genome info",
327                    iv.chrom
328                )));
329            }
330            Some(&size) => {
331                if iv.end > size {
332                    return Err(CyaneaError::InvalidInput(format!(
333                        "interval {}:{}-{} exceeds chromosome size {}",
334                        iv.chrom, iv.start, iv.end, size
335                    )));
336                }
337            }
338        }
339    }
340
341    let merged = merge(intervals, StrandMode::Ignore);
342    let groups = group_by_chrom(&merged);
343    let mut result = Vec::new();
344
345    for (chrom, size) in genome {
346        let mut pos = 0u64;
347        if let Some(ivs) = groups.get(chrom) {
348            for iv in ivs {
349                if iv.start > pos {
350                    result.push(GenomicInterval {
351                        chrom: chrom.clone(),
352                        start: pos,
353                        end: iv.start,
354                        strand: Strand::Unknown,
355                    });
356                }
357                pos = iv.end;
358            }
359        }
360        if pos < *size {
361            result.push(GenomicInterval {
362                chrom: chrom.clone(),
363                start: pos,
364                end: *size,
365                strand: Strand::Unknown,
366            });
367        }
368    }
369
370    Ok(result)
371}
372
373/// Find the closest interval in `b` for each interval in `a`.
374pub fn closest(
375    a: &[GenomicInterval],
376    b: &[GenomicInterval],
377    strand_mode: StrandMode,
378) -> Vec<ClosestResult> {
379    let groups_b = group_by_chrom(b);
380    let mut results = Vec::with_capacity(a.len());
381
382    for a_iv in a {
383        let b_ivs = match groups_b.get(&a_iv.chrom) {
384            Some(v) => v,
385            None => {
386                results.push(ClosestResult {
387                    query: a_iv.clone(),
388                    closest: None,
389                    distance: None,
390                });
391                continue;
392            }
393        };
394
395        // Filter by strand
396        let matching: Vec<&GenomicInterval> = b_ivs
397            .iter()
398            .filter(|b_iv| strands_match(a_iv, b_iv, strand_mode))
399            .copied()
400            .collect();
401
402        if matching.is_empty() {
403            results.push(ClosestResult {
404                query: a_iv.clone(),
405                closest: None,
406                distance: None,
407            });
408            continue;
409        }
410
411        // Binary search for the position where a_iv.start would insert
412        let idx = matching.partition_point(|iv| iv.end <= a_iv.start);
413
414        let mut best: Option<(&GenomicInterval, u64)> = None;
415
416        // Check candidates around the insertion point
417        for &candidate_idx in &[
418            idx.wrapping_sub(1),
419            idx,
420            idx + 1,
421        ] {
422            if candidate_idx >= matching.len() {
423                continue;
424            }
425            let b_iv = matching[candidate_idx];
426            let dist = if a_iv.start < b_iv.end && b_iv.start < a_iv.end {
427                0 // overlapping
428            } else if a_iv.end <= b_iv.start {
429                b_iv.start - a_iv.end
430            } else {
431                a_iv.start - b_iv.end
432            };
433
434            if best.is_none() || dist < best.unwrap().1 {
435                best = Some((b_iv, dist));
436            }
437        }
438
439        match best {
440            Some((b_iv, dist)) => {
441                results.push(ClosestResult {
442                    query: a_iv.clone(),
443                    closest: Some(b_iv.clone()),
444                    distance: Some(dist),
445                });
446            }
447            None => {
448                results.push(ClosestResult {
449                    query: a_iv.clone(),
450                    closest: None,
451                    distance: None,
452                });
453            }
454        }
455    }
456
457    results
458}
459
460/// Generate non-overlapping tiling windows across a genome.
461pub fn make_windows(genome: &GenomeInfo, window_size: u64) -> Result<Vec<GenomicInterval>> {
462    if window_size == 0 {
463        return Err(CyaneaError::InvalidInput(
464            "window size must be > 0".into(),
465        ));
466    }
467
468    let mut result = Vec::new();
469    for (chrom, &size) in genome {
470        let mut start = 0u64;
471        while start < size {
472            let end = (start + window_size).min(size);
473            result.push(GenomicInterval {
474                chrom: chrom.clone(),
475                start,
476                end,
477                strand: Strand::Unknown,
478            });
479            start += window_size;
480        }
481    }
482
483    Ok(result)
484}
485
486/// Generate sliding windows across a genome.
487pub fn make_sliding_windows(
488    genome: &GenomeInfo,
489    window_size: u64,
490    step: u64,
491) -> Result<Vec<GenomicInterval>> {
492    if window_size == 0 {
493        return Err(CyaneaError::InvalidInput(
494            "window size must be > 0".into(),
495        ));
496    }
497    if step == 0 {
498        return Err(CyaneaError::InvalidInput("step must be > 0".into()));
499    }
500
501    let mut result = Vec::new();
502    for (chrom, &size) in genome {
503        let mut start = 0u64;
504        while start < size {
505            let end = (start + window_size).min(size);
506            result.push(GenomicInterval {
507                chrom: chrom.clone(),
508                start,
509                end,
510                strand: Strand::Unknown,
511            });
512            start += step;
513        }
514    }
515
516    Ok(result)
517}
518
519/// Generate windows centered on each interval's midpoint, clipped to chromosome bounds.
520pub fn windows_around(
521    intervals: &[GenomicInterval],
522    window_size: u64,
523    genome: &GenomeInfo,
524) -> Result<Vec<GenomicInterval>> {
525    if window_size == 0 {
526        return Err(CyaneaError::InvalidInput(
527            "window size must be > 0".into(),
528        ));
529    }
530
531    let half = window_size / 2;
532    let mut result = Vec::with_capacity(intervals.len());
533
534    for iv in intervals {
535        let chrom_size = genome.get(&iv.chrom).ok_or_else(|| {
536            CyaneaError::InvalidInput(format!(
537                "chromosome '{}' not found in genome info",
538                iv.chrom
539            ))
540        })?;
541
542        let mid = iv.midpoint();
543        let start = mid.saturating_sub(half);
544        let end = (mid + half).min(*chrom_size);
545
546        if start < end {
547            result.push(GenomicInterval {
548                chrom: iv.chrom.clone(),
549                start,
550                end,
551                strand: iv.strand,
552            });
553        }
554    }
555
556    Ok(result)
557}
558
559/// Jaccard similarity between two interval sets.
560///
561/// Returns `intersection_bp / union_bp`, or 0.0 if both sets are empty.
562pub fn jaccard(a: &[GenomicInterval], b: &[GenomicInterval]) -> f64 {
563    jaccard_stats(a, b).jaccard
564}
565
566/// Extended Jaccard similarity statistics.
567pub fn jaccard_stats(a: &[GenomicInterval], b: &[GenomicInterval]) -> JaccardStats {
568    let merged_a = merge(a, StrandMode::Ignore);
569    let merged_b = merge(b, StrandMode::Ignore);
570
571    // Compute intersection bp
572    let isect = intersect(&merged_a, &merged_b, StrandMode::Ignore).unwrap_or_default();
573    let intersection_bp: u64 = isect.iter().map(|iv| iv.len()).sum();
574    let n_intersections = isect.len() as u64;
575
576    // Compute union bp
577    let all_union = union(&merged_a, &merged_b, StrandMode::Ignore);
578    let union_bp: u64 = all_union.iter().map(|iv| iv.len()).sum();
579
580    let jaccard = if union_bp == 0 {
581        0.0
582    } else {
583        intersection_bp as f64 / union_bp as f64
584    };
585
586    JaccardStats {
587        intersection_bp,
588        union_bp,
589        jaccard,
590        n_intersections,
591    }
592}
593
594#[cfg(test)]
595mod tests {
596    use super::*;
597
598    fn iv(chrom: &str, start: u64, end: u64) -> GenomicInterval {
599        GenomicInterval::new(chrom, start, end).unwrap()
600    }
601
602    fn iv_strand(chrom: &str, start: u64, end: u64, strand: Strand) -> GenomicInterval {
603        GenomicInterval::with_strand(chrom, start, end, strand).unwrap()
604    }
605
606    // -----------------------------------------------------------------------
607    // merge
608    // -----------------------------------------------------------------------
609
610    #[test]
611    fn test_merge_overlapping() {
612        let intervals = vec![iv("chr1", 100, 200), iv("chr1", 150, 300)];
613        let merged = merge(&intervals, StrandMode::Ignore);
614        assert_eq!(merged.len(), 1);
615        assert_eq!(merged[0].start, 100);
616        assert_eq!(merged[0].end, 300);
617    }
618
619    #[test]
620    fn test_merge_abutting() {
621        let intervals = vec![iv("chr1", 100, 200), iv("chr1", 200, 300)];
622        let merged = merge(&intervals, StrandMode::Ignore);
623        assert_eq!(merged.len(), 1);
624        assert_eq!(merged[0].start, 100);
625        assert_eq!(merged[0].end, 300);
626    }
627
628    #[test]
629    fn test_merge_disjoint() {
630        let intervals = vec![iv("chr1", 100, 200), iv("chr1", 300, 400)];
631        let merged = merge(&intervals, StrandMode::Ignore);
632        assert_eq!(merged.len(), 2);
633    }
634
635    #[test]
636    fn test_merge_empty() {
637        let merged = merge(&[], StrandMode::Ignore);
638        assert!(merged.is_empty());
639    }
640
641    #[test]
642    fn test_merge_multi_chrom() {
643        let intervals = vec![
644            iv("chr2", 100, 200),
645            iv("chr1", 100, 200),
646            iv("chr1", 150, 300),
647        ];
648        let merged = merge(&intervals, StrandMode::Ignore);
649        assert_eq!(merged.len(), 2);
650        assert_eq!(merged[0].chrom, "chr1");
651        assert_eq!(merged[0].end, 300);
652        assert_eq!(merged[1].chrom, "chr2");
653    }
654
655    #[test]
656    fn test_merge_strand_aware() {
657        let intervals = vec![
658            iv_strand("chr1", 100, 200, Strand::Forward),
659            iv_strand("chr1", 150, 300, Strand::Reverse),
660        ];
661        let merged = merge(&intervals, StrandMode::Same);
662        assert_eq!(merged.len(), 2); // different strands, not merged
663    }
664
665    #[test]
666    fn test_merge_strand_aware_same() {
667        let intervals = vec![
668            iv_strand("chr1", 100, 200, Strand::Forward),
669            iv_strand("chr1", 150, 300, Strand::Forward),
670        ];
671        let merged = merge(&intervals, StrandMode::Same);
672        assert_eq!(merged.len(), 1);
673        assert_eq!(merged[0].end, 300);
674    }
675
676    // -----------------------------------------------------------------------
677    // union
678    // -----------------------------------------------------------------------
679
680    #[test]
681    fn test_union_basic() {
682        let a = vec![iv("chr1", 100, 200)];
683        let b = vec![iv("chr1", 150, 300)];
684        let u = union(&a, &b, StrandMode::Ignore);
685        assert_eq!(u.len(), 1);
686        assert_eq!(u[0].start, 100);
687        assert_eq!(u[0].end, 300);
688    }
689
690    #[test]
691    fn test_union_disjoint() {
692        let a = vec![iv("chr1", 100, 200)];
693        let b = vec![iv("chr1", 400, 500)];
694        let u = union(&a, &b, StrandMode::Ignore);
695        assert_eq!(u.len(), 2);
696    }
697
698    // -----------------------------------------------------------------------
699    // intersect
700    // -----------------------------------------------------------------------
701
702    #[test]
703    fn test_intersect_basic() {
704        let a = vec![iv("chr1", 100, 300)];
705        let b = vec![iv("chr1", 200, 400)];
706        let r = intersect(&a, &b, StrandMode::Ignore).unwrap();
707        assert_eq!(r.len(), 1);
708        assert_eq!(r[0].start, 200);
709        assert_eq!(r[0].end, 300);
710    }
711
712    #[test]
713    fn test_intersect_no_overlap() {
714        let a = vec![iv("chr1", 100, 200)];
715        let b = vec![iv("chr1", 300, 400)];
716        let r = intersect(&a, &b, StrandMode::Ignore).unwrap();
717        assert!(r.is_empty());
718    }
719
720    #[test]
721    fn test_intersect_multiple_overlaps() {
722        let a = vec![iv("chr1", 100, 500)];
723        let b = vec![iv("chr1", 150, 200), iv("chr1", 300, 400)];
724        let r = intersect(&a, &b, StrandMode::Ignore).unwrap();
725        assert_eq!(r.len(), 2);
726        assert_eq!(r[0].start, 150);
727        assert_eq!(r[0].end, 200);
728        assert_eq!(r[1].start, 300);
729        assert_eq!(r[1].end, 400);
730    }
731
732    #[test]
733    fn test_intersect_multi_chrom() {
734        let a = vec![iv("chr1", 100, 200), iv("chr2", 100, 200)];
735        let b = vec![iv("chr1", 150, 250)];
736        let r = intersect(&a, &b, StrandMode::Ignore).unwrap();
737        assert_eq!(r.len(), 1);
738        assert_eq!(r[0].chrom, "chr1");
739    }
740
741    #[test]
742    fn test_intersect_empty_input() {
743        let a: Vec<GenomicInterval> = vec![];
744        let b = vec![iv("chr1", 100, 200)];
745        let r = intersect(&a, &b, StrandMode::Ignore).unwrap();
746        assert!(r.is_empty());
747    }
748
749    #[test]
750    fn test_intersect_identical() {
751        let a = vec![iv("chr1", 100, 200)];
752        let b = vec![iv("chr1", 100, 200)];
753        let r = intersect(&a, &b, StrandMode::Ignore).unwrap();
754        assert_eq!(r.len(), 1);
755        assert_eq!(r[0].start, 100);
756        assert_eq!(r[0].end, 200);
757    }
758
759    #[test]
760    fn test_intersect_strand_same() {
761        let a = vec![iv_strand("chr1", 100, 300, Strand::Forward)];
762        let b = vec![iv_strand("chr1", 200, 400, Strand::Reverse)];
763        let r = intersect(&a, &b, StrandMode::Same).unwrap();
764        assert!(r.is_empty()); // different strands
765    }
766
767    #[test]
768    fn test_intersect_strand_opposite() {
769        let a = vec![iv_strand("chr1", 100, 300, Strand::Forward)];
770        let b = vec![iv_strand("chr1", 200, 400, Strand::Reverse)];
771        let r = intersect(&a, &b, StrandMode::Opposite).unwrap();
772        assert_eq!(r.len(), 1);
773    }
774
775    // -----------------------------------------------------------------------
776    // intersect_report_a
777    // -----------------------------------------------------------------------
778
779    #[test]
780    fn test_intersect_report_a_basic() {
781        let a = vec![iv("chr1", 100, 200), iv("chr1", 400, 500)];
782        let b = vec![iv("chr1", 150, 250)];
783        let r = intersect_report_a(&a, &b, StrandMode::Ignore);
784        assert_eq!(r.len(), 1);
785        assert_eq!(r[0].start, 100);
786        assert_eq!(r[0].end, 200);
787    }
788
789    // -----------------------------------------------------------------------
790    // subtract
791    // -----------------------------------------------------------------------
792
793    #[test]
794    fn test_subtract_no_overlap() {
795        let a = vec![iv("chr1", 100, 200)];
796        let b = vec![iv("chr1", 300, 400)];
797        let r = subtract(&a, &b, StrandMode::Ignore).unwrap();
798        assert_eq!(r.len(), 1);
799        assert_eq!(r[0].start, 100);
800        assert_eq!(r[0].end, 200);
801    }
802
803    #[test]
804    fn test_subtract_complete_removal() {
805        let a = vec![iv("chr1", 100, 200)];
806        let b = vec![iv("chr1", 50, 250)];
807        let r = subtract(&a, &b, StrandMode::Ignore).unwrap();
808        assert!(r.is_empty());
809    }
810
811    #[test]
812    fn test_subtract_left_trim() {
813        let a = vec![iv("chr1", 100, 300)];
814        let b = vec![iv("chr1", 50, 200)];
815        let r = subtract(&a, &b, StrandMode::Ignore).unwrap();
816        assert_eq!(r.len(), 1);
817        assert_eq!(r[0].start, 200);
818        assert_eq!(r[0].end, 300);
819    }
820
821    #[test]
822    fn test_subtract_right_trim() {
823        let a = vec![iv("chr1", 100, 300)];
824        let b = vec![iv("chr1", 200, 400)];
825        let r = subtract(&a, &b, StrandMode::Ignore).unwrap();
826        assert_eq!(r.len(), 1);
827        assert_eq!(r[0].start, 100);
828        assert_eq!(r[0].end, 200);
829    }
830
831    #[test]
832    fn test_subtract_split_middle() {
833        let a = vec![iv("chr1", 100, 400)];
834        let b = vec![iv("chr1", 200, 300)];
835        let r = subtract(&a, &b, StrandMode::Ignore).unwrap();
836        assert_eq!(r.len(), 2);
837        assert_eq!(r[0].start, 100);
838        assert_eq!(r[0].end, 200);
839        assert_eq!(r[1].start, 300);
840        assert_eq!(r[1].end, 400);
841    }
842
843    #[test]
844    fn test_subtract_multiple_b() {
845        let a = vec![iv("chr1", 100, 500)];
846        let b = vec![iv("chr1", 150, 200), iv("chr1", 300, 350)];
847        let r = subtract(&a, &b, StrandMode::Ignore).unwrap();
848        assert_eq!(r.len(), 3);
849        assert_eq!((r[0].start, r[0].end), (100, 150));
850        assert_eq!((r[1].start, r[1].end), (200, 300));
851        assert_eq!((r[2].start, r[2].end), (350, 500));
852    }
853
854    #[test]
855    fn test_subtract_empty_b() {
856        let a = vec![iv("chr1", 100, 200)];
857        let r = subtract(&a, &[], StrandMode::Ignore).unwrap();
858        assert_eq!(r.len(), 1);
859    }
860
861    #[test]
862    fn test_subtract_strand_mode() {
863        let a = vec![iv_strand("chr1", 100, 300, Strand::Forward)];
864        let b = vec![iv_strand("chr1", 150, 250, Strand::Reverse)];
865        // Same strand mode: b doesn't match, a passes through
866        let r = subtract(&a, &b, StrandMode::Same).unwrap();
867        assert_eq!(r.len(), 1);
868        assert_eq!(r[0].start, 100);
869        assert_eq!(r[0].end, 300);
870    }
871
872    // -----------------------------------------------------------------------
873    // complement
874    // -----------------------------------------------------------------------
875
876    #[test]
877    fn test_complement_basic() {
878        let intervals = vec![iv("chr1", 100, 200), iv("chr1", 300, 400)];
879        let genome = genome_info(&[("chr1", 500)]);
880        let r = complement(&intervals, &genome).unwrap();
881        assert_eq!(r.len(), 3);
882        assert_eq!((r[0].start, r[0].end), (0, 100));
883        assert_eq!((r[1].start, r[1].end), (200, 300));
884        assert_eq!((r[2].start, r[2].end), (400, 500));
885    }
886
887    #[test]
888    fn test_complement_full_coverage() {
889        let intervals = vec![iv("chr1", 0, 500)];
890        let genome = genome_info(&[("chr1", 500)]);
891        let r = complement(&intervals, &genome).unwrap();
892        assert!(r.is_empty());
893    }
894
895    #[test]
896    fn test_complement_empty_input() {
897        let genome = genome_info(&[("chr1", 1000)]);
898        let r = complement(&[], &genome).unwrap();
899        assert_eq!(r.len(), 1);
900        assert_eq!((r[0].start, r[0].end), (0, 1000));
901    }
902
903    #[test]
904    fn test_complement_chrom_not_in_genome() {
905        let intervals = vec![iv("chrX", 100, 200)];
906        let genome = genome_info(&[("chr1", 1000)]);
907        assert!(complement(&intervals, &genome).is_err());
908    }
909
910    #[test]
911    fn test_complement_interval_exceeds_chrom() {
912        let intervals = vec![iv("chr1", 900, 1100)];
913        let genome = genome_info(&[("chr1", 1000)]);
914        assert!(complement(&intervals, &genome).is_err());
915    }
916
917    #[test]
918    fn test_complement_chrom_with_no_intervals() {
919        let intervals = vec![iv("chr1", 100, 200)];
920        let genome = genome_info(&[("chr1", 500), ("chr2", 300)]);
921        let r = complement(&intervals, &genome).unwrap();
922        // chr1: [0,100), [200,500); chr2: [0,300)
923        assert_eq!(r.len(), 3);
924        let chr2: Vec<_> = r.iter().filter(|i| i.chrom == "chr2").collect();
925        assert_eq!(chr2.len(), 1);
926        assert_eq!((chr2[0].start, chr2[0].end), (0, 300));
927    }
928
929    // -----------------------------------------------------------------------
930    // closest
931    // -----------------------------------------------------------------------
932
933    #[test]
934    fn test_closest_upstream() {
935        let a = vec![iv("chr1", 300, 400)];
936        let b = vec![iv("chr1", 100, 200)];
937        let r = closest(&a, &b, StrandMode::Ignore);
938        assert_eq!(r.len(), 1);
939        assert_eq!(r[0].distance, Some(100));
940    }
941
942    #[test]
943    fn test_closest_downstream() {
944        let a = vec![iv("chr1", 100, 200)];
945        let b = vec![iv("chr1", 300, 400)];
946        let r = closest(&a, &b, StrandMode::Ignore);
947        assert_eq!(r.len(), 1);
948        assert_eq!(r[0].distance, Some(100));
949    }
950
951    #[test]
952    fn test_closest_overlapping() {
953        let a = vec![iv("chr1", 100, 300)];
954        let b = vec![iv("chr1", 200, 400)];
955        let r = closest(&a, &b, StrandMode::Ignore);
956        assert_eq!(r.len(), 1);
957        assert_eq!(r[0].distance, Some(0));
958    }
959
960    #[test]
961    fn test_closest_no_b_on_chrom() {
962        let a = vec![iv("chr1", 100, 200)];
963        let b = vec![iv("chr2", 100, 200)];
964        let r = closest(&a, &b, StrandMode::Ignore);
965        assert_eq!(r.len(), 1);
966        assert!(r[0].closest.is_none());
967        assert!(r[0].distance.is_none());
968    }
969
970    #[test]
971    fn test_closest_single_b() {
972        let a = vec![iv("chr1", 500, 600)];
973        let b = vec![iv("chr1", 100, 200)];
974        let r = closest(&a, &b, StrandMode::Ignore);
975        assert_eq!(r[0].distance, Some(300));
976    }
977
978    #[test]
979    fn test_closest_equidistant() {
980        let a = vec![iv("chr1", 200, 300)];
981        let b = vec![iv("chr1", 100, 150), iv("chr1", 350, 400)];
982        let r = closest(&a, &b, StrandMode::Ignore);
983        assert_eq!(r[0].distance, Some(50));
984    }
985
986    // -----------------------------------------------------------------------
987    // windows
988    // -----------------------------------------------------------------------
989
990    #[test]
991    fn test_make_windows_basic() {
992        let genome = genome_info(&[("chr1", 100)]);
993        let r = make_windows(&genome, 30).unwrap();
994        assert_eq!(r.len(), 4); // [0,30), [30,60), [60,90), [90,100)
995        assert_eq!((r[3].start, r[3].end), (90, 100));
996    }
997
998    #[test]
999    fn test_make_windows_exact() {
1000        let genome = genome_info(&[("chr1", 100)]);
1001        let r = make_windows(&genome, 50).unwrap();
1002        assert_eq!(r.len(), 2);
1003        assert_eq!((r[0].start, r[0].end), (0, 50));
1004        assert_eq!((r[1].start, r[1].end), (50, 100));
1005    }
1006
1007    #[test]
1008    fn test_make_windows_zero_size() {
1009        let genome = genome_info(&[("chr1", 100)]);
1010        assert!(make_windows(&genome, 0).is_err());
1011    }
1012
1013    #[test]
1014    fn test_make_windows_larger_than_chrom() {
1015        let genome = genome_info(&[("chr1", 50)]);
1016        let r = make_windows(&genome, 100).unwrap();
1017        assert_eq!(r.len(), 1);
1018        assert_eq!((r[0].start, r[0].end), (0, 50));
1019    }
1020
1021    #[test]
1022    fn test_make_sliding_windows() {
1023        let genome = genome_info(&[("chr1", 100)]);
1024        let r = make_sliding_windows(&genome, 50, 25).unwrap();
1025        // [0,50), [25,75), [50,100), [75,100)
1026        assert_eq!(r.len(), 4);
1027        assert_eq!((r[1].start, r[1].end), (25, 75));
1028    }
1029
1030    #[test]
1031    fn test_make_sliding_windows_zero_step() {
1032        let genome = genome_info(&[("chr1", 100)]);
1033        assert!(make_sliding_windows(&genome, 50, 0).is_err());
1034    }
1035
1036    #[test]
1037    fn test_windows_around() {
1038        let intervals = vec![iv("chr1", 100, 200)];
1039        let genome = genome_info(&[("chr1", 1000)]);
1040        let r = windows_around(&intervals, 50, &genome).unwrap();
1041        assert_eq!(r.len(), 1);
1042        // midpoint = 150, half = 25, so [125, 175)
1043        assert_eq!((r[0].start, r[0].end), (125, 175));
1044    }
1045
1046    #[test]
1047    fn test_windows_around_clipping() {
1048        let intervals = vec![iv("chr1", 0, 10)];
1049        let genome = genome_info(&[("chr1", 100)]);
1050        let r = windows_around(&intervals, 40, &genome).unwrap();
1051        assert_eq!(r.len(), 1);
1052        // midpoint = 5, half = 20, start = 5-20 saturates to 0, end = 5+20 = 25
1053        assert_eq!(r[0].start, 0);
1054        assert_eq!(r[0].end, 25);
1055    }
1056
1057    // -----------------------------------------------------------------------
1058    // jaccard
1059    // -----------------------------------------------------------------------
1060
1061    #[test]
1062    fn test_jaccard_identical() {
1063        let a = vec![iv("chr1", 100, 200)];
1064        let j = jaccard(&a, &a);
1065        assert!((j - 1.0).abs() < 1e-10);
1066    }
1067
1068    #[test]
1069    fn test_jaccard_no_overlap() {
1070        let a = vec![iv("chr1", 100, 200)];
1071        let b = vec![iv("chr1", 300, 400)];
1072        let j = jaccard(&a, &b);
1073        assert!((j - 0.0).abs() < 1e-10);
1074    }
1075
1076    #[test]
1077    fn test_jaccard_partial_overlap() {
1078        let a = vec![iv("chr1", 100, 300)];
1079        let b = vec![iv("chr1", 200, 400)];
1080        // intersection = [200,300) = 100bp, union = [100,400) = 300bp
1081        let j = jaccard(&a, &b);
1082        assert!((j - 100.0 / 300.0).abs() < 1e-10);
1083    }
1084
1085    #[test]
1086    fn test_jaccard_both_empty() {
1087        let a: Vec<GenomicInterval> = vec![];
1088        let b: Vec<GenomicInterval> = vec![];
1089        assert!((jaccard(&a, &b) - 0.0).abs() < 1e-10);
1090    }
1091
1092    #[test]
1093    fn test_jaccard_stats_bp_counts() {
1094        let a = vec![iv("chr1", 100, 300)];
1095        let b = vec![iv("chr1", 200, 400)];
1096        let stats = jaccard_stats(&a, &b);
1097        assert_eq!(stats.intersection_bp, 100);
1098        assert_eq!(stats.union_bp, 300);
1099        assert_eq!(stats.n_intersections, 1);
1100    }
1101
1102    // -----------------------------------------------------------------------
1103    // property-style tests
1104    // -----------------------------------------------------------------------
1105
1106    #[test]
1107    fn test_subtract_plus_intersect_equals_a() {
1108        // Property holds on non-overlapping a (merged first)
1109        let a = merge(
1110            &[iv("chr1", 100, 300), iv("chr1", 250, 400)],
1111            StrandMode::Ignore,
1112        );
1113        let b = vec![iv("chr1", 200, 350)];
1114
1115        let sub = subtract(&a, &b, StrandMode::Ignore).unwrap();
1116        let isect = intersect(&a, &b, StrandMode::Ignore).unwrap();
1117
1118        let sub_bp: u64 = sub.iter().map(|i| i.len()).sum();
1119        let isect_bp: u64 = isect.iter().map(|i| i.len()).sum();
1120        let a_bp: u64 = a.iter().map(|i| i.len()).sum();
1121
1122        assert_eq!(sub_bp + isect_bp, a_bp);
1123    }
1124
1125    #[test]
1126    fn test_jaccard_symmetry() {
1127        let a = vec![iv("chr1", 100, 300)];
1128        let b = vec![iv("chr1", 200, 500)];
1129        let j1 = jaccard(&a, &b);
1130        let j2 = jaccard(&b, &a);
1131        assert!((j1 - j2).abs() < 1e-10);
1132    }
1133
1134    #[test]
1135    fn test_jaccard_bounds() {
1136        let a = vec![iv("chr1", 100, 300)];
1137        let b = vec![iv("chr1", 200, 500)];
1138        let j = jaccard(&a, &b);
1139        assert!(j >= 0.0 && j <= 1.0);
1140    }
1141
1142    #[test]
1143    fn test_complement_of_complement_equals_merge() {
1144        let a = vec![iv("chr1", 100, 200), iv("chr1", 300, 400)];
1145        let genome = genome_info(&[("chr1", 500)]);
1146
1147        let comp = complement(&a, &genome).unwrap();
1148        let comp2 = complement(&comp, &genome).unwrap();
1149
1150        let merged_a = merge(&a, StrandMode::Ignore);
1151        assert_eq!(comp2.len(), merged_a.len());
1152        for (c, m) in comp2.iter().zip(merged_a.iter()) {
1153            assert_eq!(c.start, m.start);
1154            assert_eq!(c.end, m.end);
1155        }
1156    }
1157}