1use std::collections::BTreeMap;
8
9use cyanea_core::{CyaneaError, Result};
10
11use crate::genomic::{GenomicInterval, Strand};
12
13pub type GenomeInfo = BTreeMap<String, u64>;
15
16#[derive(Debug, Clone, Copy, PartialEq, Eq)]
18pub enum StrandMode {
19 Ignore,
21 Same,
23 Opposite,
25}
26
27#[derive(Debug, Clone, PartialEq)]
29pub struct ClosestResult {
30 pub query: GenomicInterval,
32 pub closest: Option<GenomicInterval>,
34 pub distance: Option<u64>,
36}
37
38#[derive(Debug, Clone, PartialEq)]
40pub struct JaccardStats {
41 pub intersection_bp: u64,
43 pub union_bp: u64,
45 pub jaccard: f64,
47 pub n_intersections: u64,
49}
50
51pub fn genome_info(chroms: &[(&str, u64)]) -> GenomeInfo {
53 chroms.iter().map(|(c, s)| (c.to_string(), *s)).collect()
54}
55
56fn group_by_chrom(intervals: &[GenomicInterval]) -> BTreeMap<String, Vec<&GenomicInterval>> {
62 let mut groups: BTreeMap<String, Vec<&GenomicInterval>> = BTreeMap::new();
63 for iv in intervals {
64 groups.entry(iv.chrom.clone()).or_default().push(iv);
65 }
66 for group in groups.values_mut() {
67 group.sort_by_key(|iv| (iv.start, iv.end));
68 }
69 groups
70}
71
72fn strands_match(a: &GenomicInterval, b: &GenomicInterval, mode: StrandMode) -> bool {
74 match mode {
75 StrandMode::Ignore => true,
76 StrandMode::Same => a.strand == b.strand,
77 StrandMode::Opposite => {
78 matches!(
79 (&a.strand, &b.strand),
80 (Strand::Forward, Strand::Reverse) | (Strand::Reverse, Strand::Forward)
81 )
82 }
83 }
84}
85
86fn merge_sorted(sorted: &[&GenomicInterval]) -> Vec<GenomicInterval> {
89 if sorted.is_empty() {
90 return Vec::new();
91 }
92 let mut merged = Vec::new();
93 let mut cur_start = sorted[0].start;
94 let mut cur_end = sorted[0].end;
95 let chrom = &sorted[0].chrom;
96 let strand = sorted[0].strand;
97
98 for iv in &sorted[1..] {
99 if iv.start <= cur_end {
100 cur_end = cur_end.max(iv.end);
101 } else {
102 merged.push(GenomicInterval {
103 chrom: chrom.clone(),
104 start: cur_start,
105 end: cur_end,
106 strand,
107 });
108 cur_start = iv.start;
109 cur_end = iv.end;
110 }
111 }
112 merged.push(GenomicInterval {
113 chrom: chrom.clone(),
114 start: cur_start,
115 end: cur_end,
116 strand,
117 });
118 merged
119}
120
121pub fn merge(intervals: &[GenomicInterval], strand_mode: StrandMode) -> Vec<GenomicInterval> {
130 if intervals.is_empty() {
131 return Vec::new();
132 }
133
134 if strand_mode == StrandMode::Same {
135 let mut groups: BTreeMap<(String, Strand), Vec<&GenomicInterval>> = BTreeMap::new();
137 for iv in intervals {
138 groups
139 .entry((iv.chrom.clone(), iv.strand))
140 .or_default()
141 .push(iv);
142 }
143 let mut result = Vec::new();
144 for group in groups.values_mut() {
145 group.sort_by_key(|iv| (iv.start, iv.end));
146 result.extend(merge_sorted(group));
147 }
148 result
149 } else {
150 let groups = group_by_chrom(intervals);
151 let mut result = Vec::new();
152 for group in groups.values() {
153 result.extend(merge_sorted(group));
154 }
155 result
156 }
157}
158
159pub fn union(
161 a: &[GenomicInterval],
162 b: &[GenomicInterval],
163 strand_mode: StrandMode,
164) -> Vec<GenomicInterval> {
165 let mut combined: Vec<GenomicInterval> = Vec::with_capacity(a.len() + b.len());
166 combined.extend_from_slice(a);
167 combined.extend_from_slice(b);
168 merge(&combined, strand_mode)
169}
170
171pub fn intersect(
176 a: &[GenomicInterval],
177 b: &[GenomicInterval],
178 strand_mode: StrandMode,
179) -> Result<Vec<GenomicInterval>> {
180 let groups_a = group_by_chrom(a);
181 let groups_b = group_by_chrom(b);
182 let mut result = Vec::new();
183
184 for (chrom, a_ivs) in &groups_a {
185 let b_ivs = match groups_b.get(chrom) {
186 Some(v) => v,
187 None => continue,
188 };
189
190 for a_iv in a_ivs {
191 for b_iv in b_ivs {
192 if !strands_match(a_iv, b_iv, strand_mode) {
193 continue;
194 }
195 if a_iv.start < b_iv.end && b_iv.start < a_iv.end {
196 let start = a_iv.start.max(b_iv.start);
197 let end = a_iv.end.min(b_iv.end);
198 result.push(GenomicInterval {
199 chrom: chrom.clone(),
200 start,
201 end,
202 strand: a_iv.strand,
203 });
204 }
205 }
206 }
207 }
208
209 Ok(result)
210}
211
212pub fn intersect_report_a(
214 a: &[GenomicInterval],
215 b: &[GenomicInterval],
216 strand_mode: StrandMode,
217) -> Vec<GenomicInterval> {
218 let groups_b = group_by_chrom(b);
219 let mut result = Vec::new();
220
221 for a_iv in a {
222 let b_ivs = match groups_b.get(&a_iv.chrom) {
223 Some(v) => v,
224 None => continue,
225 };
226 let has_overlap = b_ivs.iter().any(|b_iv| {
227 strands_match(a_iv, b_iv, strand_mode)
228 && a_iv.start < b_iv.end
229 && b_iv.start < a_iv.end
230 });
231 if has_overlap {
232 result.push(a_iv.clone());
233 }
234 }
235
236 result
237}
238
239pub fn subtract(
243 a: &[GenomicInterval],
244 b: &[GenomicInterval],
245 strand_mode: StrandMode,
246) -> Result<Vec<GenomicInterval>> {
247 if b.is_empty() {
248 return Ok(a.to_vec());
249 }
250
251 let merged_b = merge(b, StrandMode::Ignore);
252 let groups_b = group_by_chrom(&merged_b);
253 let mut result = Vec::new();
254
255 for a_iv in a {
256 let b_ivs = match groups_b.get(&a_iv.chrom) {
257 Some(v) => v,
258 None => {
259 result.push(a_iv.clone());
260 continue;
261 }
262 };
263
264 let matching_b: Vec<&&GenomicInterval> = b_ivs
266 .iter()
267 .filter(|b_iv| strands_match(a_iv, b_iv, strand_mode))
268 .collect();
269
270 if matching_b.is_empty() {
271 result.push(a_iv.clone());
272 continue;
273 }
274
275 let mut pos = a_iv.start;
277
278 for b_iv in &matching_b {
279 if b_iv.start >= a_iv.end {
280 break;
281 }
282 if b_iv.end <= pos {
283 continue;
284 }
285 if b_iv.start > pos {
287 let end = b_iv.start.min(a_iv.end);
288 if end > pos {
289 result.push(GenomicInterval {
290 chrom: a_iv.chrom.clone(),
291 start: pos,
292 end,
293 strand: a_iv.strand,
294 });
295 }
296 }
297 pos = pos.max(b_iv.end);
298 }
299
300 if pos < a_iv.end {
302 result.push(GenomicInterval {
303 chrom: a_iv.chrom.clone(),
304 start: pos,
305 end: a_iv.end,
306 strand: a_iv.strand,
307 });
308 }
309 }
310
311 Ok(result)
312}
313
314pub fn complement(
318 intervals: &[GenomicInterval],
319 genome: &GenomeInfo,
320) -> Result<Vec<GenomicInterval>> {
321 for iv in intervals {
323 match genome.get(&iv.chrom) {
324 None => {
325 return Err(CyaneaError::InvalidInput(format!(
326 "chromosome '{}' not found in genome info",
327 iv.chrom
328 )));
329 }
330 Some(&size) => {
331 if iv.end > size {
332 return Err(CyaneaError::InvalidInput(format!(
333 "interval {}:{}-{} exceeds chromosome size {}",
334 iv.chrom, iv.start, iv.end, size
335 )));
336 }
337 }
338 }
339 }
340
341 let merged = merge(intervals, StrandMode::Ignore);
342 let groups = group_by_chrom(&merged);
343 let mut result = Vec::new();
344
345 for (chrom, size) in genome {
346 let mut pos = 0u64;
347 if let Some(ivs) = groups.get(chrom) {
348 for iv in ivs {
349 if iv.start > pos {
350 result.push(GenomicInterval {
351 chrom: chrom.clone(),
352 start: pos,
353 end: iv.start,
354 strand: Strand::Unknown,
355 });
356 }
357 pos = iv.end;
358 }
359 }
360 if pos < *size {
361 result.push(GenomicInterval {
362 chrom: chrom.clone(),
363 start: pos,
364 end: *size,
365 strand: Strand::Unknown,
366 });
367 }
368 }
369
370 Ok(result)
371}
372
373pub fn closest(
375 a: &[GenomicInterval],
376 b: &[GenomicInterval],
377 strand_mode: StrandMode,
378) -> Vec<ClosestResult> {
379 let groups_b = group_by_chrom(b);
380 let mut results = Vec::with_capacity(a.len());
381
382 for a_iv in a {
383 let b_ivs = match groups_b.get(&a_iv.chrom) {
384 Some(v) => v,
385 None => {
386 results.push(ClosestResult {
387 query: a_iv.clone(),
388 closest: None,
389 distance: None,
390 });
391 continue;
392 }
393 };
394
395 let matching: Vec<&GenomicInterval> = b_ivs
397 .iter()
398 .filter(|b_iv| strands_match(a_iv, b_iv, strand_mode))
399 .copied()
400 .collect();
401
402 if matching.is_empty() {
403 results.push(ClosestResult {
404 query: a_iv.clone(),
405 closest: None,
406 distance: None,
407 });
408 continue;
409 }
410
411 let idx = matching.partition_point(|iv| iv.end <= a_iv.start);
413
414 let mut best: Option<(&GenomicInterval, u64)> = None;
415
416 for &candidate_idx in &[
418 idx.wrapping_sub(1),
419 idx,
420 idx + 1,
421 ] {
422 if candidate_idx >= matching.len() {
423 continue;
424 }
425 let b_iv = matching[candidate_idx];
426 let dist = if a_iv.start < b_iv.end && b_iv.start < a_iv.end {
427 0 } else if a_iv.end <= b_iv.start {
429 b_iv.start - a_iv.end
430 } else {
431 a_iv.start - b_iv.end
432 };
433
434 if best.is_none() || dist < best.unwrap().1 {
435 best = Some((b_iv, dist));
436 }
437 }
438
439 match best {
440 Some((b_iv, dist)) => {
441 results.push(ClosestResult {
442 query: a_iv.clone(),
443 closest: Some(b_iv.clone()),
444 distance: Some(dist),
445 });
446 }
447 None => {
448 results.push(ClosestResult {
449 query: a_iv.clone(),
450 closest: None,
451 distance: None,
452 });
453 }
454 }
455 }
456
457 results
458}
459
460pub fn make_windows(genome: &GenomeInfo, window_size: u64) -> Result<Vec<GenomicInterval>> {
462 if window_size == 0 {
463 return Err(CyaneaError::InvalidInput(
464 "window size must be > 0".into(),
465 ));
466 }
467
468 let mut result = Vec::new();
469 for (chrom, &size) in genome {
470 let mut start = 0u64;
471 while start < size {
472 let end = (start + window_size).min(size);
473 result.push(GenomicInterval {
474 chrom: chrom.clone(),
475 start,
476 end,
477 strand: Strand::Unknown,
478 });
479 start += window_size;
480 }
481 }
482
483 Ok(result)
484}
485
486pub fn make_sliding_windows(
488 genome: &GenomeInfo,
489 window_size: u64,
490 step: u64,
491) -> Result<Vec<GenomicInterval>> {
492 if window_size == 0 {
493 return Err(CyaneaError::InvalidInput(
494 "window size must be > 0".into(),
495 ));
496 }
497 if step == 0 {
498 return Err(CyaneaError::InvalidInput("step must be > 0".into()));
499 }
500
501 let mut result = Vec::new();
502 for (chrom, &size) in genome {
503 let mut start = 0u64;
504 while start < size {
505 let end = (start + window_size).min(size);
506 result.push(GenomicInterval {
507 chrom: chrom.clone(),
508 start,
509 end,
510 strand: Strand::Unknown,
511 });
512 start += step;
513 }
514 }
515
516 Ok(result)
517}
518
519pub fn windows_around(
521 intervals: &[GenomicInterval],
522 window_size: u64,
523 genome: &GenomeInfo,
524) -> Result<Vec<GenomicInterval>> {
525 if window_size == 0 {
526 return Err(CyaneaError::InvalidInput(
527 "window size must be > 0".into(),
528 ));
529 }
530
531 let half = window_size / 2;
532 let mut result = Vec::with_capacity(intervals.len());
533
534 for iv in intervals {
535 let chrom_size = genome.get(&iv.chrom).ok_or_else(|| {
536 CyaneaError::InvalidInput(format!(
537 "chromosome '{}' not found in genome info",
538 iv.chrom
539 ))
540 })?;
541
542 let mid = iv.midpoint();
543 let start = mid.saturating_sub(half);
544 let end = (mid + half).min(*chrom_size);
545
546 if start < end {
547 result.push(GenomicInterval {
548 chrom: iv.chrom.clone(),
549 start,
550 end,
551 strand: iv.strand,
552 });
553 }
554 }
555
556 Ok(result)
557}
558
559pub fn jaccard(a: &[GenomicInterval], b: &[GenomicInterval]) -> f64 {
563 jaccard_stats(a, b).jaccard
564}
565
566pub fn jaccard_stats(a: &[GenomicInterval], b: &[GenomicInterval]) -> JaccardStats {
568 let merged_a = merge(a, StrandMode::Ignore);
569 let merged_b = merge(b, StrandMode::Ignore);
570
571 let isect = intersect(&merged_a, &merged_b, StrandMode::Ignore).unwrap_or_default();
573 let intersection_bp: u64 = isect.iter().map(|iv| iv.len()).sum();
574 let n_intersections = isect.len() as u64;
575
576 let all_union = union(&merged_a, &merged_b, StrandMode::Ignore);
578 let union_bp: u64 = all_union.iter().map(|iv| iv.len()).sum();
579
580 let jaccard = if union_bp == 0 {
581 0.0
582 } else {
583 intersection_bp as f64 / union_bp as f64
584 };
585
586 JaccardStats {
587 intersection_bp,
588 union_bp,
589 jaccard,
590 n_intersections,
591 }
592}
593
594#[cfg(test)]
595mod tests {
596 use super::*;
597
598 fn iv(chrom: &str, start: u64, end: u64) -> GenomicInterval {
599 GenomicInterval::new(chrom, start, end).unwrap()
600 }
601
602 fn iv_strand(chrom: &str, start: u64, end: u64, strand: Strand) -> GenomicInterval {
603 GenomicInterval::with_strand(chrom, start, end, strand).unwrap()
604 }
605
606 #[test]
611 fn test_merge_overlapping() {
612 let intervals = vec![iv("chr1", 100, 200), iv("chr1", 150, 300)];
613 let merged = merge(&intervals, StrandMode::Ignore);
614 assert_eq!(merged.len(), 1);
615 assert_eq!(merged[0].start, 100);
616 assert_eq!(merged[0].end, 300);
617 }
618
619 #[test]
620 fn test_merge_abutting() {
621 let intervals = vec![iv("chr1", 100, 200), iv("chr1", 200, 300)];
622 let merged = merge(&intervals, StrandMode::Ignore);
623 assert_eq!(merged.len(), 1);
624 assert_eq!(merged[0].start, 100);
625 assert_eq!(merged[0].end, 300);
626 }
627
628 #[test]
629 fn test_merge_disjoint() {
630 let intervals = vec![iv("chr1", 100, 200), iv("chr1", 300, 400)];
631 let merged = merge(&intervals, StrandMode::Ignore);
632 assert_eq!(merged.len(), 2);
633 }
634
635 #[test]
636 fn test_merge_empty() {
637 let merged = merge(&[], StrandMode::Ignore);
638 assert!(merged.is_empty());
639 }
640
641 #[test]
642 fn test_merge_multi_chrom() {
643 let intervals = vec![
644 iv("chr2", 100, 200),
645 iv("chr1", 100, 200),
646 iv("chr1", 150, 300),
647 ];
648 let merged = merge(&intervals, StrandMode::Ignore);
649 assert_eq!(merged.len(), 2);
650 assert_eq!(merged[0].chrom, "chr1");
651 assert_eq!(merged[0].end, 300);
652 assert_eq!(merged[1].chrom, "chr2");
653 }
654
655 #[test]
656 fn test_merge_strand_aware() {
657 let intervals = vec![
658 iv_strand("chr1", 100, 200, Strand::Forward),
659 iv_strand("chr1", 150, 300, Strand::Reverse),
660 ];
661 let merged = merge(&intervals, StrandMode::Same);
662 assert_eq!(merged.len(), 2); }
664
665 #[test]
666 fn test_merge_strand_aware_same() {
667 let intervals = vec![
668 iv_strand("chr1", 100, 200, Strand::Forward),
669 iv_strand("chr1", 150, 300, Strand::Forward),
670 ];
671 let merged = merge(&intervals, StrandMode::Same);
672 assert_eq!(merged.len(), 1);
673 assert_eq!(merged[0].end, 300);
674 }
675
676 #[test]
681 fn test_union_basic() {
682 let a = vec![iv("chr1", 100, 200)];
683 let b = vec![iv("chr1", 150, 300)];
684 let u = union(&a, &b, StrandMode::Ignore);
685 assert_eq!(u.len(), 1);
686 assert_eq!(u[0].start, 100);
687 assert_eq!(u[0].end, 300);
688 }
689
690 #[test]
691 fn test_union_disjoint() {
692 let a = vec![iv("chr1", 100, 200)];
693 let b = vec![iv("chr1", 400, 500)];
694 let u = union(&a, &b, StrandMode::Ignore);
695 assert_eq!(u.len(), 2);
696 }
697
698 #[test]
703 fn test_intersect_basic() {
704 let a = vec![iv("chr1", 100, 300)];
705 let b = vec![iv("chr1", 200, 400)];
706 let r = intersect(&a, &b, StrandMode::Ignore).unwrap();
707 assert_eq!(r.len(), 1);
708 assert_eq!(r[0].start, 200);
709 assert_eq!(r[0].end, 300);
710 }
711
712 #[test]
713 fn test_intersect_no_overlap() {
714 let a = vec![iv("chr1", 100, 200)];
715 let b = vec![iv("chr1", 300, 400)];
716 let r = intersect(&a, &b, StrandMode::Ignore).unwrap();
717 assert!(r.is_empty());
718 }
719
720 #[test]
721 fn test_intersect_multiple_overlaps() {
722 let a = vec![iv("chr1", 100, 500)];
723 let b = vec![iv("chr1", 150, 200), iv("chr1", 300, 400)];
724 let r = intersect(&a, &b, StrandMode::Ignore).unwrap();
725 assert_eq!(r.len(), 2);
726 assert_eq!(r[0].start, 150);
727 assert_eq!(r[0].end, 200);
728 assert_eq!(r[1].start, 300);
729 assert_eq!(r[1].end, 400);
730 }
731
732 #[test]
733 fn test_intersect_multi_chrom() {
734 let a = vec![iv("chr1", 100, 200), iv("chr2", 100, 200)];
735 let b = vec![iv("chr1", 150, 250)];
736 let r = intersect(&a, &b, StrandMode::Ignore).unwrap();
737 assert_eq!(r.len(), 1);
738 assert_eq!(r[0].chrom, "chr1");
739 }
740
741 #[test]
742 fn test_intersect_empty_input() {
743 let a: Vec<GenomicInterval> = vec![];
744 let b = vec![iv("chr1", 100, 200)];
745 let r = intersect(&a, &b, StrandMode::Ignore).unwrap();
746 assert!(r.is_empty());
747 }
748
749 #[test]
750 fn test_intersect_identical() {
751 let a = vec![iv("chr1", 100, 200)];
752 let b = vec![iv("chr1", 100, 200)];
753 let r = intersect(&a, &b, StrandMode::Ignore).unwrap();
754 assert_eq!(r.len(), 1);
755 assert_eq!(r[0].start, 100);
756 assert_eq!(r[0].end, 200);
757 }
758
759 #[test]
760 fn test_intersect_strand_same() {
761 let a = vec![iv_strand("chr1", 100, 300, Strand::Forward)];
762 let b = vec![iv_strand("chr1", 200, 400, Strand::Reverse)];
763 let r = intersect(&a, &b, StrandMode::Same).unwrap();
764 assert!(r.is_empty()); }
766
767 #[test]
768 fn test_intersect_strand_opposite() {
769 let a = vec![iv_strand("chr1", 100, 300, Strand::Forward)];
770 let b = vec![iv_strand("chr1", 200, 400, Strand::Reverse)];
771 let r = intersect(&a, &b, StrandMode::Opposite).unwrap();
772 assert_eq!(r.len(), 1);
773 }
774
775 #[test]
780 fn test_intersect_report_a_basic() {
781 let a = vec![iv("chr1", 100, 200), iv("chr1", 400, 500)];
782 let b = vec![iv("chr1", 150, 250)];
783 let r = intersect_report_a(&a, &b, StrandMode::Ignore);
784 assert_eq!(r.len(), 1);
785 assert_eq!(r[0].start, 100);
786 assert_eq!(r[0].end, 200);
787 }
788
789 #[test]
794 fn test_subtract_no_overlap() {
795 let a = vec![iv("chr1", 100, 200)];
796 let b = vec![iv("chr1", 300, 400)];
797 let r = subtract(&a, &b, StrandMode::Ignore).unwrap();
798 assert_eq!(r.len(), 1);
799 assert_eq!(r[0].start, 100);
800 assert_eq!(r[0].end, 200);
801 }
802
803 #[test]
804 fn test_subtract_complete_removal() {
805 let a = vec![iv("chr1", 100, 200)];
806 let b = vec![iv("chr1", 50, 250)];
807 let r = subtract(&a, &b, StrandMode::Ignore).unwrap();
808 assert!(r.is_empty());
809 }
810
811 #[test]
812 fn test_subtract_left_trim() {
813 let a = vec![iv("chr1", 100, 300)];
814 let b = vec![iv("chr1", 50, 200)];
815 let r = subtract(&a, &b, StrandMode::Ignore).unwrap();
816 assert_eq!(r.len(), 1);
817 assert_eq!(r[0].start, 200);
818 assert_eq!(r[0].end, 300);
819 }
820
821 #[test]
822 fn test_subtract_right_trim() {
823 let a = vec![iv("chr1", 100, 300)];
824 let b = vec![iv("chr1", 200, 400)];
825 let r = subtract(&a, &b, StrandMode::Ignore).unwrap();
826 assert_eq!(r.len(), 1);
827 assert_eq!(r[0].start, 100);
828 assert_eq!(r[0].end, 200);
829 }
830
831 #[test]
832 fn test_subtract_split_middle() {
833 let a = vec![iv("chr1", 100, 400)];
834 let b = vec![iv("chr1", 200, 300)];
835 let r = subtract(&a, &b, StrandMode::Ignore).unwrap();
836 assert_eq!(r.len(), 2);
837 assert_eq!(r[0].start, 100);
838 assert_eq!(r[0].end, 200);
839 assert_eq!(r[1].start, 300);
840 assert_eq!(r[1].end, 400);
841 }
842
843 #[test]
844 fn test_subtract_multiple_b() {
845 let a = vec![iv("chr1", 100, 500)];
846 let b = vec![iv("chr1", 150, 200), iv("chr1", 300, 350)];
847 let r = subtract(&a, &b, StrandMode::Ignore).unwrap();
848 assert_eq!(r.len(), 3);
849 assert_eq!((r[0].start, r[0].end), (100, 150));
850 assert_eq!((r[1].start, r[1].end), (200, 300));
851 assert_eq!((r[2].start, r[2].end), (350, 500));
852 }
853
854 #[test]
855 fn test_subtract_empty_b() {
856 let a = vec![iv("chr1", 100, 200)];
857 let r = subtract(&a, &[], StrandMode::Ignore).unwrap();
858 assert_eq!(r.len(), 1);
859 }
860
861 #[test]
862 fn test_subtract_strand_mode() {
863 let a = vec![iv_strand("chr1", 100, 300, Strand::Forward)];
864 let b = vec![iv_strand("chr1", 150, 250, Strand::Reverse)];
865 let r = subtract(&a, &b, StrandMode::Same).unwrap();
867 assert_eq!(r.len(), 1);
868 assert_eq!(r[0].start, 100);
869 assert_eq!(r[0].end, 300);
870 }
871
872 #[test]
877 fn test_complement_basic() {
878 let intervals = vec![iv("chr1", 100, 200), iv("chr1", 300, 400)];
879 let genome = genome_info(&[("chr1", 500)]);
880 let r = complement(&intervals, &genome).unwrap();
881 assert_eq!(r.len(), 3);
882 assert_eq!((r[0].start, r[0].end), (0, 100));
883 assert_eq!((r[1].start, r[1].end), (200, 300));
884 assert_eq!((r[2].start, r[2].end), (400, 500));
885 }
886
887 #[test]
888 fn test_complement_full_coverage() {
889 let intervals = vec![iv("chr1", 0, 500)];
890 let genome = genome_info(&[("chr1", 500)]);
891 let r = complement(&intervals, &genome).unwrap();
892 assert!(r.is_empty());
893 }
894
895 #[test]
896 fn test_complement_empty_input() {
897 let genome = genome_info(&[("chr1", 1000)]);
898 let r = complement(&[], &genome).unwrap();
899 assert_eq!(r.len(), 1);
900 assert_eq!((r[0].start, r[0].end), (0, 1000));
901 }
902
903 #[test]
904 fn test_complement_chrom_not_in_genome() {
905 let intervals = vec![iv("chrX", 100, 200)];
906 let genome = genome_info(&[("chr1", 1000)]);
907 assert!(complement(&intervals, &genome).is_err());
908 }
909
910 #[test]
911 fn test_complement_interval_exceeds_chrom() {
912 let intervals = vec![iv("chr1", 900, 1100)];
913 let genome = genome_info(&[("chr1", 1000)]);
914 assert!(complement(&intervals, &genome).is_err());
915 }
916
917 #[test]
918 fn test_complement_chrom_with_no_intervals() {
919 let intervals = vec![iv("chr1", 100, 200)];
920 let genome = genome_info(&[("chr1", 500), ("chr2", 300)]);
921 let r = complement(&intervals, &genome).unwrap();
922 assert_eq!(r.len(), 3);
924 let chr2: Vec<_> = r.iter().filter(|i| i.chrom == "chr2").collect();
925 assert_eq!(chr2.len(), 1);
926 assert_eq!((chr2[0].start, chr2[0].end), (0, 300));
927 }
928
929 #[test]
934 fn test_closest_upstream() {
935 let a = vec![iv("chr1", 300, 400)];
936 let b = vec![iv("chr1", 100, 200)];
937 let r = closest(&a, &b, StrandMode::Ignore);
938 assert_eq!(r.len(), 1);
939 assert_eq!(r[0].distance, Some(100));
940 }
941
942 #[test]
943 fn test_closest_downstream() {
944 let a = vec![iv("chr1", 100, 200)];
945 let b = vec![iv("chr1", 300, 400)];
946 let r = closest(&a, &b, StrandMode::Ignore);
947 assert_eq!(r.len(), 1);
948 assert_eq!(r[0].distance, Some(100));
949 }
950
951 #[test]
952 fn test_closest_overlapping() {
953 let a = vec![iv("chr1", 100, 300)];
954 let b = vec![iv("chr1", 200, 400)];
955 let r = closest(&a, &b, StrandMode::Ignore);
956 assert_eq!(r.len(), 1);
957 assert_eq!(r[0].distance, Some(0));
958 }
959
960 #[test]
961 fn test_closest_no_b_on_chrom() {
962 let a = vec![iv("chr1", 100, 200)];
963 let b = vec![iv("chr2", 100, 200)];
964 let r = closest(&a, &b, StrandMode::Ignore);
965 assert_eq!(r.len(), 1);
966 assert!(r[0].closest.is_none());
967 assert!(r[0].distance.is_none());
968 }
969
970 #[test]
971 fn test_closest_single_b() {
972 let a = vec![iv("chr1", 500, 600)];
973 let b = vec![iv("chr1", 100, 200)];
974 let r = closest(&a, &b, StrandMode::Ignore);
975 assert_eq!(r[0].distance, Some(300));
976 }
977
978 #[test]
979 fn test_closest_equidistant() {
980 let a = vec![iv("chr1", 200, 300)];
981 let b = vec![iv("chr1", 100, 150), iv("chr1", 350, 400)];
982 let r = closest(&a, &b, StrandMode::Ignore);
983 assert_eq!(r[0].distance, Some(50));
984 }
985
986 #[test]
991 fn test_make_windows_basic() {
992 let genome = genome_info(&[("chr1", 100)]);
993 let r = make_windows(&genome, 30).unwrap();
994 assert_eq!(r.len(), 4); assert_eq!((r[3].start, r[3].end), (90, 100));
996 }
997
998 #[test]
999 fn test_make_windows_exact() {
1000 let genome = genome_info(&[("chr1", 100)]);
1001 let r = make_windows(&genome, 50).unwrap();
1002 assert_eq!(r.len(), 2);
1003 assert_eq!((r[0].start, r[0].end), (0, 50));
1004 assert_eq!((r[1].start, r[1].end), (50, 100));
1005 }
1006
1007 #[test]
1008 fn test_make_windows_zero_size() {
1009 let genome = genome_info(&[("chr1", 100)]);
1010 assert!(make_windows(&genome, 0).is_err());
1011 }
1012
1013 #[test]
1014 fn test_make_windows_larger_than_chrom() {
1015 let genome = genome_info(&[("chr1", 50)]);
1016 let r = make_windows(&genome, 100).unwrap();
1017 assert_eq!(r.len(), 1);
1018 assert_eq!((r[0].start, r[0].end), (0, 50));
1019 }
1020
1021 #[test]
1022 fn test_make_sliding_windows() {
1023 let genome = genome_info(&[("chr1", 100)]);
1024 let r = make_sliding_windows(&genome, 50, 25).unwrap();
1025 assert_eq!(r.len(), 4);
1027 assert_eq!((r[1].start, r[1].end), (25, 75));
1028 }
1029
1030 #[test]
1031 fn test_make_sliding_windows_zero_step() {
1032 let genome = genome_info(&[("chr1", 100)]);
1033 assert!(make_sliding_windows(&genome, 50, 0).is_err());
1034 }
1035
1036 #[test]
1037 fn test_windows_around() {
1038 let intervals = vec![iv("chr1", 100, 200)];
1039 let genome = genome_info(&[("chr1", 1000)]);
1040 let r = windows_around(&intervals, 50, &genome).unwrap();
1041 assert_eq!(r.len(), 1);
1042 assert_eq!((r[0].start, r[0].end), (125, 175));
1044 }
1045
1046 #[test]
1047 fn test_windows_around_clipping() {
1048 let intervals = vec![iv("chr1", 0, 10)];
1049 let genome = genome_info(&[("chr1", 100)]);
1050 let r = windows_around(&intervals, 40, &genome).unwrap();
1051 assert_eq!(r.len(), 1);
1052 assert_eq!(r[0].start, 0);
1054 assert_eq!(r[0].end, 25);
1055 }
1056
1057 #[test]
1062 fn test_jaccard_identical() {
1063 let a = vec![iv("chr1", 100, 200)];
1064 let j = jaccard(&a, &a);
1065 assert!((j - 1.0).abs() < 1e-10);
1066 }
1067
1068 #[test]
1069 fn test_jaccard_no_overlap() {
1070 let a = vec![iv("chr1", 100, 200)];
1071 let b = vec![iv("chr1", 300, 400)];
1072 let j = jaccard(&a, &b);
1073 assert!((j - 0.0).abs() < 1e-10);
1074 }
1075
1076 #[test]
1077 fn test_jaccard_partial_overlap() {
1078 let a = vec![iv("chr1", 100, 300)];
1079 let b = vec![iv("chr1", 200, 400)];
1080 let j = jaccard(&a, &b);
1082 assert!((j - 100.0 / 300.0).abs() < 1e-10);
1083 }
1084
1085 #[test]
1086 fn test_jaccard_both_empty() {
1087 let a: Vec<GenomicInterval> = vec![];
1088 let b: Vec<GenomicInterval> = vec![];
1089 assert!((jaccard(&a, &b) - 0.0).abs() < 1e-10);
1090 }
1091
1092 #[test]
1093 fn test_jaccard_stats_bp_counts() {
1094 let a = vec![iv("chr1", 100, 300)];
1095 let b = vec![iv("chr1", 200, 400)];
1096 let stats = jaccard_stats(&a, &b);
1097 assert_eq!(stats.intersection_bp, 100);
1098 assert_eq!(stats.union_bp, 300);
1099 assert_eq!(stats.n_intersections, 1);
1100 }
1101
1102 #[test]
1107 fn test_subtract_plus_intersect_equals_a() {
1108 let a = merge(
1110 &[iv("chr1", 100, 300), iv("chr1", 250, 400)],
1111 StrandMode::Ignore,
1112 );
1113 let b = vec![iv("chr1", 200, 350)];
1114
1115 let sub = subtract(&a, &b, StrandMode::Ignore).unwrap();
1116 let isect = intersect(&a, &b, StrandMode::Ignore).unwrap();
1117
1118 let sub_bp: u64 = sub.iter().map(|i| i.len()).sum();
1119 let isect_bp: u64 = isect.iter().map(|i| i.len()).sum();
1120 let a_bp: u64 = a.iter().map(|i| i.len()).sum();
1121
1122 assert_eq!(sub_bp + isect_bp, a_bp);
1123 }
1124
1125 #[test]
1126 fn test_jaccard_symmetry() {
1127 let a = vec![iv("chr1", 100, 300)];
1128 let b = vec![iv("chr1", 200, 500)];
1129 let j1 = jaccard(&a, &b);
1130 let j2 = jaccard(&b, &a);
1131 assert!((j1 - j2).abs() < 1e-10);
1132 }
1133
1134 #[test]
1135 fn test_jaccard_bounds() {
1136 let a = vec![iv("chr1", 100, 300)];
1137 let b = vec![iv("chr1", 200, 500)];
1138 let j = jaccard(&a, &b);
1139 assert!(j >= 0.0 && j <= 1.0);
1140 }
1141
1142 #[test]
1143 fn test_complement_of_complement_equals_merge() {
1144 let a = vec![iv("chr1", 100, 200), iv("chr1", 300, 400)];
1145 let genome = genome_info(&[("chr1", 500)]);
1146
1147 let comp = complement(&a, &genome).unwrap();
1148 let comp2 = complement(&comp, &genome).unwrap();
1149
1150 let merged_a = merge(&a, StrandMode::Ignore);
1151 assert_eq!(comp2.len(), merged_a.len());
1152 for (c, m) in comp2.iter().zip(merged_a.iter()) {
1153 assert_eq!(c.start, m.start);
1154 assert_eq!(c.end, m.end);
1155 }
1156 }
1157}