#![allow(
private_interfaces,
reason = "ResolvedRange is intentionally crate-private; the IntoSegmentTarget trait is sealed"
)]
use crate::bam::BamHeader;
use seqair_types::{Pos0, RegionString, SmolStr};
use std::num::NonZeroU32;
use super::{
ReaderError,
resolve::{ResolveTid, Tid},
};
#[derive(Debug, thiserror::Error, PartialEq, Eq)]
#[non_exhaustive]
pub enum SegmentInvariantError {
#[error("segment start {start} > end {end}")]
StartAfterEnd { start: u64, end: u64 },
#[error("segment end {end} exceeds contig_last_pos {contig_last_pos}")]
EndPastContig { end: u64, contig_last_pos: u64 },
#[error(
"overlap_start ({overlap_start}) + overlap_end ({overlap_end}) \
must be < tile length ({len})"
)]
OverlapTooLarge { overlap_start: u32, overlap_end: u32, len: u32 },
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct Segment {
tid: Tid,
contig: SmolStr,
start: Pos0,
end: Pos0,
len: u32,
overlap_start: u32,
overlap_end: u32,
contig_last_pos: Pos0,
}
#[allow(
clippy::len_without_is_empty,
reason = "Segment is never empty by construction; an is_empty() method would be noise"
)]
impl Segment {
pub(crate) fn new(
tid: Tid,
contig: SmolStr,
start: Pos0,
end: Pos0,
overlap_start: u32,
overlap_end: u32,
contig_last_pos: Pos0,
) -> Result<Self, SegmentInvariantError> {
if start > end {
return Err(SegmentInvariantError::StartAfterEnd {
start: start.as_u64(),
end: end.as_u64(),
});
}
if end > contig_last_pos {
return Err(SegmentInvariantError::EndPastContig {
end: end.as_u64(),
contig_last_pos: contig_last_pos.as_u64(),
});
}
let span_u64 =
end.as_u64().checked_sub(start.as_u64()).and_then(|d| d.checked_add(1)).ok_or(
SegmentInvariantError::StartAfterEnd { start: start.as_u64(), end: end.as_u64() },
)?;
let len = u32::try_from(span_u64).map_err(|_| SegmentInvariantError::EndPastContig {
end: end.as_u64(),
contig_last_pos: contig_last_pos.as_u64(),
})?;
let overlap_total = u64::from(overlap_start).saturating_add(u64::from(overlap_end));
if overlap_total >= u64::from(len) {
return Err(SegmentInvariantError::OverlapTooLarge { overlap_start, overlap_end, len });
}
Ok(Self { tid, contig, start, end, len, overlap_start, overlap_end, contig_last_pos })
}
#[must_use]
pub fn tid(&self) -> Tid {
self.tid
}
#[must_use]
pub fn contig(&self) -> &SmolStr {
&self.contig
}
#[must_use]
pub fn start(&self) -> Pos0 {
self.start
}
#[must_use]
pub fn end(&self) -> Pos0 {
self.end
}
#[must_use]
pub fn len(&self) -> u32 {
self.len
}
#[must_use]
pub fn overlap_start(&self) -> u32 {
self.overlap_start
}
#[must_use]
pub fn overlap_end(&self) -> u32 {
self.overlap_end
}
#[must_use]
pub fn contig_last_pos(&self) -> Pos0 {
self.contig_last_pos
}
#[must_use]
pub fn starts_at_contig_start(&self) -> bool {
self.start == Pos0::ZERO
}
#[must_use]
pub fn ends_at_contig_end(&self) -> bool {
self.end == self.contig_last_pos
}
#[must_use]
pub fn core_range(&self) -> std::ops::RangeInclusive<Pos0> {
let core_start = self
.start
.checked_add_offset(seqair_types::Offset::new(i64::from(self.overlap_start)))
.expect("constructor invariant: start + overlap_start <= end <= i32::MAX");
let core_end = self
.end
.checked_sub_offset(seqair_types::Offset::new(i64::from(self.overlap_end)))
.expect("constructor invariant: end - overlap_end >= start >= 0");
debug_assert!(
core_start <= core_end,
"constructor invariant: overlap_start + overlap_end < len => core_start <= core_end"
);
core_start..=core_end
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct SegmentOptions {
max_len: NonZeroU32,
overlap: u32,
}
impl Default for SegmentOptions {
fn default() -> Self {
let max_len = NonZeroU32::new(10_000).expect("non-zero literal");
Self { max_len, overlap: 0 }
}
}
impl SegmentOptions {
#[must_use]
pub const fn new(max_len: NonZeroU32) -> Self {
Self { max_len, overlap: 0 }
}
pub const fn with_overlap(self, overlap: u32) -> Result<Self, SegmentOptionsError> {
if overlap >= self.max_len.get() {
return Err(SegmentOptionsError::OverlapTooLarge {
max_len: self.max_len.get(),
overlap,
});
}
Ok(Self { max_len: self.max_len, overlap })
}
#[must_use]
pub const fn max_len(&self) -> NonZeroU32 {
self.max_len
}
#[must_use]
pub const fn overlap(&self) -> u32 {
self.overlap
}
}
#[derive(Debug, thiserror::Error, PartialEq, Eq)]
#[non_exhaustive]
pub enum SegmentOptionsError {
#[error("segment overlap {overlap} must be < max_len {max_len}")]
OverlapTooLarge { max_len: u32, overlap: u32 },
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub(crate) struct ResolvedRange {
pub tid: Tid,
pub contig: SmolStr,
pub start: Pos0,
pub end: Pos0,
pub contig_last_pos: Pos0,
}
mod private {
use super::{BamHeader, ReaderError, ResolvedRange};
#[allow(
private_interfaces,
reason = "method signature uses crate-private ResolvedRange; trait is sealed"
)]
pub trait IntoSegmentTargetSealed {
fn resolve_target(self, header: &BamHeader) -> Result<Vec<ResolvedRange>, ReaderError>;
}
}
pub trait IntoSegmentTarget: private::IntoSegmentTargetSealed {}
impl<T: private::IntoSegmentTargetSealed> IntoSegmentTarget for T {}
pub(crate) fn resolve_target<T: IntoSegmentTarget>(
target: T,
header: &BamHeader,
) -> Result<Vec<ResolvedRange>, ReaderError> {
private::IntoSegmentTargetSealed::resolve_target(target, header)
}
fn target_count_u32(header: &BamHeader) -> Result<u32, ReaderError> {
let count = header.target_count();
u32::try_from(count).map_err(|_| ReaderError::HeaderTargetCountOverflow { count })
}
fn whole_contig_with_name(
header: &BamHeader,
tid: Tid,
name: SmolStr,
) -> Result<ResolvedRange, ReaderError> {
let Some(len) = header.target_len(tid.as_u32()) else {
let n_targets = target_count_u32(header)?;
return Err(super::resolve::TidError::TidOutOfRange { tid: tid.as_u32(), n_targets }.into());
};
if len == 0 {
return Err(ReaderError::EmptyContig { name });
}
let last = len.checked_sub(1).expect("len > 0 checked above");
let last_u32 = u32::try_from(last).map_err(|_| ReaderError::RegionEndTooLarge { end: last })?;
let contig_last_pos =
Pos0::new(last_u32).ok_or(ReaderError::RegionEndTooLarge { end: last })?;
Ok(ResolvedRange {
tid,
contig: name,
start: Pos0::ZERO,
end: contig_last_pos,
contig_last_pos,
})
}
fn whole_contig(header: &BamHeader, tid: Tid) -> Result<ResolvedRange, ReaderError> {
let name = match header.target_name(tid.as_u32()) {
Some(n) => n.to_owned(),
None => {
let n_targets = target_count_u32(header)?;
return Err(
super::resolve::TidError::TidOutOfRange { tid: tid.as_u32(), n_targets }.into()
);
}
};
whole_contig_with_name(header, tid, name.into())
}
impl private::IntoSegmentTargetSealed for Tid {
fn resolve_target(self, header: &BamHeader) -> Result<Vec<ResolvedRange>, ReaderError> {
Ok(vec![whole_contig(header, self)?])
}
}
impl private::IntoSegmentTargetSealed for u32 {
fn resolve_target(self, header: &BamHeader) -> Result<Vec<ResolvedRange>, ReaderError> {
let tid = self.resolve_tid(header)?;
Ok(vec![whole_contig(header, tid)?])
}
}
impl private::IntoSegmentTargetSealed for &str {
fn resolve_target(self, header: &BamHeader) -> Result<Vec<ResolvedRange>, ReaderError> {
let tid = self.resolve_tid(header)?;
Ok(vec![whole_contig(header, tid)?])
}
}
impl private::IntoSegmentTargetSealed for &String {
fn resolve_target(self, header: &BamHeader) -> Result<Vec<ResolvedRange>, ReaderError> {
private::IntoSegmentTargetSealed::resolve_target(self.as_str(), header)
}
}
impl private::IntoSegmentTargetSealed for &SmolStr {
fn resolve_target(self, header: &BamHeader) -> Result<Vec<ResolvedRange>, ReaderError> {
let tid = self.as_str().resolve_tid(header)?;
Ok(vec![whole_contig_with_name(header, tid, self.clone())?])
}
}
impl private::IntoSegmentTargetSealed for &RegionString {
fn resolve_target(self, header: &BamHeader) -> Result<Vec<ResolvedRange>, ReaderError> {
let tid = self.chromosome.as_str().resolve_tid(header)?;
let mut range = whole_contig_with_name(header, tid, self.chromosome.clone())?;
if let Some(p1) = self.start {
range.start = p1.to_zero_based();
}
if let Some(p1) = self.end {
let end0 = p1.to_zero_based();
if end0 > range.contig_last_pos {
return Err(ReaderError::RegionEndTooLarge { end: end0.as_u64() });
}
range.end = end0;
}
if range.start > range.end {
return Err(ReaderError::RegionStartAfterEnd {
contig: range.contig.clone(),
start: range.start.as_u64(),
end: range.end.as_u64(),
});
}
Ok(vec![range])
}
}
impl<R: ResolveTid> private::IntoSegmentTargetSealed for (R, Pos0, Pos0) {
fn resolve_target(self, header: &BamHeader) -> Result<Vec<ResolvedRange>, ReaderError> {
let (resolver, start, end) = self;
let tid = resolver.resolve_tid(header)?;
let mut range = whole_contig(header, tid)?;
if start > end {
return Err(ReaderError::RegionStartAfterEnd {
contig: range.contig.clone(),
start: start.as_u64(),
end: end.as_u64(),
});
}
if end > range.contig_last_pos {
return Err(ReaderError::RegionEndTooLarge { end: end.as_u64() });
}
range.start = start;
range.end = end;
Ok(vec![range])
}
}
impl private::IntoSegmentTargetSealed for () {
fn resolve_target(self, header: &BamHeader) -> Result<Vec<ResolvedRange>, ReaderError> {
let n = target_count_u32(header)?;
let mut out = Vec::with_capacity(header.target_count());
for tid_u32 in 0..n {
let tid = tid_u32.resolve_tid(header)?;
match whole_contig(header, tid) {
Ok(r) => out.push(r),
Err(ReaderError::EmptyContig { .. }) => continue,
Err(e) => return Err(e),
}
}
Ok(out)
}
}
pub struct Segments {
ranges: std::vec::IntoIter<ResolvedRange>,
current: Option<ResolvedRange>,
next_core_start: Pos0,
opts: SegmentOptions,
}
impl Segments {
pub(crate) fn new(ranges: Vec<ResolvedRange>, opts: SegmentOptions) -> Self {
let mut iter = ranges.into_iter();
let current = iter.next();
let next_core_start = current.as_ref().map_or(Pos0::ZERO, |r| r.start);
Self { ranges: iter, current, next_core_start, opts }
}
fn advance_to_next_range(&mut self) {
self.current = self.ranges.next();
if let Some(r) = self.current.as_ref() {
self.next_core_start = r.start;
}
}
}
impl Iterator for Segments {
type Item = Segment;
fn next(&mut self) -> Option<Segment> {
loop {
let range = self.current.as_ref()?;
if self.next_core_start > range.end {
self.advance_to_next_range();
continue;
}
let max_len = self.opts.max_len.get();
let overlap = self.opts.overlap;
let range_start_u64 = range.start.as_u64();
let range_end_u64 = range.end.as_u64();
let core_start_u64 = self.next_core_start.as_u64();
let core_end_u64 = core_start_u64
.saturating_add(u64::from(max_len))
.saturating_sub(1)
.min(range_end_u64);
let tile_start_u64 =
core_start_u64.saturating_sub(u64::from(overlap)).max(range_start_u64);
let tile_end_u64 = core_end_u64.saturating_add(u64::from(overlap)).min(range_end_u64);
let overlap_start =
u32::try_from(core_start_u64.saturating_sub(tile_start_u64)).unwrap_or(u32::MAX);
let overlap_end =
u32::try_from(tile_end_u64.saturating_sub(core_end_u64)).unwrap_or(u32::MAX);
let tile_start = pos0_from_u64(tile_start_u64).unwrap_or(range.start);
let tile_end = pos0_from_u64(tile_end_u64).unwrap_or(range.end);
let seg = Segment::new(
range.tid,
range.contig.clone(),
tile_start,
tile_end,
overlap_start,
overlap_end,
range.contig_last_pos,
)
.expect("iterator arithmetic produces valid Segment invariants");
if core_end_u64 >= range_end_u64 {
self.advance_to_next_range();
} else {
let next = core_end_u64.saturating_add(1);
match pos0_from_u64(next) {
Some(p) => self.next_core_start = p,
None => {
self.current = None;
self.ranges = Vec::new().into_iter();
}
}
}
return Some(seg);
}
}
}
fn pos0_from_u64(v: u64) -> Option<Pos0> {
let v32 = u32::try_from(v).ok()?;
Pos0::new(v32)
}
#[cfg(test)]
#[allow(
clippy::arithmetic_side_effects,
clippy::cast_possible_truncation,
clippy::format_push_string,
clippy::comparison_chain,
clippy::comparison_to_empty,
reason = "test code with bounded values"
)]
mod tests {
use super::private::IntoSegmentTargetSealed as _;
use super::*;
use proptest::prelude::*;
fn p(n: u32) -> Pos0 {
Pos0::new(n).expect("test position")
}
fn tid(n: u32) -> Tid {
let mut header_text = String::from("@HD\tVN:1.6\n");
for i in 0..=n {
header_text.push_str(&format!("@SQ\tSN:c{i}\tLN:1000\n"));
}
let header = crate::bam::BamHeader::from_sam_text(&header_text).unwrap();
n.resolve_tid(&header).unwrap()
}
fn header_with_contigs(contigs: &[(&str, u32)]) -> crate::bam::BamHeader {
let mut text = String::from("@HD\tVN:1.6\n");
for (name, len) in contigs {
text.push_str(&format!("@SQ\tSN:{name}\tLN:{len}\n"));
}
crate::bam::BamHeader::from_sam_text(&text).unwrap()
}
fn oracle_tiles(
range_start: u32,
range_end: u32,
max_len: u32,
overlap: u32,
) -> Vec<(u32, u32, u32, u32)> {
assert!(range_start <= range_end);
assert!(max_len >= 1);
assert!(overlap < max_len);
let mut cores: Vec<(u32, u32)> = Vec::new();
let mut s = range_start;
loop {
let len_minus_one = max_len - 1;
let e = s.saturating_add(len_minus_one).min(range_end);
cores.push((s, e));
if e == range_end {
break;
}
s = e + 1;
}
let mut out = Vec::with_capacity(cores.len());
for (cs, ce) in cores {
let want_left = u64::from(cs).saturating_sub(u64::from(overlap));
let want_right = u64::from(ce).saturating_add(u64::from(overlap));
let tile_start =
u32::try_from(want_left.max(u64::from(range_start))).expect("range fits in u32");
let tile_end =
u32::try_from(want_right.min(u64::from(range_end))).expect("range fits in u32");
let overlap_start = cs - tile_start;
let overlap_end = tile_end - ce;
out.push((tile_start, tile_end, overlap_start, overlap_end));
}
out
}
#[test]
fn segment_options_rejects_too_large_overlap() {
let opts = SegmentOptions::new(NonZeroU32::new(100).unwrap());
assert!(opts.with_overlap(99).is_ok());
assert!(matches!(
opts.with_overlap(100),
Err(SegmentOptionsError::OverlapTooLarge { max_len: 100, overlap: 100 })
));
assert!(matches!(opts.with_overlap(101), Err(SegmentOptionsError::OverlapTooLarge { .. })));
}
#[test]
fn segment_core_range_excludes_overlap() {
let seg = Segment::new(tid(0), "c0".into(), p(100), p(199), 10, 5, p(999)).unwrap();
assert_eq!(seg.len(), 100);
let core = seg.core_range();
assert_eq!(*core.start(), p(110));
assert_eq!(*core.end(), p(194));
}
#[test]
fn segment_new_rejects_start_after_end() {
let err =
Segment::new(tid(0), "c0".into(), p(200), p(100), 0, 0, p(999)).expect_err("invalid");
assert!(matches!(err, SegmentInvariantError::StartAfterEnd { start: 200, end: 100 }));
}
#[test]
fn segment_new_rejects_end_past_contig() {
let err =
Segment::new(tid(0), "c0".into(), p(0), p(500), 0, 0, p(100)).expect_err("invalid");
assert!(matches!(
err,
SegmentInvariantError::EndPastContig { end: 500, contig_last_pos: 100 }
));
}
#[test]
fn segment_new_rejects_overlap_total_ge_len() {
let err =
Segment::new(tid(0), "c0".into(), p(0), p(99), 50, 50, p(999)).expect_err("invalid");
assert!(matches!(
err,
SegmentInvariantError::OverlapTooLarge { overlap_start: 50, overlap_end: 50, len: 100 }
));
let err2 =
Segment::new(tid(0), "c0".into(), p(0), p(99), 60, 50, p(999)).expect_err("invalid");
assert!(matches!(err2, SegmentInvariantError::OverlapTooLarge { .. }));
let ok = Segment::new(tid(0), "c0".into(), p(0), p(99), 50, 49, p(999)).unwrap();
assert_eq!(ok.len(), 100);
assert_eq!(*ok.core_range().start(), p(50));
assert_eq!(*ok.core_range().end(), p(50));
}
#[test]
fn segment_len_matches_inclusive_span() {
for (s, e, expected) in
[(0u32, 0u32, 1u32), (0, 99, 100), (5, 14, 10), (1_000_000, 1_000_499, 500)]
{
let seg = Segment::new(tid(0), "c0".into(), p(s), p(e), 0, 0, p(2_000_000)).unwrap();
assert_eq!(seg.len(), expected, "span [{s}..={e}]");
}
}
#[test]
fn segments_single_tile_when_range_fits() {
let header = header_with_contigs(&[("chr1", 500)]);
let opts = SegmentOptions::new(NonZeroU32::new(1000).unwrap());
let ranges = ("chr1", p(0), p(99)).resolve_target(&header).unwrap();
let segs: Vec<_> = Segments::new(ranges, opts).collect();
assert_eq!(segs.len(), 1);
let s = &segs[0];
assert_eq!(s.start(), p(0));
assert_eq!(s.end(), p(99));
assert_eq!(s.overlap_start(), 0);
assert_eq!(s.overlap_end(), 0);
assert!(s.starts_at_contig_start());
assert_eq!(s.contig_last_pos(), p(499));
}
#[test]
fn segments_many_tiles_no_overlap() {
let header = header_with_contigs(&[("chr1", 1_000_000)]);
let opts = SegmentOptions::new(NonZeroU32::new(100).unwrap());
let ranges = ("chr1", p(0), p(249)).resolve_target(&header).unwrap();
let segs: Vec<_> = Segments::new(ranges, opts).collect();
assert_eq!(segs.len(), 3);
assert_eq!((segs[0].start(), segs[0].end()), (p(0), p(99)));
assert_eq!((segs[1].start(), segs[1].end()), (p(100), p(199)));
assert_eq!((segs[2].start(), segs[2].end()), (p(200), p(249)));
for s in &segs {
assert_eq!(s.overlap_start(), 0);
assert_eq!(s.overlap_end(), 0);
}
}
#[test]
fn segments_with_overlap() {
let header = header_with_contigs(&[("chr1", 1000)]);
let opts = SegmentOptions::new(NonZeroU32::new(100).unwrap()).with_overlap(10).unwrap();
let ranges = ("chr1", p(0), p(249)).resolve_target(&header).unwrap();
let segs: Vec<_> = Segments::new(ranges, opts).collect();
assert_eq!(segs.len(), 3);
assert_eq!(
(segs[0].start(), segs[0].end(), segs[0].overlap_start(), segs[0].overlap_end()),
(p(0), p(109), 0, 10)
);
assert_eq!(
(segs[1].start(), segs[1].end(), segs[1].overlap_start(), segs[1].overlap_end()),
(p(90), p(209), 10, 10)
);
assert_eq!(
(segs[2].start(), segs[2].end(), segs[2].overlap_start(), segs[2].overlap_end()),
(p(190), p(249), 10, 0)
);
let cores: Vec<_> = segs.iter().map(|s| s.core_range()).collect();
assert_eq!(*cores[0].start(), p(0));
assert_eq!(*cores[0].end(), p(99));
assert_eq!(*cores[1].start(), p(100));
assert_eq!(*cores[1].end(), p(199));
assert_eq!(*cores[2].start(), p(200));
assert_eq!(*cores[2].end(), p(249));
}
#[test]
fn segments_terminate_at_pos0_max() {
let last = Pos0::max_value();
let near_max = Pos0::new(u32::try_from(last.as_u64()).unwrap() - 5).unwrap();
let mut header_text = String::from("@HD\tVN:1.6\n");
header_text.push_str("@SQ\tSN:fake\tLN:1000\n");
let header = crate::bam::BamHeader::from_sam_text(&header_text).unwrap();
let tid = 0u32.resolve_tid(&header).unwrap();
let range = ResolvedRange {
tid,
contig: "fake".into(),
start: near_max,
end: last,
contig_last_pos: last,
};
let opts = SegmentOptions::new(NonZeroU32::new(1_000_000).unwrap());
let segs: Vec<_> = Segments::new(vec![range], opts).collect();
assert_eq!(segs.len(), 1);
assert_eq!(segs[0].end(), last);
assert_eq!(segs[0].start(), near_max);
}
#[test]
fn segments_whole_genome_skips_empty_contigs() {
let text = "@HD\tVN:1.6\n@SQ\tSN:a\tLN:50\n@SQ\tSN:empty\tLN:0\n@SQ\tSN:c\tLN:30\n";
let header = crate::bam::BamHeader::from_sam_text(text).unwrap();
let opts = SegmentOptions::new(NonZeroU32::new(100).unwrap());
let ranges = ().resolve_target(&header).unwrap();
let segs: Vec<_> = Segments::new(ranges, opts).collect();
assert_eq!(segs.len(), 2);
assert_eq!(segs[0].contig().as_str(), "a");
assert_eq!(segs[1].contig().as_str(), "c");
}
#[test]
fn into_segment_target_empty_contig_errors_for_named() {
let text = "@HD\tVN:1.6\n@SQ\tSN:empty\tLN:0\n";
let header = crate::bam::BamHeader::from_sam_text(text).unwrap();
let err = "empty".resolve_target(&header).unwrap_err();
assert!(matches!(err, ReaderError::EmptyContig { .. }));
}
#[test]
fn whole_contig_with_name_rejects_stale_tid() {
let header_a = header_with_contigs(&[
("a0", 1000),
("a1", 1000),
("a2", 1000),
("a3", 1000),
("a4", 1000),
]);
let stale_tid = 4u32.resolve_tid(&header_a).unwrap();
let header_b = header_with_contigs(&[("b0", 1000), ("b1", 1000)]);
let err = whole_contig_with_name(&header_b, stale_tid, "b0".into()).unwrap_err();
assert!(
matches!(
err,
ReaderError::Tid {
source: super::super::resolve::TidError::TidOutOfRange { tid: 4, n_targets: 2 }
}
),
"expected TidOutOfRange, got {err:?}"
);
}
#[test]
fn into_segment_target_start_after_end_errors() {
let header = header_with_contigs(&[("chr1", 1000)]);
let err = ("chr1", p(100), p(50)).resolve_target(&header).unwrap_err();
assert!(matches!(err, ReaderError::RegionStartAfterEnd { .. }));
}
#[test]
fn into_segment_target_end_past_contig_errors() {
let header = header_with_contigs(&[("chr1", 100)]);
let err = ("chr1", p(0), p(500)).resolve_target(&header).unwrap_err();
assert!(matches!(err, ReaderError::RegionEndTooLarge { .. }));
}
proptest! {
#[test]
fn segments_match_oracle(
range_start in 0u32..1_000_000,
len in 1u32..1_000_000,
max_len in 1u32..2_000,
overlap in 0u32..2_000,
) {
prop_assume!(overlap < max_len);
let range_end = range_start.saturating_add(len - 1).min(1_999_999);
let opts = SegmentOptions::new(NonZeroU32::new(max_len).unwrap())
.with_overlap(overlap).unwrap();
let header = header_with_contigs(&[("chr1", 2_000_000)]);
let ranges = ("chr1", p(range_start), p(range_end)).resolve_target(&header).unwrap();
let actual: Vec<(u32, u32, u32, u32)> = Segments::new(ranges, opts)
.map(|s| (
u32::try_from(s.start().as_u64()).unwrap(),
u32::try_from(s.end().as_u64()).unwrap(),
s.overlap_start(),
s.overlap_end(),
))
.collect();
let expected = oracle_tiles(range_start, range_end, max_len, overlap);
prop_assert_eq!(actual, expected);
}
#[test]
fn cores_tile_input_exactly(
range_start in 0u32..100_000,
len in 1u32..100_000,
max_len in 1u32..500,
overlap in 0u32..500,
) {
prop_assume!(overlap < max_len);
let range_end = range_start.saturating_add(len - 1).min(199_999);
let opts = SegmentOptions::new(NonZeroU32::new(max_len).unwrap())
.with_overlap(overlap).unwrap();
let header = header_with_contigs(&[("chr1", 200_000)]);
let ranges = ("chr1", p(range_start), p(range_end)).resolve_target(&header).unwrap();
let segs: Vec<_> = Segments::new(ranges, opts).collect();
prop_assert!(!segs.is_empty());
let first = &segs[0];
let last = segs.last().unwrap();
prop_assert_eq!(*first.core_range().start(), p(range_start));
prop_assert_eq!(*last.core_range().end(), p(range_end));
for w in segs.windows(2) {
let a_end = w[0].core_range().end().as_u64();
let b_start = w[1].core_range().start().as_u64();
prop_assert_eq!(a_end + 1, b_start, "core ranges must be contiguous");
}
for s in &segs {
prop_assert!(s.len() >= 1);
prop_assert!(s.len() <= max_len + 2 * overlap);
}
}
#[test]
fn first_and_last_overlaps_are_zero(
range_start in 0u32..100_000,
n_full_cores in 1u32..20,
extra_len in 0u32..200,
max_len in 10u32..200,
overlap in 0u32..9,
) {
prop_assume!(overlap < max_len);
let total = u64::from(n_full_cores) * u64::from(max_len) + u64::from(extra_len);
let range_end_u64 = u64::from(range_start).saturating_add(total).saturating_sub(1);
let contig_len = range_end_u64.saturating_add(10);
let contig_len_u32 = u32::try_from(contig_len).unwrap_or(u32::MAX);
let range_end = u32::try_from(range_end_u64).unwrap_or(u32::MAX / 2);
let opts = SegmentOptions::new(NonZeroU32::new(max_len).unwrap())
.with_overlap(overlap).unwrap();
let header = header_with_contigs(&[("chr1", contig_len_u32)]);
let ranges =
("chr1", p(range_start), p(range_end)).resolve_target(&header).unwrap();
let segs: Vec<_> = Segments::new(ranges, opts).collect();
prop_assert!(!segs.is_empty());
prop_assert_eq!(segs.first().unwrap().overlap_start(), 0);
prop_assert_eq!(segs.last().unwrap().overlap_end(), 0);
prop_assert_eq!(segs.first().unwrap().start(), p(range_start));
prop_assert_eq!(segs.last().unwrap().end(), p(range_end));
for s in &segs {
prop_assert!(s.start() >= p(range_start));
prop_assert!(s.end() <= p(range_end));
}
}
}
}