use thiserror::Error;
use crate::Match;
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Segment {
pub id: u32,
pub start: u32,
pub len: u32,
}
impl Segment {
#[must_use]
pub const fn new(id: u32, start: u32, len: u32) -> Self {
Self { id, start, len }
}
fn checked_end(self, segment_index: usize) -> Result<u32, SegmentMapError> {
self.start
.checked_add(self.len)
.ok_or(SegmentMapError::SegmentEndOverflow {
segment_index,
start: self.start,
len: self.len,
})
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct SegmentedMatch {
pub segment_id: u32,
pub pattern_id: u32,
pub local_start: u32,
pub local_end: u32,
}
impl SegmentedMatch {
#[must_use]
pub const fn new(segment_id: u32, pattern_id: u32, local_start: u32, local_end: u32) -> Self {
Self {
segment_id,
pattern_id,
local_start,
local_end,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Error)]
pub enum SegmentMapError {
#[error("segment {segment_index} overflows: start {start} + len {len}; Fix: split the segment or use a wider offset domain")]
SegmentEndOverflow {
segment_index: usize,
start: u32,
len: u32,
},
#[error("segment {segment_index} starts at {start} before previous start {previous_start}; Fix: sort segments by start offset before mapping")]
SegmentsNotSorted {
segment_index: usize,
previous_start: u32,
start: u32,
},
#[error("segment {segment_index} starts at {start} before previous segment {previous_index} ends at {previous_end}; Fix: coalesce or split overlapping ranges")]
SegmentsOverlap {
previous_index: usize,
segment_index: usize,
previous_end: u32,
start: u32,
},
#[error("match {match_index} has invalid range [{start}, {end}); Fix: emit non-empty half-open match ranges")]
InvalidMatchRange {
match_index: usize,
start: u32,
end: u32,
},
}
#[derive(Debug, Clone, Copy)]
struct NormalizedSegment {
id: u32,
start: u32,
end: u32,
}
pub fn map_offsets_to_segments(
segments: &[Segment],
matches: &[Match],
) -> Result<Vec<SegmentedMatch>, SegmentMapError> {
let normalized = validate_segments(segments)?;
let mut ordered_match_indices: Vec<usize> = (0..matches.len()).collect();
ordered_match_indices.sort_by_key(|&index| {
let item = matches[index];
(item.start, item.end, item.pattern_id, index)
});
let mut segment_cursor = 0usize;
let mut mapped_by_input_order = vec![None; matches.len()];
for match_index in ordered_match_indices {
let item = matches[match_index];
if item.end <= item.start {
return Err(SegmentMapError::InvalidMatchRange {
match_index,
start: item.start,
end: item.end,
});
}
while segment_cursor < normalized.len() && normalized[segment_cursor].end <= item.start {
segment_cursor += 1;
}
let Some(segment) = normalized.get(segment_cursor).copied() else {
continue;
};
if segment.start <= item.start && item.end <= segment.end {
mapped_by_input_order[match_index] = Some(SegmentedMatch::new(
segment.id,
item.pattern_id,
item.start - segment.start,
item.end - segment.start,
));
}
}
Ok(mapped_by_input_order.into_iter().flatten().collect())
}
fn validate_segments(segments: &[Segment]) -> Result<Vec<NormalizedSegment>, SegmentMapError> {
let mut normalized = Vec::with_capacity(segments.len());
let mut previous: Option<(usize, u32, u32)> = None;
for (segment_index, segment) in segments.iter().copied().enumerate() {
let end = segment.checked_end(segment_index)?;
if let Some((previous_index, previous_start, previous_end)) = previous {
if segment.start < previous_start {
return Err(SegmentMapError::SegmentsNotSorted {
segment_index,
previous_start,
start: segment.start,
});
}
if segment.start < previous_end {
return Err(SegmentMapError::SegmentsOverlap {
previous_index,
segment_index,
previous_end,
start: segment.start,
});
}
}
normalized.push(NormalizedSegment {
id: segment.id,
start: segment.start,
end,
});
previous = Some((segment_index, segment.start, end));
}
Ok(normalized)
}