1use thiserror::Error;
8
9#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
11pub struct Segment {
12 pub id: u32,
14 pub start: u32,
16 pub len: u32,
18}
19
20impl Segment {
21 #[must_use]
23 pub const fn new(id: u32, start: u32, len: u32) -> Self {
24 Self { id, start, len }
25 }
26
27 fn checked_end(self, segment_index: usize) -> Result<u32, SegmentAttributionError> {
28 self.start
29 .checked_add(self.len)
30 .ok_or(SegmentAttributionError::SegmentEndOverflow {
31 segment_index,
32 start: self.start,
33 len: self.len,
34 })
35 }
36}
37
38#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
40pub struct GlobalMatch {
41 pub pattern_id: u32,
43 pub start: u32,
45 pub end: u32,
47}
48
49impl GlobalMatch {
50 #[must_use]
52 pub const fn new(pattern_id: u32, start: u32, end: u32) -> Self {
53 Self {
54 pattern_id,
55 start,
56 end,
57 }
58 }
59}
60
61#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
63pub struct AttributedMatch {
64 pub segment_id: u32,
66 pub pattern_id: u32,
68 pub local_start: u32,
70 pub local_end: u32,
72}
73
74impl AttributedMatch {
75 #[must_use]
77 pub const fn new(segment_id: u32, pattern_id: u32, local_start: u32, local_end: u32) -> Self {
78 Self {
79 segment_id,
80 pattern_id,
81 local_start,
82 local_end,
83 }
84 }
85}
86
87#[derive(Debug, Clone, Copy, PartialEq, Eq, Error)]
89pub enum SegmentAttributionError {
90 #[error("segment {segment_index} overflows: start {start} + len {len}")]
92 SegmentEndOverflow {
93 segment_index: usize,
95 start: u32,
97 len: u32,
99 },
100 #[error("segment {segment_index} starts at {start} before previous start {previous_start}")]
102 SegmentsNotSorted {
103 segment_index: usize,
105 previous_start: u32,
107 start: u32,
109 },
110 #[error(
112 "segment {segment_index} starts at {start} before previous segment {previous_index} ends at {previous_end}"
113 )]
114 SegmentsOverlap {
115 previous_index: usize,
117 segment_index: usize,
119 previous_end: u32,
121 start: u32,
123 },
124 #[error("match {match_index} has invalid range [{start}, {end})")]
126 InvalidMatchRange {
127 match_index: usize,
129 start: u32,
131 end: u32,
133 },
134}
135
136#[derive(Debug, Clone, Copy)]
137struct NormalizedSegment {
138 id: u32,
139 start: u32,
140 end: u32,
141}
142
143pub fn map_offsets_to_segments(
148 segments: &[Segment],
149 matches: &[GlobalMatch],
150) -> Result<Vec<AttributedMatch>, SegmentAttributionError> {
151 let normalized = validate_segments(segments)?;
152 let mut ordered_match_indices: Vec<usize> = (0..matches.len()).collect();
153 ordered_match_indices.sort_by_key(|&index| {
154 let item = matches[index];
155 (item.start, item.end, item.pattern_id, index)
156 });
157
158 let mut segment_cursor = 0usize;
159 let mut mapped_by_input_order = vec![None; matches.len()];
160
161 for match_index in ordered_match_indices {
162 let item = matches[match_index];
163 if item.end <= item.start {
164 return Err(SegmentAttributionError::InvalidMatchRange {
165 match_index,
166 start: item.start,
167 end: item.end,
168 });
169 }
170
171 while segment_cursor < normalized.len() && normalized[segment_cursor].end <= item.start {
172 segment_cursor += 1;
173 }
174
175 let Some(segment) = normalized.get(segment_cursor).copied() else {
176 continue;
177 };
178
179 if segment.start <= item.start && item.end <= segment.end {
180 mapped_by_input_order[match_index] = Some(AttributedMatch::new(
181 segment.id,
182 item.pattern_id,
183 item.start - segment.start,
184 item.end - segment.start,
185 ));
186 }
187 }
188
189 Ok(mapped_by_input_order.into_iter().flatten().collect())
190}
191
192fn validate_segments(
193 segments: &[Segment],
194) -> Result<Vec<NormalizedSegment>, SegmentAttributionError> {
195 let mut normalized = Vec::with_capacity(segments.len());
196 let mut previous: Option<(usize, u32, u32)> = None;
197
198 for (segment_index, segment) in segments.iter().copied().enumerate() {
199 let end = segment.checked_end(segment_index)?;
200
201 if let Some((previous_index, previous_start, previous_end)) = previous {
202 if segment.start < previous_start {
203 return Err(SegmentAttributionError::SegmentsNotSorted {
204 segment_index,
205 previous_start,
206 start: segment.start,
207 });
208 }
209 if segment.start < previous_end {
210 return Err(SegmentAttributionError::SegmentsOverlap {
211 previous_index,
212 segment_index,
213 previous_end,
214 start: segment.start,
215 });
216 }
217 }
218
219 normalized.push(NormalizedSegment {
220 id: segment.id,
221 start: segment.start,
222 end,
223 });
224 previous = Some((segment_index, segment.start, end));
225 }
226
227 Ok(normalized)
228}