use regex::Regex;
use tastty::{AbsolutePosition, LogicalLineSpan};
use super::{Capture, SearchMatch};
#[derive(Clone, Copy, Debug)]
pub(super) struct CellBoundary {
byte_start: usize,
byte_end: usize,
pos: AbsolutePosition,
}
pub(super) struct SpanHaystack {
pub(super) text: String,
boundaries: Vec<CellBoundary>,
}
pub(super) fn build_span_haystack(span: &LogicalLineSpan<'_>) -> SpanHaystack {
let mut text = String::new();
let mut boundaries: Vec<CellBoundary> = Vec::new();
let cells: Vec<_> = span.cells().collect();
let last_populated = cells
.iter()
.rposition(|(_, cell)| cell.has_contents())
.map(|i| i + 1)
.unwrap_or(0);
for (pos, cell) in cells.into_iter().take(last_populated) {
let byte_start = text.len();
let contents = cell.contents();
if contents.is_empty() {
text.push(' ');
} else {
text.push_str(contents);
}
let byte_end = text.len();
boundaries.push(CellBoundary {
byte_start,
byte_end,
pos,
});
}
SpanHaystack { text, boundaries }
}
pub(super) fn scan_span(
regex: &Regex,
haystack: &SpanHaystack,
names: &[Option<String>],
out: &mut Vec<SearchMatch>,
) {
let text = haystack.text.as_str();
let mut byte_cursor = 0usize;
while byte_cursor <= text.len() {
let Some(caps) = regex.captures_at(text, byte_cursor) else {
break;
};
let m_top = caps.get(0).expect("captures_at yields a top-level match");
let m_range = m_top.range();
let advance_to = if m_range.is_empty() {
match text[m_range.end..].chars().next() {
Some(c) => m_range.end + c.len_utf8(),
None => break,
}
} else {
let (start_pos, end_pos) =
byte_range_to_positions(m_range.start, m_range.end, &haystack.boundaries);
let match_text = text[m_range.start..m_range.end].to_string();
let mut captures: Vec<Capture> = Vec::new();
for (idx, group) in caps.iter().enumerate().skip(1) {
let Some(group) = group else {
continue;
};
let g_range = group.range();
if g_range.is_empty() {
continue;
}
let (cs, ce) =
byte_range_to_positions(g_range.start, g_range.end, &haystack.boundaries);
let cap_text = text[g_range.start..g_range.end].to_string();
let name = names.get(idx).cloned().flatten();
captures.push(Capture {
name,
index: idx,
start: cs,
end: ce,
text: cap_text,
});
}
out.push(SearchMatch {
start: start_pos,
end: end_pos,
text: match_text,
captures,
});
m_range.end
};
if advance_to <= byte_cursor {
break;
}
byte_cursor = advance_to;
}
}
fn byte_range_to_positions(
byte_start: usize,
byte_end: usize,
boundaries: &[CellBoundary],
) -> (AbsolutePosition, AbsolutePosition) {
debug_assert!(byte_end > byte_start);
let start = boundary_at(byte_start, boundaries);
let end = boundary_at(byte_end - 1, boundaries);
(start, end)
}
fn boundary_at(byte_offset: usize, boundaries: &[CellBoundary]) -> AbsolutePosition {
let idx = boundaries.binary_search_by(|b| {
if b.byte_end <= byte_offset {
std::cmp::Ordering::Less
} else if b.byte_start > byte_offset {
std::cmp::Ordering::Greater
} else {
std::cmp::Ordering::Equal
}
});
let idx = match idx {
Ok(i) => i,
Err(insert) => {
debug_assert!(
insert < boundaries.len(),
"byte offset {byte_offset} not covered by any cell boundary"
);
insert.min(boundaries.len().saturating_sub(1))
}
};
boundaries[idx].pos
}