use crate::source::Source;
use regex::bytes::Regex;
use std::ops::Range;
pub struct LineIndex {
starts: Vec<usize>,
record_starts: Vec<usize>,
record_start_regex: Option<Regex>,
scanned_through: usize,
start_byte: usize,
pending_line_start: bool,
head_cap: Option<usize>,
record_zero_committed: bool,
}
impl Default for LineIndex {
fn default() -> Self {
Self::new()
}
}
impl LineIndex {
pub fn new() -> Self {
Self::new_starting_at(0)
}
pub fn new_starting_at(start_byte: usize) -> Self {
Self {
starts: vec![start_byte],
record_starts: vec![start_byte],
record_start_regex: None,
scanned_through: start_byte,
start_byte,
pending_line_start: false,
head_cap: None,
record_zero_committed: true,
}
}
pub fn set_head_cap(&mut self, cap: usize) {
self.head_cap = Some(cap);
}
pub fn set_record_start(&mut self, re: Regex) {
debug_assert!(
self.scanned_through == self.start_byte && self.starts.len() == 1,
"set_record_start must be called before scanning"
);
self.record_start_regex = Some(re);
self.record_zero_committed = false;
}
pub fn records_mode(&self) -> bool {
self.record_start_regex.is_some()
}
pub fn line_count(&self) -> usize {
let raw = if self.scanned_through == self.start_byte && self.starts.len() == 1 {
0
} else {
self.starts.len()
};
match self.head_cap {
Some(cap) => raw.min(cap),
None => raw,
}
}
fn at_scan_cap(&self) -> bool {
matches!(self.head_cap, Some(cap) if self.starts.len() > cap)
}
fn extend_to_byte(&mut self, src: &dyn Source, target_byte: usize) {
if self.at_scan_cap() {
return;
}
if matches!(self.head_cap, Some(0)) {
return;
}
let total = src.len();
let stop = target_byte.min(total);
if self.scanned_through >= stop {
return;
}
if self.pending_line_start {
let line_start = self.scanned_through;
self.starts.push(line_start);
self.maybe_push_record_start(line_start, src);
self.pending_line_start = false;
if self.at_scan_cap() {
return;
}
}
let chunk = src.bytes(self.scanned_through..total);
let mut pos = self.scanned_through;
for &b in chunk.iter() {
pos += 1;
if b == b'\n' {
if pos < total {
let new_line_start = pos;
self.starts.push(new_line_start);
self.maybe_push_record_start(new_line_start, src);
if self.at_scan_cap() {
self.scanned_through = pos;
return;
}
} else {
self.pending_line_start = true;
}
}
if pos >= stop && b == b'\n' {
self.scanned_through = pos;
return;
}
}
self.scanned_through = total;
}
fn maybe_push_record_start(&mut self, line_start: usize, src: &dyn Source) {
match &self.record_start_regex {
None => {
self.record_starts.push(line_start);
}
Some(re) => {
let line_end = self.find_line_end(line_start, src);
let line_bytes = src.bytes(line_start..line_end);
let is_match = re.is_match(&line_bytes);
if is_match {
if !self.record_zero_committed {
if line_start == self.start_byte {
} else {
self.record_starts.push(line_start);
}
self.record_zero_committed = true;
} else {
self.record_starts.push(line_start);
}
} else if !self.record_zero_committed && line_start == self.start_byte {
self.record_zero_committed = true;
}
}
}
}
fn find_line_end(&self, line_start: usize, src: &dyn Source) -> usize {
let total = src.len();
let chunk = src.bytes(line_start..total);
for (i, &b) in chunk.iter().enumerate() {
if b == b'\n' {
return line_start + i;
}
}
total
}
pub fn extend_to_line(&mut self, n: usize, src: &dyn Source) {
while self.starts.len() <= n && self.scanned_through < src.len() {
if self.at_scan_cap() {
return;
}
self.extend_to_byte(src, src.len());
}
}
pub fn extend_to_end(&mut self, src: &dyn Source) {
self.extend_to_byte(src, src.len());
}
pub fn notice_new_bytes(&mut self, src: &dyn Source) {
self.extend_to_byte(src, src.len());
}
pub fn scanned_through(&self) -> usize {
self.scanned_through
}
pub fn extend_to_byte_for_query(&mut self, src: &dyn Source, byte: usize) {
self.extend_to_byte(src, byte);
}
pub fn line_at_byte(&self, byte: usize) -> Option<usize> {
if byte < self.start_byte || byte >= self.scanned_through {
return None;
}
match self.starts.binary_search(&byte) {
Ok(idx) => Some(idx),
Err(0) => None,
Err(idx) => Some(idx - 1),
}
}
pub fn line_range(&self, n: usize, src: &dyn Source) -> Range<usize> {
let start = self.starts[n];
let next_known = self.starts.get(n + 1).copied();
let end = if let Some(next_start) = next_known {
next_start - 1
} else {
let total_scanned = src.len().min(self.scanned_through.max(start));
if total_scanned > start && src.bytes(total_scanned - 1..total_scanned)[0] == b'\n' {
total_scanned - 1
} else {
total_scanned
}
};
start..end
}
pub fn record_count(&self) -> usize {
let raw = if self.scanned_through == self.start_byte && self.record_starts.len() == 1
&& self.record_start_regex.is_none()
{
0
} else if self.scanned_through == self.start_byte && self.record_starts.len() == 1
&& self.record_start_regex.is_some() && !self.record_zero_committed
{
0
} else {
self.record_starts.len()
};
match self.head_cap {
Some(0) => 0,
Some(cap) => {
let visible_lines = raw.min(self.starts.len()).min(cap);
self.line_to_record_inner(visible_lines.saturating_sub(1))
.map(|r| r + 1)
.unwrap_or(0)
}
None => raw,
}
}
pub fn record_range(&self, n: usize, src: &dyn Source) -> Range<usize> {
let start = self.record_starts[n];
let end = if n + 1 < self.record_starts.len() {
self.record_starts[n + 1] - 1
} else {
let total_scanned = src.len().min(self.scanned_through.max(start));
if total_scanned > start && src.bytes(total_scanned - 1..total_scanned)[0] == b'\n' {
total_scanned - 1
} else {
total_scanned
}
};
start..end
}
pub fn record_line_range(&self, n: usize) -> Range<usize> {
let first_line = self.starts.binary_search(&self.record_starts[n])
.expect("record start is always a line start");
let last_line = if n + 1 < self.record_starts.len() {
self.starts.binary_search(&self.record_starts[n + 1])
.expect("record start is always a line start")
} else {
self.starts.len()
};
first_line..last_line
}
pub fn line_to_record(&self, line_n: usize) -> usize {
self.line_to_record_inner(line_n).unwrap_or(0)
}
fn line_to_record_inner(&self, line_n: usize) -> Option<usize> {
if self.starts.len() <= line_n {
return None;
}
let line_start = self.starts[line_n];
match self.record_starts.binary_search(&line_start) {
Ok(idx) => Some(idx),
Err(0) => Some(0),
Err(idx) => Some(idx - 1),
}
}
pub fn record_bytes<'a>(&self, n: usize, src: &'a dyn Source) -> std::borrow::Cow<'a, [u8]> {
let r = self.record_range(n, src);
src.bytes(r)
}
pub fn line_bytes_stripped<'a>(
&self,
n: usize,
src: &'a dyn Source,
) -> std::borrow::Cow<'a, [u8]> {
let range = self.line_range(n, src);
let raw = src.bytes(range);
crate::ansi::strip_sgr(&raw).into_owned().into()
}
pub fn record_bytes_stripped<'a>(
&self,
n: usize,
src: &'a dyn Source,
) -> std::borrow::Cow<'a, [u8]> {
let range = self.record_range(n, src);
let raw = src.bytes(range);
crate::ansi::strip_sgr(&raw).into_owned().into()
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::source::MockSource;
use regex::bytes::Regex;
#[test]
fn empty_source_zero_lines() {
let m = MockSource::new();
let mut idx = LineIndex::new();
idx.extend_to_end(&m);
assert_eq!(idx.line_count(), 0);
}
#[test]
fn single_line_no_newline() {
let m = MockSource::new();
m.append(b"hello");
let mut idx = LineIndex::new();
idx.extend_to_end(&m);
assert_eq!(idx.line_count(), 1);
assert_eq!(idx.line_range(0, &m), 0..5);
}
#[test]
fn single_line_trailing_newline() {
let m = MockSource::new();
m.append(b"hello\n");
let mut idx = LineIndex::new();
idx.extend_to_end(&m);
assert_eq!(idx.line_count(), 1);
assert_eq!(idx.line_range(0, &m), 0..5);
}
#[test]
fn multiple_lines() {
let m = MockSource::new();
m.append(b"a\nbb\nccc\n");
let mut idx = LineIndex::new();
idx.extend_to_end(&m);
assert_eq!(idx.line_count(), 3);
assert_eq!(idx.line_range(0, &m), 0..1);
assert_eq!(idx.line_range(1, &m), 2..4);
assert_eq!(idx.line_range(2, &m), 5..8);
}
#[test]
fn head_cap_truncates_line_count() {
let m = MockSource::new();
m.append(b"1\n2\n3\n4\n5\n6\n7\n8\n"); let mut idx = LineIndex::new();
idx.set_head_cap(3);
idx.extend_to_end(&m);
assert_eq!(idx.line_count(), 3, "should be capped to 3 lines");
assert_eq!(idx.line_range(0, &m), 0..1);
assert_eq!(idx.line_range(1, &m), 2..3);
assert_eq!(idx.line_range(2, &m), 4..5);
}
#[test]
fn head_cap_extend_to_line_terminates() {
let m = MockSource::new();
m.append(b"1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n");
let mut idx = LineIndex::new();
idx.set_head_cap(3);
idx.extend_to_line(20, &m); assert_eq!(idx.line_count(), 3);
}
#[test]
fn head_cap_zero_yields_empty() {
let m = MockSource::new();
m.append(b"1\n2\n3\n");
let mut idx = LineIndex::new();
idx.set_head_cap(0);
idx.extend_to_end(&m);
assert_eq!(idx.line_count(), 0);
}
#[test]
fn start_byte_skips_head_of_source() {
let m = MockSource::new();
m.append(b"alpha\nbeta\ngamma\ndelta\nepsilon\n");
let mut idx = LineIndex::new_starting_at(11);
idx.extend_to_end(&m);
assert_eq!(idx.line_count(), 3, "from byte 11 there are 3 lines: gamma, delta, epsilon");
assert_eq!(idx.line_range(0, &m), 11..16); assert_eq!(idx.line_range(1, &m), 17..22); assert_eq!(idx.line_range(2, &m), 23..30); }
#[test]
fn start_byte_with_empty_remainder() {
let m = MockSource::new();
m.append(b"alpha\n");
let mut idx = LineIndex::new_starting_at(6);
idx.extend_to_end(&m);
assert_eq!(idx.line_count(), 0);
}
#[test]
fn incremental_growth_via_notice_new_bytes() {
let m = MockSource::new();
let mut idx = LineIndex::new();
m.append(b"alpha\n");
idx.notice_new_bytes(&m);
assert_eq!(idx.line_count(), 1);
m.append(b"beta\ngamm");
idx.notice_new_bytes(&m);
assert_eq!(idx.line_count(), 3);
m.append(b"a\n");
idx.notice_new_bytes(&m);
assert_eq!(idx.line_count(), 3);
assert_eq!(idx.line_range(2, &m), 11..16); }
fn re(pat: &str) -> Regex {
Regex::new(pat).unwrap()
}
#[test]
fn records_mirror_lines_when_no_regex() {
let m = MockSource::new();
m.append(b"a\nb\nc\n");
let mut idx = LineIndex::new();
idx.extend_to_end(&m);
assert_eq!(idx.line_count(), 3);
assert_eq!(idx.record_count(), 3);
for i in 0..3 {
assert_eq!(idx.record_range(i, &m), idx.line_range(i, &m));
}
}
#[test]
fn record_count_zero_for_empty_source_records_mode() {
let m = MockSource::new();
let mut idx = LineIndex::new();
idx.set_record_start(re(r"^\["));
idx.extend_to_end(&m);
assert_eq!(idx.record_count(), 0);
}
#[test]
fn records_group_continuations() {
let m = MockSource::new();
m.append(b"[1] head\n more\n more\n[2] head\n more\n");
let mut idx = LineIndex::new();
idx.set_record_start(re(r"^\["));
idx.extend_to_end(&m);
assert_eq!(idx.line_count(), 5);
assert_eq!(idx.record_count(), 2);
let r0 = idx.record_range(0, &m);
assert_eq!(&m.bytes(r0)[..], b"[1] head\n more\n more");
let r1 = idx.record_range(1, &m);
assert_eq!(&m.bytes(r1)[..3], b"[2]");
}
#[test]
fn synthetic_record_zero_absorbs_orphan_head() {
let m = MockSource::new();
m.append(b"banner line 1\nbanner line 2\n[1] first real record\n");
let mut idx = LineIndex::new();
idx.set_record_start(re(r"^\["));
idx.extend_to_end(&m);
assert_eq!(idx.line_count(), 3);
assert_eq!(idx.record_count(), 2);
let r0 = idx.record_range(0, &m);
assert_eq!(&m.bytes(r0)[..], b"banner line 1\nbanner line 2");
assert_eq!(idx.record_line_range(0), 0..2);
assert_eq!(idx.record_line_range(1), 2..3);
}
#[test]
fn line_to_record_round_trips() {
let m = MockSource::new();
m.append(b"[1] a\n cont\n[2] b\n cont\n cont\n");
let mut idx = LineIndex::new();
idx.set_record_start(re(r"^\["));
idx.extend_to_end(&m);
assert_eq!(idx.line_to_record(0), 0); assert_eq!(idx.line_to_record(1), 0); assert_eq!(idx.line_to_record(2), 1); assert_eq!(idx.line_to_record(3), 1);
assert_eq!(idx.line_to_record(4), 1);
}
#[test]
fn record_bytes_contains_embedded_newlines() {
let m = MockSource::new();
m.append(b"[1] head\n more\n[2] next\n");
let mut idx = LineIndex::new();
idx.set_record_start(re(r"^\["));
idx.extend_to_end(&m);
let bytes = idx.record_bytes(0, &m);
assert_eq!(&*bytes, b"[1] head\n more");
}
#[test]
fn no_match_at_all_is_one_synthetic_record() {
let m = MockSource::new();
m.append(b"plain text\nmore plain\nno brackets here\n");
let mut idx = LineIndex::new();
idx.set_record_start(re(r"^\["));
idx.extend_to_end(&m);
assert_eq!(idx.line_count(), 3);
assert_eq!(idx.record_count(), 1);
assert_eq!(idx.record_line_range(0), 0..3);
}
#[test]
fn pending_record_start_handles_growing_input() {
let m = MockSource::new();
let mut idx = LineIndex::new();
idx.set_record_start(re(r"^\["));
m.append(b"[1] head\n more\n");
idx.notice_new_bytes(&m);
assert_eq!(idx.record_count(), 1);
m.append(b"[2] head\n");
idx.notice_new_bytes(&m);
assert_eq!(idx.record_count(), 2);
}
#[test]
fn empty_continuation_lines_are_continuations() {
let m = MockSource::new();
m.append(b"[1] head\n\n after blank\n[2] next\n");
let mut idx = LineIndex::new();
idx.set_record_start(re(r"^\["));
idx.extend_to_end(&m);
assert_eq!(idx.line_count(), 4);
assert_eq!(idx.record_count(), 2);
assert_eq!(idx.record_line_range(0), 0..3);
}
#[test]
fn line_bytes_stripped_returns_visible_text() {
let m = MockSource::new();
m.append(b"\x1b[31merror\x1b[0m\n");
let mut idx = LineIndex::new();
idx.extend_to_end(&m);
let stripped = idx.line_bytes_stripped(0, &m);
assert_eq!(stripped.as_ref(), b"error");
}
#[test]
fn line_bytes_stripped_plain_input() {
let m = MockSource::new();
m.append(b"plain\n");
let mut idx = LineIndex::new();
idx.extend_to_end(&m);
let stripped = idx.line_bytes_stripped(0, &m);
assert_eq!(stripped.as_ref(), b"plain");
}
#[test]
fn records_mode_reports_true_only_when_regex_set() {
let mut idx = LineIndex::new();
assert!(!idx.records_mode());
idx.set_record_start(re(r"^\["));
assert!(idx.records_mode());
}
#[test]
fn record_range_handles_unterminated_last_record() {
let m = MockSource::new();
m.append(b"[1] head\n[2] last line no newline");
let mut idx = LineIndex::new();
idx.set_record_start(re(r"^\["));
idx.extend_to_end(&m);
assert_eq!(idx.record_count(), 2);
let r1 = idx.record_range(1, &m);
assert_eq!(&m.bytes(r1)[..], b"[2] last line no newline");
}
#[test]
fn record_count_with_head_cap_zero_returns_zero_in_records_mode() {
let m = MockSource::new();
m.append(b"[1] head\n[2] next\n");
let mut idx = LineIndex::new();
idx.set_record_start(re(r"^\["));
idx.set_head_cap(0);
idx.extend_to_end(&m);
assert_eq!(idx.line_count(), 0);
assert_eq!(idx.record_count(), 0);
}
}