1use crate::source::Source;
2use regex::bytes::Regex;
3use std::ops::Range;
4
5pub struct LineIndex {
6 starts: Vec<usize>,
7 record_starts: Vec<usize>,
8 record_start_regex: Option<Regex>,
9 scanned_through: usize,
10 start_byte: usize,
11 pending_line_start: bool,
12 head_cap: Option<usize>,
13 record_zero_committed: bool,
17}
18
19impl Default for LineIndex {
20 fn default() -> Self {
21 Self::new()
22 }
23}
24
25impl LineIndex {
26 pub fn new() -> Self {
27 Self::new_starting_at(0)
28 }
29
30 pub fn new_starting_at(start_byte: usize) -> Self {
34 Self {
35 starts: vec![start_byte],
36 record_starts: vec![start_byte],
37 record_start_regex: None,
38 scanned_through: start_byte,
39 start_byte,
40 pending_line_start: false,
41 head_cap: None,
42 record_zero_committed: true,
43 }
44 }
45
46 pub fn set_head_cap(&mut self, cap: usize) {
50 self.head_cap = Some(cap);
51 }
52
53 pub fn set_record_start(&mut self, re: Regex) {
56 debug_assert!(
57 self.scanned_through == self.start_byte && self.starts.len() == 1,
58 "set_record_start must be called before scanning"
59 );
60 self.record_start_regex = Some(re);
61 self.record_zero_committed = false;
62 }
63
64 pub fn records_mode(&self) -> bool {
66 self.record_start_regex.is_some()
67 }
68
69 pub fn line_count(&self) -> usize {
70 let raw = if self.scanned_through == self.start_byte && self.starts.len() == 1 {
71 0
72 } else {
73 self.starts.len()
74 };
75 match self.head_cap {
76 Some(cap) => raw.min(cap),
77 None => raw,
78 }
79 }
80
81 fn at_scan_cap(&self) -> bool {
85 matches!(self.head_cap, Some(cap) if self.starts.len() > cap)
86 }
87
88 fn extend_to_byte(&mut self, src: &dyn Source, target_byte: usize) {
90 if self.at_scan_cap() {
91 return;
92 }
93 if matches!(self.head_cap, Some(0)) {
94 return;
95 }
96 let total = src.len();
97 let stop = target_byte.min(total);
98 if self.scanned_through >= stop {
99 return;
100 }
101
102 if self.pending_line_start {
103 let line_start = self.scanned_through;
104 self.starts.push(line_start);
105 self.maybe_push_record_start(line_start, src);
106 self.pending_line_start = false;
107 if self.at_scan_cap() {
108 return;
109 }
110 }
111
112 let chunk = src.bytes(self.scanned_through..total);
113 let mut pos = self.scanned_through;
114 for &b in chunk.iter() {
115 pos += 1;
116 if b == b'\n' {
117 if pos < total {
118 let new_line_start = pos;
119 self.starts.push(new_line_start);
120 self.maybe_push_record_start(new_line_start, src);
121 if self.at_scan_cap() {
122 self.scanned_through = pos;
123 return;
124 }
125 } else {
126 self.pending_line_start = true;
127 }
128 }
129 if pos >= stop && b == b'\n' {
130 self.scanned_through = pos;
131 return;
132 }
133 }
134 self.scanned_through = total;
135 }
136
137 fn maybe_push_record_start(&mut self, line_start: usize, src: &dyn Source) {
141 match &self.record_start_regex {
142 None => {
143 self.record_starts.push(line_start);
145 }
146 Some(re) => {
147 let line_end = self.find_line_end(line_start, src);
148 let line_bytes = src.bytes(line_start..line_end);
149 let is_match = re.is_match(&line_bytes);
150 if is_match {
151 if !self.record_zero_committed {
152 if line_start == self.start_byte {
154 } else {
158 self.record_starts.push(line_start);
162 }
163 self.record_zero_committed = true;
164 } else {
165 self.record_starts.push(line_start);
166 }
167 } else if !self.record_zero_committed && line_start == self.start_byte {
168 self.record_zero_committed = true;
172 }
173 }
175 }
176 }
177
178 fn find_line_end(&self, line_start: usize, src: &dyn Source) -> usize {
182 let total = src.len();
183 let chunk = src.bytes(line_start..total);
184 for (i, &b) in chunk.iter().enumerate() {
185 if b == b'\n' {
186 return line_start + i;
187 }
188 }
189 total
190 }
191
192 pub fn extend_to_line(&mut self, n: usize, src: &dyn Source) {
193 while self.starts.len() <= n && self.scanned_through < src.len() {
194 if self.at_scan_cap() {
195 return;
198 }
199 self.extend_to_byte(src, src.len());
200 }
201 }
202
203 pub fn extend_to_end(&mut self, src: &dyn Source) {
204 self.extend_to_byte(src, src.len());
205 }
206
207 pub fn notice_new_bytes(&mut self, src: &dyn Source) {
208 self.extend_to_byte(src, src.len());
209 }
210
211 pub fn scanned_through(&self) -> usize {
213 self.scanned_through
214 }
215
216 pub fn extend_to_byte_for_query(&mut self, src: &dyn Source, byte: usize) {
220 self.extend_to_byte(src, byte);
221 }
222
223 pub fn line_at_byte(&self, byte: usize) -> Option<usize> {
227 if byte < self.start_byte || byte >= self.scanned_through {
228 return None;
229 }
230 match self.starts.binary_search(&byte) {
231 Ok(idx) => Some(idx),
232 Err(0) => None,
233 Err(idx) => Some(idx - 1),
234 }
235 }
236
237 pub fn line_range(&self, n: usize, src: &dyn Source) -> Range<usize> {
240 let start = self.starts[n];
241 let next_known = self.starts.get(n + 1).copied();
246 let end = if let Some(next_start) = next_known {
247 next_start - 1
249 } else {
250 let total_scanned = src.len().min(self.scanned_through.max(start));
252 if total_scanned > start && src.bytes(total_scanned - 1..total_scanned)[0] == b'\n' {
253 total_scanned - 1
254 } else {
255 total_scanned
256 }
257 };
258 start..end
259 }
260
261 pub fn record_count(&self) -> usize {
265 let raw = if self.scanned_through == self.start_byte && self.record_starts.len() == 1
266 && self.record_start_regex.is_none()
267 {
268 0
270 } else if self.scanned_through == self.start_byte && self.record_starts.len() == 1
271 && self.record_start_regex.is_some() && !self.record_zero_committed
272 {
273 0
275 } else {
276 self.record_starts.len()
277 };
278 match self.head_cap {
279 Some(0) => 0,
280 Some(cap) => {
281 let visible_lines = raw.min(self.starts.len()).min(cap);
282 self.line_to_record_inner(visible_lines.saturating_sub(1))
283 .map(|r| r + 1)
284 .unwrap_or(0)
285 }
286 None => raw,
287 }
288 }
289
290 pub fn record_range(&self, n: usize, src: &dyn Source) -> Range<usize> {
293 let start = self.record_starts[n];
294 let end = if n + 1 < self.record_starts.len() {
295 self.record_starts[n + 1] - 1
297 } else {
298 let total_scanned = src.len().min(self.scanned_through.max(start));
300 if total_scanned > start && src.bytes(total_scanned - 1..total_scanned)[0] == b'\n' {
301 total_scanned - 1
302 } else {
303 total_scanned
304 }
305 };
306 start..end
307 }
308
309 pub fn record_line_range(&self, n: usize) -> Range<usize> {
311 let first_line = self.starts.binary_search(&self.record_starts[n])
312 .expect("record start is always a line start");
313 let last_line = if n + 1 < self.record_starts.len() {
314 self.starts.binary_search(&self.record_starts[n + 1])
315 .expect("record start is always a line start")
316 } else {
317 self.starts.len()
318 };
319 first_line..last_line
320 }
321
322 pub fn line_to_record(&self, line_n: usize) -> usize {
324 self.line_to_record_inner(line_n).unwrap_or(0)
325 }
326
327 fn line_to_record_inner(&self, line_n: usize) -> Option<usize> {
328 if self.starts.len() <= line_n {
329 return None;
330 }
331 let line_start = self.starts[line_n];
332 match self.record_starts.binary_search(&line_start) {
333 Ok(idx) => Some(idx),
334 Err(0) => Some(0),
335 Err(idx) => Some(idx - 1),
336 }
337 }
338
339 pub fn record_bytes<'a>(&self, n: usize, src: &'a dyn Source) -> std::borrow::Cow<'a, [u8]> {
341 let r = self.record_range(n, src);
342 src.bytes(r)
343 }
344
345 pub fn line_bytes_stripped<'a>(
349 &self,
350 n: usize,
351 src: &'a dyn Source,
352 ) -> std::borrow::Cow<'a, [u8]> {
353 let range = self.line_range(n, src);
354 let raw = src.bytes(range);
355 crate::ansi::strip_sgr(&raw).into_owned().into()
356 }
357
358 pub fn record_bytes_stripped<'a>(
360 &self,
361 n: usize,
362 src: &'a dyn Source,
363 ) -> std::borrow::Cow<'a, [u8]> {
364 let range = self.record_range(n, src);
365 let raw = src.bytes(range);
366 crate::ansi::strip_sgr(&raw).into_owned().into()
367 }
368}
369
370#[cfg(test)]
371mod tests {
372 use super::*;
373 use crate::source::MockSource;
374 use regex::bytes::Regex;
375
376 #[test]
377 fn empty_source_zero_lines() {
378 let m = MockSource::new();
379 let mut idx = LineIndex::new();
380 idx.extend_to_end(&m);
381 assert_eq!(idx.line_count(), 0);
382 }
383
384 #[test]
385 fn single_line_no_newline() {
386 let m = MockSource::new();
387 m.append(b"hello");
388 let mut idx = LineIndex::new();
389 idx.extend_to_end(&m);
390 assert_eq!(idx.line_count(), 1);
391 assert_eq!(idx.line_range(0, &m), 0..5);
392 }
393
394 #[test]
395 fn single_line_trailing_newline() {
396 let m = MockSource::new();
397 m.append(b"hello\n");
398 let mut idx = LineIndex::new();
399 idx.extend_to_end(&m);
400 assert_eq!(idx.line_count(), 1);
401 assert_eq!(idx.line_range(0, &m), 0..5);
402 }
403
404 #[test]
405 fn multiple_lines() {
406 let m = MockSource::new();
407 m.append(b"a\nbb\nccc\n");
408 let mut idx = LineIndex::new();
409 idx.extend_to_end(&m);
410 assert_eq!(idx.line_count(), 3);
411 assert_eq!(idx.line_range(0, &m), 0..1);
412 assert_eq!(idx.line_range(1, &m), 2..4);
413 assert_eq!(idx.line_range(2, &m), 5..8);
414 }
415
416 #[test]
417 fn head_cap_truncates_line_count() {
418 let m = MockSource::new();
419 m.append(b"1\n2\n3\n4\n5\n6\n7\n8\n"); let mut idx = LineIndex::new();
421 idx.set_head_cap(3);
422 idx.extend_to_end(&m);
423 assert_eq!(idx.line_count(), 3, "should be capped to 3 lines");
424 assert_eq!(idx.line_range(0, &m), 0..1);
426 assert_eq!(idx.line_range(1, &m), 2..3);
427 assert_eq!(idx.line_range(2, &m), 4..5);
428 }
429
430 #[test]
431 fn head_cap_extend_to_line_terminates() {
432 let m = MockSource::new();
436 m.append(b"1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n");
437 let mut idx = LineIndex::new();
438 idx.set_head_cap(3);
439 idx.extend_to_line(20, &m); assert_eq!(idx.line_count(), 3);
441 }
442
443 #[test]
444 fn head_cap_zero_yields_empty() {
445 let m = MockSource::new();
446 m.append(b"1\n2\n3\n");
447 let mut idx = LineIndex::new();
448 idx.set_head_cap(0);
449 idx.extend_to_end(&m);
450 assert_eq!(idx.line_count(), 0);
451 }
452
453 #[test]
454 fn start_byte_skips_head_of_source() {
455 let m = MockSource::new();
456 m.append(b"alpha\nbeta\ngamma\ndelta\nepsilon\n");
458 let mut idx = LineIndex::new_starting_at(11);
460 idx.extend_to_end(&m);
461 assert_eq!(idx.line_count(), 3, "from byte 11 there are 3 lines: gamma, delta, epsilon");
462 assert_eq!(idx.line_range(0, &m), 11..16); assert_eq!(idx.line_range(1, &m), 17..22); assert_eq!(idx.line_range(2, &m), 23..30); }
466
467 #[test]
468 fn start_byte_with_empty_remainder() {
469 let m = MockSource::new();
470 m.append(b"alpha\n");
471 let mut idx = LineIndex::new_starting_at(6);
472 idx.extend_to_end(&m);
473 assert_eq!(idx.line_count(), 0);
474 }
475
476 #[test]
477 fn incremental_growth_via_notice_new_bytes() {
478 let m = MockSource::new();
479 let mut idx = LineIndex::new();
480
481 m.append(b"alpha\n");
482 idx.notice_new_bytes(&m);
483 assert_eq!(idx.line_count(), 1);
484
485 m.append(b"beta\ngamm");
486 idx.notice_new_bytes(&m);
487 assert_eq!(idx.line_count(), 3); m.append(b"a\n");
490 idx.notice_new_bytes(&m);
491 assert_eq!(idx.line_count(), 3);
492 assert_eq!(idx.line_range(2, &m), 11..16); }
495
496 fn re(pat: &str) -> Regex {
497 Regex::new(pat).unwrap()
498 }
499
500 #[test]
501 fn records_mirror_lines_when_no_regex() {
502 let m = MockSource::new();
503 m.append(b"a\nb\nc\n");
504 let mut idx = LineIndex::new();
505 idx.extend_to_end(&m);
506 assert_eq!(idx.line_count(), 3);
507 assert_eq!(idx.record_count(), 3);
508 for i in 0..3 {
509 assert_eq!(idx.record_range(i, &m), idx.line_range(i, &m));
510 }
511 }
512
513 #[test]
514 fn record_count_zero_for_empty_source_records_mode() {
515 let m = MockSource::new();
516 let mut idx = LineIndex::new();
517 idx.set_record_start(re(r"^\["));
518 idx.extend_to_end(&m);
519 assert_eq!(idx.record_count(), 0);
520 }
521
522 #[test]
523 fn records_group_continuations() {
524 let m = MockSource::new();
525 m.append(b"[1] head\n more\n more\n[2] head\n more\n");
526 let mut idx = LineIndex::new();
527 idx.set_record_start(re(r"^\["));
528 idx.extend_to_end(&m);
529 assert_eq!(idx.line_count(), 5);
530 assert_eq!(idx.record_count(), 2);
531 let r0 = idx.record_range(0, &m);
532 assert_eq!(&m.bytes(r0)[..], b"[1] head\n more\n more");
533 let r1 = idx.record_range(1, &m);
534 assert_eq!(&m.bytes(r1)[..3], b"[2]");
535 }
536
537 #[test]
538 fn synthetic_record_zero_absorbs_orphan_head() {
539 let m = MockSource::new();
540 m.append(b"banner line 1\nbanner line 2\n[1] first real record\n");
541 let mut idx = LineIndex::new();
542 idx.set_record_start(re(r"^\["));
543 idx.extend_to_end(&m);
544 assert_eq!(idx.line_count(), 3);
545 assert_eq!(idx.record_count(), 2);
546 let r0 = idx.record_range(0, &m);
547 assert_eq!(&m.bytes(r0)[..], b"banner line 1\nbanner line 2");
548 assert_eq!(idx.record_line_range(0), 0..2);
549 assert_eq!(idx.record_line_range(1), 2..3);
550 }
551
552 #[test]
553 fn line_to_record_round_trips() {
554 let m = MockSource::new();
555 m.append(b"[1] a\n cont\n[2] b\n cont\n cont\n");
556 let mut idx = LineIndex::new();
557 idx.set_record_start(re(r"^\["));
558 idx.extend_to_end(&m);
559 assert_eq!(idx.line_to_record(0), 0); assert_eq!(idx.line_to_record(1), 0); assert_eq!(idx.line_to_record(2), 1); assert_eq!(idx.line_to_record(3), 1);
563 assert_eq!(idx.line_to_record(4), 1);
564 }
565
566 #[test]
567 fn record_bytes_contains_embedded_newlines() {
568 let m = MockSource::new();
569 m.append(b"[1] head\n more\n[2] next\n");
570 let mut idx = LineIndex::new();
571 idx.set_record_start(re(r"^\["));
572 idx.extend_to_end(&m);
573 let bytes = idx.record_bytes(0, &m);
574 assert_eq!(&*bytes, b"[1] head\n more");
575 }
576
577 #[test]
578 fn no_match_at_all_is_one_synthetic_record() {
579 let m = MockSource::new();
580 m.append(b"plain text\nmore plain\nno brackets here\n");
581 let mut idx = LineIndex::new();
582 idx.set_record_start(re(r"^\["));
583 idx.extend_to_end(&m);
584 assert_eq!(idx.line_count(), 3);
585 assert_eq!(idx.record_count(), 1);
586 assert_eq!(idx.record_line_range(0), 0..3);
587 }
588
589 #[test]
590 fn pending_record_start_handles_growing_input() {
591 let m = MockSource::new();
592 let mut idx = LineIndex::new();
593 idx.set_record_start(re(r"^\["));
594 m.append(b"[1] head\n more\n");
595 idx.notice_new_bytes(&m);
596 assert_eq!(idx.record_count(), 1);
597 m.append(b"[2] head\n");
598 idx.notice_new_bytes(&m);
599 assert_eq!(idx.record_count(), 2);
600 }
601
602 #[test]
603 fn empty_continuation_lines_are_continuations() {
604 let m = MockSource::new();
605 m.append(b"[1] head\n\n after blank\n[2] next\n");
606 let mut idx = LineIndex::new();
607 idx.set_record_start(re(r"^\["));
608 idx.extend_to_end(&m);
609 assert_eq!(idx.line_count(), 4);
610 assert_eq!(idx.record_count(), 2);
611 assert_eq!(idx.record_line_range(0), 0..3);
612 }
613
614 #[test]
615 fn line_bytes_stripped_returns_visible_text() {
616 let m = MockSource::new();
617 m.append(b"\x1b[31merror\x1b[0m\n");
618 let mut idx = LineIndex::new();
619 idx.extend_to_end(&m);
620 let stripped = idx.line_bytes_stripped(0, &m);
621 assert_eq!(stripped.as_ref(), b"error");
622 }
623
624 #[test]
625 fn line_bytes_stripped_plain_input() {
626 let m = MockSource::new();
627 m.append(b"plain\n");
628 let mut idx = LineIndex::new();
629 idx.extend_to_end(&m);
630 let stripped = idx.line_bytes_stripped(0, &m);
631 assert_eq!(stripped.as_ref(), b"plain");
632 }
633
634 #[test]
635 fn records_mode_reports_true_only_when_regex_set() {
636 let mut idx = LineIndex::new();
637 assert!(!idx.records_mode());
638 idx.set_record_start(re(r"^\["));
639 assert!(idx.records_mode());
640 }
641
642 #[test]
643 fn record_range_handles_unterminated_last_record() {
644 let m = MockSource::new();
645 m.append(b"[1] head\n[2] last line no newline");
646 let mut idx = LineIndex::new();
647 idx.set_record_start(re(r"^\["));
648 idx.extend_to_end(&m);
649 assert_eq!(idx.record_count(), 2);
650 let r1 = idx.record_range(1, &m);
651 assert_eq!(&m.bytes(r1)[..], b"[2] last line no newline");
652 }
653
654 #[test]
655 fn record_count_with_head_cap_zero_returns_zero_in_records_mode() {
656 let m = MockSource::new();
657 m.append(b"[1] head\n[2] next\n");
658 let mut idx = LineIndex::new();
659 idx.set_record_start(re(r"^\["));
660 idx.set_head_cap(0);
661 idx.extend_to_end(&m);
662 assert_eq!(idx.line_count(), 0);
663 assert_eq!(idx.record_count(), 0);
664 }
665}