1use crate::source::Source;
2use regex::bytes::Regex;
3use std::ops::Range;
4
5pub struct LineIndex {
6 starts: Vec<usize>,
7 record_starts: Vec<usize>,
8 record_start_regex: Option<Regex>,
9 scanned_through: usize,
10 start_byte: usize,
11 pending_line_start: bool,
12 head_cap: Option<usize>,
13 record_zero_committed: bool,
17}
18
19impl Default for LineIndex {
20 fn default() -> Self {
21 Self::new()
22 }
23}
24
25impl LineIndex {
26 pub fn new() -> Self {
27 Self::new_starting_at(0)
28 }
29
30 pub fn new_starting_at(start_byte: usize) -> Self {
34 Self {
35 starts: vec![start_byte],
36 record_starts: vec![start_byte],
37 record_start_regex: None,
38 scanned_through: start_byte,
39 start_byte,
40 pending_line_start: false,
41 head_cap: None,
42 record_zero_committed: true,
43 }
44 }
45
46 pub fn set_head_cap(&mut self, cap: usize) {
50 self.head_cap = Some(cap);
51 }
52
53 pub fn set_record_start(&mut self, re: Regex) {
56 debug_assert!(
57 self.scanned_through == self.start_byte && self.starts.len() == 1,
58 "set_record_start must be called before scanning"
59 );
60 self.record_start_regex = Some(re);
61 self.record_zero_committed = false;
62 }
63
64 pub fn records_mode(&self) -> bool {
66 self.record_start_regex.is_some()
67 }
68
69 pub fn line_count(&self) -> usize {
70 let raw = if self.scanned_through == self.start_byte && self.starts.len() == 1 {
71 0
72 } else {
73 self.starts.len()
74 };
75 match self.head_cap {
76 Some(cap) => raw.min(cap),
77 None => raw,
78 }
79 }
80
81 fn at_scan_cap(&self) -> bool {
85 matches!(self.head_cap, Some(cap) if self.starts.len() > cap)
86 }
87
88 fn extend_to_byte(&mut self, src: &dyn Source, target_byte: usize) {
90 if self.at_scan_cap() {
91 return;
92 }
93 if matches!(self.head_cap, Some(0)) {
94 return;
95 }
96 let total = src.len();
97 let stop = target_byte.min(total);
98 if self.scanned_through >= stop {
99 return;
100 }
101
102 if self.pending_line_start {
103 let line_start = self.scanned_through;
104 self.starts.push(line_start);
105 self.maybe_push_record_start(line_start, src);
106 self.pending_line_start = false;
107 if self.at_scan_cap() {
108 return;
109 }
110 }
111
112 let chunk = src.bytes(self.scanned_through..total);
113 let mut pos = self.scanned_through;
114 for &b in chunk.iter() {
115 pos += 1;
116 if b == b'\n' {
117 if pos < total {
118 let new_line_start = pos;
119 self.starts.push(new_line_start);
120 self.maybe_push_record_start(new_line_start, src);
121 if self.at_scan_cap() {
122 self.scanned_through = pos;
123 return;
124 }
125 } else {
126 self.pending_line_start = true;
127 }
128 }
129 if pos >= stop && b == b'\n' {
130 self.scanned_through = pos;
131 return;
132 }
133 }
134 self.scanned_through = total;
135 }
136
137 fn maybe_push_record_start(&mut self, line_start: usize, src: &dyn Source) {
141 match &self.record_start_regex {
142 None => {
143 self.record_starts.push(line_start);
145 }
146 Some(re) => {
147 let line_end = self.find_line_end(line_start, src);
148 let line_bytes = src.bytes(line_start..line_end);
149 let is_match = re.is_match(&line_bytes);
150 if is_match {
151 if !self.record_zero_committed {
152 if line_start == self.start_byte {
154 } else {
158 self.record_starts.push(line_start);
162 }
163 self.record_zero_committed = true;
164 } else {
165 self.record_starts.push(line_start);
166 }
167 } else if !self.record_zero_committed && line_start == self.start_byte {
168 self.record_zero_committed = true;
172 }
173 }
175 }
176 }
177
178 fn find_line_end(&self, line_start: usize, src: &dyn Source) -> usize {
182 let total = src.len();
183 let chunk = src.bytes(line_start..total);
184 for (i, &b) in chunk.iter().enumerate() {
185 if b == b'\n' {
186 return line_start + i;
187 }
188 }
189 total
190 }
191
192 pub fn extend_to_line(&mut self, n: usize, src: &dyn Source) {
193 while self.starts.len() <= n && self.scanned_through < src.len() {
194 if self.at_scan_cap() {
195 return;
198 }
199 self.extend_to_byte(src, src.len());
200 }
201 }
202
203 pub fn extend_to_end(&mut self, src: &dyn Source) {
204 self.extend_to_byte(src, src.len());
205 }
206
207 pub fn notice_new_bytes(&mut self, src: &dyn Source) {
208 self.extend_to_byte(src, src.len());
209 }
210
211 pub fn scanned_through(&self) -> usize {
213 self.scanned_through
214 }
215
216 pub fn extend_to_byte_for_query(&mut self, src: &dyn Source, byte: usize) {
220 self.extend_to_byte(src, byte);
221 }
222
223 pub fn line_at_byte(&self, byte: usize) -> Option<usize> {
227 if byte < self.start_byte || byte >= self.scanned_through {
228 return None;
229 }
230 match self.starts.binary_search(&byte) {
231 Ok(idx) => Some(idx),
232 Err(0) => None,
233 Err(idx) => Some(idx - 1),
234 }
235 }
236
237 pub fn line_range(&self, n: usize, src: &dyn Source) -> Range<usize> {
240 let start = self.starts[n];
241 let next_known = self.starts.get(n + 1).copied();
246 let end = if let Some(next_start) = next_known {
247 next_start - 1
249 } else {
250 let total_scanned = src.len().min(self.scanned_through.max(start));
252 if total_scanned > start && src.bytes(total_scanned - 1..total_scanned)[0] == b'\n' {
253 total_scanned - 1
254 } else {
255 total_scanned
256 }
257 };
258 start..end
259 }
260
261 pub fn record_count(&self) -> usize {
265 let raw = if self.scanned_through == self.start_byte && self.record_starts.len() == 1
266 && self.record_start_regex.is_none()
267 {
268 0
270 } else if self.scanned_through == self.start_byte && self.record_starts.len() == 1
271 && self.record_start_regex.is_some() && !self.record_zero_committed
272 {
273 0
275 } else {
276 self.record_starts.len()
277 };
278 match self.head_cap {
279 Some(0) => 0,
280 Some(cap) => {
281 let visible_lines = raw.min(self.starts.len()).min(cap);
282 self.line_to_record_inner(visible_lines.saturating_sub(1))
283 .map(|r| r + 1)
284 .unwrap_or(0)
285 }
286 None => raw,
287 }
288 }
289
290 pub fn record_range(&self, n: usize, src: &dyn Source) -> Range<usize> {
293 let start = self.record_starts[n];
294 let end = if n + 1 < self.record_starts.len() {
295 self.record_starts[n + 1] - 1
297 } else {
298 let total_scanned = src.len().min(self.scanned_through.max(start));
300 if total_scanned > start && src.bytes(total_scanned - 1..total_scanned)[0] == b'\n' {
301 total_scanned - 1
302 } else {
303 total_scanned
304 }
305 };
306 start..end
307 }
308
309 pub fn record_line_range(&self, n: usize) -> Range<usize> {
311 let first_line = self.starts.binary_search(&self.record_starts[n])
312 .expect("record start is always a line start");
313 let last_line = if n + 1 < self.record_starts.len() {
314 self.starts.binary_search(&self.record_starts[n + 1])
315 .expect("record start is always a line start")
316 } else {
317 self.starts.len()
318 };
319 first_line..last_line
320 }
321
322 pub fn line_to_record(&self, line_n: usize) -> usize {
324 self.line_to_record_inner(line_n).unwrap_or(0)
325 }
326
327 fn line_to_record_inner(&self, line_n: usize) -> Option<usize> {
328 if self.starts.len() <= line_n {
329 return None;
330 }
331 let line_start = self.starts[line_n];
332 match self.record_starts.binary_search(&line_start) {
333 Ok(idx) => Some(idx),
334 Err(0) => Some(0),
335 Err(idx) => Some(idx - 1),
336 }
337 }
338
339 pub fn record_bytes<'a>(&self, n: usize, src: &'a dyn Source) -> std::borrow::Cow<'a, [u8]> {
341 let r = self.record_range(n, src);
342 src.bytes(r)
343 }
344}
345
346#[cfg(test)]
347mod tests {
348 use super::*;
349 use crate::source::MockSource;
350 use regex::bytes::Regex;
351
352 #[test]
353 fn empty_source_zero_lines() {
354 let m = MockSource::new();
355 let mut idx = LineIndex::new();
356 idx.extend_to_end(&m);
357 assert_eq!(idx.line_count(), 0);
358 }
359
360 #[test]
361 fn single_line_no_newline() {
362 let m = MockSource::new();
363 m.append(b"hello");
364 let mut idx = LineIndex::new();
365 idx.extend_to_end(&m);
366 assert_eq!(idx.line_count(), 1);
367 assert_eq!(idx.line_range(0, &m), 0..5);
368 }
369
370 #[test]
371 fn single_line_trailing_newline() {
372 let m = MockSource::new();
373 m.append(b"hello\n");
374 let mut idx = LineIndex::new();
375 idx.extend_to_end(&m);
376 assert_eq!(idx.line_count(), 1);
377 assert_eq!(idx.line_range(0, &m), 0..5);
378 }
379
380 #[test]
381 fn multiple_lines() {
382 let m = MockSource::new();
383 m.append(b"a\nbb\nccc\n");
384 let mut idx = LineIndex::new();
385 idx.extend_to_end(&m);
386 assert_eq!(idx.line_count(), 3);
387 assert_eq!(idx.line_range(0, &m), 0..1);
388 assert_eq!(idx.line_range(1, &m), 2..4);
389 assert_eq!(idx.line_range(2, &m), 5..8);
390 }
391
392 #[test]
393 fn head_cap_truncates_line_count() {
394 let m = MockSource::new();
395 m.append(b"1\n2\n3\n4\n5\n6\n7\n8\n"); let mut idx = LineIndex::new();
397 idx.set_head_cap(3);
398 idx.extend_to_end(&m);
399 assert_eq!(idx.line_count(), 3, "should be capped to 3 lines");
400 assert_eq!(idx.line_range(0, &m), 0..1);
402 assert_eq!(idx.line_range(1, &m), 2..3);
403 assert_eq!(idx.line_range(2, &m), 4..5);
404 }
405
406 #[test]
407 fn head_cap_extend_to_line_terminates() {
408 let m = MockSource::new();
412 m.append(b"1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n");
413 let mut idx = LineIndex::new();
414 idx.set_head_cap(3);
415 idx.extend_to_line(20, &m); assert_eq!(idx.line_count(), 3);
417 }
418
419 #[test]
420 fn head_cap_zero_yields_empty() {
421 let m = MockSource::new();
422 m.append(b"1\n2\n3\n");
423 let mut idx = LineIndex::new();
424 idx.set_head_cap(0);
425 idx.extend_to_end(&m);
426 assert_eq!(idx.line_count(), 0);
427 }
428
429 #[test]
430 fn start_byte_skips_head_of_source() {
431 let m = MockSource::new();
432 m.append(b"alpha\nbeta\ngamma\ndelta\nepsilon\n");
434 let mut idx = LineIndex::new_starting_at(11);
436 idx.extend_to_end(&m);
437 assert_eq!(idx.line_count(), 3, "from byte 11 there are 3 lines: gamma, delta, epsilon");
438 assert_eq!(idx.line_range(0, &m), 11..16); assert_eq!(idx.line_range(1, &m), 17..22); assert_eq!(idx.line_range(2, &m), 23..30); }
442
443 #[test]
444 fn start_byte_with_empty_remainder() {
445 let m = MockSource::new();
446 m.append(b"alpha\n");
447 let mut idx = LineIndex::new_starting_at(6);
448 idx.extend_to_end(&m);
449 assert_eq!(idx.line_count(), 0);
450 }
451
452 #[test]
453 fn incremental_growth_via_notice_new_bytes() {
454 let m = MockSource::new();
455 let mut idx = LineIndex::new();
456
457 m.append(b"alpha\n");
458 idx.notice_new_bytes(&m);
459 assert_eq!(idx.line_count(), 1);
460
461 m.append(b"beta\ngamm");
462 idx.notice_new_bytes(&m);
463 assert_eq!(idx.line_count(), 3); m.append(b"a\n");
466 idx.notice_new_bytes(&m);
467 assert_eq!(idx.line_count(), 3);
468 assert_eq!(idx.line_range(2, &m), 11..16); }
471
472 fn re(pat: &str) -> Regex {
473 Regex::new(pat).unwrap()
474 }
475
476 #[test]
477 fn records_mirror_lines_when_no_regex() {
478 let m = MockSource::new();
479 m.append(b"a\nb\nc\n");
480 let mut idx = LineIndex::new();
481 idx.extend_to_end(&m);
482 assert_eq!(idx.line_count(), 3);
483 assert_eq!(idx.record_count(), 3);
484 for i in 0..3 {
485 assert_eq!(idx.record_range(i, &m), idx.line_range(i, &m));
486 }
487 }
488
489 #[test]
490 fn record_count_zero_for_empty_source_records_mode() {
491 let m = MockSource::new();
492 let mut idx = LineIndex::new();
493 idx.set_record_start(re(r"^\["));
494 idx.extend_to_end(&m);
495 assert_eq!(idx.record_count(), 0);
496 }
497
498 #[test]
499 fn records_group_continuations() {
500 let m = MockSource::new();
501 m.append(b"[1] head\n more\n more\n[2] head\n more\n");
502 let mut idx = LineIndex::new();
503 idx.set_record_start(re(r"^\["));
504 idx.extend_to_end(&m);
505 assert_eq!(idx.line_count(), 5);
506 assert_eq!(idx.record_count(), 2);
507 let r0 = idx.record_range(0, &m);
508 assert_eq!(&m.bytes(r0)[..], b"[1] head\n more\n more");
509 let r1 = idx.record_range(1, &m);
510 assert_eq!(&m.bytes(r1)[..3], b"[2]");
511 }
512
513 #[test]
514 fn synthetic_record_zero_absorbs_orphan_head() {
515 let m = MockSource::new();
516 m.append(b"banner line 1\nbanner line 2\n[1] first real record\n");
517 let mut idx = LineIndex::new();
518 idx.set_record_start(re(r"^\["));
519 idx.extend_to_end(&m);
520 assert_eq!(idx.line_count(), 3);
521 assert_eq!(idx.record_count(), 2);
522 let r0 = idx.record_range(0, &m);
523 assert_eq!(&m.bytes(r0)[..], b"banner line 1\nbanner line 2");
524 assert_eq!(idx.record_line_range(0), 0..2);
525 assert_eq!(idx.record_line_range(1), 2..3);
526 }
527
528 #[test]
529 fn line_to_record_round_trips() {
530 let m = MockSource::new();
531 m.append(b"[1] a\n cont\n[2] b\n cont\n cont\n");
532 let mut idx = LineIndex::new();
533 idx.set_record_start(re(r"^\["));
534 idx.extend_to_end(&m);
535 assert_eq!(idx.line_to_record(0), 0); assert_eq!(idx.line_to_record(1), 0); assert_eq!(idx.line_to_record(2), 1); assert_eq!(idx.line_to_record(3), 1);
539 assert_eq!(idx.line_to_record(4), 1);
540 }
541
542 #[test]
543 fn record_bytes_contains_embedded_newlines() {
544 let m = MockSource::new();
545 m.append(b"[1] head\n more\n[2] next\n");
546 let mut idx = LineIndex::new();
547 idx.set_record_start(re(r"^\["));
548 idx.extend_to_end(&m);
549 let bytes = idx.record_bytes(0, &m);
550 assert_eq!(&*bytes, b"[1] head\n more");
551 }
552
553 #[test]
554 fn no_match_at_all_is_one_synthetic_record() {
555 let m = MockSource::new();
556 m.append(b"plain text\nmore plain\nno brackets here\n");
557 let mut idx = LineIndex::new();
558 idx.set_record_start(re(r"^\["));
559 idx.extend_to_end(&m);
560 assert_eq!(idx.line_count(), 3);
561 assert_eq!(idx.record_count(), 1);
562 assert_eq!(idx.record_line_range(0), 0..3);
563 }
564
565 #[test]
566 fn pending_record_start_handles_growing_input() {
567 let m = MockSource::new();
568 let mut idx = LineIndex::new();
569 idx.set_record_start(re(r"^\["));
570 m.append(b"[1] head\n more\n");
571 idx.notice_new_bytes(&m);
572 assert_eq!(idx.record_count(), 1);
573 m.append(b"[2] head\n");
574 idx.notice_new_bytes(&m);
575 assert_eq!(idx.record_count(), 2);
576 }
577
578 #[test]
579 fn empty_continuation_lines_are_continuations() {
580 let m = MockSource::new();
581 m.append(b"[1] head\n\n after blank\n[2] next\n");
582 let mut idx = LineIndex::new();
583 idx.set_record_start(re(r"^\["));
584 idx.extend_to_end(&m);
585 assert_eq!(idx.line_count(), 4);
586 assert_eq!(idx.record_count(), 2);
587 assert_eq!(idx.record_line_range(0), 0..3);
588 }
589
590 #[test]
591 fn records_mode_reports_true_only_when_regex_set() {
592 let mut idx = LineIndex::new();
593 assert!(!idx.records_mode());
594 idx.set_record_start(re(r"^\["));
595 assert!(idx.records_mode());
596 }
597
598 #[test]
599 fn record_range_handles_unterminated_last_record() {
600 let m = MockSource::new();
601 m.append(b"[1] head\n[2] last line no newline");
602 let mut idx = LineIndex::new();
603 idx.set_record_start(re(r"^\["));
604 idx.extend_to_end(&m);
605 assert_eq!(idx.record_count(), 2);
606 let r1 = idx.record_range(1, &m);
607 assert_eq!(&m.bytes(r1)[..], b"[2] last line no newline");
608 }
609
610 #[test]
611 fn record_count_with_head_cap_zero_returns_zero_in_records_mode() {
612 let m = MockSource::new();
613 m.append(b"[1] head\n[2] next\n");
614 let mut idx = LineIndex::new();
615 idx.set_record_start(re(r"^\["));
616 idx.set_head_cap(0);
617 idx.extend_to_end(&m);
618 assert_eq!(idx.line_count(), 0);
619 assert_eq!(idx.record_count(), 0);
620 }
621}