Skip to main content

tess/
line_index.rs

1use crate::source::Source;
2use std::ops::Range;
3
4pub struct LineIndex {
5    starts: Vec<usize>,       // byte offset of line N
6    scanned_through: usize,
7    /// Byte offset where indexing begins. Non-zero when `--tail` has skipped
8    /// over the head of the source.
9    start_byte: usize,
10    /// True when the last byte scanned was a '\n'. Used to defer adding the
11    /// next line-start until we know whether more bytes will arrive (i.e. the
12    /// '\n' is not actually trailing).
13    pending_line_start: bool,
14    /// Optional cap on the number of lines exposed via `line_count` and
15    /// scanned via `extend_to_byte`. Used by `--head N`.
16    head_cap: Option<usize>,
17}
18
19impl LineIndex {
20    pub fn new() -> Self {
21        Self::new_starting_at(0)
22    }
23
24    /// Construct a line index that begins at the given byte offset. Bytes
25    /// before `start_byte` are never scanned and never appear in `line_range`.
26    /// Used by `--tail` to skip past the head of the source.
27    pub fn new_starting_at(start_byte: usize) -> Self {
28        Self {
29            starts: vec![start_byte],
30            scanned_through: start_byte,
31            start_byte,
32            pending_line_start: false,
33            head_cap: None,
34        }
35    }
36
37    /// Limit the index to the first N logical lines from the start point.
38    /// `line_count` clamps to this and `extend_to_byte` stops scanning
39    /// past it. Used by `--head N`.
40    pub fn set_head_cap(&mut self, cap: usize) {
41        self.head_cap = Some(cap);
42    }
43
44    pub fn line_count(&self) -> usize {
45        let raw = if self.scanned_through == self.start_byte && self.starts.len() == 1 {
46            0
47        } else {
48            self.starts.len()
49        };
50        match self.head_cap {
51            Some(cap) => raw.min(cap),
52            None => raw,
53        }
54    }
55
56    /// True once we've scanned one entry past the cap. We always keep a
57    /// "sentinel" entry beyond the cap so that `line_range(cap-1)` knows
58    /// where the last visible line ends.
59    fn at_scan_cap(&self) -> bool {
60        matches!(self.head_cap, Some(cap) if self.starts.len() > cap)
61    }
62
63    /// Scan `src` from `scanned_through` to at least byte position `target_byte`.
64    fn extend_to_byte(&mut self, src: &dyn Source, target_byte: usize) {
65        if self.at_scan_cap() {
66            return;
67        }
68        // head_cap == 0 means no lines visible, so don't scan at all.
69        if matches!(self.head_cap, Some(0)) {
70            return;
71        }
72        let total = src.len();
73        let stop = target_byte.min(total);
74        if self.scanned_through >= stop {
75            return;
76        }
77
78        // If the previous scan ended on a '\n' and new bytes have arrived,
79        // that '\n' is no longer trailing — commit the deferred line start now.
80        if self.pending_line_start {
81            self.starts.push(self.scanned_through);
82            self.pending_line_start = false;
83            if self.at_scan_cap() {
84                return;
85            }
86        }
87
88        let chunk = src.bytes(self.scanned_through..total);
89        let mut pos = self.scanned_through;
90        for &b in chunk.iter() {
91            pos += 1;
92            if b == b'\n' {
93                if pos < total {
94                    // Not at EOF: this newline definitely starts a new line.
95                    self.starts.push(pos);
96                    if self.at_scan_cap() {
97                        self.scanned_through = pos;
98                        return;
99                    }
100                } else {
101                    // At EOF: may be trailing. Defer until we know more bytes arrive.
102                    self.pending_line_start = true;
103                }
104            }
105            if pos >= stop && b == b'\n' {
106                self.scanned_through = pos;
107                return;
108            }
109        }
110        self.scanned_through = total;
111    }
112
113    pub fn extend_to_line(&mut self, n: usize, src: &dyn Source) {
114        while self.starts.len() <= n && self.scanned_through < src.len() {
115            if self.at_scan_cap() {
116                // head_cap is set and we've already scanned the sentinel past
117                // the cap; no further progress is possible.
118                return;
119            }
120            self.extend_to_byte(src, src.len());
121        }
122    }
123
124    pub fn extend_to_end(&mut self, src: &dyn Source) {
125        self.extend_to_byte(src, src.len());
126    }
127
128    pub fn notice_new_bytes(&mut self, src: &dyn Source) {
129        self.extend_to_byte(src, src.len());
130    }
131
132    /// Byte range of line `n` (excluding the trailing newline).
133    /// Caller must ensure n < line_count() and the index has scanned through the line.
134    pub fn line_range(&self, n: usize, src: &dyn Source) -> Range<usize> {
135        let start = self.starts[n];
136        // When head_cap is set, line `cap-1` is the last visible line. Its end
137        // is the byte just before line `cap`'s start (a real line break exists
138        // there, otherwise the cap wouldn't have been reached). The "last line
139        // unbounded" branch below should not be entered for a capped line.
140        let next_known = self.starts.get(n + 1).copied();
141        let end = if let Some(next_start) = next_known {
142            // Drop the trailing newline preceding the next line start.
143            next_start - 1
144        } else {
145            // Last line: from start to current scanned end (minus trailing \n if present).
146            let total_scanned = src.len().min(self.scanned_through.max(start));
147            if total_scanned > start && src.bytes(total_scanned - 1..total_scanned)[0] == b'\n' {
148                total_scanned - 1
149            } else {
150                total_scanned
151            }
152        };
153        start..end
154    }
155}
156
157#[cfg(test)]
158mod tests {
159    use super::*;
160    use crate::source::MockSource;
161
162    #[test]
163    fn empty_source_zero_lines() {
164        let m = MockSource::new();
165        let mut idx = LineIndex::new();
166        idx.extend_to_end(&m);
167        assert_eq!(idx.line_count(), 0);
168    }
169
170    #[test]
171    fn single_line_no_newline() {
172        let m = MockSource::new();
173        m.append(b"hello");
174        let mut idx = LineIndex::new();
175        idx.extend_to_end(&m);
176        assert_eq!(idx.line_count(), 1);
177        assert_eq!(idx.line_range(0, &m), 0..5);
178    }
179
180    #[test]
181    fn single_line_trailing_newline() {
182        let m = MockSource::new();
183        m.append(b"hello\n");
184        let mut idx = LineIndex::new();
185        idx.extend_to_end(&m);
186        assert_eq!(idx.line_count(), 1);
187        assert_eq!(idx.line_range(0, &m), 0..5);
188    }
189
190    #[test]
191    fn multiple_lines() {
192        let m = MockSource::new();
193        m.append(b"a\nbb\nccc\n");
194        let mut idx = LineIndex::new();
195        idx.extend_to_end(&m);
196        assert_eq!(idx.line_count(), 3);
197        assert_eq!(idx.line_range(0, &m), 0..1);
198        assert_eq!(idx.line_range(1, &m), 2..4);
199        assert_eq!(idx.line_range(2, &m), 5..8);
200    }
201
202    #[test]
203    fn head_cap_truncates_line_count() {
204        let m = MockSource::new();
205        m.append(b"1\n2\n3\n4\n5\n6\n7\n8\n");  // 8 lines
206        let mut idx = LineIndex::new();
207        idx.set_head_cap(3);
208        idx.extend_to_end(&m);
209        assert_eq!(idx.line_count(), 3, "should be capped to 3 lines");
210        // Lines 0..2 inclusive must have correct ranges.
211        assert_eq!(idx.line_range(0, &m), 0..1);
212        assert_eq!(idx.line_range(1, &m), 2..3);
213        assert_eq!(idx.line_range(2, &m), 4..5);
214    }
215
216    #[test]
217    fn head_cap_extend_to_line_terminates() {
218        // Regression: extend_to_line(n) used to spin forever when head_cap
219        // had already been hit, because extend_to_byte returned without
220        // advancing scanned_through.
221        let m = MockSource::new();
222        m.append(b"1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n");
223        let mut idx = LineIndex::new();
224        idx.set_head_cap(3);
225        idx.extend_to_line(20, &m);  // far past the cap
226        assert_eq!(idx.line_count(), 3);
227    }
228
229    #[test]
230    fn head_cap_zero_yields_empty() {
231        let m = MockSource::new();
232        m.append(b"1\n2\n3\n");
233        let mut idx = LineIndex::new();
234        idx.set_head_cap(0);
235        idx.extend_to_end(&m);
236        assert_eq!(idx.line_count(), 0);
237    }
238
239    #[test]
240    fn start_byte_skips_head_of_source() {
241        let m = MockSource::new();
242        // 5 lines: alpha\nbeta\ngamma\ndelta\nepsilon\n
243        m.append(b"alpha\nbeta\ngamma\ndelta\nepsilon\n");
244        // gamma starts at byte 11.
245        let mut idx = LineIndex::new_starting_at(11);
246        idx.extend_to_end(&m);
247        assert_eq!(idx.line_count(), 3, "from byte 11 there are 3 lines: gamma, delta, epsilon");
248        assert_eq!(idx.line_range(0, &m), 11..16); // gamma
249        assert_eq!(idx.line_range(1, &m), 17..22); // delta
250        assert_eq!(idx.line_range(2, &m), 23..30); // epsilon
251    }
252
253    #[test]
254    fn start_byte_with_empty_remainder() {
255        let m = MockSource::new();
256        m.append(b"alpha\n");
257        let mut idx = LineIndex::new_starting_at(6);
258        idx.extend_to_end(&m);
259        assert_eq!(idx.line_count(), 0);
260    }
261
262    #[test]
263    fn incremental_growth_via_notice_new_bytes() {
264        let m = MockSource::new();
265        let mut idx = LineIndex::new();
266
267        m.append(b"alpha\n");
268        idx.notice_new_bytes(&m);
269        assert_eq!(idx.line_count(), 1);
270
271        m.append(b"beta\ngamm");
272        idx.notice_new_bytes(&m);
273        assert_eq!(idx.line_count(), 3); // alpha, beta, gamm (partial, but counted)
274
275        m.append(b"a\n");
276        idx.notice_new_bytes(&m);
277        assert_eq!(idx.line_count(), 3);
278        // "alpha\n" = bytes 0-5, "beta\n" = bytes 6-10, "gamma" = bytes 11-15
279        assert_eq!(idx.line_range(2, &m), 11..16); // "gamma"
280    }
281}