1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
use crate::source::Source;
use std::ops::Range;
pub struct LineIndex {
starts: Vec<usize>, // byte offset of line N
scanned_through: usize,
/// Byte offset where indexing begins. Non-zero when `--tail` has skipped
/// over the head of the source.
start_byte: usize,
/// True when the last byte scanned was a '\n'. Used to defer adding the
/// next line-start until we know whether more bytes will arrive (i.e. the
/// '\n' is not actually trailing).
pending_line_start: bool,
/// Optional cap on the number of lines exposed via `line_count` and
/// scanned via `extend_to_byte`. Used by `--head N`.
head_cap: Option<usize>,
}
impl LineIndex {
pub fn new() -> Self {
Self::new_starting_at(0)
}
/// Construct a line index that begins at the given byte offset. Bytes
/// before `start_byte` are never scanned and never appear in `line_range`.
/// Used by `--tail` to skip past the head of the source.
pub fn new_starting_at(start_byte: usize) -> Self {
Self {
starts: vec![start_byte],
scanned_through: start_byte,
start_byte,
pending_line_start: false,
head_cap: None,
}
}
/// Limit the index to the first N logical lines from the start point.
/// `line_count` clamps to this and `extend_to_byte` stops scanning
/// past it. Used by `--head N`.
pub fn set_head_cap(&mut self, cap: usize) {
self.head_cap = Some(cap);
}
pub fn line_count(&self) -> usize {
let raw = if self.scanned_through == self.start_byte && self.starts.len() == 1 {
0
} else {
self.starts.len()
};
match self.head_cap {
Some(cap) => raw.min(cap),
None => raw,
}
}
/// True once we've scanned one entry past the cap. We always keep a
/// "sentinel" entry beyond the cap so that `line_range(cap-1)` knows
/// where the last visible line ends.
fn at_scan_cap(&self) -> bool {
matches!(self.head_cap, Some(cap) if self.starts.len() > cap)
}
/// Scan `src` from `scanned_through` to at least byte position `target_byte`.
fn extend_to_byte(&mut self, src: &dyn Source, target_byte: usize) {
if self.at_scan_cap() {
return;
}
// head_cap == 0 means no lines visible, so don't scan at all.
if matches!(self.head_cap, Some(0)) {
return;
}
let total = src.len();
let stop = target_byte.min(total);
if self.scanned_through >= stop {
return;
}
// If the previous scan ended on a '\n' and new bytes have arrived,
// that '\n' is no longer trailing — commit the deferred line start now.
if self.pending_line_start {
self.starts.push(self.scanned_through);
self.pending_line_start = false;
if self.at_scan_cap() {
return;
}
}
let chunk = src.bytes(self.scanned_through..total);
let mut pos = self.scanned_through;
for &b in chunk.iter() {
pos += 1;
if b == b'\n' {
if pos < total {
// Not at EOF: this newline definitely starts a new line.
self.starts.push(pos);
if self.at_scan_cap() {
self.scanned_through = pos;
return;
}
} else {
// At EOF: may be trailing. Defer until we know more bytes arrive.
self.pending_line_start = true;
}
}
if pos >= stop && b == b'\n' {
self.scanned_through = pos;
return;
}
}
self.scanned_through = total;
}
pub fn extend_to_line(&mut self, n: usize, src: &dyn Source) {
while self.starts.len() <= n && self.scanned_through < src.len() {
if self.at_scan_cap() {
// head_cap is set and we've already scanned the sentinel past
// the cap; no further progress is possible.
return;
}
self.extend_to_byte(src, src.len());
}
}
pub fn extend_to_end(&mut self, src: &dyn Source) {
self.extend_to_byte(src, src.len());
}
pub fn notice_new_bytes(&mut self, src: &dyn Source) {
self.extend_to_byte(src, src.len());
}
/// Byte range of line `n` (excluding the trailing newline).
/// Caller must ensure n < line_count() and the index has scanned through the line.
pub fn line_range(&self, n: usize, src: &dyn Source) -> Range<usize> {
let start = self.starts[n];
// When head_cap is set, line `cap-1` is the last visible line. Its end
// is the byte just before line `cap`'s start (a real line break exists
// there, otherwise the cap wouldn't have been reached). The "last line
// unbounded" branch below should not be entered for a capped line.
let next_known = self.starts.get(n + 1).copied();
let end = if let Some(next_start) = next_known {
// Drop the trailing newline preceding the next line start.
next_start - 1
} else {
// Last line: from start to current scanned end (minus trailing \n if present).
let total_scanned = src.len().min(self.scanned_through.max(start));
if total_scanned > start && src.bytes(total_scanned - 1..total_scanned)[0] == b'\n' {
total_scanned - 1
} else {
total_scanned
}
};
start..end
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::source::MockSource;
#[test]
fn empty_source_zero_lines() {
let m = MockSource::new();
let mut idx = LineIndex::new();
idx.extend_to_end(&m);
assert_eq!(idx.line_count(), 0);
}
#[test]
fn single_line_no_newline() {
let m = MockSource::new();
m.append(b"hello");
let mut idx = LineIndex::new();
idx.extend_to_end(&m);
assert_eq!(idx.line_count(), 1);
assert_eq!(idx.line_range(0, &m), 0..5);
}
#[test]
fn single_line_trailing_newline() {
let m = MockSource::new();
m.append(b"hello\n");
let mut idx = LineIndex::new();
idx.extend_to_end(&m);
assert_eq!(idx.line_count(), 1);
assert_eq!(idx.line_range(0, &m), 0..5);
}
#[test]
fn multiple_lines() {
let m = MockSource::new();
m.append(b"a\nbb\nccc\n");
let mut idx = LineIndex::new();
idx.extend_to_end(&m);
assert_eq!(idx.line_count(), 3);
assert_eq!(idx.line_range(0, &m), 0..1);
assert_eq!(idx.line_range(1, &m), 2..4);
assert_eq!(idx.line_range(2, &m), 5..8);
}
#[test]
fn head_cap_truncates_line_count() {
let m = MockSource::new();
m.append(b"1\n2\n3\n4\n5\n6\n7\n8\n"); // 8 lines
let mut idx = LineIndex::new();
idx.set_head_cap(3);
idx.extend_to_end(&m);
assert_eq!(idx.line_count(), 3, "should be capped to 3 lines");
// Lines 0..2 inclusive must have correct ranges.
assert_eq!(idx.line_range(0, &m), 0..1);
assert_eq!(idx.line_range(1, &m), 2..3);
assert_eq!(idx.line_range(2, &m), 4..5);
}
#[test]
fn head_cap_extend_to_line_terminates() {
// Regression: extend_to_line(n) used to spin forever when head_cap
// had already been hit, because extend_to_byte returned without
// advancing scanned_through.
let m = MockSource::new();
m.append(b"1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n");
let mut idx = LineIndex::new();
idx.set_head_cap(3);
idx.extend_to_line(20, &m); // far past the cap
assert_eq!(idx.line_count(), 3);
}
#[test]
fn head_cap_zero_yields_empty() {
let m = MockSource::new();
m.append(b"1\n2\n3\n");
let mut idx = LineIndex::new();
idx.set_head_cap(0);
idx.extend_to_end(&m);
assert_eq!(idx.line_count(), 0);
}
#[test]
fn start_byte_skips_head_of_source() {
let m = MockSource::new();
// 5 lines: alpha\nbeta\ngamma\ndelta\nepsilon\n
m.append(b"alpha\nbeta\ngamma\ndelta\nepsilon\n");
// gamma starts at byte 11.
let mut idx = LineIndex::new_starting_at(11);
idx.extend_to_end(&m);
assert_eq!(idx.line_count(), 3, "from byte 11 there are 3 lines: gamma, delta, epsilon");
assert_eq!(idx.line_range(0, &m), 11..16); // gamma
assert_eq!(idx.line_range(1, &m), 17..22); // delta
assert_eq!(idx.line_range(2, &m), 23..30); // epsilon
}
#[test]
fn start_byte_with_empty_remainder() {
let m = MockSource::new();
m.append(b"alpha\n");
let mut idx = LineIndex::new_starting_at(6);
idx.extend_to_end(&m);
assert_eq!(idx.line_count(), 0);
}
#[test]
fn incremental_growth_via_notice_new_bytes() {
let m = MockSource::new();
let mut idx = LineIndex::new();
m.append(b"alpha\n");
idx.notice_new_bytes(&m);
assert_eq!(idx.line_count(), 1);
m.append(b"beta\ngamm");
idx.notice_new_bytes(&m);
assert_eq!(idx.line_count(), 3); // alpha, beta, gamm (partial, but counted)
m.append(b"a\n");
idx.notice_new_bytes(&m);
assert_eq!(idx.line_count(), 3);
// "alpha\n" = bytes 0-5, "beta\n" = bytes 6-10, "gamma" = bytes 11-15
assert_eq!(idx.line_range(2, &m), 11..16); // "gamma"
}
}