Skip to main content

coreutils_rs/tac/
core.rs

1use std::io::{self, IoSlice, Write};
2
3/// Max IoSlice entries per write_vectored batch.
4/// Linux UIO_MAXIOV is 1024; we use that as our batch limit.
5const MAX_IOV: usize = 1024;
6
7/// Chunk size for the forward-scan-within-backward-chunks strategy.
8/// 512KB balances L2/L3 cache residency with amortised memchr_iter overhead.
9const CHUNK: usize = 512 * 1024;
10
11/// Flush a batch of IoSlice entries using write_vectored.
12/// Falls back to individual write_all for each slice if write_vectored
13/// doesn't write everything (handles partial writes).
14#[inline]
15fn flush_iov(out: &mut impl Write, slices: &[IoSlice]) -> io::Result<()> {
16    if slices.is_empty() {
17        return Ok(());
18    }
19    // Try write_vectored first for the whole batch
20    let total: usize = slices.iter().map(|s| s.len()).sum();
21
22    // Fast path: single writev call often writes everything
23    let written = match out.write_vectored(slices) {
24        Ok(n) if n >= total => return Ok(()),
25        Ok(n) => n,
26        Err(e) => return Err(e),
27    };
28
29    // Slow path: partial write — fall back to write_all per remaining slice
30    let mut skip = written;
31    for slice in slices {
32        let slen = slice.len();
33        if skip >= slen {
34            skip -= slen;
35            continue;
36        }
37        if skip > 0 {
38            out.write_all(&slice[skip..])?;
39            skip = 0;
40        } else {
41            out.write_all(slice)?;
42        }
43    }
44    Ok(())
45}
46
47/// Reverse records separated by a single byte.
48/// Uses chunk-based forward SIMD scan processed in reverse order for
49/// maximum throughput — eliminates per-line memrchr call overhead.
50/// Output uses write_vectored (writev) for zero-copy from mmap'd data.
51pub fn tac_bytes(data: &[u8], separator: u8, before: bool, out: &mut impl Write) -> io::Result<()> {
52    if data.is_empty() {
53        return Ok(());
54    }
55    if !before {
56        tac_bytes_backward_after(data, separator, out)
57    } else {
58        tac_bytes_backward_before(data, separator, out)
59    }
60}
61
62/// After-separator mode: chunk-based forward SIMD scan, emitted in reverse.
63///
64/// Instead of calling memrchr once per line (high per-call overhead for short
65/// lines), we process the file backward in large chunks. Within each chunk a
66/// single memchr_iter forward pass finds ALL separator positions with full SIMD
67/// pipeline utilisation, then we emit the records (slices) in reverse order.
68///
69/// This converts ~200K memrchr calls (for a 10MB / 50-byte-line file) into
70/// ~20 memchr_iter calls, each scanning a contiguous 512KB region.
71fn tac_bytes_backward_after(data: &[u8], sep: u8, out: &mut impl Write) -> io::Result<()> {
72    // No pre-count: capacity is always capped at MAX_IOV anyway.
73    let mut iov: Vec<IoSlice> = Vec::with_capacity(MAX_IOV);
74
75    // `global_end` tracks where the current (rightmost unseen) record ends.
76    // We walk it leftward as we discover separators.
77    let mut global_end = data.len();
78
79    // Process the file from right to left in CHUNK-sized windows.
80    // Each iteration handles [chunk_start .. chunk_end) where
81    // chunk_end = min(global_end, data.len()).
82    let mut chunk_start = data.len().saturating_sub(CHUNK);
83
84    loop {
85        let chunk_end = global_end.min(data.len());
86        if chunk_start >= chunk_end {
87            // Chunk is empty — nothing to scan in this window.
88            if chunk_start == 0 {
89                break;
90            }
91            chunk_start = chunk_start.saturating_sub(CHUNK);
92            continue;
93        }
94        let chunk = &data[chunk_start..chunk_end];
95
96        // Single SIMD forward pass finds every separator in the chunk.
97        // Collect into a small stack vec; worst case is CHUNK/1 entries
98        // but typical density is much lower, and we only need the positions.
99        let positions: Vec<usize> = memchr::memchr_iter(sep, chunk)
100            .map(|p| p + chunk_start) // convert to global offset
101            .collect();
102
103        // Emit records in reverse (rightmost first).
104        for &pos in positions.iter().rev() {
105            // In "after" mode, separator is at the END of a record.
106            // The record is data[pos+1 .. global_end].
107            // But if pos+1 == global_end the record after the separator is
108            // empty (consecutive separators) — still must emit the empty
109            // IoSlice so the separator byte itself is part of the PREVIOUS
110            // record we haven't emitted yet.
111            if pos + 1 < global_end {
112                iov.push(IoSlice::new(&data[pos + 1..global_end]));
113            }
114            global_end = pos + 1; // include the separator in the next (leftward) record
115
116            if iov.len() >= MAX_IOV {
117                flush_iov(out, &iov)?;
118                iov.clear();
119            }
120        }
121
122        if chunk_start == 0 {
123            break;
124        }
125        chunk_start = chunk_start.saturating_sub(CHUNK);
126    }
127
128    // First record: everything from data start up to global_end (includes
129    // the leftmost separator byte, which belongs to this record in "after" mode).
130    if global_end > 0 {
131        iov.push(IoSlice::new(&data[0..global_end]));
132    }
133    flush_iov(out, &iov)?;
134    Ok(())
135}
136
137/// Before-separator mode: chunk-based forward SIMD scan, emitted in reverse.
138///
139/// Same chunked strategy as after-mode, but each record STARTS with its
140/// separator byte instead of ending with it.
141fn tac_bytes_backward_before(data: &[u8], sep: u8, out: &mut impl Write) -> io::Result<()> {
142    let mut iov: Vec<IoSlice> = Vec::with_capacity(MAX_IOV);
143
144    // `global_end` tracks where the current record ends (exclusive).
145    let mut global_end = data.len();
146    let mut chunk_start = data.len().saturating_sub(CHUNK);
147
148    loop {
149        let chunk_end = global_end.min(data.len());
150        if chunk_start >= chunk_end {
151            if chunk_start == 0 {
152                break;
153            }
154            chunk_start = chunk_start.saturating_sub(CHUNK);
155            continue;
156        }
157        let chunk = &data[chunk_start..chunk_end];
158
159        let positions: Vec<usize> = memchr::memchr_iter(sep, chunk)
160            .map(|p| p + chunk_start)
161            .collect();
162
163        // Emit records in reverse (rightmost first).
164        for &pos in positions.iter().rev() {
165            // In "before" mode the separator is at the START of a record.
166            // Record = data[pos .. global_end].
167            iov.push(IoSlice::new(&data[pos..global_end]));
168            global_end = pos;
169
170            if iov.len() >= MAX_IOV {
171                flush_iov(out, &iov)?;
172                iov.clear();
173            }
174        }
175
176        if chunk_start == 0 {
177            break;
178        }
179        chunk_start = chunk_start.saturating_sub(CHUNK);
180    }
181
182    // Leading content before the first separator (if any).
183    if global_end > 0 {
184        iov.push(IoSlice::new(&data[0..global_end]));
185    }
186    flush_iov(out, &iov)?;
187    Ok(())
188}
189
190/// Reverse records using a multi-byte string separator.
191/// Uses chunk-based forward SIMD-accelerated memmem + IoSlice zero-copy output.
192///
193/// For single-byte separators, delegates to tac_bytes which uses memchr (faster).
194pub fn tac_string_separator(
195    data: &[u8],
196    separator: &[u8],
197    before: bool,
198    out: &mut impl Write,
199) -> io::Result<()> {
200    if data.is_empty() {
201        return Ok(());
202    }
203
204    if separator.len() == 1 {
205        return tac_bytes(data, separator[0], before, out);
206    }
207
208    let sep_len = separator.len();
209
210    // For multi-byte separators we use the same chunk-based strategy but with
211    // memmem::find_iter instead of memchr_iter. We need FinderRev only for a
212    // quick "any separator exists?" check — the actual work uses forward Finder.
213    //
214    // We still need to handle the case where a separator straddles a chunk
215    // boundary. We do this by extending each chunk's left edge by (sep_len - 1)
216    // bytes and deduplicating matches that fall in the overlap zone.
217
218    if !before {
219        tac_string_after(data, separator, sep_len, out)
220    } else {
221        tac_string_before(data, separator, sep_len, out)
222    }
223}
224
225/// Multi-byte string separator, after mode (separator at end of record).
226fn tac_string_after(
227    data: &[u8],
228    separator: &[u8],
229    sep_len: usize,
230    out: &mut impl Write,
231) -> io::Result<()> {
232    let mut iov: Vec<IoSlice> = Vec::with_capacity(MAX_IOV);
233    let mut global_end = data.len();
234    let mut chunk_start = data.len().saturating_sub(CHUNK);
235
236    loop {
237        let chunk_end = global_end.min(data.len());
238        // Extend left edge by sep_len-1 to catch separators that straddle boundaries.
239        let scan_start = chunk_start.saturating_sub(sep_len - 1);
240        if scan_start >= chunk_end {
241            if chunk_start == 0 {
242                break;
243            }
244            chunk_start = chunk_start.saturating_sub(CHUNK);
245            continue;
246        }
247        let scan_region = &data[scan_start..chunk_end];
248
249        // Forward SIMD pass to find all separator occurrences in the scan region.
250        let positions: Vec<usize> = memchr::memmem::find_iter(scan_region, separator)
251            .map(|p| p + scan_start) // global offset
252            .filter(|&p| p >= chunk_start || chunk_start == 0) // skip overlap duplicates (already processed)
253            .filter(|&p| p + sep_len <= global_end) // skip positions past our current frontier
254            .collect();
255
256        for &pos in positions.iter().rev() {
257            let rec_end_exclusive = pos + sep_len;
258            if rec_end_exclusive < global_end {
259                iov.push(IoSlice::new(&data[rec_end_exclusive..global_end]));
260            }
261            global_end = rec_end_exclusive;
262            if iov.len() >= MAX_IOV {
263                flush_iov(out, &iov)?;
264                iov.clear();
265            }
266        }
267
268        if chunk_start == 0 {
269            break;
270        }
271        chunk_start = chunk_start.saturating_sub(CHUNK);
272    }
273
274    // First record
275    if global_end > 0 {
276        iov.push(IoSlice::new(&data[0..global_end]));
277    }
278    flush_iov(out, &iov)?;
279    Ok(())
280}
281
282/// Multi-byte string separator, before mode (separator at start of record).
283fn tac_string_before(
284    data: &[u8],
285    separator: &[u8],
286    sep_len: usize,
287    out: &mut impl Write,
288) -> io::Result<()> {
289    let mut iov: Vec<IoSlice> = Vec::with_capacity(MAX_IOV);
290    let mut global_end = data.len();
291    let mut chunk_start = data.len().saturating_sub(CHUNK);
292
293    loop {
294        let chunk_end = global_end.min(data.len());
295        let scan_start = chunk_start.saturating_sub(sep_len - 1);
296        if scan_start >= chunk_end {
297            if chunk_start == 0 {
298                break;
299            }
300            chunk_start = chunk_start.saturating_sub(CHUNK);
301            continue;
302        }
303        let scan_region = &data[scan_start..chunk_end];
304
305        let positions: Vec<usize> = memchr::memmem::find_iter(scan_region, separator)
306            .map(|p| p + scan_start)
307            .filter(|&p| p >= chunk_start || chunk_start == 0)
308            .filter(|&p| p < global_end)
309            .collect();
310
311        for &pos in positions.iter().rev() {
312            iov.push(IoSlice::new(&data[pos..global_end]));
313            global_end = pos;
314            if iov.len() >= MAX_IOV {
315                flush_iov(out, &iov)?;
316                iov.clear();
317            }
318        }
319
320        if chunk_start == 0 {
321            break;
322        }
323        chunk_start = chunk_start.saturating_sub(CHUNK);
324    }
325
326    // Leading content before first separator
327    if global_end > 0 {
328        iov.push(IoSlice::new(&data[0..global_end]));
329    }
330    flush_iov(out, &iov)?;
331    Ok(())
332}
333
334/// Find regex matches using backward scanning, matching GNU tac's re_search behavior.
335fn find_regex_matches_backward(data: &[u8], re: &regex::bytes::Regex) -> Vec<(usize, usize)> {
336    let mut matches = Vec::new();
337    let mut past_end = data.len();
338
339    while past_end > 0 {
340        let buf = &data[..past_end];
341        let mut found = false;
342
343        let mut pos = past_end;
344        while pos > 0 {
345            pos -= 1;
346            if let Some(m) = re.find_at(buf, pos) {
347                if m.start() == pos {
348                    matches.push((m.start(), m.end()));
349                    past_end = m.start();
350                    found = true;
351                    break;
352                }
353            }
354        }
355
356        if !found {
357            break;
358        }
359    }
360
361    matches.reverse();
362    matches
363}
364
365/// Reverse records using a regex separator.
366pub fn tac_regex_separator(
367    data: &[u8],
368    pattern: &str,
369    before: bool,
370    out: &mut impl Write,
371) -> io::Result<()> {
372    if data.is_empty() {
373        return Ok(());
374    }
375
376    let re = match regex::bytes::Regex::new(pattern) {
377        Ok(r) => r,
378        Err(e) => {
379            return Err(io::Error::new(
380                io::ErrorKind::InvalidInput,
381                format!("invalid regex '{}': {}", pattern, e),
382            ));
383        }
384    };
385
386    let matches = find_regex_matches_backward(data, &re);
387
388    if matches.is_empty() {
389        out.write_all(data)?;
390        return Ok(());
391    }
392
393    let mut iov: Vec<IoSlice> = Vec::with_capacity(matches.len().min(MAX_IOV) + 2);
394
395    if !before {
396        let last_end = matches.last().unwrap().1;
397
398        if last_end < data.len() {
399            iov.push(IoSlice::new(&data[last_end..]));
400        }
401
402        let mut i = matches.len();
403        while i > 0 {
404            i -= 1;
405            let rec_start = if i == 0 { 0 } else { matches[i - 1].1 };
406            iov.push(IoSlice::new(&data[rec_start..matches[i].1]));
407            if iov.len() >= MAX_IOV {
408                flush_iov(out, &iov)?;
409                iov.clear();
410            }
411        }
412    } else {
413        let mut i = matches.len();
414        while i > 0 {
415            i -= 1;
416            let start = matches[i].0;
417            let end = if i + 1 < matches.len() {
418                matches[i + 1].0
419            } else {
420                data.len()
421            };
422            iov.push(IoSlice::new(&data[start..end]));
423            if iov.len() >= MAX_IOV {
424                flush_iov(out, &iov)?;
425                iov.clear();
426            }
427        }
428
429        if matches[0].0 > 0 {
430            iov.push(IoSlice::new(&data[..matches[0].0]));
431        }
432    }
433
434    flush_iov(out, &iov)?;
435    Ok(())
436}