Skip to main content

coreutils_rs/tac/
core.rs

1use std::io::{self, IoSlice, Write};
2
3/// Max IoSlice entries per write_vectored batch.
4/// Linux UIO_MAXIOV is 1024; we use that as our batch limit.
5const MAX_IOV: usize = 1024;
6
7/// Chunk size for the forward-scan-within-backward-chunks strategy.
8/// 2MB gives better SIMD throughput per memchr_iter call (fewer calls needed,
9/// each processing a larger contiguous region with full AVX2 pipeline).
10/// For a 10MB file with 50-byte lines, 2MB chunks = 5 calls vs 20 at 512KB.
11const CHUNK: usize = 2 * 1024 * 1024;
12
13/// Flush a batch of IoSlice entries using write_vectored.
14/// Falls back to individual write_all for each slice if write_vectored
15/// doesn't write everything (handles partial writes).
16#[inline]
17fn flush_iov(out: &mut impl Write, slices: &[IoSlice]) -> io::Result<()> {
18    if slices.is_empty() {
19        return Ok(());
20    }
21    // Try write_vectored first for the whole batch
22    let total: usize = slices.iter().map(|s| s.len()).sum();
23
24    // Fast path: single writev call often writes everything
25    let written = match out.write_vectored(slices) {
26        Ok(n) if n >= total => return Ok(()),
27        Ok(n) => n,
28        Err(e) => return Err(e),
29    };
30
31    // Slow path: partial write — fall back to write_all per remaining slice
32    let mut skip = written;
33    for slice in slices {
34        let slen = slice.len();
35        if skip >= slen {
36            skip -= slen;
37            continue;
38        }
39        if skip > 0 {
40            out.write_all(&slice[skip..])?;
41            skip = 0;
42        } else {
43            out.write_all(slice)?;
44        }
45    }
46    Ok(())
47}
48
49/// Reverse records separated by a single byte.
50/// Uses chunk-based forward SIMD scan processed in reverse order for
51/// maximum throughput — eliminates per-line memrchr call overhead.
52/// Output uses write_vectored (writev) for zero-copy from mmap'd data.
53pub fn tac_bytes(data: &[u8], separator: u8, before: bool, out: &mut impl Write) -> io::Result<()> {
54    if data.is_empty() {
55        return Ok(());
56    }
57    if !before {
58        tac_bytes_backward_after(data, separator, out)
59    } else {
60        tac_bytes_backward_before(data, separator, out)
61    }
62}
63
64/// After-separator mode: chunk-based forward SIMD scan, emitted in reverse.
65///
66/// Instead of calling memrchr once per line (high per-call overhead for short
67/// lines), we process the file backward in large chunks. Within each chunk a
68/// single memchr_iter forward pass finds ALL separator positions with full SIMD
69/// pipeline utilisation, then we emit the records (slices) in reverse order.
70///
71/// This converts ~200K memrchr calls (for a 10MB / 50-byte-line file) into
72/// ~20 memchr_iter calls, each scanning a contiguous 512KB region.
73///
74/// Optimization: Instead of collecting positions into a Vec, we store them
75/// in a stack-allocated array to avoid heap allocation per chunk.
76fn tac_bytes_backward_after(data: &[u8], sep: u8, out: &mut impl Write) -> io::Result<()> {
77    let mut iov: Vec<IoSlice> = Vec::with_capacity(MAX_IOV);
78
79    // `global_end` tracks where the current (rightmost unseen) record ends.
80    let mut global_end = data.len();
81
82    // Stack-allocated positions buffer: avoid heap allocation per chunk.
83    // 512KB chunk / 1 byte per separator = 512K max positions.
84    // We use a heap-allocated buffer once but reuse across all chunks.
85    let mut positions_buf: Vec<usize> = Vec::with_capacity(CHUNK);
86
87    let mut chunk_start = data.len().saturating_sub(CHUNK);
88
89    loop {
90        let chunk_end = global_end.min(data.len());
91        if chunk_start >= chunk_end {
92            if chunk_start == 0 {
93                break;
94            }
95            chunk_start = chunk_start.saturating_sub(CHUNK);
96            continue;
97        }
98        let chunk = &data[chunk_start..chunk_end];
99
100        // Reuse positions buffer: clear and refill without reallocation.
101        positions_buf.clear();
102        positions_buf.extend(memchr::memchr_iter(sep, chunk).map(|p| p + chunk_start));
103
104        // Emit records in reverse (rightmost first).
105        for &pos in positions_buf.iter().rev() {
106            if pos + 1 < global_end {
107                iov.push(IoSlice::new(&data[pos + 1..global_end]));
108            }
109            global_end = pos + 1;
110
111            if iov.len() >= MAX_IOV {
112                flush_iov(out, &iov)?;
113                iov.clear();
114            }
115        }
116
117        if chunk_start == 0 {
118            break;
119        }
120        chunk_start = chunk_start.saturating_sub(CHUNK);
121    }
122
123    // First record
124    if global_end > 0 {
125        iov.push(IoSlice::new(&data[0..global_end]));
126    }
127    flush_iov(out, &iov)?;
128    Ok(())
129}
130
131/// Before-separator mode: chunk-based forward SIMD scan, emitted in reverse.
132///
133/// Same chunked strategy as after-mode, but each record STARTS with its
134/// separator byte instead of ending with it.
135fn tac_bytes_backward_before(data: &[u8], sep: u8, out: &mut impl Write) -> io::Result<()> {
136    let mut iov: Vec<IoSlice> = Vec::with_capacity(MAX_IOV);
137
138    let mut global_end = data.len();
139    let mut chunk_start = data.len().saturating_sub(CHUNK);
140    let mut positions_buf: Vec<usize> = Vec::with_capacity(CHUNK);
141
142    loop {
143        let chunk_end = global_end.min(data.len());
144        if chunk_start >= chunk_end {
145            if chunk_start == 0 {
146                break;
147            }
148            chunk_start = chunk_start.saturating_sub(CHUNK);
149            continue;
150        }
151        let chunk = &data[chunk_start..chunk_end];
152
153        positions_buf.clear();
154        positions_buf.extend(memchr::memchr_iter(sep, chunk).map(|p| p + chunk_start));
155
156        for &pos in positions_buf.iter().rev() {
157            iov.push(IoSlice::new(&data[pos..global_end]));
158            global_end = pos;
159
160            if iov.len() >= MAX_IOV {
161                flush_iov(out, &iov)?;
162                iov.clear();
163            }
164        }
165
166        if chunk_start == 0 {
167            break;
168        }
169        chunk_start = chunk_start.saturating_sub(CHUNK);
170    }
171
172    if global_end > 0 {
173        iov.push(IoSlice::new(&data[0..global_end]));
174    }
175    flush_iov(out, &iov)?;
176    Ok(())
177}
178
179/// Reverse records using a multi-byte string separator.
180/// Uses chunk-based forward SIMD-accelerated memmem + IoSlice zero-copy output.
181///
182/// For single-byte separators, delegates to tac_bytes which uses memchr (faster).
183pub fn tac_string_separator(
184    data: &[u8],
185    separator: &[u8],
186    before: bool,
187    out: &mut impl Write,
188) -> io::Result<()> {
189    if data.is_empty() {
190        return Ok(());
191    }
192
193    if separator.len() == 1 {
194        return tac_bytes(data, separator[0], before, out);
195    }
196
197    let sep_len = separator.len();
198
199    // For multi-byte separators we use the same chunk-based strategy but with
200    // memmem::find_iter instead of memchr_iter. We need FinderRev only for a
201    // quick "any separator exists?" check — the actual work uses forward Finder.
202    //
203    // We still need to handle the case where a separator straddles a chunk
204    // boundary. We do this by extending each chunk's left edge by (sep_len - 1)
205    // bytes and deduplicating matches that fall in the overlap zone.
206
207    if !before {
208        tac_string_after(data, separator, sep_len, out)
209    } else {
210        tac_string_before(data, separator, sep_len, out)
211    }
212}
213
214/// Multi-byte string separator, after mode (separator at end of record).
215fn tac_string_after(
216    data: &[u8],
217    separator: &[u8],
218    sep_len: usize,
219    out: &mut impl Write,
220) -> io::Result<()> {
221    let mut iov: Vec<IoSlice> = Vec::with_capacity(MAX_IOV);
222    let mut global_end = data.len();
223    let mut chunk_start = data.len().saturating_sub(CHUNK);
224    let mut positions_buf: Vec<usize> = Vec::with_capacity(CHUNK / 4);
225
226    loop {
227        let chunk_end = global_end.min(data.len());
228        let scan_start = chunk_start.saturating_sub(sep_len - 1);
229        if scan_start >= chunk_end {
230            if chunk_start == 0 {
231                break;
232            }
233            chunk_start = chunk_start.saturating_sub(CHUNK);
234            continue;
235        }
236        let scan_region = &data[scan_start..chunk_end];
237
238        positions_buf.clear();
239        positions_buf.extend(
240            memchr::memmem::find_iter(scan_region, separator)
241                .map(|p| p + scan_start)
242                .filter(|&p| p >= chunk_start || chunk_start == 0)
243                .filter(|&p| p + sep_len <= global_end),
244        );
245
246        for &pos in positions_buf.iter().rev() {
247            let rec_end_exclusive = pos + sep_len;
248            if rec_end_exclusive < global_end {
249                iov.push(IoSlice::new(&data[rec_end_exclusive..global_end]));
250            }
251            global_end = rec_end_exclusive;
252            if iov.len() >= MAX_IOV {
253                flush_iov(out, &iov)?;
254                iov.clear();
255            }
256        }
257
258        if chunk_start == 0 {
259            break;
260        }
261        chunk_start = chunk_start.saturating_sub(CHUNK);
262    }
263
264    if global_end > 0 {
265        iov.push(IoSlice::new(&data[0..global_end]));
266    }
267    flush_iov(out, &iov)?;
268    Ok(())
269}
270
271/// Multi-byte string separator, before mode (separator at start of record).
272fn tac_string_before(
273    data: &[u8],
274    separator: &[u8],
275    sep_len: usize,
276    out: &mut impl Write,
277) -> io::Result<()> {
278    let mut iov: Vec<IoSlice> = Vec::with_capacity(MAX_IOV);
279    let mut global_end = data.len();
280    let mut chunk_start = data.len().saturating_sub(CHUNK);
281    let mut positions_buf: Vec<usize> = Vec::with_capacity(CHUNK / 4);
282
283    loop {
284        let chunk_end = global_end.min(data.len());
285        let scan_start = chunk_start.saturating_sub(sep_len - 1);
286        if scan_start >= chunk_end {
287            if chunk_start == 0 {
288                break;
289            }
290            chunk_start = chunk_start.saturating_sub(CHUNK);
291            continue;
292        }
293        let scan_region = &data[scan_start..chunk_end];
294
295        positions_buf.clear();
296        positions_buf.extend(
297            memchr::memmem::find_iter(scan_region, separator)
298                .map(|p| p + scan_start)
299                .filter(|&p| p >= chunk_start || chunk_start == 0)
300                .filter(|&p| p < global_end),
301        );
302
303        for &pos in positions_buf.iter().rev() {
304            iov.push(IoSlice::new(&data[pos..global_end]));
305            global_end = pos;
306            if iov.len() >= MAX_IOV {
307                flush_iov(out, &iov)?;
308                iov.clear();
309            }
310        }
311
312        if chunk_start == 0 {
313            break;
314        }
315        chunk_start = chunk_start.saturating_sub(CHUNK);
316    }
317
318    if global_end > 0 {
319        iov.push(IoSlice::new(&data[0..global_end]));
320    }
321    flush_iov(out, &iov)?;
322    Ok(())
323}
324
325/// Find regex matches using backward scanning, matching GNU tac's re_search behavior.
326fn find_regex_matches_backward(data: &[u8], re: &regex::bytes::Regex) -> Vec<(usize, usize)> {
327    let mut matches = Vec::new();
328    let mut past_end = data.len();
329
330    while past_end > 0 {
331        let buf = &data[..past_end];
332        let mut found = false;
333
334        let mut pos = past_end;
335        while pos > 0 {
336            pos -= 1;
337            if let Some(m) = re.find_at(buf, pos) {
338                if m.start() == pos {
339                    matches.push((m.start(), m.end()));
340                    past_end = m.start();
341                    found = true;
342                    break;
343                }
344            }
345        }
346
347        if !found {
348            break;
349        }
350    }
351
352    matches.reverse();
353    matches
354}
355
356/// Reverse records using a regex separator.
357pub fn tac_regex_separator(
358    data: &[u8],
359    pattern: &str,
360    before: bool,
361    out: &mut impl Write,
362) -> io::Result<()> {
363    if data.is_empty() {
364        return Ok(());
365    }
366
367    let re = match regex::bytes::Regex::new(pattern) {
368        Ok(r) => r,
369        Err(e) => {
370            return Err(io::Error::new(
371                io::ErrorKind::InvalidInput,
372                format!("invalid regex '{}': {}", pattern, e),
373            ));
374        }
375    };
376
377    let matches = find_regex_matches_backward(data, &re);
378
379    if matches.is_empty() {
380        out.write_all(data)?;
381        return Ok(());
382    }
383
384    let mut iov: Vec<IoSlice> = Vec::with_capacity(matches.len().min(MAX_IOV) + 2);
385
386    if !before {
387        let last_end = matches.last().unwrap().1;
388
389        if last_end < data.len() {
390            iov.push(IoSlice::new(&data[last_end..]));
391        }
392
393        let mut i = matches.len();
394        while i > 0 {
395            i -= 1;
396            let rec_start = if i == 0 { 0 } else { matches[i - 1].1 };
397            iov.push(IoSlice::new(&data[rec_start..matches[i].1]));
398            if iov.len() >= MAX_IOV {
399                flush_iov(out, &iov)?;
400                iov.clear();
401            }
402        }
403    } else {
404        let mut i = matches.len();
405        while i > 0 {
406            i -= 1;
407            let start = matches[i].0;
408            let end = if i + 1 < matches.len() {
409                matches[i + 1].0
410            } else {
411                data.len()
412            };
413            iov.push(IoSlice::new(&data[start..end]));
414            if iov.len() >= MAX_IOV {
415                flush_iov(out, &iov)?;
416                iov.clear();
417            }
418        }
419
420        if matches[0].0 > 0 {
421            iov.push(IoSlice::new(&data[..matches[0].0]));
422        }
423    }
424
425    flush_iov(out, &iov)?;
426    Ok(())
427}