Skip to main content

coreutils_rs/tac/
core.rs

1use std::io::{self, IoSlice, Write};
2
3/// Max IoSlice entries per write_vectored batch.
4/// Linux UIO_MAXIOV is 1024; we use that as our batch limit.
5const MAX_IOV: usize = 1024;
6
7/// Chunk size for the forward-scan-within-backward-chunks strategy.
8/// 512KB balances L2/L3 cache residency with amortised memchr_iter overhead.
9const CHUNK: usize = 512 * 1024;
10
11/// Flush a batch of IoSlice entries using write_vectored.
12/// Falls back to individual write_all for each slice if write_vectored
13/// doesn't write everything (handles partial writes).
14#[inline]
15fn flush_iov(out: &mut impl Write, slices: &[IoSlice]) -> io::Result<()> {
16    if slices.is_empty() {
17        return Ok(());
18    }
19    // Try write_vectored first for the whole batch
20    let total: usize = slices.iter().map(|s| s.len()).sum();
21
22    // Fast path: single writev call often writes everything
23    let written = match out.write_vectored(slices) {
24        Ok(n) if n >= total => return Ok(()),
25        Ok(n) => n,
26        Err(e) => return Err(e),
27    };
28
29    // Slow path: partial write — fall back to write_all per remaining slice
30    let mut skip = written;
31    for slice in slices {
32        let slen = slice.len();
33        if skip >= slen {
34            skip -= slen;
35            continue;
36        }
37        if skip > 0 {
38            out.write_all(&slice[skip..])?;
39            skip = 0;
40        } else {
41            out.write_all(slice)?;
42        }
43    }
44    Ok(())
45}
46
47/// Reverse records separated by a single byte.
48/// Uses chunk-based forward SIMD scan processed in reverse order for
49/// maximum throughput — eliminates per-line memrchr call overhead.
50/// Output uses write_vectored (writev) for zero-copy from mmap'd data.
51pub fn tac_bytes(data: &[u8], separator: u8, before: bool, out: &mut impl Write) -> io::Result<()> {
52    if data.is_empty() {
53        return Ok(());
54    }
55    if !before {
56        tac_bytes_backward_after(data, separator, out)
57    } else {
58        tac_bytes_backward_before(data, separator, out)
59    }
60}
61
62/// After-separator mode: chunk-based forward SIMD scan, emitted in reverse.
63///
64/// Instead of calling memrchr once per line (high per-call overhead for short
65/// lines), we process the file backward in large chunks. Within each chunk a
66/// single memchr_iter forward pass finds ALL separator positions with full SIMD
67/// pipeline utilisation, then we emit the records (slices) in reverse order.
68///
69/// This converts ~200K memrchr calls (for a 10MB / 50-byte-line file) into
70/// ~20 memchr_iter calls, each scanning a contiguous 512KB region.
71///
72/// Optimization: Instead of collecting positions into a Vec, we store them
73/// in a stack-allocated array to avoid heap allocation per chunk.
74fn tac_bytes_backward_after(data: &[u8], sep: u8, out: &mut impl Write) -> io::Result<()> {
75    let mut iov: Vec<IoSlice> = Vec::with_capacity(MAX_IOV);
76
77    // `global_end` tracks where the current (rightmost unseen) record ends.
78    let mut global_end = data.len();
79
80    // Stack-allocated positions buffer: avoid heap allocation per chunk.
81    // 512KB chunk / 1 byte per separator = 512K max positions.
82    // We use a heap-allocated buffer once but reuse across all chunks.
83    let mut positions_buf: Vec<usize> = Vec::with_capacity(CHUNK);
84
85    let mut chunk_start = data.len().saturating_sub(CHUNK);
86
87    loop {
88        let chunk_end = global_end.min(data.len());
89        if chunk_start >= chunk_end {
90            if chunk_start == 0 {
91                break;
92            }
93            chunk_start = chunk_start.saturating_sub(CHUNK);
94            continue;
95        }
96        let chunk = &data[chunk_start..chunk_end];
97
98        // Reuse positions buffer: clear and refill without reallocation.
99        positions_buf.clear();
100        positions_buf.extend(memchr::memchr_iter(sep, chunk).map(|p| p + chunk_start));
101
102        // Emit records in reverse (rightmost first).
103        for &pos in positions_buf.iter().rev() {
104            if pos + 1 < global_end {
105                iov.push(IoSlice::new(&data[pos + 1..global_end]));
106            }
107            global_end = pos + 1;
108
109            if iov.len() >= MAX_IOV {
110                flush_iov(out, &iov)?;
111                iov.clear();
112            }
113        }
114
115        if chunk_start == 0 {
116            break;
117        }
118        chunk_start = chunk_start.saturating_sub(CHUNK);
119    }
120
121    // First record
122    if global_end > 0 {
123        iov.push(IoSlice::new(&data[0..global_end]));
124    }
125    flush_iov(out, &iov)?;
126    Ok(())
127}
128
129/// Before-separator mode: chunk-based forward SIMD scan, emitted in reverse.
130///
131/// Same chunked strategy as after-mode, but each record STARTS with its
132/// separator byte instead of ending with it.
133fn tac_bytes_backward_before(data: &[u8], sep: u8, out: &mut impl Write) -> io::Result<()> {
134    let mut iov: Vec<IoSlice> = Vec::with_capacity(MAX_IOV);
135
136    let mut global_end = data.len();
137    let mut chunk_start = data.len().saturating_sub(CHUNK);
138    let mut positions_buf: Vec<usize> = Vec::with_capacity(CHUNK);
139
140    loop {
141        let chunk_end = global_end.min(data.len());
142        if chunk_start >= chunk_end {
143            if chunk_start == 0 {
144                break;
145            }
146            chunk_start = chunk_start.saturating_sub(CHUNK);
147            continue;
148        }
149        let chunk = &data[chunk_start..chunk_end];
150
151        positions_buf.clear();
152        positions_buf.extend(memchr::memchr_iter(sep, chunk).map(|p| p + chunk_start));
153
154        for &pos in positions_buf.iter().rev() {
155            iov.push(IoSlice::new(&data[pos..global_end]));
156            global_end = pos;
157
158            if iov.len() >= MAX_IOV {
159                flush_iov(out, &iov)?;
160                iov.clear();
161            }
162        }
163
164        if chunk_start == 0 {
165            break;
166        }
167        chunk_start = chunk_start.saturating_sub(CHUNK);
168    }
169
170    if global_end > 0 {
171        iov.push(IoSlice::new(&data[0..global_end]));
172    }
173    flush_iov(out, &iov)?;
174    Ok(())
175}
176
177/// Reverse records using a multi-byte string separator.
178/// Uses chunk-based forward SIMD-accelerated memmem + IoSlice zero-copy output.
179///
180/// For single-byte separators, delegates to tac_bytes which uses memchr (faster).
181pub fn tac_string_separator(
182    data: &[u8],
183    separator: &[u8],
184    before: bool,
185    out: &mut impl Write,
186) -> io::Result<()> {
187    if data.is_empty() {
188        return Ok(());
189    }
190
191    if separator.len() == 1 {
192        return tac_bytes(data, separator[0], before, out);
193    }
194
195    let sep_len = separator.len();
196
197    // For multi-byte separators we use the same chunk-based strategy but with
198    // memmem::find_iter instead of memchr_iter. We need FinderRev only for a
199    // quick "any separator exists?" check — the actual work uses forward Finder.
200    //
201    // We still need to handle the case where a separator straddles a chunk
202    // boundary. We do this by extending each chunk's left edge by (sep_len - 1)
203    // bytes and deduplicating matches that fall in the overlap zone.
204
205    if !before {
206        tac_string_after(data, separator, sep_len, out)
207    } else {
208        tac_string_before(data, separator, sep_len, out)
209    }
210}
211
212/// Multi-byte string separator, after mode (separator at end of record).
213fn tac_string_after(
214    data: &[u8],
215    separator: &[u8],
216    sep_len: usize,
217    out: &mut impl Write,
218) -> io::Result<()> {
219    let mut iov: Vec<IoSlice> = Vec::with_capacity(MAX_IOV);
220    let mut global_end = data.len();
221    let mut chunk_start = data.len().saturating_sub(CHUNK);
222    let mut positions_buf: Vec<usize> = Vec::with_capacity(CHUNK / 4);
223
224    loop {
225        let chunk_end = global_end.min(data.len());
226        let scan_start = chunk_start.saturating_sub(sep_len - 1);
227        if scan_start >= chunk_end {
228            if chunk_start == 0 {
229                break;
230            }
231            chunk_start = chunk_start.saturating_sub(CHUNK);
232            continue;
233        }
234        let scan_region = &data[scan_start..chunk_end];
235
236        positions_buf.clear();
237        positions_buf.extend(
238            memchr::memmem::find_iter(scan_region, separator)
239                .map(|p| p + scan_start)
240                .filter(|&p| p >= chunk_start || chunk_start == 0)
241                .filter(|&p| p + sep_len <= global_end),
242        );
243
244        for &pos in positions_buf.iter().rev() {
245            let rec_end_exclusive = pos + sep_len;
246            if rec_end_exclusive < global_end {
247                iov.push(IoSlice::new(&data[rec_end_exclusive..global_end]));
248            }
249            global_end = rec_end_exclusive;
250            if iov.len() >= MAX_IOV {
251                flush_iov(out, &iov)?;
252                iov.clear();
253            }
254        }
255
256        if chunk_start == 0 {
257            break;
258        }
259        chunk_start = chunk_start.saturating_sub(CHUNK);
260    }
261
262    if global_end > 0 {
263        iov.push(IoSlice::new(&data[0..global_end]));
264    }
265    flush_iov(out, &iov)?;
266    Ok(())
267}
268
269/// Multi-byte string separator, before mode (separator at start of record).
270fn tac_string_before(
271    data: &[u8],
272    separator: &[u8],
273    sep_len: usize,
274    out: &mut impl Write,
275) -> io::Result<()> {
276    let mut iov: Vec<IoSlice> = Vec::with_capacity(MAX_IOV);
277    let mut global_end = data.len();
278    let mut chunk_start = data.len().saturating_sub(CHUNK);
279    let mut positions_buf: Vec<usize> = Vec::with_capacity(CHUNK / 4);
280
281    loop {
282        let chunk_end = global_end.min(data.len());
283        let scan_start = chunk_start.saturating_sub(sep_len - 1);
284        if scan_start >= chunk_end {
285            if chunk_start == 0 {
286                break;
287            }
288            chunk_start = chunk_start.saturating_sub(CHUNK);
289            continue;
290        }
291        let scan_region = &data[scan_start..chunk_end];
292
293        positions_buf.clear();
294        positions_buf.extend(
295            memchr::memmem::find_iter(scan_region, separator)
296                .map(|p| p + scan_start)
297                .filter(|&p| p >= chunk_start || chunk_start == 0)
298                .filter(|&p| p < global_end),
299        );
300
301        for &pos in positions_buf.iter().rev() {
302            iov.push(IoSlice::new(&data[pos..global_end]));
303            global_end = pos;
304            if iov.len() >= MAX_IOV {
305                flush_iov(out, &iov)?;
306                iov.clear();
307            }
308        }
309
310        if chunk_start == 0 {
311            break;
312        }
313        chunk_start = chunk_start.saturating_sub(CHUNK);
314    }
315
316    if global_end > 0 {
317        iov.push(IoSlice::new(&data[0..global_end]));
318    }
319    flush_iov(out, &iov)?;
320    Ok(())
321}
322
323/// Find regex matches using backward scanning, matching GNU tac's re_search behavior.
324fn find_regex_matches_backward(data: &[u8], re: &regex::bytes::Regex) -> Vec<(usize, usize)> {
325    let mut matches = Vec::new();
326    let mut past_end = data.len();
327
328    while past_end > 0 {
329        let buf = &data[..past_end];
330        let mut found = false;
331
332        let mut pos = past_end;
333        while pos > 0 {
334            pos -= 1;
335            if let Some(m) = re.find_at(buf, pos) {
336                if m.start() == pos {
337                    matches.push((m.start(), m.end()));
338                    past_end = m.start();
339                    found = true;
340                    break;
341                }
342            }
343        }
344
345        if !found {
346            break;
347        }
348    }
349
350    matches.reverse();
351    matches
352}
353
354/// Reverse records using a regex separator.
355pub fn tac_regex_separator(
356    data: &[u8],
357    pattern: &str,
358    before: bool,
359    out: &mut impl Write,
360) -> io::Result<()> {
361    if data.is_empty() {
362        return Ok(());
363    }
364
365    let re = match regex::bytes::Regex::new(pattern) {
366        Ok(r) => r,
367        Err(e) => {
368            return Err(io::Error::new(
369                io::ErrorKind::InvalidInput,
370                format!("invalid regex '{}': {}", pattern, e),
371            ));
372        }
373    };
374
375    let matches = find_regex_matches_backward(data, &re);
376
377    if matches.is_empty() {
378        out.write_all(data)?;
379        return Ok(());
380    }
381
382    let mut iov: Vec<IoSlice> = Vec::with_capacity(matches.len().min(MAX_IOV) + 2);
383
384    if !before {
385        let last_end = matches.last().unwrap().1;
386
387        if last_end < data.len() {
388            iov.push(IoSlice::new(&data[last_end..]));
389        }
390
391        let mut i = matches.len();
392        while i > 0 {
393            i -= 1;
394            let rec_start = if i == 0 { 0 } else { matches[i - 1].1 };
395            iov.push(IoSlice::new(&data[rec_start..matches[i].1]));
396            if iov.len() >= MAX_IOV {
397                flush_iov(out, &iov)?;
398                iov.clear();
399            }
400        }
401    } else {
402        let mut i = matches.len();
403        while i > 0 {
404            i -= 1;
405            let start = matches[i].0;
406            let end = if i + 1 < matches.len() {
407                matches[i + 1].0
408            } else {
409                data.len()
410            };
411            iov.push(IoSlice::new(&data[start..end]));
412            if iov.len() >= MAX_IOV {
413                flush_iov(out, &iov)?;
414                iov.clear();
415            }
416        }
417
418        if matches[0].0 > 0 {
419            iov.push(IoSlice::new(&data[..matches[0].0]));
420        }
421    }
422
423    flush_iov(out, &iov)?;
424    Ok(())
425}