Skip to main content

coreutils_rs/tac/
core.rs

1use std::io::{self, IoSlice, Write};
2
3/// Max IoSlice entries per write_vectored batch.
4/// Linux UIO_MAXIOV is 1024; we use that as our batch limit.
5const MAX_IOV: usize = 1024;
6
7/// Flush a batch of IoSlice entries using write_vectored.
8/// Falls back to individual write_all for each slice if write_vectored
9/// doesn't write everything (handles partial writes).
10#[inline]
11fn flush_iov(out: &mut impl Write, slices: &[IoSlice]) -> io::Result<()> {
12    if slices.is_empty() {
13        return Ok(());
14    }
15    // Try write_vectored first for the whole batch
16    let total: usize = slices.iter().map(|s| s.len()).sum();
17
18    // Fast path: single writev call often writes everything
19    let written = match out.write_vectored(slices) {
20        Ok(n) if n >= total => return Ok(()),
21        Ok(n) => n,
22        Err(e) => return Err(e),
23    };
24
25    // Slow path: partial write — fall back to write_all per remaining slice
26    let mut skip = written;
27    for slice in slices {
28        let slen = slice.len();
29        if skip >= slen {
30            skip -= slen;
31            continue;
32        }
33        if skip > 0 {
34            out.write_all(&slice[skip..])?;
35            skip = 0;
36        } else {
37            out.write_all(slice)?;
38        }
39    }
40    Ok(())
41}
42
43/// Reverse records separated by a single byte.
44/// Uses backward SIMD scan (memrchr) — zero Vec allocation, single pass.
45/// Output uses write_vectored (writev) for zero-copy from mmap'd data.
46pub fn tac_bytes(data: &[u8], separator: u8, before: bool, out: &mut impl Write) -> io::Result<()> {
47    if data.is_empty() {
48        return Ok(());
49    }
50    if !before {
51        tac_bytes_backward_after(data, separator, out)
52    } else {
53        tac_bytes_backward_before(data, separator, out)
54    }
55}
56
57/// After-separator mode: backward scan with memrchr.
58/// Each record includes its trailing separator byte.
59/// Uses IoSlice batching for zero-copy output directly from mmap'd data.
60///
61/// Pre-counts separators with SIMD-accelerated memchr_iter to right-size
62/// the IoSlice Vec, avoiding reallocations for files with many lines.
63fn tac_bytes_backward_after(data: &[u8], sep: u8, out: &mut impl Write) -> io::Result<()> {
64    // Pre-count separators with SIMD (memchr at 30+ GB/s) to right-size IoSlice Vec.
65    // For a 100MB file with 10M lines, this avoids many Vec reallocations.
66    let sep_count = memchr::memchr_iter(sep, data).count();
67    if sep_count == 0 {
68        return out.write_all(data);
69    }
70
71    // +1 for the first record (before first separator), +1 for possible trailing content
72    let mut iov: Vec<IoSlice> = Vec::with_capacity((sep_count + 2).min(MAX_IOV));
73
74    let mut end = data.len();
75
76    let Some(mut pos) = memchr::memrchr(sep, data) else {
77        // Should not happen since sep_count > 0, but handle gracefully
78        return out.write_all(data);
79    };
80
81    // Trailing content after last separator
82    if pos + 1 < end {
83        iov.push(IoSlice::new(&data[pos + 1..end]));
84    }
85    end = pos + 1;
86
87    // Scan backward for remaining separators
88    while pos > 0 {
89        match memchr::memrchr(sep, &data[..pos]) {
90            Some(prev) => {
91                iov.push(IoSlice::new(&data[prev + 1..end]));
92                if iov.len() >= MAX_IOV {
93                    flush_iov(out, &iov)?;
94                    iov.clear();
95                }
96                end = prev + 1;
97                pos = prev;
98            }
99            None => break,
100        }
101    }
102
103    // First record (from start of data)
104    iov.push(IoSlice::new(&data[0..end]));
105    flush_iov(out, &iov)?;
106
107    Ok(())
108}
109
110/// Before-separator mode: backward scan with memrchr.
111/// Each record starts with its separator byte.
112/// Uses IoSlice batching for zero-copy output.
113///
114/// Pre-counts separators with SIMD-accelerated memchr_iter to right-size
115/// the IoSlice Vec.
116fn tac_bytes_backward_before(data: &[u8], sep: u8, out: &mut impl Write) -> io::Result<()> {
117    // Pre-count separators with SIMD to right-size IoSlice Vec.
118    let sep_count = memchr::memchr_iter(sep, data).count();
119    if sep_count == 0 {
120        return out.write_all(data);
121    }
122
123    // +1 for possible leading content before first separator
124    let mut iov: Vec<IoSlice> = Vec::with_capacity((sep_count + 1).min(MAX_IOV));
125
126    let mut end = data.len();
127
128    let Some(pos) = memchr::memrchr(sep, data) else {
129        // Should not happen since sep_count > 0, but handle gracefully
130        return out.write_all(data);
131    };
132
133    // Last record: from last separator to end
134    iov.push(IoSlice::new(&data[pos..end]));
135    end = pos;
136
137    // Scan backward
138    while end > 0 {
139        match memchr::memrchr(sep, &data[..end]) {
140            Some(prev) => {
141                iov.push(IoSlice::new(&data[prev..end]));
142                if iov.len() >= MAX_IOV {
143                    flush_iov(out, &iov)?;
144                    iov.clear();
145                }
146                end = prev;
147            }
148            None => break,
149        }
150    }
151
152    // Leading content before first separator
153    if end > 0 {
154        iov.push(IoSlice::new(&data[0..end]));
155    }
156
157    flush_iov(out, &iov)?;
158    Ok(())
159}
160
161/// Reverse records using a multi-byte string separator.
162/// Uses backward SIMD-accelerated memmem (FinderRev) + IoSlice zero-copy output.
163///
164/// For single-byte separators, delegates to tac_bytes which uses memchr (faster).
165/// Pre-counts separators to right-size the IoSlice Vec.
166pub fn tac_string_separator(
167    data: &[u8],
168    separator: &[u8],
169    before: bool,
170    out: &mut impl Write,
171) -> io::Result<()> {
172    if data.is_empty() {
173        return Ok(());
174    }
175
176    if separator.len() == 1 {
177        return tac_bytes(data, separator[0], before, out);
178    }
179
180    let sep_len = separator.len();
181    let finder = memchr::memmem::FinderRev::new(separator);
182
183    // Pre-count separators for right-sized IoSlice Vec.
184    // Use forward Finder for counting (more cache-friendly than backward scan).
185    let sep_count = memchr::memmem::find_iter(data, separator).count();
186    if sep_count == 0 {
187        return out.write_all(data);
188    }
189
190    let mut iov: Vec<IoSlice> = Vec::with_capacity((sep_count + 2).min(MAX_IOV));
191
192    if !before {
193        let mut end = data.len();
194
195        let Some(mut pos) = finder.rfind(data) else {
196            // Should not happen since sep_count > 0
197            return out.write_all(data);
198        };
199
200        // Trailing content after last separator
201        if pos + sep_len < end {
202            iov.push(IoSlice::new(&data[pos + sep_len..end]));
203        }
204        end = pos + sep_len;
205
206        // Scan backward
207        while pos > 0 {
208            match finder.rfind(&data[..pos]) {
209                Some(prev) => {
210                    iov.push(IoSlice::new(&data[prev + sep_len..end]));
211                    if iov.len() >= MAX_IOV {
212                        flush_iov(out, &iov)?;
213                        iov.clear();
214                    }
215                    end = prev + sep_len;
216                    pos = prev;
217                }
218                None => break,
219            }
220        }
221
222        // First record
223        iov.push(IoSlice::new(&data[0..end]));
224    } else {
225        let mut end = data.len();
226
227        let Some(pos) = finder.rfind(data) else {
228            // Should not happen since sep_count > 0
229            return out.write_all(data);
230        };
231
232        // Last record: from last separator to end
233        iov.push(IoSlice::new(&data[pos..end]));
234        end = pos;
235
236        // Scan backward
237        while end > 0 {
238            match finder.rfind(&data[..end]) {
239                Some(prev) => {
240                    iov.push(IoSlice::new(&data[prev..end]));
241                    if iov.len() >= MAX_IOV {
242                        flush_iov(out, &iov)?;
243                        iov.clear();
244                    }
245                    end = prev;
246                }
247                None => break,
248            }
249        }
250
251        // Leading content before first separator
252        if end > 0 {
253            iov.push(IoSlice::new(&data[0..end]));
254        }
255    }
256
257    flush_iov(out, &iov)?;
258    Ok(())
259}
260
261/// Find regex matches using backward scanning, matching GNU tac's re_search behavior.
262fn find_regex_matches_backward(data: &[u8], re: &regex::bytes::Regex) -> Vec<(usize, usize)> {
263    let mut matches = Vec::new();
264    let mut past_end = data.len();
265
266    while past_end > 0 {
267        let buf = &data[..past_end];
268        let mut found = false;
269
270        let mut pos = past_end;
271        while pos > 0 {
272            pos -= 1;
273            if let Some(m) = re.find_at(buf, pos) {
274                if m.start() == pos {
275                    matches.push((m.start(), m.end()));
276                    past_end = m.start();
277                    found = true;
278                    break;
279                }
280            }
281        }
282
283        if !found {
284            break;
285        }
286    }
287
288    matches.reverse();
289    matches
290}
291
292/// Reverse records using a regex separator.
293pub fn tac_regex_separator(
294    data: &[u8],
295    pattern: &str,
296    before: bool,
297    out: &mut impl Write,
298) -> io::Result<()> {
299    if data.is_empty() {
300        return Ok(());
301    }
302
303    let re = match regex::bytes::Regex::new(pattern) {
304        Ok(r) => r,
305        Err(e) => {
306            return Err(io::Error::new(
307                io::ErrorKind::InvalidInput,
308                format!("invalid regex '{}': {}", pattern, e),
309            ));
310        }
311    };
312
313    let matches = find_regex_matches_backward(data, &re);
314
315    if matches.is_empty() {
316        out.write_all(data)?;
317        return Ok(());
318    }
319
320    let mut iov: Vec<IoSlice> = Vec::with_capacity(matches.len().min(MAX_IOV) + 2);
321
322    if !before {
323        let last_end = matches.last().unwrap().1;
324
325        if last_end < data.len() {
326            iov.push(IoSlice::new(&data[last_end..]));
327        }
328
329        let mut i = matches.len();
330        while i > 0 {
331            i -= 1;
332            let rec_start = if i == 0 { 0 } else { matches[i - 1].1 };
333            iov.push(IoSlice::new(&data[rec_start..matches[i].1]));
334            if iov.len() >= MAX_IOV {
335                flush_iov(out, &iov)?;
336                iov.clear();
337            }
338        }
339    } else {
340        let mut i = matches.len();
341        while i > 0 {
342            i -= 1;
343            let start = matches[i].0;
344            let end = if i + 1 < matches.len() {
345                matches[i + 1].0
346            } else {
347                data.len()
348            };
349            iov.push(IoSlice::new(&data[start..end]));
350            if iov.len() >= MAX_IOV {
351                flush_iov(out, &iov)?;
352                iov.clear();
353            }
354        }
355
356        if matches[0].0 > 0 {
357            iov.push(IoSlice::new(&data[..matches[0].0]));
358        }
359    }
360
361    flush_iov(out, &iov)?;
362    Ok(())
363}