Skip to main content

coreutils_rs/tac/
core.rs

1use std::io::{self, IoSlice, Write};
2
3/// Max IoSlice entries per write_vectored batch.
4/// Linux UIO_MAXIOV is 1024; we use that as our batch limit.
5const MAX_IOV: usize = 1024;
6
7/// Flush a batch of IoSlice entries using write_vectored.
8/// Falls back to individual write_all for each slice if write_vectored
9/// doesn't write everything (handles partial writes).
10#[inline]
11fn flush_iov(out: &mut impl Write, slices: &[IoSlice]) -> io::Result<()> {
12    if slices.is_empty() {
13        return Ok(());
14    }
15    // Try write_vectored first for the whole batch
16    let total: usize = slices.iter().map(|s| s.len()).sum();
17
18    // Fast path: single writev call often writes everything
19    let written = match out.write_vectored(slices) {
20        Ok(n) if n >= total => return Ok(()),
21        Ok(n) => n,
22        Err(e) => return Err(e),
23    };
24
25    // Slow path: partial write — fall back to write_all per remaining slice
26    let mut skip = written;
27    for slice in slices {
28        let slen = slice.len();
29        if skip >= slen {
30            skip -= slen;
31            continue;
32        }
33        if skip > 0 {
34            out.write_all(&slice[skip..])?;
35            skip = 0;
36        } else {
37            out.write_all(slice)?;
38        }
39    }
40    Ok(())
41}
42
43/// Reverse records separated by a single byte.
44/// Uses backward SIMD scan (memrchr) — zero Vec allocation, single pass.
45/// Output uses write_vectored (writev) for zero-copy from mmap'd data.
46pub fn tac_bytes(data: &[u8], separator: u8, before: bool, out: &mut impl Write) -> io::Result<()> {
47    if data.is_empty() {
48        return Ok(());
49    }
50    if !before {
51        tac_bytes_backward_after(data, separator, out)
52    } else {
53        tac_bytes_backward_before(data, separator, out)
54    }
55}
56
57/// After-separator mode: backward scan with memrchr.
58/// Each record includes its trailing separator byte.
59/// Uses IoSlice batching for zero-copy output directly from mmap'd data.
60fn tac_bytes_backward_after(data: &[u8], sep: u8, out: &mut impl Write) -> io::Result<()> {
61    let mut iov: Vec<IoSlice> = Vec::with_capacity(MAX_IOV);
62
63    let mut end = data.len();
64
65    let Some(mut pos) = memchr::memrchr(sep, data) else {
66        return out.write_all(data);
67    };
68
69    // Trailing content after last separator
70    if pos + 1 < end {
71        iov.push(IoSlice::new(&data[pos + 1..end]));
72    }
73    end = pos + 1;
74
75    // Scan backward for remaining separators
76    while pos > 0 {
77        match memchr::memrchr(sep, &data[..pos]) {
78            Some(prev) => {
79                iov.push(IoSlice::new(&data[prev + 1..end]));
80                if iov.len() >= MAX_IOV {
81                    flush_iov(out, &iov)?;
82                    iov.clear();
83                }
84                end = prev + 1;
85                pos = prev;
86            }
87            None => break,
88        }
89    }
90
91    // First record (from start of data)
92    iov.push(IoSlice::new(&data[0..end]));
93    flush_iov(out, &iov)?;
94
95    Ok(())
96}
97
98/// Before-separator mode: backward scan with memrchr.
99/// Each record starts with its separator byte.
100/// Uses IoSlice batching for zero-copy output.
101fn tac_bytes_backward_before(data: &[u8], sep: u8, out: &mut impl Write) -> io::Result<()> {
102    let mut iov: Vec<IoSlice> = Vec::with_capacity(MAX_IOV);
103
104    let mut end = data.len();
105
106    let Some(pos) = memchr::memrchr(sep, data) else {
107        return out.write_all(data);
108    };
109
110    // Last record: from last separator to end
111    iov.push(IoSlice::new(&data[pos..end]));
112    end = pos;
113
114    // Scan backward
115    while end > 0 {
116        match memchr::memrchr(sep, &data[..end]) {
117            Some(prev) => {
118                iov.push(IoSlice::new(&data[prev..end]));
119                if iov.len() >= MAX_IOV {
120                    flush_iov(out, &iov)?;
121                    iov.clear();
122                }
123                end = prev;
124            }
125            None => break,
126        }
127    }
128
129    // Leading content before first separator
130    if end > 0 {
131        iov.push(IoSlice::new(&data[0..end]));
132    }
133
134    flush_iov(out, &iov)?;
135    Ok(())
136}
137
138/// Reverse records using a multi-byte string separator.
139/// Uses backward SIMD-accelerated memmem (FinderRev) + IoSlice zero-copy output.
140pub fn tac_string_separator(
141    data: &[u8],
142    separator: &[u8],
143    before: bool,
144    out: &mut impl Write,
145) -> io::Result<()> {
146    if data.is_empty() {
147        return Ok(());
148    }
149
150    if separator.len() == 1 {
151        return tac_bytes(data, separator[0], before, out);
152    }
153
154    let sep_len = separator.len();
155    let finder = memchr::memmem::FinderRev::new(separator);
156    let mut iov: Vec<IoSlice> = Vec::with_capacity(MAX_IOV);
157
158    if !before {
159        let mut end = data.len();
160
161        let Some(mut pos) = finder.rfind(data) else {
162            return out.write_all(data);
163        };
164
165        // Trailing content after last separator
166        if pos + sep_len < end {
167            iov.push(IoSlice::new(&data[pos + sep_len..end]));
168        }
169        end = pos + sep_len;
170
171        // Scan backward
172        while pos > 0 {
173            match finder.rfind(&data[..pos]) {
174                Some(prev) => {
175                    iov.push(IoSlice::new(&data[prev + sep_len..end]));
176                    if iov.len() >= MAX_IOV {
177                        flush_iov(out, &iov)?;
178                        iov.clear();
179                    }
180                    end = prev + sep_len;
181                    pos = prev;
182                }
183                None => break,
184            }
185        }
186
187        // First record
188        iov.push(IoSlice::new(&data[0..end]));
189    } else {
190        let mut end = data.len();
191
192        let Some(pos) = finder.rfind(data) else {
193            return out.write_all(data);
194        };
195
196        // Last record: from last separator to end
197        iov.push(IoSlice::new(&data[pos..end]));
198        end = pos;
199
200        // Scan backward
201        while end > 0 {
202            match finder.rfind(&data[..end]) {
203                Some(prev) => {
204                    iov.push(IoSlice::new(&data[prev..end]));
205                    if iov.len() >= MAX_IOV {
206                        flush_iov(out, &iov)?;
207                        iov.clear();
208                    }
209                    end = prev;
210                }
211                None => break,
212            }
213        }
214
215        // Leading content before first separator
216        if end > 0 {
217            iov.push(IoSlice::new(&data[0..end]));
218        }
219    }
220
221    flush_iov(out, &iov)?;
222    Ok(())
223}
224
225/// Find regex matches using backward scanning, matching GNU tac's re_search behavior.
226fn find_regex_matches_backward(data: &[u8], re: &regex::bytes::Regex) -> Vec<(usize, usize)> {
227    let mut matches = Vec::new();
228    let mut past_end = data.len();
229
230    while past_end > 0 {
231        let buf = &data[..past_end];
232        let mut found = false;
233
234        let mut pos = past_end;
235        while pos > 0 {
236            pos -= 1;
237            if let Some(m) = re.find_at(buf, pos) {
238                if m.start() == pos {
239                    matches.push((m.start(), m.end()));
240                    past_end = m.start();
241                    found = true;
242                    break;
243                }
244            }
245        }
246
247        if !found {
248            break;
249        }
250    }
251
252    matches.reverse();
253    matches
254}
255
256/// Reverse records using a regex separator.
257pub fn tac_regex_separator(
258    data: &[u8],
259    pattern: &str,
260    before: bool,
261    out: &mut impl Write,
262) -> io::Result<()> {
263    if data.is_empty() {
264        return Ok(());
265    }
266
267    let re = match regex::bytes::Regex::new(pattern) {
268        Ok(r) => r,
269        Err(e) => {
270            return Err(io::Error::new(
271                io::ErrorKind::InvalidInput,
272                format!("invalid regex '{}': {}", pattern, e),
273            ));
274        }
275    };
276
277    let matches = find_regex_matches_backward(data, &re);
278
279    if matches.is_empty() {
280        out.write_all(data)?;
281        return Ok(());
282    }
283
284    let mut iov: Vec<IoSlice> = Vec::with_capacity(matches.len().min(MAX_IOV) + 2);
285
286    if !before {
287        let last_end = matches.last().unwrap().1;
288
289        if last_end < data.len() {
290            iov.push(IoSlice::new(&data[last_end..]));
291        }
292
293        let mut i = matches.len();
294        while i > 0 {
295            i -= 1;
296            let rec_start = if i == 0 { 0 } else { matches[i - 1].1 };
297            iov.push(IoSlice::new(&data[rec_start..matches[i].1]));
298            if iov.len() >= MAX_IOV {
299                flush_iov(out, &iov)?;
300                iov.clear();
301            }
302        }
303    } else {
304        let mut i = matches.len();
305        while i > 0 {
306            i -= 1;
307            let start = matches[i].0;
308            let end = if i + 1 < matches.len() {
309                matches[i + 1].0
310            } else {
311                data.len()
312            };
313            iov.push(IoSlice::new(&data[start..end]));
314            if iov.len() >= MAX_IOV {
315                flush_iov(out, &iov)?;
316                iov.clear();
317            }
318        }
319
320        if matches[0].0 > 0 {
321            iov.push(IoSlice::new(&data[..matches[0].0]));
322        }
323    }
324
325    flush_iov(out, &iov)?;
326    Ok(())
327}