Skip to main content

coreutils_rs/tac/
core.rs

1use std::io::{self, IoSlice, Write};
2
3/// Maximum number of iovecs per writev() call (Linux IOV_MAX is 1024).
4const IOV_BATCH: usize = 1024;
5
6/// Write all IoSlices to the writer, handling partial writes.
7/// For large numbers of slices, batches into IOV_BATCH-sized groups.
8fn write_all_slices(out: &mut impl Write, slices: &[IoSlice<'_>]) -> io::Result<()> {
9    // Small number of slices: use simple write_all for each
10    if slices.len() <= 4 {
11        for s in slices {
12            out.write_all(s)?;
13        }
14        return Ok(());
15    }
16
17    let mut offset = 0;
18    while offset < slices.len() {
19        let end = (offset + IOV_BATCH).min(slices.len());
20        let n = out.write_vectored(&slices[offset..end])?;
21        if n == 0 {
22            return Err(io::Error::new(
23                io::ErrorKind::WriteZero,
24                "failed to write any data",
25            ));
26        }
27        let mut remaining = n;
28        while offset < end && remaining >= slices[offset].len() {
29            remaining -= slices[offset].len();
30            offset += 1;
31        }
32        if remaining > 0 && offset < end {
33            out.write_all(&slices[offset][remaining..])?;
34            offset += 1;
35        }
36    }
37    Ok(())
38}
39
40/// Reverse records separated by a single byte.
41/// Uses forward SIMD scan (memchr_iter) to collect separator positions,
42/// then builds IoSlice references in reverse order for zero-copy writev output.
43/// No output buffer allocation — references mmap'd data directly.
44pub fn tac_bytes(data: &[u8], separator: u8, before: bool, out: &mut impl Write) -> io::Result<()> {
45    if data.is_empty() {
46        return Ok(());
47    }
48
49    // Forward SIMD scan: collect all separator positions in one pass.
50    let positions: Vec<usize> = memchr::memchr_iter(separator, data).collect();
51
52    if positions.is_empty() {
53        return out.write_all(data);
54    }
55
56    // Build IoSlice references in reverse order, write in batches.
57    // Zero-copy: references mmap'd data directly, no output buffer needed.
58    let mut batch: Vec<IoSlice<'_>> = Vec::with_capacity(IOV_BATCH.min(positions.len() + 2));
59
60    if !before {
61        // separator-after mode: records end with separator
62        let last_sep = *positions.last().unwrap();
63
64        // Trailing content without separator — output first
65        if last_sep + 1 < data.len() {
66            batch.push(IoSlice::new(&data[last_sep + 1..]));
67        }
68
69        // Records in reverse: each record is from (prev_sep+1) to (cur_sep+1)
70        for i in (0..positions.len()).rev() {
71            let start = if i == 0 { 0 } else { positions[i - 1] + 1 };
72            let end = positions[i] + 1;
73            batch.push(IoSlice::new(&data[start..end]));
74            if batch.len() >= IOV_BATCH {
75                write_all_slices(out, &batch)?;
76                batch.clear();
77            }
78        }
79    } else {
80        // separator-before mode: records start with separator
81        for i in (0..positions.len()).rev() {
82            let start = positions[i];
83            let end = if i + 1 < positions.len() {
84                positions[i + 1]
85            } else {
86                data.len()
87            };
88            batch.push(IoSlice::new(&data[start..end]));
89            if batch.len() >= IOV_BATCH {
90                write_all_slices(out, &batch)?;
91                batch.clear();
92            }
93        }
94
95        // Leading content before first separator
96        if positions[0] > 0 {
97            batch.push(IoSlice::new(&data[..positions[0]]));
98        }
99    }
100
101    if !batch.is_empty() {
102        write_all_slices(out, &batch)?;
103    }
104
105    Ok(())
106}
107
108/// Reverse records using a multi-byte string separator.
109/// Uses SIMD-accelerated memmem for substring search + IoSlice/writev output.
110pub fn tac_string_separator(
111    data: &[u8],
112    separator: &[u8],
113    before: bool,
114    out: &mut impl Write,
115) -> io::Result<()> {
116    if data.is_empty() {
117        return Ok(());
118    }
119
120    if separator.len() == 1 {
121        return tac_bytes(data, separator[0], before, out);
122    }
123
124    // Find all occurrences of the separator using SIMD-accelerated memmem
125    let positions: Vec<usize> = memchr::memmem::find_iter(data, separator).collect();
126
127    if positions.is_empty() {
128        return out.write_all(data);
129    }
130
131    let sep_len = separator.len();
132
133    // Build IoSlice references in reverse order, write in batches.
134    let mut batch: Vec<IoSlice<'_>> = Vec::with_capacity(IOV_BATCH.min(positions.len() + 2));
135
136    if !before {
137        let last_end = positions.last().unwrap() + sep_len;
138        if last_end < data.len() {
139            batch.push(IoSlice::new(&data[last_end..]));
140        }
141        for i in (0..positions.len()).rev() {
142            let rec_start = if i == 0 {
143                0
144            } else {
145                positions[i - 1] + sep_len
146            };
147            batch.push(IoSlice::new(&data[rec_start..positions[i] + sep_len]));
148            if batch.len() >= IOV_BATCH {
149                write_all_slices(out, &batch)?;
150                batch.clear();
151            }
152        }
153    } else {
154        for i in (0..positions.len()).rev() {
155            let start = positions[i];
156            let end = if i + 1 < positions.len() {
157                positions[i + 1]
158            } else {
159                data.len()
160            };
161            batch.push(IoSlice::new(&data[start..end]));
162            if batch.len() >= IOV_BATCH {
163                write_all_slices(out, &batch)?;
164                batch.clear();
165            }
166        }
167        if positions[0] > 0 {
168            batch.push(IoSlice::new(&data[..positions[0]]));
169        }
170    }
171
172    if !batch.is_empty() {
173        write_all_slices(out, &batch)?;
174    }
175
176    Ok(())
177}
178
179/// Find regex matches using backward scanning, matching GNU tac's re_search behavior.
180fn find_regex_matches_backward(data: &[u8], re: &regex::bytes::Regex) -> Vec<(usize, usize)> {
181    let mut matches = Vec::new();
182    let mut past_end = data.len();
183
184    while past_end > 0 {
185        let buf = &data[..past_end];
186        let mut found = false;
187
188        let mut pos = past_end;
189        while pos > 0 {
190            pos -= 1;
191            if let Some(m) = re.find_at(buf, pos) {
192                if m.start() == pos {
193                    matches.push((m.start(), m.end()));
194                    past_end = m.start();
195                    found = true;
196                    break;
197                }
198            }
199        }
200
201        if !found {
202            break;
203        }
204    }
205
206    matches.reverse();
207    matches
208}
209
210/// Reverse records using a regex separator.
211pub fn tac_regex_separator(
212    data: &[u8],
213    pattern: &str,
214    before: bool,
215    out: &mut impl Write,
216) -> io::Result<()> {
217    if data.is_empty() {
218        return Ok(());
219    }
220
221    let re = match regex::bytes::Regex::new(pattern) {
222        Ok(r) => r,
223        Err(e) => {
224            return Err(io::Error::new(
225                io::ErrorKind::InvalidInput,
226                format!("invalid regex '{}': {}", pattern, e),
227            ));
228        }
229    };
230
231    let matches = find_regex_matches_backward(data, &re);
232
233    if matches.is_empty() {
234        out.write_all(data)?;
235        return Ok(());
236    }
237
238    // Build IoSlice references in reverse order, write in batches.
239    let mut batch: Vec<IoSlice<'_>> = Vec::with_capacity(IOV_BATCH);
240
241    if !before {
242        let last_end = matches.last().unwrap().1;
243
244        if last_end < data.len() {
245            batch.push(IoSlice::new(&data[last_end..]));
246        }
247
248        let mut i = matches.len();
249        while i > 0 {
250            i -= 1;
251            let rec_start = if i == 0 { 0 } else { matches[i - 1].1 };
252            batch.push(IoSlice::new(&data[rec_start..matches[i].1]));
253            if batch.len() >= IOV_BATCH {
254                write_all_slices(out, &batch)?;
255                batch.clear();
256            }
257        }
258    } else {
259        let mut i = matches.len();
260        while i > 0 {
261            i -= 1;
262            let start = matches[i].0;
263            let end = if i + 1 < matches.len() {
264                matches[i + 1].0
265            } else {
266                data.len()
267            };
268            batch.push(IoSlice::new(&data[start..end]));
269            if batch.len() >= IOV_BATCH {
270                write_all_slices(out, &batch)?;
271                batch.clear();
272            }
273        }
274
275        if matches[0].0 > 0 {
276            batch.push(IoSlice::new(&data[..matches[0].0]));
277        }
278    }
279
280    if !batch.is_empty() {
281        write_all_slices(out, &batch)?;
282    }
283
284    Ok(())
285}