Skip to main content

coreutils_rs/tac/
core.rs

1use std::io::{self, Write};
2
3/// Reverse the records in `data` separated by a single byte `separator` and write to `out`.
4/// If `before` is true, the separator is attached before the record instead of after.
5/// Uses forward memchr scan for SIMD-accelerated separator finding with optimal prefetch.
6pub fn tac_bytes(data: &[u8], separator: u8, before: bool, out: &mut impl Write) -> io::Result<()> {
7    if data.is_empty() {
8        return Ok(());
9    }
10
11    // Forward SIMD scan to collect all separator positions — better prefetch than backward scanning
12    let positions: Vec<usize> = memchr::memchr_iter(separator, data).collect();
13
14    if positions.is_empty() {
15        out.write_all(data)?;
16        return Ok(());
17    }
18
19    let mut buf = io::BufWriter::with_capacity(1024 * 1024, out);
20
21    if !before {
22        // Default mode: separator is AFTER the record (like newline at end of line)
23        let has_trailing_sep = *positions.last().unwrap() == data.len() - 1;
24
25        // Trailing content without separator — write without adding separator
26        // (it gets concatenated with the next reversed record, matching GNU behavior)
27        if !has_trailing_sep {
28            let last_sep = *positions.last().unwrap();
29            buf.write_all(&data[last_sep + 1..])?;
30        }
31
32        // Records in reverse order
33        let mut i = positions.len();
34        while i > 0 {
35            i -= 1;
36            let end = positions[i] + 1; // include separator
37            let start = if i == 0 { 0 } else { positions[i - 1] + 1 };
38            buf.write_all(&data[start..end])?;
39        }
40    } else {
41        // Before mode: separator is BEFORE the record
42        // Write records in reverse
43        let mut i = positions.len();
44        while i > 0 {
45            i -= 1;
46            let start = positions[i];
47            let end = if i + 1 < positions.len() {
48                positions[i + 1]
49            } else {
50                data.len()
51            };
52            buf.write_all(&data[start..end])?;
53        }
54
55        // Leading content before first separator
56        if positions[0] > 0 {
57            buf.write_all(&data[..positions[0]])?;
58        }
59    }
60
61    buf.flush()?;
62    Ok(())
63}
64
65/// Reverse records using a multi-byte string separator.
66/// Uses SIMD-accelerated memmem for substring search.
67pub fn tac_string_separator(
68    data: &[u8],
69    separator: &[u8],
70    before: bool,
71    out: &mut impl Write,
72) -> io::Result<()> {
73    if data.is_empty() {
74        return Ok(());
75    }
76
77    if separator.len() == 1 {
78        return tac_bytes(data, separator[0], before, out);
79    }
80
81    // Find all occurrences of the separator using SIMD-accelerated memmem
82    let positions: Vec<usize> = memchr::memmem::find_iter(data, separator).collect();
83
84    if positions.is_empty() {
85        out.write_all(data)?;
86        return Ok(());
87    }
88
89    let sep_len = separator.len();
90    let mut buf = io::BufWriter::with_capacity(1024 * 1024, out);
91
92    if !before {
93        // Default: separator after record
94        let last_end = positions.last().unwrap() + sep_len;
95        let has_trailing_sep = last_end == data.len();
96
97        // Trailing chunk without separator — write without adding separator
98        if !has_trailing_sep {
99            buf.write_all(&data[last_end..])?;
100        }
101
102        // Records in reverse
103        let mut i = positions.len();
104        while i > 0 {
105            i -= 1;
106            let sep_start = positions[i];
107            let rec_start = if i == 0 {
108                0
109            } else {
110                positions[i - 1] + sep_len
111            };
112            buf.write_all(&data[rec_start..sep_start + sep_len])?;
113        }
114    } else {
115        // Before mode: separator before record
116        let mut i = positions.len();
117        while i > 0 {
118            i -= 1;
119            let start = positions[i];
120            let end = if i + 1 < positions.len() {
121                positions[i + 1]
122            } else {
123                data.len()
124            };
125            buf.write_all(&data[start..end])?;
126        }
127
128        if positions[0] > 0 {
129            buf.write_all(&data[..positions[0]])?;
130        }
131    }
132
133    buf.flush()?;
134    Ok(())
135}
136
137/// Find regex matches using backward scanning, matching GNU tac's re_search behavior.
138/// GNU tac scans backward from the end, finding the rightmost starting position first.
139/// This produces different matches than forward scanning for patterns like [0-9]+.
140/// The matches are returned in left-to-right order.
141fn find_regex_matches_backward(data: &[u8], re: &regex::bytes::Regex) -> Vec<(usize, usize)> {
142    let mut matches = Vec::new();
143    let mut past_end = data.len();
144
145    while past_end > 0 {
146        let buf = &data[..past_end];
147        let mut found = false;
148
149        // Scan backward: try positions from past_end-1 down to 0
150        // We need the LAST match starting position in buf, so we try from the end
151        let mut pos = past_end;
152        while pos > 0 {
153            pos -= 1;
154            if let Some(m) = re.find_at(buf, pos) {
155                if m.start() == pos {
156                    // Match starts at exactly this position — this is the rightmost match start
157                    matches.push((m.start(), m.end()));
158                    past_end = m.start();
159                    found = true;
160                    break;
161                }
162                // Match starts later than pos — skip to before that match
163                // No point checking positions between pos and m.start() since
164                // find_at already told us the leftmost match from pos starts at m.start()
165                // But we need matches that START before m.start(), so continue decrementing
166            }
167            // If None, there's no match at pos or later, but there might be one earlier
168            // (find_at only searches forward from pos)
169        }
170
171        if !found {
172            break;
173        }
174    }
175
176    matches.reverse(); // Convert from backward order to left-to-right order
177    matches
178}
179
180/// Reverse records using a regex separator.
181/// Uses regex::bytes for direct byte-level matching (no UTF-8 conversion needed).
182/// NOTE: GNU tac uses POSIX Basic Regular Expressions (BRE), so we convert to ERE first.
183/// Uses backward scanning to match GNU tac's re_search behavior.
184pub fn tac_regex_separator(
185    data: &[u8],
186    pattern: &str,
187    before: bool,
188    out: &mut impl Write,
189) -> io::Result<()> {
190    if data.is_empty() {
191        return Ok(());
192    }
193
194    let re = match regex::bytes::Regex::new(pattern) {
195        Ok(r) => r,
196        Err(e) => {
197            return Err(io::Error::new(
198                io::ErrorKind::InvalidInput,
199                format!("invalid regex '{}': {}", pattern, e),
200            ));
201        }
202    };
203
204    // Use backward scanning to match GNU tac's re_search behavior
205    let matches = find_regex_matches_backward(data, &re);
206
207    if matches.is_empty() {
208        out.write_all(data)?;
209        return Ok(());
210    }
211
212    let mut buf = io::BufWriter::with_capacity(1024 * 1024, out);
213
214    if !before {
215        let last_end = matches.last().unwrap().1;
216        let has_trailing_sep = last_end == data.len();
217
218        // Trailing content after last separator — write without adding separator
219        if !has_trailing_sep {
220            buf.write_all(&data[last_end..])?;
221        }
222
223        // Records in reverse: each record = text + separator
224        let mut i = matches.len();
225        while i > 0 {
226            i -= 1;
227            let rec_start = if i == 0 { 0 } else { matches[i - 1].1 };
228            let rec_end = matches[i].1;
229            buf.write_all(&data[rec_start..rec_end])?;
230        }
231    } else {
232        // Before mode: separator before record
233        let mut i = matches.len();
234        while i > 0 {
235            i -= 1;
236            let start = matches[i].0;
237            let end = if i + 1 < matches.len() {
238                matches[i + 1].0
239            } else {
240                data.len()
241            };
242            buf.write_all(&data[start..end])?;
243        }
244
245        if matches[0].0 > 0 {
246            buf.write_all(&data[..matches[0].0])?;
247        }
248    }
249
250    buf.flush()?;
251    Ok(())
252}