Skip to main content

coreutils_rs/tac/
core.rs

1use std::io::{self, Write};
2
3/// Reverse the records in `data` separated by a single byte `separator` and write to `out`.
4/// If `before` is true, the separator is attached before the record instead of after.
5/// Uses forward memchr scan for SIMD-accelerated separator finding with optimal prefetch.
6pub fn tac_bytes(data: &[u8], separator: u8, before: bool, out: &mut impl Write) -> io::Result<()> {
7    if data.is_empty() {
8        return Ok(());
9    }
10
11    // Forward SIMD scan to collect all separator positions — better prefetch than backward scanning
12    let positions: Vec<usize> = memchr::memchr_iter(separator, data).collect();
13
14    if positions.is_empty() {
15        out.write_all(data)?;
16        return Ok(());
17    }
18
19    let mut buf = io::BufWriter::with_capacity(1024 * 1024, out);
20
21    if !before {
22        // Default mode: separator is AFTER the record (like newline at end of line)
23        let has_trailing_sep = *positions.last().unwrap() == data.len() - 1;
24
25        // Trailing content without separator — GNU tac appends the separator
26        if !has_trailing_sep {
27            let last_sep = *positions.last().unwrap();
28            buf.write_all(&data[last_sep + 1..])?;
29            buf.write_all(&[separator])?;
30        }
31
32        // Records in reverse order
33        let mut i = positions.len();
34        while i > 0 {
35            i -= 1;
36            let end = positions[i] + 1; // include separator
37            let start = if i == 0 { 0 } else { positions[i - 1] + 1 };
38            buf.write_all(&data[start..end])?;
39        }
40    } else {
41        // Before mode: separator is BEFORE the record
42        // Write records in reverse
43        let mut i = positions.len();
44        while i > 0 {
45            i -= 1;
46            let start = positions[i];
47            let end = if i + 1 < positions.len() {
48                positions[i + 1]
49            } else {
50                data.len()
51            };
52            buf.write_all(&data[start..end])?;
53        }
54
55        // Leading content before first separator
56        if positions[0] > 0 {
57            buf.write_all(&data[..positions[0]])?;
58        }
59    }
60
61    buf.flush()?;
62    Ok(())
63}
64
65/// Reverse records using a multi-byte string separator.
66/// Uses SIMD-accelerated memmem for substring search.
67pub fn tac_string_separator(
68    data: &[u8],
69    separator: &[u8],
70    before: bool,
71    out: &mut impl Write,
72) -> io::Result<()> {
73    if data.is_empty() {
74        return Ok(());
75    }
76
77    if separator.len() == 1 {
78        return tac_bytes(data, separator[0], before, out);
79    }
80
81    // Find all occurrences of the separator using SIMD-accelerated memmem
82    let positions: Vec<usize> = memchr::memmem::find_iter(data, separator).collect();
83
84    if positions.is_empty() {
85        out.write_all(data)?;
86        return Ok(());
87    }
88
89    let sep_len = separator.len();
90    let mut buf = io::BufWriter::with_capacity(1024 * 1024, out);
91
92    if !before {
93        // Default: separator after record
94        let last_end = positions.last().unwrap() + sep_len;
95        let has_trailing_sep = last_end == data.len();
96
97        // Trailing chunk without separator
98        if !has_trailing_sep {
99            buf.write_all(&data[last_end..])?;
100            buf.write_all(separator)?;
101        }
102
103        // Records in reverse
104        let mut i = positions.len();
105        while i > 0 {
106            i -= 1;
107            let sep_start = positions[i];
108            let rec_start = if i == 0 {
109                0
110            } else {
111                positions[i - 1] + sep_len
112            };
113            buf.write_all(&data[rec_start..sep_start + sep_len])?;
114        }
115    } else {
116        // Before mode: separator before record
117        let mut i = positions.len();
118        while i > 0 {
119            i -= 1;
120            let start = positions[i];
121            let end = if i + 1 < positions.len() {
122                positions[i + 1]
123            } else {
124                data.len()
125            };
126            buf.write_all(&data[start..end])?;
127        }
128
129        if positions[0] > 0 {
130            buf.write_all(&data[..positions[0]])?;
131        }
132    }
133
134    buf.flush()?;
135    Ok(())
136}
137
138/// Reverse records using a regex separator.
139/// Uses regex::bytes for direct byte-level matching (no UTF-8 conversion needed).
140pub fn tac_regex_separator(
141    data: &[u8],
142    pattern: &str,
143    before: bool,
144    out: &mut impl Write,
145) -> io::Result<()> {
146    if data.is_empty() {
147        return Ok(());
148    }
149
150    let re = match regex::bytes::Regex::new(pattern) {
151        Ok(r) => r,
152        Err(e) => {
153            return Err(io::Error::new(
154                io::ErrorKind::InvalidInput,
155                format!("invalid regex '{}': {}", pattern, e),
156            ));
157        }
158    };
159
160    // Collect all match positions (start, end) in forward order
161    let matches: Vec<(usize, usize)> = re.find_iter(data).map(|m| (m.start(), m.end())).collect();
162
163    if matches.is_empty() {
164        out.write_all(data)?;
165        return Ok(());
166    }
167
168    let mut buf = io::BufWriter::with_capacity(1024 * 1024, out);
169
170    if !before {
171        let last_end = matches.last().unwrap().1;
172        let has_trailing_sep = last_end == data.len();
173
174        // Trailing content after last separator
175        if !has_trailing_sep {
176            buf.write_all(&data[last_end..])?;
177            // Append the last separator match to close this record
178            let last_match = matches.last().unwrap();
179            buf.write_all(&data[last_match.0..last_match.1])?;
180        }
181
182        // Records in reverse
183        let mut i = matches.len();
184        while i > 0 {
185            i -= 1;
186            let rec_start = if i == 0 { 0 } else { matches[i - 1].1 };
187            let rec_end = matches[i].1;
188            buf.write_all(&data[rec_start..rec_end])?;
189        }
190    } else {
191        // Before mode
192        let mut i = matches.len();
193        while i > 0 {
194            i -= 1;
195            let start = matches[i].0;
196            let end = if i + 1 < matches.len() {
197                matches[i + 1].0
198            } else {
199                data.len()
200            };
201            buf.write_all(&data[start..end])?;
202        }
203
204        if matches[0].0 > 0 {
205            buf.write_all(&data[..matches[0].0])?;
206        }
207    }
208
209    buf.flush()?;
210    Ok(())
211}