Skip to main content

coreutils_rs/rev/
core.rs

1use std::io::Write;
2
3/// Reverse each line in the input data and write to output.
4/// Lines are delimited by newline (b'\n').
5/// ASCII lines are reversed byte-by-byte (fast path).
6/// Non-ASCII lines are reversed by Unicode characters.
7pub fn rev_bytes(data: &[u8], out: &mut impl Write) -> std::io::Result<()> {
8    if data.is_empty() {
9        return Ok(());
10    }
11
12    // Pre-allocate output buffer same size as input
13    // Use 256KB flush threshold for batched output
14    const FLUSH_THRESHOLD: usize = 256 * 1024;
15    let mut output = Vec::with_capacity(data.len().min(FLUSH_THRESHOLD * 2));
16    let mut start = 0;
17
18    for pos in memchr::memchr_iter(b'\n', data) {
19        let line = &data[start..pos];
20        reverse_line(line, &mut output);
21        output.push(b'\n');
22        start = pos + 1;
23
24        // Flush in batches for large files
25        if output.len() >= FLUSH_THRESHOLD {
26            out.write_all(&output)?;
27            output.clear();
28        }
29    }
30
31    // Handle last line without trailing newline
32    if start < data.len() {
33        let line = &data[start..];
34        reverse_line(line, &mut output);
35    }
36
37    if !output.is_empty() {
38        out.write_all(&output)?;
39    }
40    Ok(())
41}
42
43/// Reverse a single line (without the newline delimiter).
44/// Fast path for pure ASCII, slow path for UTF-8 multibyte.
45#[inline]
46fn reverse_line(line: &[u8], output: &mut Vec<u8>) {
47    if line.is_empty() {
48        return;
49    }
50
51    // Check if all bytes are ASCII (< 128)
52    if is_ascii(line) {
53        // ASCII fast path: reverse bytes directly
54        let start = output.len();
55        output.extend_from_slice(line);
56        output[start..].reverse();
57    } else {
58        // UTF-8 path: reverse by characters without intermediate Vec
59        match std::str::from_utf8(line) {
60            Ok(s) => {
61                // Pre-reserve space
62                output.reserve(line.len());
63                // Write reversed chars directly
64                for ch in s.chars().rev() {
65                    let mut buf = [0u8; 4];
66                    let encoded = ch.encode_utf8(&mut buf);
67                    output.extend_from_slice(encoded.as_bytes());
68                }
69            }
70            Err(_) => {
71                // Invalid UTF-8: reverse bytes (same as GNU rev behavior)
72                let start = output.len();
73                output.extend_from_slice(line);
74                output[start..].reverse();
75            }
76        }
77    }
78}
79
80/// Check if all bytes in the slice are ASCII (< 128).
81/// Uses word-at-a-time trick for SIMD-like speed.
82#[inline]
83fn is_ascii(data: &[u8]) -> bool {
84    // Process 8 bytes at a time
85    let chunks = data.chunks_exact(8);
86    let remainder = chunks.remainder();
87
88    for chunk in chunks {
89        let word = u64::from_ne_bytes(chunk.try_into().unwrap());
90        if word & 0x8080808080808080 != 0 {
91            return false;
92        }
93    }
94
95    for &b in remainder {
96        if b & 0x80 != 0 {
97            return false;
98        }
99    }
100
101    true
102}