Skip to main content

coreutils_rs/rev/
core.rs

1use std::io::Write;
2
3/// Reverse each line in the input data and write to output.
4/// Lines are delimited by newline (b'\n').
5/// ASCII lines are reversed byte-by-byte (fast path).
6/// Non-ASCII lines are reversed by Unicode characters.
7pub fn rev_bytes(data: &[u8], out: &mut impl Write) -> std::io::Result<()> {
8    if data.is_empty() {
9        return Ok(());
10    }
11
12    // Pre-allocate output buffer same size as input
13    let mut output = Vec::with_capacity(data.len());
14    let mut start = 0;
15
16    for pos in memchr::memchr_iter(b'\n', data) {
17        let line = &data[start..pos];
18        reverse_line(line, &mut output);
19        output.push(b'\n');
20        start = pos + 1;
21    }
22
23    // Handle last line without trailing newline
24    if start < data.len() {
25        let line = &data[start..];
26        reverse_line(line, &mut output);
27    }
28
29    out.write_all(&output)
30}
31
32/// Reverse a single line (without the newline delimiter).
33/// Fast path for pure ASCII, slow path for UTF-8 multibyte.
34#[inline]
35fn reverse_line(line: &[u8], output: &mut Vec<u8>) {
36    if line.is_empty() {
37        return;
38    }
39
40    // Check if all bytes are ASCII (< 128)
41    if is_ascii(line) {
42        // ASCII fast path: reverse bytes directly
43        let start = output.len();
44        output.extend_from_slice(line);
45        output[start..].reverse();
46    } else {
47        // UTF-8 path: reverse by characters
48        // Use unsafe from_utf8_unchecked only if valid UTF-8, otherwise reverse bytes
49        match std::str::from_utf8(line) {
50            Ok(s) => {
51                // Reverse chars
52                let chars: Vec<char> = s.chars().rev().collect();
53                for ch in chars {
54                    let mut buf = [0u8; 4];
55                    let encoded = ch.encode_utf8(&mut buf);
56                    output.extend_from_slice(encoded.as_bytes());
57                }
58            }
59            Err(_) => {
60                // Invalid UTF-8: reverse bytes (same as GNU rev behavior)
61                let start = output.len();
62                output.extend_from_slice(line);
63                output[start..].reverse();
64            }
65        }
66    }
67}
68
69/// Check if all bytes in the slice are ASCII (< 128).
70/// Uses word-at-a-time trick for SIMD-like speed.
71#[inline]
72fn is_ascii(data: &[u8]) -> bool {
73    // Process 8 bytes at a time
74    let chunks = data.chunks_exact(8);
75    let remainder = chunks.remainder();
76
77    for chunk in chunks {
78        let word = u64::from_ne_bytes(chunk.try_into().unwrap());
79        if word & 0x8080808080808080 != 0 {
80            return false;
81        }
82    }
83
84    for &b in remainder {
85        if b & 0x80 != 0 {
86            return false;
87        }
88    }
89
90    true
91}