rust_expect/util/
bytes.rs

1//! Byte manipulation utilities.
2//!
3//! This module provides utilities for working with byte sequences,
4//! including pattern matching, escaping, and conversion.
5
6use std::fmt::{self, Write};
7
8/// Convert bytes to a human-readable hexdump format.
9#[must_use]
10pub fn hexdump(data: &[u8]) -> String {
11    let mut result = String::new();
12
13    for (i, chunk) in data.chunks(16).enumerate() {
14        // Offset
15        let _ = write!(result, "{:08x}  ", i * 16);
16
17        // Hex bytes
18        for (j, byte) in chunk.iter().enumerate() {
19            let _ = write!(result, "{byte:02x} ");
20            if j == 7 {
21                result.push(' ');
22            }
23        }
24
25        // Padding for incomplete lines
26        for j in chunk.len()..16 {
27            result.push_str("   ");
28            if j == 7 {
29                result.push(' ');
30            }
31        }
32
33        result.push_str(" |");
34
35        // ASCII representation
36        for byte in chunk {
37            let c = if byte.is_ascii_graphic() || *byte == b' ' {
38                *byte as char
39            } else {
40                '.'
41            };
42            result.push(c);
43        }
44
45        result.push_str("|\n");
46    }
47
48    result
49}
50
51/// Escape bytes for display.
52#[must_use]
53pub fn escape_bytes(data: &[u8]) -> String {
54    let mut result = String::new();
55
56    for byte in data {
57        match byte {
58            b'\n' => result.push_str("\\n"),
59            b'\r' => result.push_str("\\r"),
60            b'\t' => result.push_str("\\t"),
61            b'\0' => result.push_str("\\0"),
62            b'\\' => result.push_str("\\\\"),
63            0x1b => result.push_str("\\e"),
64            0x07 => result.push_str("\\a"),
65            0x08 => result.push_str("\\b"),
66            b if b.is_ascii_graphic() || *b == b' ' => result.push(*b as char),
67            b => {
68                let _ = write!(result, "\\x{b:02x}");
69            }
70        }
71    }
72
73    result
74}
75
76/// Parse an escaped string back to bytes.
77#[must_use]
78pub fn unescape_bytes(s: &str) -> Vec<u8> {
79    let mut result = Vec::new();
80    let mut chars = s.chars();
81
82    while let Some(c) = chars.next() {
83        if c == '\\' {
84            match chars.next() {
85                Some('n') => result.push(b'\n'),
86                Some('r') => result.push(b'\r'),
87                Some('t') => result.push(b'\t'),
88                Some('0') => result.push(b'\0'),
89                Some('\\') => result.push(b'\\'),
90                Some('e') => result.push(0x1b),
91                Some('a') => result.push(0x07),
92                Some('b') => result.push(0x08),
93                Some('x') => {
94                    let hex: String = chars.by_ref().take(2).collect();
95                    if let Ok(byte) = u8::from_str_radix(&hex, 16) {
96                        result.push(byte);
97                    }
98                }
99                Some(other) => {
100                    result.push(b'\\');
101                    let mut buf = [0u8; 4];
102                    result.extend(other.encode_utf8(&mut buf).as_bytes());
103                }
104                None => result.push(b'\\'),
105            }
106        } else {
107            let mut buf = [0u8; 4];
108            result.extend(c.encode_utf8(&mut buf).as_bytes());
109        }
110    }
111
112    result
113}
114
115/// Find a pattern in a byte slice.
116#[must_use]
117pub fn find_pattern(haystack: &[u8], needle: &[u8]) -> Option<usize> {
118    if needle.is_empty() {
119        return Some(0);
120    }
121    if needle.len() > haystack.len() {
122        return None;
123    }
124
125    haystack
126        .windows(needle.len())
127        .position(|window| window == needle)
128}
129
130/// Find all occurrences of a pattern in a byte slice.
131#[must_use]
132pub fn find_all_patterns(haystack: &[u8], needle: &[u8]) -> Vec<usize> {
133    if needle.is_empty() || needle.len() > haystack.len() {
134        return Vec::new();
135    }
136
137    haystack
138        .windows(needle.len())
139        .enumerate()
140        .filter_map(|(i, window)| if window == needle { Some(i) } else { None })
141        .collect()
142}
143
144/// Replace all occurrences of a pattern in a byte slice.
145#[must_use]
146pub fn replace_pattern(haystack: &[u8], needle: &[u8], replacement: &[u8]) -> Vec<u8> {
147    if needle.is_empty() {
148        return haystack.to_vec();
149    }
150
151    let mut result = Vec::with_capacity(haystack.len());
152    let mut i = 0;
153
154    while i < haystack.len() {
155        if i + needle.len() <= haystack.len() && &haystack[i..i + needle.len()] == needle {
156            result.extend(replacement);
157            i += needle.len();
158        } else {
159            result.push(haystack[i]);
160            i += 1;
161        }
162    }
163
164    result
165}
166
167/// Strip ANSI escape sequences from bytes.
168#[must_use]
169pub fn strip_ansi(data: &[u8]) -> Vec<u8> {
170    let mut result = Vec::with_capacity(data.len());
171    let mut i = 0;
172
173    while i < data.len() {
174        if data[i] == 0x1b {
175            // Skip escape sequence
176            if i + 1 < data.len() && data[i + 1] == b'[' {
177                // CSI sequence
178                i += 2;
179                while i < data.len() && !data[i].is_ascii_alphabetic() && data[i] != b'@' {
180                    i += 1;
181                }
182                if i < data.len() {
183                    i += 1; // Skip final character
184                }
185            } else {
186                // Simple escape
187                i += 2;
188            }
189        } else {
190            result.push(data[i]);
191            i += 1;
192        }
193    }
194
195    result
196}
197
198/// A wrapper for bytes that implements Display with escaping.
199pub struct EscapedBytes<'a>(pub &'a [u8]);
200
201impl fmt::Display for EscapedBytes<'_> {
202    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
203        write!(f, "{}", escape_bytes(self.0))
204    }
205}
206
207impl fmt::Debug for EscapedBytes<'_> {
208    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
209        write!(f, "\"{}\"", escape_bytes(self.0))
210    }
211}
212
213/// Convert bytes to a lossy UTF-8 string with control characters visible.
214#[must_use]
215pub fn to_visible_string(data: &[u8]) -> String {
216    let s = String::from_utf8_lossy(data);
217    let mut result = String::new();
218
219    for c in s.chars() {
220        if c.is_control() && c != '\n' && c != '\t' {
221            if c as u32 <= 26 {
222                result.push('^');
223                result.push((b'@' + c as u8) as char);
224            } else {
225                let _ = write!(result, "\\x{:02x}", c as u32);
226            }
227        } else {
228            result.push(c);
229        }
230    }
231
232    result
233}
234
235#[cfg(test)]
236mod tests {
237    use super::*;
238
239    #[test]
240    fn test_hexdump() {
241        let data = b"Hello, World!";
242        let dump = hexdump(data);
243        assert!(dump.contains("48 65 6c 6c")); // "Hell"
244        assert!(dump.contains("|Hello, World!|"));
245    }
246
247    #[test]
248    fn test_escape_unescape() {
249        let original = b"Hello\n\tWorld\x1b[31m";
250        let escaped = escape_bytes(original);
251        let unescaped = unescape_bytes(&escaped);
252        assert_eq!(original, unescaped.as_slice());
253    }
254
255    #[test]
256    fn test_find_pattern() {
257        let data = b"Hello, World!";
258        assert_eq!(find_pattern(data, b"World"), Some(7));
259        assert_eq!(find_pattern(data, b"foo"), None);
260    }
261
262    #[test]
263    fn test_replace_pattern() {
264        let data = b"Hello, World!";
265        let result = replace_pattern(data, b"World", b"Rust");
266        assert_eq!(result, b"Hello, Rust!");
267    }
268
269    #[test]
270    fn test_strip_ansi() {
271        let data = b"\x1b[31mHello\x1b[0m";
272        let stripped = strip_ansi(data);
273        assert_eq!(stripped, b"Hello");
274    }
275
276    #[test]
277    fn test_visible_string() {
278        let data = b"Hello\x03World";
279        let visible = to_visible_string(data);
280        assert!(visible.contains("^C"));
281    }
282}