Skip to main content

tess/
hex.rs

1//! xxd-style hex dump rendering. One row = 16 bytes, with offset prefix
2//! and ASCII gutter.
3
4/// Format one row of a hex dump.
5///
6/// Layout: `<8-hex-digit offset>: <hex bytes grouped by `bytes_per_group`> <16-char ASCII gutter>`.
7/// `bytes_per_group` must be one of 1, 2, 4, 8, 16 — corresponding to 2, 4, 8,
8/// 16, or 32 hex characters per group (16 = the whole row as a single group,
9/// no spacing between hex pairs).
10///
11/// When `bytes.len() < 16`, the hex portion is right-padded with spaces so
12/// the ASCII gutter remains column-aligned with full rows.
13///
14/// Offsets larger than 0xFFFFFFFF still render with at least 8 hex digits
15/// (the format width is a minimum, not a max).
16///
17/// Non-printable bytes (outside 0x20..=0x7E) render as `.` in the ASCII gutter.
18pub fn format_hex_row(offset: usize, bytes: &[u8], bytes_per_group: usize) -> String {
19    debug_assert!(bytes.len() <= 16, "hex row must be <= 16 bytes");
20    debug_assert!(
21        matches!(bytes_per_group, 1 | 2 | 4 | 8 | 16),
22        "bytes_per_group must be 1, 2, 4, 8, or 16"
23    );
24    let mut out = String::with_capacity(80);
25    out.push_str(&format!("{:08x}: ", offset));
26    for i in 0..16 {
27        if i > 0 && i % bytes_per_group == 0 {
28            out.push(' ');
29        }
30        if i < bytes.len() {
31            out.push_str(&format!("{:02x}", bytes[i]));
32        } else {
33            out.push_str("  ");
34        }
35    }
36    out.push_str("  ");
37    for b in bytes {
38        if (0x20..=0x7E).contains(b) {
39            out.push(*b as char);
40        } else {
41            out.push('.');
42        }
43    }
44    out
45}
46
47/// Translate a user-facing "hex chars per group" value (2/4/8/16/32) to the
48/// internal bytes-per-group unit (1/2/4/8/16). Returns `None` for any other
49/// input.
50pub fn hex_chars_to_bytes_per_group(hex_chars: usize) -> Option<usize> {
51    match hex_chars {
52        2 => Some(1),
53        4 => Some(2),
54        8 => Some(4),
55        16 => Some(8),
56        32 => Some(16),
57        _ => None,
58    }
59}
60
61#[cfg(test)]
62mod tests {
63    use super::*;
64
65    #[test]
66    fn aligned_input_16_bytes_renders_full_row() {
67        let bytes = b"Hello world. tes";
68        let row = format_hex_row(0, bytes, 2);
69        assert_eq!(
70            row,
71            "00000000: 4865 6c6c 6f20 776f 726c 642e 2074 6573  Hello world. tes"
72        );
73    }
74
75    #[test]
76    fn short_tail_pads_ascii_gutter_columns() {
77        let bytes = b"t.";
78        let row = format_hex_row(0x10, bytes, 2);
79        assert!(row.starts_with("00000010: 742e "));
80        assert!(row.ends_with("  t."));
81        let ascii_start = row.find("  t.").unwrap();
82        let full_row = format_hex_row(0, b"0123456789abcdef", 2);
83        let full_ascii_start = full_row.rfind("  ").unwrap();
84        assert_eq!(ascii_start, full_ascii_start,
85                   "short-row ASCII column should align with full-row ASCII column");
86    }
87
88    #[test]
89    fn all_printable_bytes_show_in_gutter() {
90        let bytes = b"abcdefghijklmnop";
91        let row = format_hex_row(0, bytes, 2);
92        assert!(row.ends_with("  abcdefghijklmnop"));
93    }
94
95    #[test]
96    fn all_non_printable_bytes_show_as_dots() {
97        let bytes = &[0x00, 0x01, 0x02, 0x1f, 0x7f, 0x80, 0xff];
98        let row = format_hex_row(0, bytes, 2);
99        assert!(row.ends_with("  ......."));
100    }
101
102    #[test]
103    fn utf8_multibyte_renders_as_dots_in_gutter() {
104        let bytes = "ä".as_bytes();
105        let row = format_hex_row(0, bytes, 2);
106        assert!(row.contains("c3a4"));
107        assert!(row.ends_with("  .."));
108    }
109
110    #[test]
111    fn offset_grows_past_0x10000() {
112        let bytes = b"X";
113        let row = format_hex_row(0x123456, bytes, 2);
114        assert!(row.starts_with("00123456: "));
115    }
116
117    #[test]
118    fn offset_grows_past_8_digits() {
119        let bytes = b"X";
120        let row = format_hex_row(0x1_2345_6789, bytes, 2);
121        assert!(row.starts_with("123456789: "));
122    }
123
124    #[test]
125    fn group_size_1_byte_renders_2_hex_per_group() {
126        let bytes = b"Hello world. tes";
127        let row = format_hex_row(0, bytes, 1);
128        assert_eq!(
129            row,
130            "00000000: 48 65 6c 6c 6f 20 77 6f 72 6c 64 2e 20 74 65 73  Hello world. tes"
131        );
132    }
133
134    #[test]
135    fn group_size_4_bytes_renders_8_hex_per_group() {
136        let bytes = b"Hello world. tes";
137        let row = format_hex_row(0, bytes, 4);
138        assert_eq!(
139            row,
140            "00000000: 48656c6c 6f20776f 726c642e 20746573  Hello world. tes"
141        );
142    }
143
144    #[test]
145    fn group_size_8_bytes_renders_16_hex_per_group() {
146        let bytes = b"Hello world. tes";
147        let row = format_hex_row(0, bytes, 8);
148        assert_eq!(
149            row,
150            "00000000: 48656c6c6f20776f 726c642e20746573  Hello world. tes"
151        );
152    }
153
154    #[test]
155    fn group_size_16_bytes_renders_whole_row_unspaced() {
156        let bytes = b"Hello world. tes";
157        let row = format_hex_row(0, bytes, 16);
158        assert_eq!(
159            row,
160            "00000000: 48656c6c6f20776f726c642e20746573  Hello world. tes"
161        );
162    }
163
164    #[test]
165    fn short_tail_aligns_across_all_group_sizes() {
166        let short = b"t.";
167        let full = b"0123456789abcdef";
168        for &bpg in &[1usize, 2, 4, 8, 16] {
169            let short_row = format_hex_row(0x10, short, bpg);
170            let full_row = format_hex_row(0, full, bpg);
171            let short_ascii = short_row.find("  t.").unwrap();
172            let full_ascii = full_row.rfind("  ").unwrap();
173            assert_eq!(short_ascii, full_ascii,
174                       "ascii column misaligned for bytes_per_group={bpg}");
175        }
176    }
177
178    #[test]
179    fn hex_chars_to_bytes_per_group_maps_valid_values() {
180        assert_eq!(hex_chars_to_bytes_per_group(2), Some(1));
181        assert_eq!(hex_chars_to_bytes_per_group(4), Some(2));
182        assert_eq!(hex_chars_to_bytes_per_group(8), Some(4));
183        assert_eq!(hex_chars_to_bytes_per_group(16), Some(8));
184        assert_eq!(hex_chars_to_bytes_per_group(32), Some(16));
185    }
186
187    #[test]
188    fn hex_chars_to_bytes_per_group_rejects_invalid() {
189        assert_eq!(hex_chars_to_bytes_per_group(0), None);
190        assert_eq!(hex_chars_to_bytes_per_group(1), None);
191        assert_eq!(hex_chars_to_bytes_per_group(3), None);
192        assert_eq!(hex_chars_to_bytes_per_group(64), None);
193    }
194}