Skip to main content

nexcore_codec/
hex.rs

1//! Hexadecimal encoding and decoding (RFC 4648 §8, Base16).
2//!
3//! Zero-dependency replacement for the `hex` crate.
4//!
5//! # Supply Chain Sovereignty
6//!
7//! This module has **zero external dependencies**. It replaces the `hex` crate
8//! for the `nexcore` ecosystem, eliminating supply chain risk for hex encoding.
9//!
10//! # Examples
11//!
12//! ```
13//! use nexcore_codec::hex;
14//!
15//! let encoded = hex::encode(b"Hello");
16//! assert_eq!(encoded, "48656c6c6f");
17//!
18//! let decoded = hex::decode("48656c6c6f").unwrap();
19//! assert_eq!(decoded, b"Hello");
20//! ```
21
22/// Hex character lookup table (lowercase).
23const HEX_CHARS_LOWER: &[u8; 16] = b"0123456789abcdef";
24
25/// Uppercase hex lookup table.
26const HEX_CHARS_UPPER: &[u8; 16] = b"0123456789ABCDEF";
27
28/// Look up a nibble (0..=15) in a hex character table.
29///
30/// `nibble` must be in 0..=15. For `encode` callers this is guaranteed by
31/// the bitwise operations `byte >> 4` (gives 0..=15) and `byte & 0x0f`
32/// (gives 0..=15).
33#[inline]
34fn hex_char(table: &[u8; 16], nibble: u8) -> char {
35    // SAFETY PROOF: `nibble` is always derived from `byte >> 4` or
36    // `byte & 0x0f`, so it is in 0..=15. The table has exactly 16 elements
37    // (indices 0..=15), making this indexing always in bounds. All values in
38    // the table are printable ASCII (< 128), so casting to `char` is valid.
39    #[allow(
40        clippy::indexing_slicing,
41        reason = "nibble is always byte >> 4 or byte & 0x0f, which is 0..=15; table has 16 elements"
42    )]
43    #[allow(
44        clippy::as_conversions,
45        reason = "table bytes are ASCII digits/letters (0-127); casting u8 to char is always valid here"
46    )]
47    (table[usize::from(nibble)] as char)
48}
49
50/// Encode bytes to a lowercase hex string.
51///
52/// Equivalent to `hex::encode()`.
53#[must_use]
54pub fn encode(input: impl AsRef<[u8]>) -> String {
55    let input = input.as_ref();
56    // Each byte expands to exactly 2 hex characters; saturating_mul is
57    // sufficient — inputs large enough to overflow usize would OOM first.
58    let mut out = String::with_capacity(input.len().saturating_mul(2));
59    for &byte in input {
60        out.push(hex_char(HEX_CHARS_LOWER, byte >> 4));
61        out.push(hex_char(HEX_CHARS_LOWER, byte & 0x0f));
62    }
63    out
64}
65
66/// Encode bytes to an uppercase hex string.
67#[must_use]
68pub fn encode_upper(input: impl AsRef<[u8]>) -> String {
69    let input = input.as_ref();
70    let mut out = String::with_capacity(input.len().saturating_mul(2));
71    for &byte in input {
72        out.push(hex_char(HEX_CHARS_UPPER, byte >> 4));
73        out.push(hex_char(HEX_CHARS_UPPER, byte & 0x0f));
74    }
75    out
76}
77
78/// Error returned when decoding an invalid hex string.
79#[non_exhaustive]
80#[derive(Debug, Clone, PartialEq, Eq)]
81pub enum DecodeError {
82    /// Input has odd length (hex requires pairs).
83    OddLength,
84    /// Invalid hex character at the given byte index.
85    InvalidChar { index: usize, byte: u8 },
86}
87
88impl core::fmt::Display for DecodeError {
89    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
90        match self {
91            Self::OddLength => write!(f, "odd-length hex string"),
92            Self::InvalidChar { index, byte } => {
93                write!(f, "invalid hex char 0x{byte:02x} at index {index}")
94            }
95        }
96    }
97}
98
99impl std::error::Error for DecodeError {}
100
101/// Decode a hex string to bytes.
102///
103/// Accepts both uppercase and lowercase hex characters.
104/// Returns `Err` on odd length or invalid characters.
105///
106/// Equivalent to `hex::decode()`.
107pub fn decode(input: impl AsRef<[u8]>) -> Result<Vec<u8>, DecodeError> {
108    let input = input.as_ref();
109    if input.len() % 2 != 0 {
110        return Err(DecodeError::OddLength);
111    }
112    let mut out = Vec::with_capacity(input.len() / 2);
113    for pair in input.chunks_exact(2) {
114        // chunks_exact(2) guarantees pair.len() == 2; indices 0 and 1 are
115        // always valid. The `hex_val` function returns 0..=15, so
116        // `(high << 4) | low` is at most 0xF0 | 0x0F = 0xFF, which fits in u8.
117        #[allow(
118            clippy::indexing_slicing,
119            reason = "chunks_exact(2) guarantees pair.len() == 2; indices 0 and 1 are always valid"
120        )]
121        let high = hex_val(pair[0], 0)?;
122        #[allow(
123            clippy::indexing_slicing,
124            reason = "chunks_exact(2) guarantees pair.len() == 2; indices 0 and 1 are always valid"
125        )]
126        let low = hex_val(pair[1], 1)?;
127        // `high` is 0..=15 from hex_val, so `high << 4` is 0..=240.
128        // `low` is 0..=15, so `(high << 4) | low` is 0..=255; fits in u8.
129        #[allow(
130            clippy::arithmetic_side_effects,
131            reason = "high is 0..=15 (from hex_val), so high << 4 is 0..=240; OR with low (0..=15) gives 0..=255; no overflow"
132        )]
133        out.push((high << 4) | low);
134    }
135    Ok(out)
136}
137
138/// Convert a hex ASCII byte to its numeric value (0..=15).
139#[inline]
140const fn hex_val(byte: u8, offset: usize) -> Result<u8, DecodeError> {
141    match byte {
142        // Match arm guards prove safety for each arm:
143        // b'0'..=b'9': byte >= b'0', so byte - b'0' is in 0..=9.
144        // b'a'..=b'f': byte >= b'a', so byte - b'a' is in 0..=5;
145        //   adding 10 gives 10..=15, fitting in u8.
146        // b'A'..=b'F': byte >= b'A', so byte - b'A' is in 0..=5;
147        //   adding 10 gives 10..=15, fitting in u8.
148        #[allow(
149            clippy::arithmetic_side_effects,
150            reason = "match arm guard proves byte >= b'0'; subtraction gives 0..=9 which fits in u8"
151        )]
152        b'0'..=b'9' => Ok(byte - b'0'),
153        #[allow(
154            clippy::arithmetic_side_effects,
155            reason = "match arm guard proves byte >= b'a' and byte - b'a' <= 5; adding 10 gives 10..=15, fitting in u8"
156        )]
157        b'a'..=b'f' => Ok(byte - b'a' + 10),
158        #[allow(
159            clippy::arithmetic_side_effects,
160            reason = "match arm guard proves byte >= b'A' and byte - b'A' <= 5; adding 10 gives 10..=15, fitting in u8"
161        )]
162        b'A'..=b'F' => Ok(byte - b'A' + 10),
163        _ => Err(DecodeError::InvalidChar {
164            index: offset,
165            byte,
166        }),
167    }
168}
169
170#[cfg(test)]
171mod tests {
172    use super::*;
173
174    #[test]
175    fn encode_empty() {
176        assert_eq!(encode(b""), "");
177    }
178
179    #[test]
180    fn encode_hello() {
181        assert_eq!(encode(b"Hello"), "48656c6c6f");
182    }
183
184    #[test]
185    fn encode_all_bytes() {
186        let input: Vec<u8> = (0..=255).collect();
187        let encoded = encode(&input);
188        assert_eq!(encoded.len(), 512);
189        assert!(encoded.starts_with("000102"));
190        assert!(encoded.ends_with("fdfeff"));
191    }
192
193    #[test]
194    fn encode_upper_hello() {
195        assert_eq!(encode_upper(b"Hello"), "48656C6C6F");
196    }
197
198    #[test]
199    fn decode_empty() {
200        assert_eq!(decode("").ok(), Some(vec![]));
201    }
202
203    #[test]
204    fn decode_hello() {
205        assert_eq!(decode("48656c6c6f").ok(), Some(b"Hello".to_vec()));
206    }
207
208    #[test]
209    fn decode_uppercase() {
210        assert_eq!(decode("48656C6C6F").ok(), Some(b"Hello".to_vec()));
211    }
212
213    #[test]
214    fn decode_mixed_case() {
215        assert_eq!(decode("48656C6c6F").ok(), Some(b"Hello".to_vec()));
216    }
217
218    #[test]
219    fn decode_odd_length() {
220        assert_eq!(decode("abc"), Err(DecodeError::OddLength));
221    }
222
223    #[test]
224    fn decode_invalid_char() {
225        let err = decode("zz");
226        assert!(matches!(err, Err(DecodeError::InvalidChar { .. })));
227    }
228
229    #[test]
230    fn roundtrip_all_bytes() {
231        let input: Vec<u8> = (0..=255).collect();
232        let encoded = encode(&input);
233        let decoded = decode(&encoded);
234        assert_eq!(decoded.ok(), Some(input));
235    }
236
237    // RFC 4648 §10 test vectors for Base16
238    #[test]
239    fn rfc4648_test_vectors() {
240        let vectors = [
241            ("", ""),
242            ("f", "66"),
243            ("fo", "666f"),
244            ("foo", "666f6f"),
245            ("foob", "666f6f62"),
246            ("fooba", "666f6f6261"),
247            ("foobar", "666f6f626172"),
248        ];
249        for (input, expected) in vectors {
250            assert_eq!(encode(input.as_bytes()), expected, "encode({input:?})");
251            assert_eq!(
252                decode(expected).ok(),
253                Some(input.as_bytes().to_vec()),
254                "decode({expected:?})"
255            );
256        }
257    }
258}