hex-conservative 1.1.0

A hex encoding and decoding crate with a conservative MSRV and dependency policy.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
// SPDX-License-Identifier: CC0-1.0

//! # Hex encoding and decoding
//!
//! General purpose hex encoding/decoding library with a conservative MSRV and dependency policy.
//!
//! ## Const hex literals
//!
//! ```
//! use hex_conservative::hex;
//!
//! const GENESIS: [u8; 32] = hex!("000000000019d6689c085ae165831e934ff763ae46a2a6c172b3f1b60a8ce26f");
//! ```
//!
//! ## Runtime hex parsing
//!
//! ```
//! # #[cfg(feature = "alloc")] {
//! // In your manifest use the `package` key to improve import ergonomics.
//! // hex = { package = "hex-conservative", version = "*" }
//! # use hex_conservative as hex; // No need for this if using `package` as above.
//! use hex::prelude::*;
//!
//! // Decode an arbitrary length hex string into a vector.
//! let v = hex::decode_to_vec("deadbeef").expect("valid hex digits");
//! // Or a known length hex string into a fixed size array.
//! let a = hex::decode_to_array::<4>("deadbeef").expect("valid length and valid hex digits");
//!
//! // We support `LowerHex` and `UpperHex` out of the box for `[u8]` slices.
//! println!("An array as lower hex: {:x}", a.as_hex());
//! // And for vecs since `Vec` derefs to byte slice.
//! println!("A vector as upper hex: {:X}", v.as_hex());
//!
//! // Allocate a new string (also `to_upper_hex_string`).
//! let s = v.to_lower_hex_string();
//!
//! // Please note, mixed case strings will still parse successfully but we only
//! // support displaying hex in a single case.
//! assert_eq!(
//!     hex::decode_to_vec("dEaDbEeF").expect("valid mixed case hex digits"),
//!     hex::decode_to_vec("deadbeef").expect("valid hex digits"),
//! );
//! # }
//! ```
//!
//! ## Crate feature flags
//!
//! * `std` - enables the standard library, on by default.
//! * `alloc` - enables features that require allocation such as decoding into `Vec<u8>`, implied
//!   by `std`.
//! * `newer-rust-version` - enables Rust version detection and thus newer features, may add
//!   dependency on a feature detection crate to reduce compile times. This feature is expected to
//!   do nothing once the native detection is in Rust and our MSRV is at least that version. We may
//!   also remove the feature gate in 2.0 with semver trick once that happens.
//!
//! ## Minimum Supported Rust Version (MSRV)
//!
//! The current MSRV is Rust `1.74.0`. Policy is to never use an MSRV that is less than two years
//! old and also that ships in Debian stable. We may bump our MSRV in a minor version, but we have
//! no plans to.
//!
//! Note though that the dependencies may have looser policy. This is not considered breaking/wrong
//! - you would just need to pin them in `Cargo.lock` (not `.toml`).

#![no_std]
// Experimental features we need.
#![cfg_attr(docsrs, feature(doc_cfg))]
// Coding conventions
#![warn(missing_docs)]

#[cfg(feature = "std")]
extern crate std;

#[cfg(feature = "alloc")]
#[allow(unused_imports)] // false positive regarding macro
#[macro_use]
extern crate alloc;

#[doc(hidden)]
pub mod _export {
    /// A re-export of `core::*`.
    pub mod _core {
        pub use core::*;
    }
}

pub mod buf_encoder;
pub mod display;
pub mod error;
mod iter;

/// Re-exports of the common crate traits.
pub mod prelude {
    #[doc(inline)]
    pub use crate::display::DisplayHex;
}

#[cfg(feature = "alloc")]
use alloc::vec::Vec;
use core::fmt;

pub(crate) use table::Table;

#[rustfmt::skip]                // Keep public re-exports separate.
#[doc(inline)]
pub use self::{
    display::DisplayHex,
    iter::{BytesToHexIter, HexToBytesIter, HexSliceToBytesIter},
};
#[doc(no_inline)]
pub use self::error::{
    DecodeFixedLengthBytesError, DecodeVariableLengthBytesError, InvalidCharError,
    InvalidLengthError, OddLengthStringError,
};

/// Decodes a hex string with variable length.
///
/// The length of the returned `Vec` is determined by the length of the input, meaning all even
/// lengths of the input string are allowed. If you know the required length at compile time using
/// [`decode_to_array`] is most likely a better choice.
///
/// # Errors
///
/// Returns an error if `hex` contains invalid characters or doesn't have even length.
#[cfg(feature = "alloc")]
pub fn decode_to_vec(hex: &str) -> Result<Vec<u8>, DecodeVariableLengthBytesError> {
    Ok(HexToBytesIter::new(hex)?.drain_to_vec()?)
}

/// Decodes a hex string with an expected length known at compile time.
///
/// If you don't know the required length at compile time you need to use [`decode_to_vec`]
/// instead.
///
/// # Errors
///
/// Returns an error if `hex` contains invalid characters or has incorrect length. (Should be
/// `N * 2`.)
pub fn decode_to_array<const N: usize>(hex: &str) -> Result<[u8; N], DecodeFixedLengthBytesError> {
    if hex.len() == N * 2 {
        let mut ret = [0u8; N];
        // checked above
        HexToBytesIter::new_unchecked(hex).drain_to_slice(&mut ret)?;
        Ok(ret)
    } else {
        Err(InvalidLengthError { invalid: hex.len(), expected: 2 * N }.into())
    }
}

/// Parses hex strings in const contexts.
///
/// This is primarily useful for testing, panics on all error paths.
///
/// # Returns
///
/// `[u8; N]` array containing the parsed data if valid.
///
/// # Panics
///
/// Panics on all error paths:
///
/// * If input string is not even length.
/// * If input string contains non-hex characters.
#[macro_export]
macro_rules! hex {
    ($hex:expr) => {{
        const _: () = assert!($hex.len() % 2 == 0, "hex string must have even length");

        const fn decode_digit(digit: u8) -> u8 {
            match digit {
                b'0'..=b'9' => digit - b'0',
                b'a'..=b'f' => digit - b'a' + 10,
                b'A'..=b'F' => digit - b'A' + 10,
                _ => panic!("invalid hex digit"),
            }
        }

        let mut output = [0u8; $hex.len() / 2];
        let bytes = $hex.as_bytes();

        let mut i = 0;
        while i < output.len() {
            let high = decode_digit(bytes[i * 2]);
            let low = decode_digit(bytes[i * 2 + 1]);
            output[i] = (high << 4) | low;
            i += 1;
        }

        output
    }};
}

/// Possible case of hex.
#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
pub enum Case {
    /// Produce lower-case chars (`[0-9a-f]`).
    ///
    /// This is the default.
    Lower,

    /// Produce upper-case chars (`[0-9A-F]`).
    Upper,
}

impl Default for Case {
    #[inline]
    fn default() -> Self { Case::Lower }
}

impl Case {
    /// Returns the encoding table.
    ///
    /// The returned table may only contain displayable ASCII chars.
    #[inline]
    #[rustfmt::skip]
    pub(crate) fn table(self) -> &'static Table {
        match self {
            Case::Lower => &Table::LOWER,
            Case::Upper => &Table::UPPER,
        }
    }
}

/// A valid hex character: one of `[0-9a-fA-F]`.
//
// The `repr(u8)` guarantees that representation matches the ASCII byte value of the character,
// making transmute between `Char` and `u8` sound whenever the byte is a valid hex digit.
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[repr(u8)]
pub enum Char {
    /// `'0'`
    Zero = b'0',
    /// `'1'`
    One = b'1',
    /// `'2'`
    Two = b'2',
    /// `'3'`
    Three = b'3',
    /// `'4'`
    Four = b'4',
    /// `'5'`
    Five = b'5',
    /// `'6'`
    Six = b'6',
    /// `'7'`
    Seven = b'7',
    /// `'8'`
    Eight = b'8',
    /// `'9'`
    Nine = b'9',
    /// `'a'`
    LowerA = b'a',
    /// `'b'`
    LowerB = b'b',
    /// `'c'`
    LowerC = b'c',
    /// `'d'`
    LowerD = b'd',
    /// `'e'`
    LowerE = b'e',
    /// `'f'`
    LowerF = b'f',
    /// `'A'`
    UpperA = b'A',
    /// `'B'`
    UpperB = b'B',
    /// `'C'`
    UpperC = b'C',
    /// `'D'`
    UpperD = b'D',
    /// `'E'`
    UpperE = b'E',
    /// `'F'`
    UpperF = b'F',
}

impl Char {
    /// Returns the nibble value (0–15) of this hex character.
    #[inline]
    pub(crate) fn decode_nibble(b: u8) -> Option<u8> {
        // Each valid hex byte maps to its nibble value; 0xFF marks invalid entries.
        // Char variant discriminants equal their ASCII byte values, so they index directly.
        #[rustfmt::skip]
        static TABLE: [u8; 256] = {
            let mut t = [0xFF_u8; 256];
            // Each Char variant is a u8. So all `as usize` casts are safe.
            t[Char::Zero  as usize] = 0;  t[Char::One   as usize] = 1;
            t[Char::Two   as usize] = 2;  t[Char::Three as usize] = 3;
            t[Char::Four  as usize] = 4;  t[Char::Five  as usize] = 5;
            t[Char::Six   as usize] = 6;  t[Char::Seven as usize] = 7;
            t[Char::Eight as usize] = 8;  t[Char::Nine  as usize] = 9;
            t[Char::LowerA as usize] = 10; t[Char::UpperA as usize] = 10;
            t[Char::LowerB as usize] = 11; t[Char::UpperB as usize] = 11;
            t[Char::LowerC as usize] = 12; t[Char::UpperC as usize] = 12;
            t[Char::LowerD as usize] = 13; t[Char::UpperD as usize] = 13;
            t[Char::LowerE as usize] = 14; t[Char::UpperE as usize] = 14;
            t[Char::LowerF as usize] = 15; t[Char::UpperF as usize] = 15;
            t
        };
        let n = TABLE[usize::from(b)];
        if n == 0xFF {
            None
        } else {
            Some(n)
        }
    }

    /// Casts a slice of `Char`s to `&str`.
    ///
    /// This conversion is zero-cost.
    #[inline]
    pub fn slice_as_str(slice: &[Self]) -> &str {
        let bytes = Self::slice_as_bytes(slice);
        // Guaranteed becuase it's all ASCII.
        unsafe { core::str::from_utf8_unchecked(bytes) }
    }

    /// Casts a slice of `Char`s to `&[u8]`.
    ///
    /// This conversion is zero-cost.
    #[inline]
    pub fn slice_as_bytes(slice: &[Self]) -> &[u8] {
        let ptr = slice.as_ptr().cast();
        let len = slice.len();
        // SOUNDNESS: `Self` is repr(u8)
        // Because all chars are ASCII a slice of chars is also guaranteed to be valid slice of
        // bytes.
        unsafe { core::slice::from_raw_parts(ptr, len) }
    }
}

impl fmt::Display for Char {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        // This should be the most efficient way of formatting because it avoids encoding `char`
        // and it fully supports all formatting options.
        let slice = core::slice::from_ref(self);
        fmt::Display::fmt(Self::slice_as_str(slice), f)
    }
}

impl fmt::Debug for Char {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        // the Debug impl of char puts quotes around it so we do it as well for consistency.
        let buf = [b'\'', u8::from(*self), b'\''];
        // SOUNDNESS: every single byte is guaranteed to be ASCII.
        let buf = unsafe { core::str::from_utf8_unchecked(&buf) };
        // Yes, Display is correct here since Debug would put "" around it and that would be
        // incorrect.
        fmt::Display::fmt(buf, f)
    }
}

impl From<Char> for char {
    #[inline]
    fn from(c: Char) -> char { char::from(c as u8) }
}

impl From<Char> for u8 {
    #[inline]
    fn from(c: Char) -> u8 { c as u8 }
}

/// Correctness boundary for `Table`.
mod table {
    use super::Char;

    /// Table of hex chars.
    //
    // Correctness invariant: each byte in the table must be ASCII.
    #[allow(clippy::derived_hash_with_manual_eq)] // The Eq impl distinguishes the two possible values of Table
    #[derive(Debug, Hash)]
    pub(crate) struct Table([Char; 16]);

    impl Table {
        #[rustfmt::skip] // rustfmt wants to make these one per line.
        pub(crate) const LOWER: Self = Table([
            Char::Zero, Char::One, Char::Two, Char::Three,
            Char::Four, Char::Five, Char::Six, Char::Seven,
            Char::Eight, Char::Nine, Char::LowerA, Char::LowerB,
            Char::LowerC, Char::LowerD, Char::LowerE, Char::LowerF,
        ]);
        #[rustfmt::skip] // rustfmt wants to make these one per line.
        pub(crate) const UPPER: Self = Table([
            Char::Zero, Char::One, Char::Two, Char::Three,
            Char::Four, Char::Five, Char::Six, Char::Seven,
            Char::Eight, Char::Nine, Char::UpperA, Char::UpperB,
            Char::UpperC, Char::UpperD, Char::UpperE, Char::UpperF,
        ]);

        /// Encodes single byte as two ASCII chars using the given table.
        ///
        /// The function guarantees only returning values from the provided table.
        #[inline]
        pub(crate) fn byte_to_chars(&self, byte: u8) -> [char; 2] {
            self.byte_to_hex_chars(byte).map(char::from)
        }

        /// Writes the single byte as two ASCII chars in the provided buffer, and returns a `&str`
        /// to that buffer.
        ///
        /// The function guarantees only returning values from the provided table.
        #[inline]
        pub(crate) fn byte_to_str<'a>(&self, dest: &'a mut [u8; 2], byte: u8) -> &'a str {
            dest[0] = self.0[usize::from(byte >> 4)].into();
            dest[1] = self.0[usize::from(byte & 0x0F)].into();
            // SAFETY: Table inner array contains only valid ascii
            let hex_str = unsafe { core::str::from_utf8_unchecked(dest) };
            hex_str
        }

        /// Encodes a single byte as two [`Char`] values using the given table.
        ///
        /// The function guarantees only returning values from the provided table.
        #[inline]
        pub(crate) fn byte_to_hex_chars(&self, byte: u8) -> [Char; 2] {
            let left = self.0[usize::from(byte >> 4)];
            let right = self.0[usize::from(byte & 0x0F)];
            [left, right]
        }
    }

    impl PartialEq for Table {
        // Table can only be Table::LOWER or Table::UPPER. These differ in any of the Chars from
        // indices 10-15.
        fn eq(&self, other: &Self) -> bool { self.0[10] == other.0[10] }
    }
    impl Eq for Table {}
}

#[cfg(test)]
#[cfg(feature = "alloc")]
mod tests {
    #[test]
    fn hex_macro() {
        let data = hex!("deadbeef");
        assert_eq!(data, [0xde, 0xad, 0xbe, 0xef]);
    }

    #[test]
    fn hex_macro_case_insensitive() {
        assert_eq!(hex!("DEADBEEF"), hex!("deadbeef"));
    }

    #[test]
    fn hex_macro_const_context() {
        const HASH: [u8; 32] =
            hex!("000000000019d6689c085ae165831e934ff763ae46a2a6c172b3f1b60a8ce26f");
        assert_eq!(HASH[0], 0x00);
        assert_eq!(HASH[31], 0x6f);
    }

    #[test]
    fn char_slice_casts() {
        use super::Char;

        const BEEF: &[Char] = &[Char::LowerB, Char::LowerE, Char::LowerE, Char::LowerF];

        assert_eq!(Char::slice_as_bytes(&[]), &[]);
        assert_eq!(Char::slice_as_bytes(&BEEF[..1]), b"b");
        assert_eq!(Char::slice_as_bytes(BEEF), b"beef");
        assert_eq!(Char::slice_as_str(&[]), "");
        assert_eq!(Char::slice_as_str(&BEEF[..1]), "b");
        assert_eq!(Char::slice_as_str(BEEF), "beef");
    }

    #[test]
    fn char_display() {
        use alloc::string::ToString;

        use super::Char;

        assert_eq!(Char::Zero.to_string(), "0");
        assert_eq!(Char::LowerB.to_string(), "b");
        assert_eq!(Char::UpperB.to_string(), "B");
        assert_eq!(format!("{: >3}", Char::UpperB), "  B");
        assert_eq!(format!("{: <3}", Char::UpperB), "B  ");
        assert_eq!(format!("{: ^3}", Char::UpperB), " B ");
    }

    #[test]
    fn char_debug() {
        use super::Char;

        assert_eq!(format!("{:?}", Char::Zero), format!("{:?}", '0'));
        assert_eq!(format!("{:?}", Char::LowerB), format!("{:?}", 'b'));
        assert_eq!(format!("{:?}", Char::UpperB), format!("{:?}", 'B'));
        // We don't test alignment against `char` because it's not supported by `char` which is
        // considered a bug - see https://github.com/rust-lang/rust/issues/30164
        assert_eq!(format!("{: >5?}", Char::UpperB), "  'B'");
        assert_eq!(format!("{: <5?}", Char::UpperB), "'B'  ");
        assert_eq!(format!("{: ^5?}", Char::UpperB), " 'B' ");
    }
}