ffuzzy 0.3.9

Library to generate / parse / compare ssdeep Context Triggered Piecewise Hashes (CTPH)
Documentation
// SPDX-License-Identifier: CC0-1.0
// SPDX-FileCopyrightText: Authored by Tsukasa OI <floss_ssdeep@irq.a4lg.com> in 2023, 2024

//! Base64 handlings.

/// Base64 alphabet table in [`u8`].
///
/// This table lists all Base64 alphabets as used in ssdeep.
///
/// This is the same alphabet set defined in the Table 1 of
/// [RFC 4648](https://datatracker.ietf.org/doc/rfc4648/).
pub(crate) const BASE64_TABLE_U8: [u8; 64] = [
    b'A', b'B', b'C', b'D', b'E', b'F', b'G', b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O', b'P',
    b'Q', b'R', b'S', b'T', b'U', b'V', b'W', b'X', b'Y', b'Z', b'a', b'b', b'c', b'd', b'e', b'f',
    b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's', b't', b'u', b'v',
    b'w', b'x', b'y', b'z', b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9', b'+', b'/',
];

/// Reverse byte to Base64 character index table.
///
/// This table has all 256 entries for branchless lookup, even on safe Rust.
///
/// # Performance Analysis
///
/// The original `base64_index()` function (now [`base64_index_simple()`])
/// did not perform well.  After profiling, we found that the old one caused
/// over 90% of parser-related branch misses.
///
/// Replacing [`base64_index()`] with the branchless implementation
/// significantly improved the parser performance.
const BASE64_REV_TABLE_U8: [u8; 256] = [
    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x3e, 0x40, 0x40, 0x40, 0x3f,
    0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    0x40, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
    0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x40, 0x40, 0x40, 0x40, 0x40,
    0x40, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
    0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x40, 0x40, 0x40, 0x40, 0x40,
    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
];

/// The constant representing an "invalid" Base64 character index.
pub(crate) const BASE64_INVALID: u8 = 0x40;

/// Tries to convert a Base64 alphabet into a corresponding index value.
///
/// If `ch` is not a valid Base64 alphabet, [`BASE64_INVALID`] is returned.
///
/// # Performance Analysis
///
/// Bound checking will not be performed on optimized settings because
/// [`BASE64_REV_TABLE_U8`] covers all possible values of [`u8`].
#[inline]
pub(crate) fn base64_index(ch: u8) -> u8 {
    BASE64_REV_TABLE_U8[ch as usize] // grcov-excl-br-line:ARRAY
}

/// Tries to convert a Base64 alphabet into a corresponding index value.
///
/// If `ch` is not a valid Base64 alphabet, [`None`] is returned.
#[cfg(any(test, doc))]
#[inline]
fn base64_index_simple(ch: u8) -> Option<u8> {
    match ch {
        b'A'..=b'Z' => Some(ch - b'A'),
        b'a'..=b'z' => Some(ch - (b'a' - 26u8)),
        b'0'..=b'9' => Some(ch + (52u8 - b'0')),
        b'+' => Some(62u8),
        b'/' => Some(63u8),
        _ => None,
    }
}

mod tests;