zerotrie 0.2.4

A data structure that efficiently maps strings to integers
Documentation
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

//! Options for building and reading from a [`ZeroTrie`](crate::ZeroTrie).
//!
//! These options are internal to the crate. A small selection of options
//! are exported by way of the different public types on this crate.

/// Whether to use the perfect hash function in the [`ZeroTrie`](crate::ZeroTrie).
#[derive(Copy, Clone)]
pub(crate) enum PhfMode {
    /// Use binary search for all branch nodes.
    BinaryOnly,
    /// Use the perfect hash function for large branch nodes.
    UsePhf,
}

impl PhfMode {
    #[cfg(feature = "serde")]
    const fn to_u8_flag(self) -> u8 {
        match self {
            Self::BinaryOnly => 0,
            Self::UsePhf => 0x1,
        }
    }
}

/// Whether to support non-ASCII data in the [`ZeroTrie`](crate::ZeroTrie).
#[derive(Copy, Clone)]
pub(crate) enum AsciiMode {
    /// Support only ASCII, returning an error if non-ASCII is found.
    AsciiOnly,
    /// Support all data, creating span nodes for non-ASCII bytes.
    BinarySpans,
}

impl AsciiMode {
    #[cfg(feature = "serde")]
    const fn to_u8_flag(self) -> u8 {
        match self {
            Self::AsciiOnly => 0,
            Self::BinarySpans => 0x2,
        }
    }
}

/// Whether to enforce a limit to the capacity of the [`ZeroTrie`](crate::ZeroTrie).
#[derive(Copy, Clone)]
pub(crate) enum CapacityMode {
    /// Return an error if the trie requires a branch of more than 2^32 bytes.
    Normal,
    /// Construct the trie without returning an error.
    Extended,
}

impl CapacityMode {
    #[cfg(feature = "serde")]
    const fn to_u8_flag(self) -> u8 {
        match self {
            Self::Normal => 0,
            Self::Extended => 0x4,
        }
    }
}

/// How to handle strings with mixed ASCII case at a node, such as "abc" and "Abc"
#[derive(Copy, Clone)]
pub(crate) enum CaseSensitivity {
    /// Allow all strings and sort them by byte value.
    Sensitive,
    /// Reject strings with different case and sort them as if `to_ascii_lowercase` is called.
    IgnoreCase,
}

impl CaseSensitivity {
    #[cfg(feature = "serde")]
    const fn to_u8_flag(self) -> u8 {
        match self {
            Self::Sensitive => 0,
            Self::IgnoreCase => 0x8,
        }
    }
}

#[derive(Copy, Clone)]
pub(crate) struct ZeroTrieBuilderOptions {
    pub phf_mode: PhfMode,
    pub ascii_mode: AsciiMode,
    pub capacity_mode: CapacityMode,
    pub case_sensitivity: CaseSensitivity,
}

impl ZeroTrieBuilderOptions {
    #[cfg(feature = "serde")]
    pub(crate) const fn to_u8_flags(self) -> u8 {
        self.phf_mode.to_u8_flag()
            | self.ascii_mode.to_u8_flag()
            | self.capacity_mode.to_u8_flag()
            | self.case_sensitivity.to_u8_flag()
    }
}

pub(crate) trait ZeroTrieWithOptions {
    const OPTIONS: ZeroTrieBuilderOptions;
}

/// All branch nodes are binary search
/// and there are no span nodes.
impl<S: ?Sized> ZeroTrieWithOptions for crate::ZeroTrieSimpleAscii<S> {
    const OPTIONS: ZeroTrieBuilderOptions = ZeroTrieBuilderOptions {
        phf_mode: PhfMode::BinaryOnly,
        ascii_mode: AsciiMode::AsciiOnly,
        capacity_mode: CapacityMode::Normal,
        case_sensitivity: CaseSensitivity::Sensitive,
    };
}

impl<S: ?Sized> crate::ZeroTrieSimpleAscii<S> {
    #[cfg(feature = "serde")]
    pub(crate) const FLAGS: u8 = Self::OPTIONS.to_u8_flags();
}

/// All branch nodes are binary search
/// and nodes use case-insensitive matching.
impl<S: ?Sized> ZeroTrieWithOptions for crate::ZeroAsciiIgnoreCaseTrie<S> {
    const OPTIONS: ZeroTrieBuilderOptions = ZeroTrieBuilderOptions {
        phf_mode: PhfMode::BinaryOnly,
        ascii_mode: AsciiMode::AsciiOnly,
        capacity_mode: CapacityMode::Normal,
        case_sensitivity: CaseSensitivity::IgnoreCase,
    };
}

/// Branch nodes could be either binary search or PHF.
impl<S: ?Sized> ZeroTrieWithOptions for crate::ZeroTriePerfectHash<S> {
    const OPTIONS: ZeroTrieBuilderOptions = ZeroTrieBuilderOptions {
        phf_mode: PhfMode::UsePhf,
        ascii_mode: AsciiMode::BinarySpans,
        capacity_mode: CapacityMode::Normal,
        case_sensitivity: CaseSensitivity::Sensitive,
    };
}

/// No limited capacity assertion.
impl<S: ?Sized> ZeroTrieWithOptions for crate::ZeroTrieExtendedCapacity<S> {
    const OPTIONS: ZeroTrieBuilderOptions = ZeroTrieBuilderOptions {
        phf_mode: PhfMode::UsePhf,
        ascii_mode: AsciiMode::BinarySpans,
        capacity_mode: CapacityMode::Extended,
        case_sensitivity: CaseSensitivity::Sensitive,
    };
}