succinctly 0.7.0

High-performance succinct data structures for Rust
Documentation
//! PFSM (Parallel Finite State Machine) tables for JSON parsing.
//!
//! Auto-generated by: src/bin/generate_pfsm_tables.rs
//! Based on the state machine from haskellworks succinct library
//!
//! Table layout:
//!   Each entry is a 32-bit word containing 4 bytes (one per state)
//!   Byte offset 0: InJson state
//!   Byte offset 1: InString state
//!   Byte offset 2: InEscape state
//!   Byte offset 3: InValue state
//!
//! Phi bits encoding (3 bits, matching src/json/standard.rs):
//!   Bit 0 (0b001): BP close - marks closing brackets/braces
//!   Bit 1 (0b010): BP open - marks opening brackets/braces
//!   Bit 2 (0b100): IB (Interest Bit) - marks structural chars and value starts

/// Transition table: maps (byte, state) → next_state
///
/// For a given input byte, this table contains all 4 next states
/// encoded in a single u32.
pub const TRANSITION_TABLE: [u32; 256] = [
    0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100,
    0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100,
    0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100,
    0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100,
    0x00010100, 0x00010100, 0x00010001, 0x00010100, 0x00010100, 0x00010100, 0x00010100,
    0x00010100, // space, !, "
    0x00010100, 0x00010100, 0x00010100, 0x03010103, 0x00010100, 0x03010103, 0x03010103,
    0x00010100, // (, ), *, +, ,, -, ., /
    0x03010103, 0x03010103, 0x03010103, 0x03010103, 0x03010103, 0x03010103, 0x03010103,
    0x03010103, // 0-7
    0x03010103, 0x03010103, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100,
    0x00010100, // 8, 9, :, ;, <, =, >, ?
    0x00010100, 0x03010103, 0x03010103, 0x03010103, 0x03010103, 0x03010103, 0x03010103,
    0x03010103, // @, A-G
    0x03010103, 0x03010103, 0x03010103, 0x03010103, 0x03010103, 0x03010103, 0x03010103,
    0x03010103, // H-O
    0x03010103, 0x03010103, 0x03010103, 0x03010103, 0x03010103, 0x03010103, 0x03010103,
    0x03010103, // P-W
    0x03010103, 0x03010103, 0x03010103, 0x00010100, 0x00010200, 0x00010100, 0x00010100,
    0x00010100, // X, Y, Z, [, \, ], ^, _
    0x00010100, 0x03010103, 0x03010103, 0x03010103, 0x03010103, 0x03010103, 0x03010103,
    0x03010103, // `, a-g
    0x03010103, 0x03010103, 0x03010103, 0x03010103, 0x03010103, 0x03010103, 0x03010103,
    0x03010103, // h-o
    0x03010103, 0x03010103, 0x03010103, 0x03010103, 0x03010103, 0x03010103, 0x03010103,
    0x03010103, // p-w
    0x03010103, 0x03010103, 0x03010103, 0x00010100, 0x00010100, 0x00010100, 0x00010100,
    0x00010100, // x, y, z, {, |, }, ~, DEL
    // Extended ASCII (all same as control characters)
    0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100,
    0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100,
    0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100,
    0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100,
    0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100,
    0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100,
    0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100,
    0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100,
    0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100,
    0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100,
    0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100,
    0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100,
    0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100,
    0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100,
    0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100,
    0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100,
];

/// Phi table: maps (byte, state) → output_bits
///
/// For a given input byte and current state, this table contains
/// the output bits (IB/OP/CL) for all 4 possible states.
pub const PHI_TABLE: [u32; 256] = [
    0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
    0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
    0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
    0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
    0x00000000, 0x00000000, 0x00000007, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
    0x00000000, // space, !, "
    0x00000000, 0x00000000, 0x00000000, 0x00000007, 0x00000000, 0x00000007, 0x00000007,
    0x00000000, // (, ), *, +, ,, -, ., /
    0x00000007, 0x00000007, 0x00000007, 0x00000007, 0x00000007, 0x00000007, 0x00000007,
    0x00000007, // 0-7
    0x00000007, 0x00000007, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
    0x00000000, // 8, 9, :, ;, <, =, >, ?
    0x00000000, 0x00000007, 0x00000007, 0x00000007, 0x00000007, 0x00000007, 0x00000007,
    0x00000007, // @, A-G
    0x00000007, 0x00000007, 0x00000007, 0x00000007, 0x00000007, 0x00000007, 0x00000007,
    0x00000007, // H-O
    0x00000007, 0x00000007, 0x00000007, 0x00000007, 0x00000007, 0x00000007, 0x00000007,
    0x00000007, // P-W
    0x00000007, 0x00000007, 0x00000007, 0x06000006, 0x00000000, 0x01000001, 0x00000000,
    0x00000000, // X, Y, Z, [, \, ], ^, _
    0x00000000, 0x00000007, 0x00000007, 0x00000007, 0x00000007, 0x00000007, 0x00000007,
    0x00000007, // `, a-g
    0x00000007, 0x00000007, 0x00000007, 0x00000007, 0x00000007, 0x00000007, 0x00000007,
    0x00000007, // h-o
    0x00000007, 0x00000007, 0x00000007, 0x00000007, 0x00000007, 0x00000007, 0x00000007,
    0x00000007, // p-w
    0x00000007, 0x00000007, 0x00000007, 0x06000006, 0x00000000, 0x01000001, 0x00000000,
    0x00000000, // x, y, z, {, |, }, ~, DEL
    // Extended ASCII
    0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
    0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
    0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
    0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
    0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
    0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
    0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
    0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
    0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
    0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
    0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
    0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
    0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
    0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
    0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
    0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
];

/// State encoding for table lookups
#[repr(u8)]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum PfsmState {
    InJson = 0,
    InString = 1,
    InEscape = 2,
    InValue = 3,
}

impl PfsmState {
    /// Get the byte offset for this state in the table entry
    #[inline]
    pub const fn byte_offset(self) -> u32 {
        (self as u32) * 8
    }

    /// Extract the next state from a transition table entry
    #[inline]
    pub const fn extract_next_state(table_entry: u32, current_state: PfsmState) -> PfsmState {
        let byte = ((table_entry >> current_state.byte_offset()) & 0xFF) as u8;
        match byte {
            0 => PfsmState::InJson,
            1 => PfsmState::InString,
            2 => PfsmState::InEscape,
            3 => PfsmState::InValue,
            _ => PfsmState::InJson, // Shouldn't happen
        }
    }

    /// Extract phi bits (IB/OP/CL) from a phi table entry
    #[inline]
    pub const fn extract_phi(table_entry: u32, current_state: PfsmState) -> u8 {
        ((table_entry >> current_state.byte_offset()) & 0xFF) as u8
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_state_encoding() {
        assert_eq!(PfsmState::InJson.byte_offset(), 0);
        assert_eq!(PfsmState::InString.byte_offset(), 8);
        assert_eq!(PfsmState::InEscape.byte_offset(), 16);
        assert_eq!(PfsmState::InValue.byte_offset(), 24);
    }

    #[test]
    fn test_table_lookups() {
        // Quote (") in InJson should transition to InString
        let byte = b'"';
        let entry = TRANSITION_TABLE[byte as usize];
        let next = PfsmState::extract_next_state(entry, PfsmState::InJson);
        assert_eq!(next, PfsmState::InString);

        // Backslash (\) in InString should transition to InEscape
        let byte = b'\\';
        let entry = TRANSITION_TABLE[byte as usize];
        let next = PfsmState::extract_next_state(entry, PfsmState::InString);
        assert_eq!(next, PfsmState::InEscape);
    }

    #[test]
    fn test_phi_extraction() {
        // Open bracket ([) should have IB=0, OP=1, CL=1
        let byte = b'[';
        let phi_entry = PHI_TABLE[byte as usize];
        let phi = PfsmState::extract_phi(phi_entry, PfsmState::InJson);

        // Verify bit layout (bits 0, 1, 2 = IB, OP, CL)
        assert_eq!(phi, 0x06); // Binary: 00000110
        assert_eq!(phi & (1 << 0), 0, "IB bit should NOT be set for [");
        assert_eq!(phi & (1 << 1), 1 << 1, "OP bit should be set for [");
        assert_eq!(phi & (1 << 2), 1 << 2, "CL bit should be set for [");
    }
}