vyre-conform 0.1.0

Conformance suite for vyre backends — proves byte-identical output to CPU reference
Documentation
use crate::spec::EngineInvariant;
use crate::spec::EngineSpec;
use crate::spec::match_ops::dfa_scan::{scan_dfa_cpu, DfaSpec};

#[cfg(test)]
pub(crate) mod tests {
    use super::{cpu_fn, spec};

    #[test]
    fn spec_has_correct_invariants() {
        let s = spec();
        assert_eq!(s.id, "engine.dfa");
        assert!(s.invariants.contains(&EngineInvariant::Deterministic));
        assert!(s.invariants.contains(&EngineInvariant::NoOutputLost));
        assert!(s.invariants.contains(&EngineInvariant::OutputOrdered));
    }

    #[test]
    fn cpu_fn_is_deterministic_on_hand_crafted_input() {
        let input = build_dfa_input(
            2,
            &[(1, 0)], // state 1 accepts pattern 0
            &[1],      // pattern 0 has length 1
            b"aba",
        );
        let out1 = cpu_fn(&input);
        let out2 = cpu_fn(&input);
        assert_eq!(out1, out2);
    }

    #[test]
    fn cpu_fn_output_is_ordered() {
        // DFA for "ab" (pattern 0) and "cd" (pattern 1)
        let mut transitions = vec![0u32; 5 * 256];
        transitions[b'a' as usize] = 1;
        transitions[256 + b'b' as usize] = 2;
        transitions[b'c' as usize] = 3;
        transitions[3 * 256 + b'd' as usize] = 4;
        let input =
            build_dfa_input_from_table(&transitions, 5, &[(2, 0), (4, 1)], &[2, 2], b"xxabxxcdxx");
        let out = cpu_fn(&input);
        // Two matches -> 24 bytes
        assert_eq!(out.len(), 24);
        let starts: Vec<u32> = out
            .chunks_exact(12)
            .map(|c| u32::from_le_bytes([c[4], c[5], c[6], c[7]]))
            .collect();
        assert_eq!(starts, vec![2, 6]);
        assert!(starts.windows(2).all(|w| w[0] <= w[1]));
    }

    #[test]
    fn cpu_fn_no_output_lost() {
        // DFA that accepts every 'a' as pattern 0
        let mut transitions = vec![0u32; 2 * 256];
        transitions[b'a' as usize] = 1;
        transitions[256 + b'a' as usize] = 1; // loop back on 'a'
        let input = build_dfa_input_from_table(&transitions, 2, &[(1, 0)], &[1], b"aaa");
        let out = cpu_fn(&input);
        // Non-overlapping: first 'a' at 0..1, then remaining "aa" -> next 'a' at 1..2, then 2..3
        assert_eq!(out.len() % 12, 0);
        let count = out.len() / 12;
        assert_eq!(count, 3);
    }

    fn build_dfa_input(
        state_count: usize,
        accept_states: &[(u32, u32)],
        pattern_lengths: &[u32],
        file_bytes: &[u8],
    ) -> Vec<u8> {
        let transitions = vec![0u32; state_count * 256];
        build_dfa_input_from_table(
            &transitions,
            state_count,
            accept_states,
            pattern_lengths,
            file_bytes,
        )
    }

    fn build_dfa_input_from_table(
        transitions: &[u32],
        state_count: usize,
        accept_states: &[(u32, u32)],
        pattern_lengths: &[u32],
        file_bytes: &[u8],
    ) -> Vec<u8> {
        let mut out = Vec::new();
        out.extend_from_slice(&(state_count as u32).to_le_bytes());
        out.extend_from_slice(&(accept_states.len() as u32).to_le_bytes());
        out.extend_from_slice(&(pattern_lengths.len() as u32).to_le_bytes());
        out.extend_from_slice(&(file_bytes.len() as u32).to_le_bytes());
        for &t in transitions {
            out.extend_from_slice(&t.to_le_bytes());
        }
        for &(s, p) in accept_states {
            out.extend_from_slice(&s.to_le_bytes());
            out.extend_from_slice(&p.to_le_bytes());
        }
        for &l in pattern_lengths {
            out.extend_from_slice(&l.to_le_bytes());
        }
        out.extend_from_slice(file_bytes);
        out
    }

    #[inline]
    pub(crate) fn build_dfa_input_for_invariants() -> Vec<u8> {
        // DFA for "ab" (pattern 0) and "cd" (pattern 1)
        let mut transitions = vec![0u32; 5 * 256];
        transitions[b'a' as usize] = 1;
        transitions[256 + b'b' as usize] = 2;
        transitions[b'c' as usize] = 3;
        transitions[3 * 256 + b'd' as usize] = 4;
        build_dfa_input_from_table(&transitions, 5, &[(2, 0), (4, 1)], &[2, 2], b"xxabxxcdxx")
    }
}