matchkit 0.1.1

Vocabulary types for multi-pattern matching — Match struct, Matcher trait, shared errors
Documentation
//! # matchkit — vocabulary types for multi-pattern matching
//!
//! `matchkit` provides the shared types that every crate in the Santh
//! performance ecosystem depends on: the [`Match`] struct, the [`Matcher`]
//! and [`BlockMatcher`] traits, and common error definitions.
//!
//! By isolating these types into a zero-dependency vocabulary crate,
//! consumer crates like `simdsieve`, `warpstate`, `warpsearch`, and
//! `warpgrep` can all agree on a single match representation without
//! pulling in heavyweight GPU or regex dependencies.
//!
//! # Quick Start
//!
//! ```rust
//! use matchkit::{Match, MatchSet};
//!
//! let mut set = MatchSet::new();
//! set.insert(Match::from_parts(0, 10, 18));
//! set.insert(Match::from_parts(1, 15, 20));
//! set.merge_overlapping();
//!
//! assert_eq!(set.len(), 1);
//! assert_eq!(set.as_slice()[0].end, 20);
//! ```

#![warn(missing_docs, clippy::pedantic)]
#![deny(unsafe_code)]
#![cfg_attr(
    not(test),
    deny(
        clippy::unwrap_used,
        clippy::expect_used,
        clippy::todo,
        clippy::unimplemented,
        clippy::panic
    )
)]
#![allow(
    clippy::module_name_repetitions,
    clippy::must_use_candidate,
    clippy::doc_markdown
)]

/// Shared error types for all matching backends.
pub mod error;
/// Internal implementation of the match set type.
#[doc(hidden)]
pub mod match_set;
/// Internal implementation of the match and GPU match types.
#[doc(hidden)]
pub mod match_type;
/// Trait definitions for pattern matching backends.
pub mod matcher;

/// Re-export of the universal error type.
pub use error::Error;
/// Re-export of the result type alias.
pub use error::Result;
/// Re-export of the sorted, deduplicated match collection.
pub use match_set::MatchSet;
/// Re-export of the GPU-internal match representation.
pub use match_type::GpuMatch;
/// Re-export of the match result struct.
pub use match_type::Match;
/// Re-export of the block-based matcher trait.
pub use matcher::BlockMatcher;
/// Re-export of the boxed matcher type alias.
pub use matcher::BoxedMatcher;
/// Re-export of the general matcher trait.
pub use matcher::Matcher;

#[cfg(test)]
mod lib_tests {
    use super::*;

    #[test]
    fn match_from_parts() {
        let m = Match::from_parts(42, 10, 20);
        assert_eq!(m.pattern_id, 42);
        assert_eq!(m.start, 10);
        assert_eq!(m.end, 20);
    }

    #[test]
    fn match_ordering() {
        let a = Match::from_parts(0, 5, 10);
        let b = Match::from_parts(0, 10, 15);
        assert!(a < b);
    }

    #[test]
    fn match_equality() {
        let a = Match::from_parts(1, 5, 10);
        let b = Match::from_parts(1, 5, 10);
        assert_eq!(a, b);
    }

    #[test]
    fn match_set_new_empty() {
        let set = MatchSet::new();
        assert!(set.is_empty());
        assert_eq!(set.len(), 0);
    }

    #[test]
    fn match_set_insert_and_len() {
        let mut set = MatchSet::new();
        set.insert(Match::from_parts(0, 0, 5));
        set.insert(Match::from_parts(1, 10, 15));
        assert_eq!(set.len(), 2);
    }

    #[test]
    fn match_set_merge_overlapping() {
        let mut set = MatchSet::new();
        set.insert(Match::from_parts(0, 0, 10));
        set.insert(Match::from_parts(0, 5, 15));
        set.merge_overlapping();
        assert_eq!(set.len(), 1);
        assert_eq!(set.as_slice()[0].end, 15);
    }

    #[test]
    fn match_set_merge_non_overlapping() {
        let mut set = MatchSet::new();
        set.insert(Match::from_parts(0, 0, 5));
        set.insert(Match::from_parts(0, 10, 15));
        set.merge_overlapping();
        assert_eq!(set.len(), 2, "non-overlapping should not merge");
    }

    #[test]
    fn match_set_merge_adjacent() {
        let mut set = MatchSet::new();
        set.insert(Match::from_parts(0, 0, 10));
        set.insert(Match::from_parts(0, 10, 20));
        set.merge_overlapping();
        // Adjacent matches (end == start) are now coalesced to prevent fragmented findings.
        assert_eq!(set.len(), 1);
        assert_eq!(set.as_slice()[0], Match::from_parts(0, 0, 20));
    }

    #[test]
    fn match_zero_length() {
        let m = Match::from_parts(0, 5, 5);
        assert_eq!(m.start, m.end);
    }

    #[test]
    fn match_set_sorted_after_insert() {
        let mut set = MatchSet::new();
        set.insert(Match::from_parts(0, 20, 25));
        set.insert(Match::from_parts(0, 5, 10));
        set.insert(Match::from_parts(0, 10, 15));
        let slice = set.as_slice();
        for window in slice.windows(2) {
            assert!(
                window[0].start <= window[1].start,
                "matches should be sorted by start"
            );
        }
    }

    #[test]
    fn match_field_offsets_are_gpu_compatible() {
        assert_eq!(std::mem::offset_of!(Match, pattern_id), 0);
        assert_eq!(std::mem::offset_of!(Match, start), 4);
        assert_eq!(std::mem::offset_of!(Match, end), 8);
        assert_eq!(std::mem::offset_of!(Match, padding), 12);
    }

    #[test]
    fn match_is_send_sync() {
        fn assert_send_sync<T: Send + Sync>() {}
        assert_send_sync::<Match>();
    }

    #[test]
    fn match_roundtrips_to_gpumatch() {
        let original = Match::from_parts_with_padding(7, 100, 200, 42);
        let gpu: GpuMatch = original.into();
        let roundtrip: Match = gpu.into();
        assert_eq!(roundtrip, original);
    }

    #[test]
    fn match_hash_ignores_padding() {
        use std::collections::hash_map::DefaultHasher;
        use std::hash::{Hash, Hasher};

        fn hash(m: &Match) -> u64 {
            let mut hasher = DefaultHasher::new();
            m.hash(&mut hasher);
            hasher.finish()
        }

        let a = Match::from_parts_with_padding(1, 10, 20, 0);
        let b = Match::from_parts_with_padding(1, 10, 20, 99);
        assert_eq!(hash(&a), hash(&b), "hash must ignore padding like PartialEq");
    }
}