neek-core 1.0.0

Core fuzzy matching and scoring engine
Documentation
//! Fuzzy matching and scoring engine.
//!
//! # Quick start
//!
//! ```rust
//! use neek_core::{has_match, score, score_positions};
//!
//! let needle = b"src";
//! let haystack = b"src/main.rs";
//!
//! assert!(has_match(needle, haystack));
//!
//! let s = score(needle, haystack);
//! println!("score: {s}");
//!
//! let mut positions = vec![0usize; needle.len()];
//! let s2 = score_positions(needle, haystack, &mut positions);
//! assert_eq!(s, s2);
//! println!("matched at positions: {:?}", positions);
//! ```
//!
//! # Configurable scoring
//!
//! The default free functions use [`config::Prefer::Contiguous`], which
//! rewards consecutive character runs (Gotoh DP with balanced gap penalties).
//!
//! To prefer prefix or suffix matches, construct a [`Scorer`] and use its
//! methods:
//!
//! ```rust
//! use neek_core::{Scorer, config::Prefer};
//!
//! let scorer = Scorer::new(Prefer::Prefix);
//! // "build" starts with 'b'; "table" has a contiguous "bl" mid-string.
//! // Prefix-biased scoring ranks "build" higher.
//! assert!(scorer.score(b"bl", b"build") > scorer.score(b"bl", b"table"));
//! ```

#![no_std]
extern crate alloc;

mod algorithm;
mod bonus;

pub mod config;

#[cfg(test)] mod tests;

pub use algorithm::{has_match, score, score_positions};
pub use config::Prefer;

/// Numeric type used for all scores.
pub type Score = f64;

/// Returned when a candidate is an exact (case-insensitive) match for the
/// needle, or when the needle is empty and all candidates match trivially.
pub const SCORE_MAX: Score = f64::INFINITY;

/// Returned when a candidate cannot be scored (too long, no match, or the
/// needle is empty at the `score` call site).
pub const SCORE_MIN: Score = f64::NEG_INFINITY;

/// Maximum haystack/needle length that the DP scorer handles. Candidates
/// longer than this are given `SCORE_MIN` by `score` / `score_positions`.
pub const MATCH_MAX_LEN: usize = 1024;

/// A reusable scorer with a fixed [`Prefer`] setting.
///
/// Construct once and call [`Scorer::score`] / [`Scorer::score_positions`] for
/// each candidate.  All three algorithms share the same Gotoh DP core; only
/// the gap-penalty constants differ.
///
/// The free functions [`score`] and [`score_positions`] are thin wrappers
/// around `Scorer::default()` (i.e. [`Prefer::Contiguous`]).
///
/// # Examples
///
/// ```rust
/// use neek_core::{Scorer, config::Prefer};
///
/// // Prefer matches that start early in each candidate.
/// let scorer = Scorer::new(Prefer::Prefix);
/// assert!(scorer.score(b"bl", b"build") > scorer.score(b"bl", b"table"));
///
/// // Prefer matches that end late in each candidate.
/// let scorer = Scorer::new(Prefer::Suffix);
/// assert!(scorer.score(b"le", b"cable") > scorer.score(b"le", b"label"));
/// ```
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub struct Scorer {
  prefer: Prefer,
}

impl Scorer {
  /// Create a new scorer with the given preference.
  #[inline]
  pub fn new(prefer: Prefer) -> Self {
    Self { prefer }
  }

  /// Return this scorer's [`Prefer`] setting.
  #[inline]
  pub fn prefer(self) -> Prefer {
    self.prefer
  }

  /// Compute the fuzzy match score of `needle` against `haystack` using this
  /// scorer's preference.
  ///
  /// Returns [`SCORE_MAX`] for an exact (case-insensitive) match, [`SCORE_MIN`]
  /// when there is no match or the inputs are out of range, and a finite score
  /// otherwise.
  #[inline]
  pub fn score(self, needle: &[u8], haystack: &[u8]) -> Score {
    algorithm::score_raw(
      needle,
      haystack,
      config::GapConfig::from_prefer(self.prefer),
    )
  }

  /// Compute the fuzzy match score and fill `positions` with the haystack
  /// indices of the matched needle characters.
  ///
  /// `positions` must have length >= `needle.len()`. On return, `positions[i]`
  /// is the haystack index of the character matched to `needle[i]`, in
  /// strictly increasing order.
  ///
  /// Returns the same score as [`Scorer::score`] would for the same inputs.
  ///
  /// ## Allocation
  ///
  /// Allocates two `Vec<MaybeUninit<f64>>` of size
  /// `needle.len() × MATCH_MAX_LEN` for the full DP matrices required by
  /// backtracking.
  #[inline]
  pub fn score_positions(
    self,
    needle: &[u8],
    haystack: &[u8],
    positions: &mut [usize],
  ) -> Score {
    algorithm::score_positions_raw(
      needle,
      haystack,
      positions,
      config::GapConfig::from_prefer(self.prefer),
    )
  }
}