Skip to main content

neek_core/
lib.rs

1//! Fuzzy matching and scoring engine.
2//!
3//! # Quick start
4//!
5//! ```rust
6//! use neek_core::{has_match, score, score_positions};
7//!
8//! let needle = b"src";
9//! let haystack = b"src/main.rs";
10//!
11//! assert!(has_match(needle, haystack));
12//!
13//! let s = score(needle, haystack);
14//! println!("score: {s}");
15//!
16//! let mut positions = vec![0usize; needle.len()];
17//! let s2 = score_positions(needle, haystack, &mut positions);
18//! assert_eq!(s, s2);
19//! println!("matched at positions: {:?}", positions);
20//! ```
21//!
22//! # Configurable scoring
23//!
24//! The default free functions use [`config::Prefer::Contiguous`], which
25//! rewards consecutive character runs (Gotoh DP with balanced gap penalties).
26//!
27//! To prefer prefix or suffix matches, construct a [`Scorer`] and use its
28//! methods:
29//!
30//! ```rust
31//! use neek_core::{Scorer, config::Prefer};
32//!
33//! let scorer = Scorer::new(Prefer::Prefix);
34//! // "build" starts with 'b'; "table" has a contiguous "bl" mid-string.
35//! // Prefix-biased scoring ranks "build" higher.
36//! assert!(scorer.score(b"bl", b"build") > scorer.score(b"bl", b"table"));
37//! ```
38
39#![no_std]
40extern crate alloc;
41
42mod algorithm;
43mod bonus;
44
45pub mod config;
46
47#[cfg(test)] mod tests;
48
49pub use algorithm::{has_match, score, score_positions};
50pub use config::Prefer;
51
52/// Numeric type used for all scores.
53pub type Score = f64;
54
55/// Returned when a candidate is an exact (case-insensitive) match for the
56/// needle, or when the needle is empty and all candidates match trivially.
57pub const SCORE_MAX: Score = f64::INFINITY;
58
59/// Returned when a candidate cannot be scored (too long, no match, or the
60/// needle is empty at the `score` call site).
61pub const SCORE_MIN: Score = f64::NEG_INFINITY;
62
63/// Maximum haystack/needle length that the DP scorer handles. Candidates
64/// longer than this are given `SCORE_MIN` by `score` / `score_positions`.
65pub const MATCH_MAX_LEN: usize = 1024;
66
67/// A reusable scorer with a fixed [`Prefer`] setting.
68///
69/// Construct once and call [`Scorer::score`] / [`Scorer::score_positions`] for
70/// each candidate.  All three algorithms share the same Gotoh DP core; only
71/// the gap-penalty constants differ.
72///
73/// The free functions [`score`] and [`score_positions`] are thin wrappers
74/// around `Scorer::default()` (i.e. [`Prefer::Contiguous`]).
75///
76/// # Examples
77///
78/// ```rust
79/// use neek_core::{Scorer, config::Prefer};
80///
81/// // Prefer matches that start early in each candidate.
82/// let scorer = Scorer::new(Prefer::Prefix);
83/// assert!(scorer.score(b"bl", b"build") > scorer.score(b"bl", b"table"));
84///
85/// // Prefer matches that end late in each candidate.
86/// let scorer = Scorer::new(Prefer::Suffix);
87/// assert!(scorer.score(b"le", b"cable") > scorer.score(b"le", b"label"));
88/// ```
89#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
90pub struct Scorer {
91  prefer: Prefer,
92}
93
94impl Scorer {
95  /// Create a new scorer with the given preference.
96  #[inline]
97  pub fn new(prefer: Prefer) -> Self {
98    Self { prefer }
99  }
100
101  /// Return this scorer's [`Prefer`] setting.
102  #[inline]
103  pub fn prefer(self) -> Prefer {
104    self.prefer
105  }
106
107  /// Compute the fuzzy match score of `needle` against `haystack` using this
108  /// scorer's preference.
109  ///
110  /// Returns [`SCORE_MAX`] for an exact (case-insensitive) match, [`SCORE_MIN`]
111  /// when there is no match or the inputs are out of range, and a finite score
112  /// otherwise.
113  #[inline]
114  pub fn score(self, needle: &[u8], haystack: &[u8]) -> Score {
115    algorithm::score_raw(
116      needle,
117      haystack,
118      config::GapConfig::from_prefer(self.prefer),
119    )
120  }
121
122  /// Compute the fuzzy match score and fill `positions` with the haystack
123  /// indices of the matched needle characters.
124  ///
125  /// `positions` must have length >= `needle.len()`. On return, `positions[i]`
126  /// is the haystack index of the character matched to `needle[i]`, in
127  /// strictly increasing order.
128  ///
129  /// Returns the same score as [`Scorer::score`] would for the same inputs.
130  ///
131  /// ## Allocation
132  ///
133  /// Allocates two `Vec<MaybeUninit<f64>>` of size
134  /// `needle.len() × MATCH_MAX_LEN` for the full DP matrices required by
135  /// backtracking.
136  #[inline]
137  pub fn score_positions(
138    self,
139    needle: &[u8],
140    haystack: &[u8],
141    positions: &mut [usize],
142  ) -> Score {
143    algorithm::score_positions_raw(
144      needle,
145      haystack,
146      positions,
147      config::GapConfig::from_prefer(self.prefer),
148    )
149  }
150}