neek-core 1.0.0

Core fuzzy matching and scoring engine
Documentation
extern crate alloc;

use alloc::{vec, vec::Vec};

use crate::{
  SCORE_MAX,
  SCORE_MIN,
  Scorer,
  config::{Prefer, *},
  has_match,
  score,
  score_positions,
};

const EPS: f64 = 1e-6;

fn approx_eq(a: f64, b: f64) -> bool {
  (a - b).abs() < EPS
}

// has_match

#[test]
fn exact_match_should_return_true() {
  assert!(has_match(b"a", b"a"));
}

#[test]
fn partial_match_should_return_true() {
  assert!(has_match(b"a", b"ab"));
  assert!(has_match(b"a", b"ba"));
}

#[test]
fn match_with_delimiters_in_between() {
  assert!(has_match(b"abc", b"a|b|c"));
}

#[test]
fn non_match_should_return_false() {
  assert!(!has_match(b"a", b""));
  assert!(!has_match(b"a", b"b"));
  assert!(!has_match(b"ass", b"tags"));
}

#[test]
fn empty_query_should_always_match() {
  assert!(has_match(b"", b""));
  assert!(has_match(b"", b"a"));
}

// score ordering preferences

#[test]
fn should_prefer_starts_of_words() {
  assert!(
    score(b"amor", b"app/models/order") > score(b"amor", b"app/models/zrder")
  );
}

#[test]
fn should_prefer_consecutive_letters() {
  assert!(score(b"amo", b"app/m/foo") < score(b"amo", b"app/models/foo"));
}

#[test]
fn should_prefer_contiguous_over_letter_following_period() {
  assert!(score(b"gemfil", b"Gemfile.lock") < score(b"gemfil", b"Gemfile"));
}

#[test]
fn should_prefer_shorter_matches() {
  assert!(score(b"abce", b"abcdef") > score(b"abce", b"abc de"));
  assert!(score(b"abc", b"    a b c ") > score(b"abc", b" a  b  c "));
  assert!(score(b"abc", b" a b c    ") > score(b"abc", b" a  b  c "));
}

#[test]
fn should_prefer_shorter_candidates() {
  assert!(score(b"test", b"tests") > score(b"test", b"testing"));
}

#[test]
fn should_prefer_start_of_candidate() {
  assert!(score(b"test", b"testing") > score(b"test", b"/testing"));
}

// score exact values

#[test]
fn score_exact_match() {
  assert_eq!(SCORE_MAX, score(b"abc", b"abc"));
  assert_eq!(SCORE_MAX, score(b"aBc", b"abC"));
}

#[test]
fn score_equal_length_mismatch_is_not_perfect() {
  assert_eq!(SCORE_MIN, score(b"abc", b"xyz"));
  assert_eq!(SCORE_MIN, score(b"a", b"b"));
}

#[test]
fn score_empty_query() {
  assert_eq!(SCORE_MIN, score(b"", b""));
  assert_eq!(SCORE_MIN, score(b"", b"a"));
  assert_eq!(SCORE_MIN, score(b"", b"bb"));
}

#[test]
fn score_gaps() {
  assert!(approx_eq(SCORE_GAP_LEADING, score(b"a", b"*a")));
  assert!(approx_eq(SCORE_GAP_LEADING * 2.0, score(b"a", b"*ba")));
  assert!(approx_eq(
    SCORE_GAP_LEADING * 2.0 + SCORE_GAP_TRAILING,
    score(b"a", b"**a*")
  ));
  assert!(approx_eq(
    SCORE_GAP_LEADING * 2.0 + SCORE_GAP_TRAILING * 2.0,
    score(b"a", b"**a**")
  ));
  assert!(approx_eq(
    SCORE_GAP_LEADING * 2.0
      + SCORE_MATCH_CONSECUTIVE
      + SCORE_GAP_TRAILING * 2.0,
    score(b"aa", b"**aa**")
  ));
  assert!(approx_eq(
    SCORE_GAP_LEADING
      + SCORE_GAP_LEADING
      + SCORE_GAP_INNER
      + SCORE_GAP_TRAILING
      + SCORE_GAP_TRAILING,
    score(b"aa", b"**a*a**")
  ));
}

#[test]
fn score_consecutive() {
  assert!(approx_eq(
    SCORE_GAP_LEADING + SCORE_MATCH_CONSECUTIVE,
    score(b"aa", b"*aa")
  ));
  assert!(approx_eq(
    SCORE_GAP_LEADING + SCORE_MATCH_CONSECUTIVE * 2.0,
    score(b"aaa", b"*aaa")
  ));
  assert!(approx_eq(
    SCORE_GAP_LEADING + SCORE_GAP_INNER + SCORE_MATCH_CONSECUTIVE,
    score(b"aaa", b"*a*aa")
  ));
}

#[test]
fn score_slash() {
  assert!(approx_eq(
    SCORE_GAP_LEADING + SCORE_MATCH_SLASH,
    score(b"a", b"/a")
  ));
  assert!(approx_eq(
    SCORE_GAP_LEADING * 2.0 + SCORE_MATCH_SLASH,
    score(b"a", b"*/a")
  ));
  assert!(approx_eq(
    SCORE_GAP_LEADING * 2.0 + SCORE_MATCH_SLASH + SCORE_MATCH_CONSECUTIVE,
    score(b"aa", b"a/aa")
  ));
}

#[test]
fn score_capital() {
  assert!(approx_eq(
    SCORE_GAP_LEADING + SCORE_MATCH_CAPITAL,
    score(b"a", b"bA")
  ));
  assert!(approx_eq(
    SCORE_GAP_LEADING * 2.0 + SCORE_MATCH_CAPITAL,
    score(b"a", b"baA")
  ));
  assert!(approx_eq(
    SCORE_GAP_LEADING * 2.0 + SCORE_MATCH_CAPITAL + SCORE_MATCH_CONSECUTIVE,
    score(b"aa", b"baAa")
  ));
}

#[test]
fn score_dot() {
  assert!(approx_eq(
    SCORE_GAP_LEADING + SCORE_MATCH_DOT,
    score(b"a", b".a")
  ));
  assert!(approx_eq(
    SCORE_GAP_LEADING * 3.0 + SCORE_MATCH_DOT,
    score(b"a", b"*a.a")
  ));
  assert!(approx_eq(
    SCORE_GAP_LEADING + SCORE_GAP_INNER + SCORE_MATCH_DOT,
    score(b"a", b"*a.a")
  ));
}

#[test]
fn score_long_string() {
  let s: Vec<u8> = vec![b'a'; 4096];
  assert_eq!(SCORE_MIN, score(b"aa", &s));
  assert_eq!(SCORE_MIN, score(&s, b"aa"));
  assert_eq!(SCORE_MIN, score(&s, &s));
}

// score_positions

#[test]
fn positions_consecutive() {
  let mut pos = [0usize; 3];
  score_positions(b"amo", b"app/models/foo", &mut pos);
  assert_eq!(0, pos[0]);
  assert_eq!(4, pos[1]);
  assert_eq!(5, pos[2]);
}

#[test]
fn positions_start_of_word() {
  let mut pos = [0usize; 4];
  score_positions(b"amor", b"app/models/order", &mut pos);
  assert_eq!(0, pos[0]);
  assert_eq!(4, pos[1]);
  assert_eq!(11, pos[2]);
  assert_eq!(12, pos[3]);
}

#[test]
fn positions_no_bonuses() {
  let mut pos = [0usize; 2];
  score_positions(b"as", b"tags", &mut pos);
  assert_eq!(1, pos[0]);
  assert_eq!(3, pos[1]);

  score_positions(b"as", b"examples.txt", &mut pos);
  assert_eq!(2, pos[0]);
  assert_eq!(7, pos[1]);
}

#[test]
fn positions_multiple_candidates_start_of_words() {
  let mut pos = [0usize; 3];
  score_positions(b"abc", b"a/a/b/c/c", &mut pos);
  assert_eq!(2, pos[0]);
  assert_eq!(4, pos[1]);
  assert_eq!(6, pos[2]);
}

#[test]
fn positions_exact_match() {
  let mut pos = [0usize; 3];
  score_positions(b"foo", b"foo", &mut pos);
  assert_eq!(0, pos[0]);
  assert_eq!(1, pos[1]);
  assert_eq!(2, pos[2]);
}

#[test]
fn positions_equal_length_mismatch_is_not_perfect() {
  let mut pos = [usize::MAX; 3];
  assert_eq!(SCORE_MIN, score_positions(b"abc", b"xyz", &mut pos));
  assert_eq!([usize::MAX; 3], pos);
}

// Scorer / Prefer

#[test]
fn scorer_default_matches_free_function() {
  let scorer = Scorer::default();
  let candidates: &[&[u8]] = &[
    b"build",
    b"bundle",
    b"table",
    b"app/models/order",
    b"Gemfile",
  ];
  for h in candidates {
    assert_eq!(
      score(b"bl", h),
      scorer.score(b"bl", h),
      "default Scorer should match free fn score() for {:?}",
      h
    );
  }
}

/// `Prefer::Prefix`: matching `bl` against candidates from the user's
/// example ("build bundle table") should rank the prefix-matching candidates
/// (`build`, `bundle`) above `table`, where `bl` appears contiguously but
/// not at the start.
#[test]
fn prefer_prefix_ranks_prefix_match_higher_than_contiguous() {
  let scorer = Scorer::new(Prefer::Prefix);
  let build = scorer.score(b"bl", b"build");
  let bundle = scorer.score(b"bl", b"bundle");
  let table = scorer.score(b"bl", b"table");

  assert!(
    build > table,
    "Prefix: build ({build}) should outscore table ({table})"
  );
  assert!(
    bundle > table,
    "Prefix: bundle ({bundle}) should outscore table ({table})"
  );
}

/// `Prefer::Contiguous` (default): `table` has a contiguous `bl`, so it
/// should outscore `build` where `b` and `l` are separated by two chars.
#[test]
fn prefer_contiguous_ranks_contiguous_match_higher_than_prefix() {
  let scorer = Scorer::new(Prefer::Contiguous);
  let build = scorer.score(b"bl", b"build");
  let table = scorer.score(b"bl", b"table");

  assert!(
    table > build,
    "Contiguous: table ({table}) should outscore build ({build})"
  );
}

/// `Prefer::Suffix`: a match ending at the very last character should
/// outscore one ending several characters before the end.
#[test]
fn prefer_suffix_ranks_end_match_higher_than_interior() {
  let scorer = Scorer::new(Prefer::Suffix);
  // "cable": c,a,b,l,e  - 'l'=3, 'e'=4 -> ends at last char (pos 4 of 5)
  // "label": l,a,b,e,l  - 'l'=0, 'e'=3 -> ends at pos 3 of 5 (one trailing)
  let cable = scorer.score(b"le", b"cable");
  let label = scorer.score(b"le", b"label");

  assert!(
    cable > label,
    "Suffix: cable ({cable}) should outscore label ({label})"
  );
}

/// `Prefer::Suffix` should NOT prefer a match that ends mid-string over one
/// that ends at the tail.
#[test]
fn prefer_suffix_exact_match_still_scores_max() {
  let scorer = Scorer::new(Prefer::Suffix);
  assert_eq!(SCORE_MAX, scorer.score(b"abc", b"abc"));
  assert_eq!(SCORE_MAX, scorer.score(b"aBc", b"abC"));
}

/// `Prefer::Prefix` still returns `SCORE_MAX` for exact matches.
#[test]
fn prefer_prefix_exact_match_still_scores_max() {
  let scorer = Scorer::new(Prefer::Prefix);
  assert_eq!(SCORE_MAX, scorer.score(b"foo", b"foo"));
}

/// `Prefer::Prefix` should prefer shorter candidates that begin with the
/// needle over longer ones where the match starts at the same position.
#[test]
fn prefer_prefix_prefers_shorter_prefix_match() {
  let scorer = Scorer::new(Prefer::Prefix);
  // "build" (5): 'b' at 0, 'l' at 3 → no trailing gap
  // "bundle" (6): 'b' at 0, 'l' at 4 → one trailing char
  let build = scorer.score(b"bl", b"build");
  let bundle = scorer.score(b"bl", b"bundle");
  assert!(
    build > bundle,
    "Prefix: build ({build}) should outscore bundle ({bundle})"
  );
}

/// `Scorer::score_positions` should agree with `Scorer::score`.
#[test]
fn scorer_score_positions_consistent_with_score() {
  let scorer = Scorer::new(Prefer::Prefix);
  let needle = b"bl";
  let haystack = b"build";
  let expected = scorer.score(needle, haystack);
  let mut pos = [0usize; 2];
  let actual = scorer.score_positions(needle, haystack, &mut pos);
  assert_eq!(expected, actual);
  // 'b' at 0, 'l' at 3 in "build"
  assert_eq!(pos[0], 0);
  assert_eq!(pos[1], 3);
}