use crate::crawlers::types::CrawledPackage;
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
enum MatchType {
ExactFull = 0,
ExactName = 1,
PrefixFull = 2,
PrefixName = 3,
ContainsFull = 4,
ContainsName = 5,
}
struct MatchResult {
package: CrawledPackage,
match_type: MatchType,
}
fn get_full_name(pkg: &CrawledPackage) -> String {
match &pkg.namespace {
Some(ns) => format!("{ns}/{}", pkg.name),
None => pkg.name.clone(),
}
}
fn get_match_type(pkg: &CrawledPackage, query: &str) -> Option<MatchType> {
let lower_query = query.to_lowercase();
let full_name = get_full_name(pkg).to_lowercase();
let name = pkg.name.to_lowercase();
if full_name == lower_query {
return Some(MatchType::ExactFull);
}
if name == lower_query {
return Some(MatchType::ExactName);
}
if full_name.starts_with(&lower_query) {
return Some(MatchType::PrefixFull);
}
if name.starts_with(&lower_query) {
return Some(MatchType::PrefixName);
}
if full_name.contains(&lower_query) {
return Some(MatchType::ContainsFull);
}
if name.contains(&lower_query) {
return Some(MatchType::ContainsName);
}
None
}
pub fn fuzzy_match_packages(
query: &str,
packages: &[CrawledPackage],
limit: usize,
) -> Vec<CrawledPackage> {
let trimmed = query.trim();
if trimmed.is_empty() {
return Vec::new();
}
let mut matches: Vec<MatchResult> = Vec::new();
for pkg in packages {
if let Some(match_type) = get_match_type(pkg, trimmed) {
matches.push(MatchResult {
package: pkg.clone(),
match_type,
});
}
}
matches.sort_by(|a, b| {
let type_cmp = a.match_type.cmp(&b.match_type);
if type_cmp != std::cmp::Ordering::Equal {
return type_cmp;
}
get_full_name(&a.package)
.to_lowercase()
.cmp(&get_full_name(&b.package).to_lowercase())
});
matches.into_iter().take(limit).map(|m| m.package).collect()
}
#[cfg(test)]
mod tests {
use super::*;
use std::path::PathBuf;
fn make_pkg(name: &str, version: &str, namespace: Option<&str>) -> CrawledPackage {
let ns = namespace.map(|s| s.to_string());
let purl = match &ns {
Some(n) => format!("pkg:npm/{n}/{name}@{version}"),
None => format!("pkg:npm/{name}@{version}"),
};
CrawledPackage {
name: name.to_string(),
version: version.to_string(),
namespace: ns,
purl,
path: PathBuf::from("/fake"),
}
}
#[test]
fn test_exact_full_name() {
let packages = vec![
make_pkg("node", "20.0.0", Some("@types")),
make_pkg("node-fetch", "3.0.0", None),
];
let results = fuzzy_match_packages("@types/node", &packages, 20);
assert_eq!(results.len(), 1);
assert_eq!(results[0].name, "node"); assert_eq!(results[0].namespace.as_deref(), Some("@types"));
}
#[test]
fn test_exact_name_only() {
let packages = vec![
make_pkg("node", "20.0.0", Some("@types")),
make_pkg("lodash", "4.17.21", None),
];
let results = fuzzy_match_packages("node", &packages, 20);
assert_eq!(results[0].name, "node"); }
#[test]
fn test_prefix_match() {
let packages = vec![
make_pkg("lodash", "4.17.21", None),
make_pkg("lodash-es", "4.17.21", None),
];
let results = fuzzy_match_packages("lodash", &packages, 20);
assert_eq!(results.len(), 2);
assert_eq!(results[0].name, "lodash"); }
#[test]
fn test_contains_match() {
let packages = vec![make_pkg("string-width", "5.0.0", None)];
let results = fuzzy_match_packages("width", &packages, 20);
assert_eq!(results.len(), 1);
assert_eq!(results[0].name, "string-width");
}
#[test]
fn test_no_match() {
let packages = vec![make_pkg("lodash", "4.17.21", None)];
let results = fuzzy_match_packages("zzzzz", &packages, 20);
assert!(results.is_empty());
}
#[test]
fn test_empty_query() {
let packages = vec![make_pkg("lodash", "4.17.21", None)];
assert!(fuzzy_match_packages("", &packages, 20).is_empty());
assert!(fuzzy_match_packages(" ", &packages, 20).is_empty());
}
#[test]
fn test_case_insensitive() {
let packages = vec![make_pkg("React", "18.0.0", None)];
let results = fuzzy_match_packages("react", &packages, 20);
assert_eq!(results.len(), 1);
}
#[test]
fn test_limit() {
let packages: Vec<CrawledPackage> = (0..50)
.map(|i| make_pkg(&format!("pkg-{i}"), "1.0.0", None))
.collect();
let results = fuzzy_match_packages("pkg", &packages, 10);
assert_eq!(results.len(), 10);
}
#[test]
fn test_tiebreak_is_case_insensitive() {
let packages = vec![
make_pkg("Zebra", "1.0.0", None),
make_pkg("apple", "1.0.0", None),
];
let results = fuzzy_match_packages("e", &packages, 20);
assert_eq!(results.len(), 2);
assert_eq!(
results[0].name, "apple",
"alphabetical tie-break must ignore case"
);
assert_eq!(results[1].name, "Zebra");
}
#[test]
fn test_best_tier_survives_limit() {
let packages = vec![
make_pkg("ax", "1.0.0", None),
make_pkg("bx", "1.0.0", None),
make_pkg("x", "1.0.0", None), ];
let results = fuzzy_match_packages("x", &packages, 1);
assert_eq!(results.len(), 1);
assert_eq!(
results[0].name, "x",
"exact match must beat alphabetically-earlier contains matches"
);
}
#[test]
fn test_namespaced_prefix_name_ranks_below_full() {
let packages = vec![
make_pkg("lodash", "4.17.21", Some("@scope")),
make_pkg("lodash-es", "4.17.21", None),
];
let results = fuzzy_match_packages("lod", &packages, 20);
assert_eq!(results.len(), 2);
assert_eq!(
results[0].name, "lodash-es",
"PrefixFull (no namespace) outranks PrefixName (namespaced)"
);
assert!(results[0].namespace.is_none());
}
}