Skip to main content

socket_patch_core/utils/
fuzzy_match.rs

1use crate::crawlers::types::CrawledPackage;
2
3// ---------------------------------------------------------------------------
4// MatchType enum
5// ---------------------------------------------------------------------------
6
7/// Match type for sorting results by relevance.
8///
9/// Lower numeric value = better match. The ordering is:
10/// 1. Exact match on full name (including namespace)
11/// 2. Exact match on package name only
12/// 3. Prefix match on full name
13/// 4. Prefix match on package name
14/// 5. Contains match on full name
15/// 6. Contains match on package name
16///
17/// Internal to this module — `fuzzy_match_packages` is the only
18/// external entry point and it returns plain `Vec<CrawledPackage>`
19/// (sorted), so callers never see the match-type tag.
20#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
21enum MatchType {
22    /// Exact match on full name (including namespace).
23    ExactFull = 0,
24    /// Exact match on package name only.
25    ExactName = 1,
26    /// Query is a prefix of the full name.
27    PrefixFull = 2,
28    /// Query is a prefix of the package name.
29    PrefixName = 3,
30    /// Query is contained in the full name.
31    ContainsFull = 4,
32    /// Query is contained in the package name.
33    ContainsName = 5,
34}
35
36// ---------------------------------------------------------------------------
37// Internal match result
38// ---------------------------------------------------------------------------
39
40struct MatchResult {
41    package: CrawledPackage,
42    match_type: MatchType,
43}
44
45// ---------------------------------------------------------------------------
46// Helpers
47// ---------------------------------------------------------------------------
48
49/// Get the full display name for a package (including namespace if present).
50fn get_full_name(pkg: &CrawledPackage) -> String {
51    match &pkg.namespace {
52        Some(ns) => format!("{ns}/{}", pkg.name),
53        None => pkg.name.clone(),
54    }
55}
56
57/// Determine the match type for a package against a query.
58/// Returns `None` if there is no match.
59fn get_match_type(pkg: &CrawledPackage, query: &str) -> Option<MatchType> {
60    let lower_query = query.to_lowercase();
61    let full_name = get_full_name(pkg).to_lowercase();
62    let name = pkg.name.to_lowercase();
63
64    // Check exact matches
65    if full_name == lower_query {
66        return Some(MatchType::ExactFull);
67    }
68    if name == lower_query {
69        return Some(MatchType::ExactName);
70    }
71
72    // Check prefix matches
73    if full_name.starts_with(&lower_query) {
74        return Some(MatchType::PrefixFull);
75    }
76    if name.starts_with(&lower_query) {
77        return Some(MatchType::PrefixName);
78    }
79
80    // Check contains matches
81    if full_name.contains(&lower_query) {
82        return Some(MatchType::ContainsFull);
83    }
84    if name.contains(&lower_query) {
85        return Some(MatchType::ContainsName);
86    }
87
88    None
89}
90
91// ---------------------------------------------------------------------------
92// Public API
93// ---------------------------------------------------------------------------
94
95/// Fuzzy match packages against a query string.
96///
97/// Matches are sorted by relevance:
98/// 1. Exact match on full name (e.g., `"@types/node"` matches `"@types/node"`)
99/// 2. Exact match on package name (e.g., `"node"` matches `"@types/node"`)
100/// 3. Prefix match on full name
101/// 4. Prefix match on package name
102/// 5. Contains match on full name
103/// 6. Contains match on package name
104///
105/// Within the same match type, results are sorted alphabetically by full name.
106pub fn fuzzy_match_packages(
107    query: &str,
108    packages: &[CrawledPackage],
109    limit: usize,
110) -> Vec<CrawledPackage> {
111    let trimmed = query.trim();
112    if trimmed.is_empty() {
113        return Vec::new();
114    }
115
116    let mut matches: Vec<MatchResult> = Vec::new();
117
118    for pkg in packages {
119        if let Some(match_type) = get_match_type(pkg, trimmed) {
120            matches.push(MatchResult {
121                package: pkg.clone(),
122                match_type,
123            });
124        }
125    }
126
127    // Sort by match type (lower is better), then alphabetically by full name
128    matches.sort_by(|a, b| {
129        let type_cmp = a.match_type.cmp(&b.match_type);
130        if type_cmp != std::cmp::Ordering::Equal {
131            return type_cmp;
132        }
133        get_full_name(&a.package).cmp(&get_full_name(&b.package))
134    });
135
136    matches
137        .into_iter()
138        .take(limit)
139        .map(|m| m.package)
140        .collect()
141}
142
143#[cfg(test)]
144mod tests {
145    use super::*;
146    use std::path::PathBuf;
147
148    fn make_pkg(
149        name: &str,
150        version: &str,
151        namespace: Option<&str>,
152    ) -> CrawledPackage {
153        let ns = namespace.map(|s| s.to_string());
154        let purl = match &ns {
155            Some(n) => format!("pkg:npm/{n}/{name}@{version}"),
156            None => format!("pkg:npm/{name}@{version}"),
157        };
158        CrawledPackage {
159            name: name.to_string(),
160            version: version.to_string(),
161            namespace: ns,
162            purl,
163            path: PathBuf::from("/fake"),
164        }
165    }
166
167    #[test]
168    fn test_exact_full_name() {
169        let packages = vec![
170            make_pkg("node", "20.0.0", Some("@types")),
171            make_pkg("node-fetch", "3.0.0", None),
172        ];
173
174        let results = fuzzy_match_packages("@types/node", &packages, 20);
175        // "node-fetch" does NOT contain "@types/node", so only 1 result
176        assert_eq!(results.len(), 1);
177        assert_eq!(results[0].name, "node"); // ExactFull
178        assert_eq!(results[0].namespace.as_deref(), Some("@types"));
179    }
180
181    #[test]
182    fn test_exact_name_only() {
183        let packages = vec![
184            make_pkg("node", "20.0.0", Some("@types")),
185            make_pkg("lodash", "4.17.21", None),
186        ];
187
188        let results = fuzzy_match_packages("node", &packages, 20);
189        assert_eq!(results[0].name, "node"); // ExactName
190    }
191
192    #[test]
193    fn test_prefix_match() {
194        let packages = vec![
195            make_pkg("lodash", "4.17.21", None),
196            make_pkg("lodash-es", "4.17.21", None),
197        ];
198
199        let results = fuzzy_match_packages("lodash", &packages, 20);
200        assert_eq!(results.len(), 2);
201        assert_eq!(results[0].name, "lodash"); // ExactName is better than PrefixName
202    }
203
204    #[test]
205    fn test_contains_match() {
206        let packages = vec![make_pkg("string-width", "5.0.0", None)];
207
208        let results = fuzzy_match_packages("width", &packages, 20);
209        assert_eq!(results.len(), 1);
210        assert_eq!(results[0].name, "string-width");
211    }
212
213    #[test]
214    fn test_no_match() {
215        let packages = vec![make_pkg("lodash", "4.17.21", None)];
216
217        let results = fuzzy_match_packages("zzzzz", &packages, 20);
218        assert!(results.is_empty());
219    }
220
221    #[test]
222    fn test_empty_query() {
223        let packages = vec![make_pkg("lodash", "4.17.21", None)];
224        assert!(fuzzy_match_packages("", &packages, 20).is_empty());
225        assert!(fuzzy_match_packages("   ", &packages, 20).is_empty());
226    }
227
228    #[test]
229    fn test_case_insensitive() {
230        let packages = vec![make_pkg("React", "18.0.0", None)];
231        let results = fuzzy_match_packages("react", &packages, 20);
232        assert_eq!(results.len(), 1);
233    }
234
235    #[test]
236    fn test_limit() {
237        let packages: Vec<CrawledPackage> = (0..50)
238            .map(|i| make_pkg(&format!("pkg-{i}"), "1.0.0", None))
239            .collect();
240
241        let results = fuzzy_match_packages("pkg", &packages, 10);
242        assert_eq!(results.len(), 10);
243    }
244
245}