Skip to main content

socket_patch_core/utils/
fuzzy_match.rs

1use crate::crawlers::types::CrawledPackage;
2
3// ---------------------------------------------------------------------------
4// MatchType enum
5// ---------------------------------------------------------------------------
6
7/// Match type for sorting results by relevance.
8///
9/// Lower numeric value = better match. The ordering is:
10/// 1. Exact match on full name (including namespace)
11/// 2. Exact match on package name only
12/// 3. Prefix match on full name
13/// 4. Prefix match on package name
14/// 5. Contains match on full name
15/// 6. Contains match on package name
16///
17/// Internal to this module — `fuzzy_match_packages` is the only
18/// external entry point and it returns plain `Vec<CrawledPackage>`
19/// (sorted), so callers never see the match-type tag.
20#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
21enum MatchType {
22    /// Exact match on full name (including namespace).
23    ExactFull = 0,
24    /// Exact match on package name only.
25    ExactName = 1,
26    /// Query is a prefix of the full name.
27    PrefixFull = 2,
28    /// Query is a prefix of the package name.
29    PrefixName = 3,
30    /// Query is contained in the full name.
31    ContainsFull = 4,
32    /// Query is contained in the package name.
33    ContainsName = 5,
34}
35
36// ---------------------------------------------------------------------------
37// Internal match result
38// ---------------------------------------------------------------------------
39
40struct MatchResult {
41    package: CrawledPackage,
42    match_type: MatchType,
43}
44
45// ---------------------------------------------------------------------------
46// Helpers
47// ---------------------------------------------------------------------------
48
49/// Get the full display name for a package (including namespace if present).
50fn get_full_name(pkg: &CrawledPackage) -> String {
51    match &pkg.namespace {
52        Some(ns) => format!("{ns}/{}", pkg.name),
53        None => pkg.name.clone(),
54    }
55}
56
57/// Determine the match type for a package against a query.
58/// Returns `None` if there is no match.
59fn get_match_type(pkg: &CrawledPackage, query: &str) -> Option<MatchType> {
60    let lower_query = query.to_lowercase();
61    let full_name = get_full_name(pkg).to_lowercase();
62    let name = pkg.name.to_lowercase();
63
64    // Check exact matches
65    if full_name == lower_query {
66        return Some(MatchType::ExactFull);
67    }
68    if name == lower_query {
69        return Some(MatchType::ExactName);
70    }
71
72    // Check prefix matches
73    if full_name.starts_with(&lower_query) {
74        return Some(MatchType::PrefixFull);
75    }
76    if name.starts_with(&lower_query) {
77        return Some(MatchType::PrefixName);
78    }
79
80    // Check contains matches
81    if full_name.contains(&lower_query) {
82        return Some(MatchType::ContainsFull);
83    }
84    if name.contains(&lower_query) {
85        return Some(MatchType::ContainsName);
86    }
87
88    None
89}
90
91// ---------------------------------------------------------------------------
92// Public API
93// ---------------------------------------------------------------------------
94
95/// Fuzzy match packages against a query string.
96///
97/// Matches are sorted by relevance:
98/// 1. Exact match on full name (e.g., `"@types/node"` matches `"@types/node"`)
99/// 2. Exact match on package name (e.g., `"node"` matches `"@types/node"`)
100/// 3. Prefix match on full name
101/// 4. Prefix match on package name
102/// 5. Contains match on full name
103/// 6. Contains match on package name
104///
105/// Within the same match type, results are sorted alphabetically by full name.
106pub fn fuzzy_match_packages(
107    query: &str,
108    packages: &[CrawledPackage],
109    limit: usize,
110) -> Vec<CrawledPackage> {
111    let trimmed = query.trim();
112    if trimmed.is_empty() {
113        return Vec::new();
114    }
115
116    let mut matches: Vec<MatchResult> = Vec::new();
117
118    for pkg in packages {
119        if let Some(match_type) = get_match_type(pkg, trimmed) {
120            matches.push(MatchResult {
121                package: pkg.clone(),
122                match_type,
123            });
124        }
125    }
126
127    // Sort by match type (lower is better), then alphabetically by full name
128    matches.sort_by(|a, b| {
129        let type_cmp = a.match_type.cmp(&b.match_type);
130        if type_cmp != std::cmp::Ordering::Equal {
131            return type_cmp;
132        }
133        // Tie-break alphabetically by full name. Matching is case-insensitive,
134        // so the ordering must be too — otherwise byte order sorts uppercase
135        // ('Z' = 0x5A) before lowercase ('a' = 0x61), which is not alphabetical
136        // and can flip which package lands at `matches[0]`.
137        get_full_name(&a.package)
138            .to_lowercase()
139            .cmp(&get_full_name(&b.package).to_lowercase())
140    });
141
142    matches.into_iter().take(limit).map(|m| m.package).collect()
143}
144
145#[cfg(test)]
146mod tests {
147    use super::*;
148    use std::path::PathBuf;
149
150    fn make_pkg(name: &str, version: &str, namespace: Option<&str>) -> CrawledPackage {
151        let ns = namespace.map(|s| s.to_string());
152        let purl = match &ns {
153            Some(n) => format!("pkg:npm/{n}/{name}@{version}"),
154            None => format!("pkg:npm/{name}@{version}"),
155        };
156        CrawledPackage {
157            name: name.to_string(),
158            version: version.to_string(),
159            namespace: ns,
160            purl,
161            path: PathBuf::from("/fake"),
162        }
163    }
164
165    #[test]
166    fn test_exact_full_name() {
167        let packages = vec![
168            make_pkg("node", "20.0.0", Some("@types")),
169            make_pkg("node-fetch", "3.0.0", None),
170        ];
171
172        let results = fuzzy_match_packages("@types/node", &packages, 20);
173        // "node-fetch" does NOT contain "@types/node", so only 1 result
174        assert_eq!(results.len(), 1);
175        assert_eq!(results[0].name, "node"); // ExactFull
176        assert_eq!(results[0].namespace.as_deref(), Some("@types"));
177    }
178
179    #[test]
180    fn test_exact_name_only() {
181        let packages = vec![
182            make_pkg("node", "20.0.0", Some("@types")),
183            make_pkg("lodash", "4.17.21", None),
184        ];
185
186        let results = fuzzy_match_packages("node", &packages, 20);
187        assert_eq!(results[0].name, "node"); // ExactName
188    }
189
190    #[test]
191    fn test_prefix_match() {
192        let packages = vec![
193            make_pkg("lodash", "4.17.21", None),
194            make_pkg("lodash-es", "4.17.21", None),
195        ];
196
197        let results = fuzzy_match_packages("lodash", &packages, 20);
198        assert_eq!(results.len(), 2);
199        assert_eq!(results[0].name, "lodash"); // ExactName is better than PrefixName
200    }
201
202    #[test]
203    fn test_contains_match() {
204        let packages = vec![make_pkg("string-width", "5.0.0", None)];
205
206        let results = fuzzy_match_packages("width", &packages, 20);
207        assert_eq!(results.len(), 1);
208        assert_eq!(results[0].name, "string-width");
209    }
210
211    #[test]
212    fn test_no_match() {
213        let packages = vec![make_pkg("lodash", "4.17.21", None)];
214
215        let results = fuzzy_match_packages("zzzzz", &packages, 20);
216        assert!(results.is_empty());
217    }
218
219    #[test]
220    fn test_empty_query() {
221        let packages = vec![make_pkg("lodash", "4.17.21", None)];
222        assert!(fuzzy_match_packages("", &packages, 20).is_empty());
223        assert!(fuzzy_match_packages("   ", &packages, 20).is_empty());
224    }
225
226    #[test]
227    fn test_case_insensitive() {
228        let packages = vec![make_pkg("React", "18.0.0", None)];
229        let results = fuzzy_match_packages("react", &packages, 20);
230        assert_eq!(results.len(), 1);
231    }
232
233    #[test]
234    fn test_limit() {
235        let packages: Vec<CrawledPackage> = (0..50)
236            .map(|i| make_pkg(&format!("pkg-{i}"), "1.0.0", None))
237            .collect();
238
239        let results = fuzzy_match_packages("pkg", &packages, 10);
240        assert_eq!(results.len(), 10);
241    }
242
243    /// Regression: within a single match tier the alphabetical tie-break must
244    /// be case-insensitive, matching the case-insensitive matching above. With
245    /// a raw byte-order comparison, 'Z' (0x5A) sorts before 'a' (0x61), so
246    /// "Zebra" would wrongly precede "apple" and become `matches[0]`.
247    #[test]
248    fn test_tiebreak_is_case_insensitive() {
249        let packages = vec![
250            make_pkg("Zebra", "1.0.0", None),
251            make_pkg("apple", "1.0.0", None),
252        ];
253        // "e" is a substring of both names but a prefix of neither, so both
254        // land in the same ContainsFull tier and the tie-break decides order.
255        let results = fuzzy_match_packages("e", &packages, 20);
256        assert_eq!(results.len(), 2);
257        assert_eq!(
258            results[0].name, "apple",
259            "alphabetical tie-break must ignore case"
260        );
261        assert_eq!(results[1].name, "Zebra");
262    }
263
264    /// A better match tier must outrank alphabetical order, and the `limit`
265    /// truncation must keep the best matches (it is applied after sorting).
266    #[test]
267    fn test_best_tier_survives_limit() {
268        let packages = vec![
269            make_pkg("ax", "1.0.0", None),
270            make_pkg("bx", "1.0.0", None),
271            make_pkg("x", "1.0.0", None), // ExactFull, but alphabetically last
272        ];
273        let results = fuzzy_match_packages("x", &packages, 1);
274        assert_eq!(results.len(), 1);
275        assert_eq!(
276            results[0].name, "x",
277            "exact match must beat alphabetically-earlier contains matches"
278        );
279    }
280
281    /// A namespaced package whose bare name (but not its namespace-qualified
282    /// full name) prefixes the query is a PrefixName match, which ranks below
283    /// a non-namespaced PrefixFull match for the same query.
284    #[test]
285    fn test_namespaced_prefix_name_ranks_below_full() {
286        let packages = vec![
287            make_pkg("lodash", "4.17.21", Some("@scope")),
288            make_pkg("lodash-es", "4.17.21", None),
289        ];
290        let results = fuzzy_match_packages("lod", &packages, 20);
291        assert_eq!(results.len(), 2);
292        assert_eq!(
293            results[0].name, "lodash-es",
294            "PrefixFull (no namespace) outranks PrefixName (namespaced)"
295        );
296        assert!(results[0].namespace.is_none());
297    }
298}