Skip to main content

socket_patch_core/utils/
fuzzy_match.rs

1use crate::crawlers::types::CrawledPackage;
2
3// ---------------------------------------------------------------------------
4// MatchType enum
5// ---------------------------------------------------------------------------
6
7/// Match type for sorting results by relevance.
8///
9/// Lower numeric value = better match. The ordering is:
10/// 1. Exact match on full name (including namespace)
11/// 2. Exact match on package name only
12/// 3. Prefix match on full name
13/// 4. Prefix match on package name
14/// 5. Contains match on full name
15/// 6. Contains match on package name
16#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
17pub enum MatchType {
18    /// Exact match on full name (including namespace).
19    ExactFull = 0,
20    /// Exact match on package name only.
21    ExactName = 1,
22    /// Query is a prefix of the full name.
23    PrefixFull = 2,
24    /// Query is a prefix of the package name.
25    PrefixName = 3,
26    /// Query is contained in the full name.
27    ContainsFull = 4,
28    /// Query is contained in the package name.
29    ContainsName = 5,
30}
31
32// ---------------------------------------------------------------------------
33// Internal match result
34// ---------------------------------------------------------------------------
35
36struct MatchResult {
37    package: CrawledPackage,
38    match_type: MatchType,
39}
40
41// ---------------------------------------------------------------------------
42// Helpers
43// ---------------------------------------------------------------------------
44
45/// Get the full display name for a package (including namespace if present).
46fn get_full_name(pkg: &CrawledPackage) -> String {
47    match &pkg.namespace {
48        Some(ns) => format!("{ns}/{}", pkg.name),
49        None => pkg.name.clone(),
50    }
51}
52
53/// Determine the match type for a package against a query.
54/// Returns `None` if there is no match.
55fn get_match_type(pkg: &CrawledPackage, query: &str) -> Option<MatchType> {
56    let lower_query = query.to_lowercase();
57    let full_name = get_full_name(pkg).to_lowercase();
58    let name = pkg.name.to_lowercase();
59
60    // Check exact matches
61    if full_name == lower_query {
62        return Some(MatchType::ExactFull);
63    }
64    if name == lower_query {
65        return Some(MatchType::ExactName);
66    }
67
68    // Check prefix matches
69    if full_name.starts_with(&lower_query) {
70        return Some(MatchType::PrefixFull);
71    }
72    if name.starts_with(&lower_query) {
73        return Some(MatchType::PrefixName);
74    }
75
76    // Check contains matches
77    if full_name.contains(&lower_query) {
78        return Some(MatchType::ContainsFull);
79    }
80    if name.contains(&lower_query) {
81        return Some(MatchType::ContainsName);
82    }
83
84    None
85}
86
87// ---------------------------------------------------------------------------
88// Public API
89// ---------------------------------------------------------------------------
90
91/// Fuzzy match packages against a query string.
92///
93/// Matches are sorted by relevance:
94/// 1. Exact match on full name (e.g., `"@types/node"` matches `"@types/node"`)
95/// 2. Exact match on package name (e.g., `"node"` matches `"@types/node"`)
96/// 3. Prefix match on full name
97/// 4. Prefix match on package name
98/// 5. Contains match on full name
99/// 6. Contains match on package name
100///
101/// Within the same match type, results are sorted alphabetically by full name.
102pub fn fuzzy_match_packages(
103    query: &str,
104    packages: &[CrawledPackage],
105    limit: usize,
106) -> Vec<CrawledPackage> {
107    let trimmed = query.trim();
108    if trimmed.is_empty() {
109        return Vec::new();
110    }
111
112    let mut matches: Vec<MatchResult> = Vec::new();
113
114    for pkg in packages {
115        if let Some(match_type) = get_match_type(pkg, trimmed) {
116            matches.push(MatchResult {
117                package: pkg.clone(),
118                match_type,
119            });
120        }
121    }
122
123    // Sort by match type (lower is better), then alphabetically by full name
124    matches.sort_by(|a, b| {
125        let type_cmp = a.match_type.cmp(&b.match_type);
126        if type_cmp != std::cmp::Ordering::Equal {
127            return type_cmp;
128        }
129        get_full_name(&a.package).cmp(&get_full_name(&b.package))
130    });
131
132    matches
133        .into_iter()
134        .take(limit)
135        .map(|m| m.package)
136        .collect()
137}
138
139/// Check if a string looks like a PURL.
140pub fn is_purl(s: &str) -> bool {
141    s.starts_with("pkg:")
142}
143
144/// Check if a string looks like a scoped npm package name.
145pub fn is_scoped_package(s: &str) -> bool {
146    s.starts_with('@') && s.contains('/')
147}
148
149#[cfg(test)]
150mod tests {
151    use super::*;
152    use std::path::PathBuf;
153
154    fn make_pkg(
155        name: &str,
156        version: &str,
157        namespace: Option<&str>,
158    ) -> CrawledPackage {
159        let ns = namespace.map(|s| s.to_string());
160        let purl = match &ns {
161            Some(n) => format!("pkg:npm/{n}/{name}@{version}"),
162            None => format!("pkg:npm/{name}@{version}"),
163        };
164        CrawledPackage {
165            name: name.to_string(),
166            version: version.to_string(),
167            namespace: ns,
168            purl,
169            path: PathBuf::from("/fake"),
170        }
171    }
172
173    #[test]
174    fn test_exact_full_name() {
175        let packages = vec![
176            make_pkg("node", "20.0.0", Some("@types")),
177            make_pkg("node-fetch", "3.0.0", None),
178        ];
179
180        let results = fuzzy_match_packages("@types/node", &packages, 20);
181        // "node-fetch" does NOT contain "@types/node", so only 1 result
182        assert_eq!(results.len(), 1);
183        assert_eq!(results[0].name, "node"); // ExactFull
184        assert_eq!(results[0].namespace.as_deref(), Some("@types"));
185    }
186
187    #[test]
188    fn test_exact_name_only() {
189        let packages = vec![
190            make_pkg("node", "20.0.0", Some("@types")),
191            make_pkg("lodash", "4.17.21", None),
192        ];
193
194        let results = fuzzy_match_packages("node", &packages, 20);
195        assert_eq!(results[0].name, "node"); // ExactName
196    }
197
198    #[test]
199    fn test_prefix_match() {
200        let packages = vec![
201            make_pkg("lodash", "4.17.21", None),
202            make_pkg("lodash-es", "4.17.21", None),
203        ];
204
205        let results = fuzzy_match_packages("lodash", &packages, 20);
206        assert_eq!(results.len(), 2);
207        assert_eq!(results[0].name, "lodash"); // ExactName is better than PrefixName
208    }
209
210    #[test]
211    fn test_contains_match() {
212        let packages = vec![make_pkg("string-width", "5.0.0", None)];
213
214        let results = fuzzy_match_packages("width", &packages, 20);
215        assert_eq!(results.len(), 1);
216        assert_eq!(results[0].name, "string-width");
217    }
218
219    #[test]
220    fn test_no_match() {
221        let packages = vec![make_pkg("lodash", "4.17.21", None)];
222
223        let results = fuzzy_match_packages("zzzzz", &packages, 20);
224        assert!(results.is_empty());
225    }
226
227    #[test]
228    fn test_empty_query() {
229        let packages = vec![make_pkg("lodash", "4.17.21", None)];
230        assert!(fuzzy_match_packages("", &packages, 20).is_empty());
231        assert!(fuzzy_match_packages("   ", &packages, 20).is_empty());
232    }
233
234    #[test]
235    fn test_case_insensitive() {
236        let packages = vec![make_pkg("React", "18.0.0", None)];
237        let results = fuzzy_match_packages("react", &packages, 20);
238        assert_eq!(results.len(), 1);
239    }
240
241    #[test]
242    fn test_limit() {
243        let packages: Vec<CrawledPackage> = (0..50)
244            .map(|i| make_pkg(&format!("pkg-{i}"), "1.0.0", None))
245            .collect();
246
247        let results = fuzzy_match_packages("pkg", &packages, 10);
248        assert_eq!(results.len(), 10);
249    }
250
251    #[test]
252    fn test_is_purl() {
253        assert!(is_purl("pkg:npm/lodash@4.17.21"));
254        assert!(is_purl("pkg:pypi/requests@2.28.0"));
255        assert!(!is_purl("lodash"));
256        assert!(!is_purl("@types/node"));
257    }
258
259    #[test]
260    fn test_is_scoped_package() {
261        assert!(is_scoped_package("@types/node"));
262        assert!(is_scoped_package("@scope/pkg"));
263        assert!(!is_scoped_package("lodash"));
264        assert!(!is_scoped_package("@scope"));
265    }
266}