Skip to main content

sbom_tools/matching/
aliases.rs

1//! Curated alias tables for cross-ecosystem package correlation.
2
3use serde::{Deserialize, Serialize};
4use std::collections::{HashMap, HashSet};
5
6/// Alias table for mapping package names across different conventions.
7#[derive(Debug, Clone, Default, Serialize, Deserialize)]
8pub struct AliasTable {
9    /// Mapping from alias to canonical name
10    alias_to_canonical: HashMap<String, String>,
11    /// Mapping from canonical name to all aliases
12    canonical_to_aliases: HashMap<String, HashSet<String>>,
13}
14
15impl AliasTable {
16    /// Create a new empty alias table
17    pub fn new() -> Self {
18        Self::default()
19    }
20
21    /// Create an alias table with built-in common aliases
22    pub fn with_builtins() -> Self {
23        let mut table = Self::new();
24        table.load_builtins();
25        table
26    }
27
28    /// Load built-in alias mappings
29    fn load_builtins(&mut self) {
30        // PyPI aliases (distribution vs import name differences)
31        self.add_aliases("pkg:pypi/pillow", &["PIL", "python-pillow", "pillow"]);
32        self.add_aliases("pkg:pypi/scikit-learn", &["sklearn", "scikit_learn"]);
33        self.add_aliases(
34            "pkg:pypi/beautifulsoup4",
35            &["bs4", "BeautifulSoup", "beautifulsoup"],
36        );
37        self.add_aliases("pkg:pypi/pyyaml", &["yaml", "PyYAML"]);
38        self.add_aliases(
39            "pkg:pypi/opencv-python",
40            &["cv2", "opencv-python-headless", "opencv"],
41        );
42        self.add_aliases("pkg:pypi/python-dateutil", &["dateutil"]);
43        self.add_aliases("pkg:pypi/attrs", &["attr"]);
44        self.add_aliases("pkg:pypi/importlib-metadata", &["importlib_metadata"]);
45        self.add_aliases("pkg:pypi/typing-extensions", &["typing_extensions"]);
46        self.add_aliases("pkg:pypi/zipp", &["zipfile"]);
47
48        // npm aliases (package variants)
49        self.add_aliases(
50            "pkg:npm/lodash",
51            &["lodash-es", "lodash.merge", "lodash.get"],
52        );
53        self.add_aliases("pkg:npm/react", &["react-dom"]);
54        self.add_aliases("pkg:npm/webpack", &["webpack-cli"]);
55
56        // Cross-ecosystem common libraries
57        self.add_aliases(
58            "yaml-parser",
59            &["pyyaml", "js-yaml", "serde_yaml", "gopkg.in/yaml"],
60        );
61        self.add_aliases("json-parser", &["serde_json", "json", "encoding/json"]);
62    }
63
64    /// Add aliases for a canonical package
65    pub fn add_aliases(&mut self, canonical: &str, aliases: &[&str]) {
66        let canonical_lower = canonical.to_lowercase();
67
68        // Add canonical as its own alias
69        self.alias_to_canonical
70            .insert(canonical_lower.clone(), canonical_lower.clone());
71
72        // Initialize alias set if needed
73        self.canonical_to_aliases
74            .entry(canonical_lower.clone())
75            .or_default()
76            .insert(canonical_lower.clone());
77
78        // Add all aliases
79        for alias in aliases {
80            let alias_lower = alias.to_lowercase();
81            self.alias_to_canonical
82                .insert(alias_lower.clone(), canonical_lower.clone());
83            // Use entry API to avoid unwrap - entry was created above but this is safer
84            self.canonical_to_aliases
85                .entry(canonical_lower.clone())
86                .or_default()
87                .insert(alias_lower);
88        }
89    }
90
91    /// Get the canonical name for an alias
92    pub fn get_canonical(&self, alias: &str) -> Option<String> {
93        self.alias_to_canonical.get(&alias.to_lowercase()).cloned()
94    }
95
96    /// Check if a name is an alias of a canonical name
97    pub fn is_alias(&self, canonical: &str, name: &str) -> bool {
98        let canonical_lower = canonical.to_lowercase();
99        let name_lower = name.to_lowercase();
100
101        if let Some(aliases) = self.canonical_to_aliases.get(&canonical_lower) {
102            aliases.contains(&name_lower)
103        } else {
104            false
105        }
106    }
107
108    /// Get all aliases for a canonical name
109    pub fn get_aliases(&self, canonical: &str) -> Option<&HashSet<String>> {
110        self.canonical_to_aliases.get(&canonical.to_lowercase())
111    }
112
113    /// Load aliases from JSON
114    pub fn load_json(&mut self, json: &str) -> Result<(), serde_json::Error> {
115        let entries: Vec<AliasEntry> = serde_json::from_str(json)?;
116        for entry in entries {
117            let aliases: Vec<&str> = entry.aliases.iter().map(|s| s.as_str()).collect();
118            self.add_aliases(&entry.canonical, &aliases);
119        }
120        Ok(())
121    }
122
123    /// Export aliases to JSON
124    pub fn to_json(&self) -> Result<String, serde_json::Error> {
125        let entries: Vec<AliasEntry> = self
126            .canonical_to_aliases
127            .iter()
128            .map(|(canonical, aliases)| AliasEntry {
129                canonical: canonical.clone(),
130                aliases: aliases.iter().cloned().collect(),
131            })
132            .collect();
133        serde_json::to_string_pretty(&entries)
134    }
135}
136
137/// Entry in the alias table JSON format
138#[derive(Debug, Serialize, Deserialize)]
139struct AliasEntry {
140    canonical: String,
141    aliases: Vec<String>,
142}
143
144#[cfg(test)]
145mod tests {
146    use super::*;
147
148    #[test]
149    fn test_alias_lookup() {
150        let table = AliasTable::with_builtins();
151
152        // PIL -> pillow
153        assert_eq!(
154            table.get_canonical("PIL"),
155            Some("pkg:pypi/pillow".to_lowercase())
156        );
157
158        // sklearn -> scikit-learn
159        assert_eq!(
160            table.get_canonical("sklearn"),
161            Some("pkg:pypi/scikit-learn".to_lowercase())
162        );
163    }
164
165    #[test]
166    fn test_is_alias() {
167        let table = AliasTable::with_builtins();
168
169        assert!(table.is_alias("pkg:pypi/pillow", "PIL"));
170        assert!(table.is_alias("pkg:pypi/pillow", "pillow"));
171        assert!(!table.is_alias("pkg:pypi/pillow", "numpy"));
172    }
173
174    #[test]
175    fn test_custom_aliases() {
176        let mut table = AliasTable::new();
177        table.add_aliases("my-package", &["my_package", "mypackage"]);
178
179        assert_eq!(
180            table.get_canonical("my_package"),
181            Some("my-package".to_string())
182        );
183        assert!(table.is_alias("my-package", "mypackage"));
184    }
185}