Skip to main content

sbom_tools/matching/
aliases.rs

1//! Curated alias tables for cross-ecosystem package correlation.
2
3use serde::{Deserialize, Serialize};
4use std::collections::{HashMap, HashSet};
5
6/// Alias table for mapping package names across different conventions.
7#[derive(Debug, Clone, Default, Serialize, Deserialize)]
8pub struct AliasTable {
9    /// Mapping from alias to canonical name
10    alias_to_canonical: HashMap<String, String>,
11    /// Mapping from canonical name to all aliases
12    canonical_to_aliases: HashMap<String, HashSet<String>>,
13}
14
15impl AliasTable {
16    /// Create a new empty alias table
17    #[must_use]
18    pub fn new() -> Self {
19        Self::default()
20    }
21
22    /// Create an alias table with built-in common aliases
23    #[must_use]
24    pub fn with_builtins() -> Self {
25        let mut table = Self::new();
26        table.load_builtins();
27        table
28    }
29
30    /// Load built-in alias mappings
31    fn load_builtins(&mut self) {
32        // PyPI aliases (distribution vs import name differences)
33        self.add_aliases("pkg:pypi/pillow", &["PIL", "python-pillow", "pillow"]);
34        self.add_aliases("pkg:pypi/scikit-learn", &["sklearn", "scikit_learn"]);
35        self.add_aliases(
36            "pkg:pypi/beautifulsoup4",
37            &["bs4", "BeautifulSoup", "beautifulsoup"],
38        );
39        self.add_aliases("pkg:pypi/pyyaml", &["yaml", "PyYAML"]);
40        self.add_aliases(
41            "pkg:pypi/opencv-python",
42            &["cv2", "opencv-python-headless", "opencv"],
43        );
44        self.add_aliases("pkg:pypi/python-dateutil", &["dateutil"]);
45        self.add_aliases("pkg:pypi/attrs", &["attr"]);
46        self.add_aliases("pkg:pypi/importlib-metadata", &["importlib_metadata"]);
47        self.add_aliases("pkg:pypi/typing-extensions", &["typing_extensions"]);
48        self.add_aliases("pkg:pypi/zipp", &["zipfile"]);
49
50        // npm aliases (package variants)
51        self.add_aliases(
52            "pkg:npm/lodash",
53            &["lodash-es", "lodash.merge", "lodash.get"],
54        );
55        self.add_aliases("pkg:npm/react", &["react-dom"]);
56        self.add_aliases("pkg:npm/webpack", &["webpack-cli"]);
57
58        // Cross-ecosystem common libraries
59        self.add_aliases(
60            "yaml-parser",
61            &["pyyaml", "js-yaml", "serde_yaml", "gopkg.in/yaml"],
62        );
63        self.add_aliases("json-parser", &["serde_json", "json", "encoding/json"]);
64    }
65
66    /// Add aliases for a canonical package
67    pub fn add_aliases(&mut self, canonical: &str, aliases: &[&str]) {
68        let canonical_lower = canonical.to_lowercase();
69
70        // Add canonical as its own alias
71        self.alias_to_canonical
72            .insert(canonical_lower.clone(), canonical_lower.clone());
73
74        // Initialize alias set and insert canonical name
75        let alias_set = self
76            .canonical_to_aliases
77            .entry(canonical_lower.clone())
78            .or_default();
79        alias_set.insert(canonical_lower.clone());
80
81        // Add all aliases
82        for alias in aliases {
83            let alias_lower = alias.to_lowercase();
84            self.alias_to_canonical
85                .insert(alias_lower.clone(), canonical_lower.clone());
86            if let Some(set) = self.canonical_to_aliases.get_mut(&canonical_lower) {
87                set.insert(alias_lower);
88            }
89        }
90    }
91
92    /// Get the canonical name for an alias
93    #[must_use]
94    pub fn get_canonical(&self, alias: &str) -> Option<String> {
95        self.alias_to_canonical.get(&alias.to_lowercase()).cloned()
96    }
97
98    /// Check if a name is an alias of a canonical name
99    #[must_use]
100    pub fn is_alias(&self, canonical: &str, name: &str) -> bool {
101        let canonical_lower = canonical.to_lowercase();
102        let name_lower = name.to_lowercase();
103
104        self.canonical_to_aliases
105            .get(&canonical_lower)
106            .is_some_and(|aliases| aliases.contains(&name_lower))
107    }
108
109    /// Get all aliases for a canonical name
110    #[must_use]
111    pub fn get_aliases(&self, canonical: &str) -> Option<&HashSet<String>> {
112        self.canonical_to_aliases.get(&canonical.to_lowercase())
113    }
114
115    /// Load aliases from JSON
116    pub fn load_json(&mut self, json: &str) -> Result<(), serde_json::Error> {
117        let entries: Vec<AliasEntry> = serde_json::from_str(json)?;
118        for entry in entries {
119            let aliases: Vec<&str> = entry
120                .aliases
121                .iter()
122                .map(std::string::String::as_str)
123                .collect();
124            self.add_aliases(&entry.canonical, &aliases);
125        }
126        Ok(())
127    }
128
129    /// Export aliases to JSON
130    pub fn to_json(&self) -> Result<String, serde_json::Error> {
131        let entries: Vec<AliasEntry> = self
132            .canonical_to_aliases
133            .iter()
134            .map(|(canonical, aliases)| AliasEntry {
135                canonical: canonical.clone(),
136                aliases: aliases.iter().cloned().collect(),
137            })
138            .collect();
139        serde_json::to_string_pretty(&entries)
140    }
141}
142
143/// Entry in the alias table JSON format
144#[derive(Debug, Serialize, Deserialize)]
145struct AliasEntry {
146    canonical: String,
147    aliases: Vec<String>,
148}
149
150#[cfg(test)]
151mod tests {
152    use super::*;
153
154    #[test]
155    fn test_alias_lookup() {
156        let table = AliasTable::with_builtins();
157
158        // PIL -> pillow
159        assert_eq!(
160            table.get_canonical("PIL"),
161            Some("pkg:pypi/pillow".to_lowercase())
162        );
163
164        // sklearn -> scikit-learn
165        assert_eq!(
166            table.get_canonical("sklearn"),
167            Some("pkg:pypi/scikit-learn".to_lowercase())
168        );
169    }
170
171    #[test]
172    fn test_is_alias() {
173        let table = AliasTable::with_builtins();
174
175        assert!(table.is_alias("pkg:pypi/pillow", "PIL"));
176        assert!(table.is_alias("pkg:pypi/pillow", "pillow"));
177        assert!(!table.is_alias("pkg:pypi/pillow", "numpy"));
178    }
179
180    #[test]
181    fn test_custom_aliases() {
182        let mut table = AliasTable::new();
183        table.add_aliases("my-package", &["my_package", "mypackage"]);
184
185        assert_eq!(
186            table.get_canonical("my_package"),
187            Some("my-package".to_string())
188        );
189        assert!(table.is_alias("my-package", "mypackage"));
190    }
191}