Skip to main content

cc_audit/sbom/
extractor.rs

1//! Dependency extractor for SBOM generation.
2
3use super::builder::{Component, ComponentType, SbomError};
4use serde_json::Value;
5use std::fs;
6use std::path::Path;
7
8/// Extractor for dependencies from various configuration files.
9pub struct DependencyExtractor;
10
11impl DependencyExtractor {
12    /// Create a new dependency extractor.
13    pub fn new() -> Self {
14        Self
15    }
16
17    /// Extract MCP servers from a directory.
18    pub fn extract_mcp_servers(&self, path: &Path) -> Result<Vec<Component>, SbomError> {
19        let mut components = Vec::new();
20
21        // Check for mcp.json
22        let mcp_json_path = path.join("mcp.json");
23        if mcp_json_path.exists() {
24            components.extend(self.parse_mcp_json(&mcp_json_path)?);
25        }
26
27        // Check for .claude/mcp_servers.json (Claude Code config)
28        let claude_mcp_path = path.join(".claude").join("mcp_servers.json");
29        if claude_mcp_path.exists() {
30            components.extend(self.parse_mcp_json(&claude_mcp_path)?);
31        }
32
33        // Check for claude_desktop_config.json
34        let desktop_config = path.join("claude_desktop_config.json");
35        if desktop_config.exists() {
36            components.extend(self.parse_claude_desktop_config(&desktop_config)?);
37        }
38
39        Ok(components)
40    }
41
42    /// Parse mcp.json format.
43    fn parse_mcp_json(&self, path: &Path) -> Result<Vec<Component>, SbomError> {
44        let content = fs::read_to_string(path)?;
45        let json: Value =
46            serde_json::from_str(&content).map_err(|e| SbomError::JsonParse(e.to_string()))?;
47
48        let mut components = Vec::new();
49
50        // Handle mcpServers object format
51        if let Some(servers) = json.get("mcpServers").and_then(|v| v.as_object()) {
52            for (name, config) in servers {
53                let mut comp = Component::new(name, ComponentType::McpServer);
54
55                // Try to extract command/args for npm package detection
56                if let Some(args) = config.get("args").and_then(|v| v.as_array()) {
57                    for arg in args {
58                        if let Some(arg_str) = arg.as_str() {
59                            // Detect npm package names
60                            if arg_str.starts_with('@') || !arg_str.contains('/') {
61                                if let Some(version) = Self::extract_npm_version(arg_str) {
62                                    comp =
63                                        comp.with_purl(Component::npm_purl(arg_str, Some(version)));
64                                } else {
65                                    comp = comp.with_purl(Component::npm_purl(arg_str, None));
66                                }
67                            }
68                        }
69                    }
70                }
71
72                components.push(comp);
73            }
74        }
75
76        Ok(components)
77    }
78
79    /// Parse claude_desktop_config.json format.
80    fn parse_claude_desktop_config(&self, path: &Path) -> Result<Vec<Component>, SbomError> {
81        let content = fs::read_to_string(path)?;
82        let json: Value =
83            serde_json::from_str(&content).map_err(|e| SbomError::JsonParse(e.to_string()))?;
84
85        let mut components = Vec::new();
86
87        // Same format as mcp.json
88        if let Some(servers) = json.get("mcpServers").and_then(|v| v.as_object()) {
89            for (name, _config) in servers {
90                components.push(Component::new(name, ComponentType::McpServer));
91            }
92        }
93
94        Ok(components)
95    }
96
97    /// Extract skills from a directory.
98    pub fn extract_skills(&self, path: &Path) -> Result<Vec<Component>, SbomError> {
99        let mut components = Vec::new();
100
101        // Check for .claude/skills directory
102        let skills_dir = path.join(".claude").join("skills");
103        if skills_dir.is_dir()
104            && let Ok(entries) = fs::read_dir(&skills_dir)
105        {
106            for entry in entries.flatten() {
107                let entry_path = entry.path();
108                if entry_path.is_file()
109                    && entry_path.extension().is_some_and(|e| e == "md")
110                    && let Some(name) = entry_path.file_stem().and_then(|s| s.to_str())
111                {
112                    let mut comp = Component::new(name, ComponentType::Skill);
113
114                    // Try to parse frontmatter for metadata
115                    if let Ok(content) = fs::read_to_string(&entry_path)
116                        && let Some(desc) = Self::extract_skill_description(&content)
117                    {
118                        comp = comp.with_description(desc);
119                    }
120
121                    components.push(comp);
122                }
123            }
124        }
125
126        Ok(components)
127    }
128
129    /// Extract skill description from frontmatter.
130    fn extract_skill_description(content: &str) -> Option<String> {
131        // Simple frontmatter parsing
132        if !content.starts_with("---") {
133            return None;
134        }
135
136        let parts: Vec<&str> = content.splitn(3, "---").collect();
137        if parts.len() < 3 {
138            return None;
139        }
140
141        let frontmatter = parts[1];
142
143        // Look for description field
144        for line in frontmatter.lines() {
145            let line = line.trim();
146            if let Some(desc) = line.strip_prefix("description:") {
147                return Some(desc.trim().trim_matches('"').trim_matches('\'').to_string());
148            }
149        }
150
151        None
152    }
153
154    /// Extract npm dependencies from package.json.
155    pub fn extract_npm_dependencies(&self, path: &Path) -> Result<Vec<Component>, SbomError> {
156        let package_json_path = path.join("package.json");
157        if !package_json_path.exists() {
158            return Ok(Vec::new());
159        }
160
161        let content = fs::read_to_string(&package_json_path)?;
162        let json: Value =
163            serde_json::from_str(&content).map_err(|e| SbomError::JsonParse(e.to_string()))?;
164
165        let mut components = Vec::new();
166
167        // Extract from dependencies
168        if let Some(deps) = json.get("dependencies").and_then(|v| v.as_object()) {
169            for (name, version) in deps {
170                let version_str = version.as_str().unwrap_or("");
171                let clean_version = Self::clean_npm_version(version_str);
172
173                let comp = Component::new(name, ComponentType::Library)
174                    .with_version(&clean_version)
175                    .with_purl(Component::npm_purl(name, Some(&clean_version)));
176
177                components.push(comp);
178            }
179        }
180
181        // Extract from devDependencies (optional)
182        if let Some(deps) = json.get("devDependencies").and_then(|v| v.as_object()) {
183            for (name, version) in deps {
184                let version_str = version.as_str().unwrap_or("");
185                let clean_version = Self::clean_npm_version(version_str);
186
187                let comp = Component::new(name, ComponentType::Library)
188                    .with_version(&clean_version)
189                    .with_purl(Component::npm_purl(name, Some(&clean_version)));
190
191                components.push(comp);
192            }
193        }
194
195        Ok(components)
196    }
197
198    /// Extract Cargo dependencies from Cargo.toml.
199    pub fn extract_cargo_dependencies(&self, path: &Path) -> Result<Vec<Component>, SbomError> {
200        let cargo_toml_path = path.join("Cargo.toml");
201        if !cargo_toml_path.exists() {
202            return Ok(Vec::new());
203        }
204
205        let content = fs::read_to_string(&cargo_toml_path)?;
206        let toml: toml::Value =
207            toml::from_str(&content).map_err(|e| SbomError::TomlParse(e.to_string()))?;
208
209        let mut components = Vec::new();
210
211        // Extract from [dependencies]
212        if let Some(deps) = toml.get("dependencies").and_then(|v| v.as_table()) {
213            for (name, value) in deps {
214                let version = Self::extract_cargo_version(value);
215
216                let comp = Component::new(name, ComponentType::Library)
217                    .with_version(&version)
218                    .with_purl(format!("pkg:cargo/{}@{}", name, version));
219
220                components.push(comp);
221            }
222        }
223
224        // Extract from [dev-dependencies]
225        if let Some(deps) = toml.get("dev-dependencies").and_then(|v| v.as_table()) {
226            for (name, value) in deps {
227                let version = Self::extract_cargo_version(value);
228
229                let comp = Component::new(name, ComponentType::Library)
230                    .with_version(&version)
231                    .with_purl(format!("pkg:cargo/{}@{}", name, version));
232
233                components.push(comp);
234            }
235        }
236
237        Ok(components)
238    }
239
240    /// Extract version from Cargo.toml dependency value.
241    fn extract_cargo_version(value: &toml::Value) -> String {
242        match value {
243            toml::Value::String(v) => v.clone(),
244            toml::Value::Table(t) => t
245                .get("version")
246                .and_then(|v| v.as_str())
247                .unwrap_or("*")
248                .to_string(),
249            _ => "*".to_string(),
250        }
251    }
252
253    /// Extract npm version from package specifier.
254    fn extract_npm_version(spec: &str) -> Option<&str> {
255        // Handle @scope/package@version or package@version
256        if let Some(idx) = spec.rfind('@')
257            && idx > 0
258            && !spec[..idx].ends_with('/')
259        {
260            return Some(&spec[idx + 1..]);
261        }
262        None
263    }
264
265    /// Clean npm version string (remove ^, ~, etc.)
266    fn clean_npm_version(version: &str) -> String {
267        version
268            .trim_start_matches(['^', '~', '>', '<', '=', ' '].as_ref())
269            .split_whitespace()
270            .next()
271            .unwrap_or(version)
272            .to_string()
273    }
274}
275
276impl Default for DependencyExtractor {
277    fn default() -> Self {
278        Self::new()
279    }
280}
281
282#[cfg(test)]
283mod tests {
284    use super::*;
285    use tempfile::TempDir;
286
287    #[test]
288    fn test_clean_npm_version() {
289        assert_eq!(DependencyExtractor::clean_npm_version("^1.2.3"), "1.2.3");
290        assert_eq!(DependencyExtractor::clean_npm_version("~1.2.3"), "1.2.3");
291        assert_eq!(DependencyExtractor::clean_npm_version(">=1.0.0"), "1.0.0");
292        assert_eq!(DependencyExtractor::clean_npm_version("1.2.3"), "1.2.3");
293    }
294
295    #[test]
296    fn test_extract_npm_version() {
297        assert_eq!(
298            DependencyExtractor::extract_npm_version("express@4.18.0"),
299            Some("4.18.0")
300        );
301        assert_eq!(
302            DependencyExtractor::extract_npm_version("@scope/package@1.0.0"),
303            Some("1.0.0")
304        );
305        assert_eq!(DependencyExtractor::extract_npm_version("express"), None);
306    }
307
308    #[test]
309    fn test_extract_mcp_servers() {
310        let temp_dir = TempDir::new().unwrap();
311        let mcp_json = temp_dir.path().join("mcp.json");
312        fs::write(
313            &mcp_json,
314            r#"{"mcpServers": {"test-server": {"command": "npx"}}}"#,
315        )
316        .unwrap();
317
318        let extractor = DependencyExtractor::new();
319        let components = extractor.extract_mcp_servers(temp_dir.path()).unwrap();
320
321        assert_eq!(components.len(), 1);
322        assert_eq!(components[0].name, "test-server");
323        assert_eq!(components[0].component_type, ComponentType::McpServer);
324    }
325
326    #[test]
327    fn test_extract_skills() {
328        let temp_dir = TempDir::new().unwrap();
329        let skills_dir = temp_dir.path().join(".claude").join("skills");
330        fs::create_dir_all(&skills_dir).unwrap();
331        fs::write(
332            skills_dir.join("test-skill.md"),
333            r#"---
334description: A test skill
335---
336# Test Skill
337"#,
338        )
339        .unwrap();
340
341        let extractor = DependencyExtractor::new();
342        let components = extractor.extract_skills(temp_dir.path()).unwrap();
343
344        assert_eq!(components.len(), 1);
345        assert_eq!(components[0].name, "test-skill");
346        assert_eq!(components[0].component_type, ComponentType::Skill);
347        assert_eq!(components[0].description, Some("A test skill".to_string()));
348    }
349
350    #[test]
351    fn test_extract_npm_dependencies() {
352        let temp_dir = TempDir::new().unwrap();
353        fs::write(
354            temp_dir.path().join("package.json"),
355            r#"{"dependencies": {"express": "^4.18.0", "lodash": "~4.17.21"}}"#,
356        )
357        .unwrap();
358
359        let extractor = DependencyExtractor::new();
360        let components = extractor.extract_npm_dependencies(temp_dir.path()).unwrap();
361
362        assert_eq!(components.len(), 2);
363
364        let names: Vec<_> = components.iter().map(|c| c.name.as_str()).collect();
365        assert!(names.contains(&"express"));
366        assert!(names.contains(&"lodash"));
367    }
368
369    #[test]
370    fn test_extract_cargo_dependencies() {
371        let temp_dir = TempDir::new().unwrap();
372        fs::write(
373            temp_dir.path().join("Cargo.toml"),
374            r#"
375[dependencies]
376serde = "1.0"
377tokio = { version = "1.0", features = ["full"] }
378"#,
379        )
380        .unwrap();
381
382        let extractor = DependencyExtractor::new();
383        let components = extractor
384            .extract_cargo_dependencies(temp_dir.path())
385            .unwrap();
386
387        assert_eq!(components.len(), 2);
388
389        let names: Vec<_> = components.iter().map(|c| c.name.as_str()).collect();
390        assert!(names.contains(&"serde"));
391        assert!(names.contains(&"tokio"));
392    }
393
394    #[test]
395    fn test_extract_skill_description() {
396        let content = r#"---
397name: test
398description: This is a test
399---
400# Content
401"#;
402        let desc = DependencyExtractor::extract_skill_description(content);
403        assert_eq!(desc, Some("This is a test".to_string()));
404    }
405
406    #[test]
407    fn test_extract_skill_description_no_frontmatter() {
408        let content = "# Just content";
409        let desc = DependencyExtractor::extract_skill_description(content);
410        assert!(desc.is_none());
411    }
412
413    #[test]
414    fn test_extract_skill_description_incomplete_frontmatter() {
415        let content = "---\nname: test\n---";
416        let desc = DependencyExtractor::extract_skill_description(content);
417        assert!(desc.is_none());
418    }
419
420    #[test]
421    fn test_extract_skill_description_quoted() {
422        let content = r#"---
423description: "quoted description"
424---
425# Content
426"#;
427        let desc = DependencyExtractor::extract_skill_description(content);
428        assert_eq!(desc, Some("quoted description".to_string()));
429    }
430
431    #[test]
432    fn test_extract_skill_description_single_quoted() {
433        let content = r#"---
434description: 'single quoted'
435---
436# Content
437"#;
438        let desc = DependencyExtractor::extract_skill_description(content);
439        assert_eq!(desc, Some("single quoted".to_string()));
440    }
441
442    #[test]
443    fn test_parse_claude_desktop_config() {
444        let temp_dir = TempDir::new().unwrap();
445        let config_path = temp_dir.path().join("claude_desktop_config.json");
446        fs::write(
447            &config_path,
448            r#"{"mcpServers": {"desktop-server": {"command": "npx", "args": ["server"]}}}"#,
449        )
450        .unwrap();
451
452        let extractor = DependencyExtractor::new();
453        let components = extractor.extract_mcp_servers(temp_dir.path()).unwrap();
454
455        assert_eq!(components.len(), 1);
456        assert_eq!(components[0].name, "desktop-server");
457    }
458
459    #[test]
460    fn test_extract_mcp_servers_from_claude_dir() {
461        let temp_dir = TempDir::new().unwrap();
462        let claude_dir = temp_dir.path().join(".claude");
463        fs::create_dir_all(&claude_dir).unwrap();
464
465        let mcp_servers_path = claude_dir.join("mcp_servers.json");
466        fs::write(
467            &mcp_servers_path,
468            r#"{"mcpServers": {"claude-server": {"command": "npx"}}}"#,
469        )
470        .unwrap();
471
472        let extractor = DependencyExtractor::new();
473        let components = extractor.extract_mcp_servers(temp_dir.path()).unwrap();
474
475        assert_eq!(components.len(), 1);
476        assert_eq!(components[0].name, "claude-server");
477    }
478
479    #[test]
480    fn test_extract_mcp_servers_with_npm_args() {
481        let temp_dir = TempDir::new().unwrap();
482        let mcp_json = temp_dir.path().join("mcp.json");
483        fs::write(
484            &mcp_json,
485            r#"{"mcpServers": {"npm-server": {"command": "npx", "args": ["@example/mcp-server@1.0.0"]}}}"#,
486        )
487        .unwrap();
488
489        let extractor = DependencyExtractor::new();
490        let components = extractor.extract_mcp_servers(temp_dir.path()).unwrap();
491
492        assert_eq!(components.len(), 1);
493        assert_eq!(components[0].name, "npm-server");
494        assert!(components[0].purl.is_some());
495    }
496
497    #[test]
498    fn test_extract_npm_dependencies_with_dev() {
499        let temp_dir = TempDir::new().unwrap();
500        fs::write(
501            temp_dir.path().join("package.json"),
502            r#"{"dependencies": {"express": "^4.18.0"}, "devDependencies": {"jest": "^29.0.0"}}"#,
503        )
504        .unwrap();
505
506        let extractor = DependencyExtractor::new();
507        let components = extractor.extract_npm_dependencies(temp_dir.path()).unwrap();
508
509        assert_eq!(components.len(), 2);
510
511        let names: Vec<_> = components.iter().map(|c| c.name.as_str()).collect();
512        assert!(names.contains(&"express"));
513        assert!(names.contains(&"jest"));
514    }
515
516    #[test]
517    fn test_extract_npm_dependencies_no_package_json() {
518        let temp_dir = TempDir::new().unwrap();
519
520        let extractor = DependencyExtractor::new();
521        let components = extractor.extract_npm_dependencies(temp_dir.path()).unwrap();
522
523        assert!(components.is_empty());
524    }
525
526    #[test]
527    fn test_extract_cargo_dependencies_with_dev() {
528        let temp_dir = TempDir::new().unwrap();
529        fs::write(
530            temp_dir.path().join("Cargo.toml"),
531            r#"
532[dependencies]
533serde = "1.0"
534
535[dev-dependencies]
536tempfile = "3.0"
537"#,
538        )
539        .unwrap();
540
541        let extractor = DependencyExtractor::new();
542        let components = extractor
543            .extract_cargo_dependencies(temp_dir.path())
544            .unwrap();
545
546        assert_eq!(components.len(), 2);
547
548        let names: Vec<_> = components.iter().map(|c| c.name.as_str()).collect();
549        assert!(names.contains(&"serde"));
550        assert!(names.contains(&"tempfile"));
551    }
552
553    #[test]
554    fn test_extract_cargo_dependencies_no_cargo_toml() {
555        let temp_dir = TempDir::new().unwrap();
556
557        let extractor = DependencyExtractor::new();
558        let components = extractor
559            .extract_cargo_dependencies(temp_dir.path())
560            .unwrap();
561
562        assert!(components.is_empty());
563    }
564
565    #[test]
566    fn test_extract_cargo_version_string() {
567        let value = toml::Value::String("1.0.0".to_string());
568        let version = DependencyExtractor::extract_cargo_version(&value);
569        assert_eq!(version, "1.0.0");
570    }
571
572    #[test]
573    fn test_extract_cargo_version_table() {
574        let mut table = toml::map::Map::new();
575        table.insert(
576            "version".to_string(),
577            toml::Value::String("2.0.0".to_string()),
578        );
579        let value = toml::Value::Table(table);
580        let version = DependencyExtractor::extract_cargo_version(&value);
581        assert_eq!(version, "2.0.0");
582    }
583
584    #[test]
585    fn test_extract_cargo_version_table_no_version() {
586        let table = toml::map::Map::new();
587        let value = toml::Value::Table(table);
588        let version = DependencyExtractor::extract_cargo_version(&value);
589        assert_eq!(version, "*");
590    }
591
592    #[test]
593    fn test_extract_cargo_version_other() {
594        let value = toml::Value::Boolean(true);
595        let version = DependencyExtractor::extract_cargo_version(&value);
596        assert_eq!(version, "*");
597    }
598
599    #[test]
600    fn test_extract_skills_no_skills_dir() {
601        let temp_dir = TempDir::new().unwrap();
602
603        let extractor = DependencyExtractor::new();
604        let components = extractor.extract_skills(temp_dir.path()).unwrap();
605
606        assert!(components.is_empty());
607    }
608
609    #[test]
610    fn test_extract_skills_with_non_md_files() {
611        let temp_dir = TempDir::new().unwrap();
612        let skills_dir = temp_dir.path().join(".claude").join("skills");
613        fs::create_dir_all(&skills_dir).unwrap();
614
615        // Create a non-.md file
616        fs::write(skills_dir.join("not-a-skill.txt"), "content").unwrap();
617
618        let extractor = DependencyExtractor::new();
619        let components = extractor.extract_skills(temp_dir.path()).unwrap();
620
621        assert!(components.is_empty());
622    }
623
624    #[test]
625    fn test_extract_npm_version_scoped_without_version() {
626        assert_eq!(
627            DependencyExtractor::extract_npm_version("@scope/package"),
628            None
629        );
630    }
631
632    #[test]
633    fn test_new_extractor() {
634        let extractor = DependencyExtractor::new();
635        // Just ensure it doesn't panic
636        let _ = extractor;
637    }
638}