Skip to main content

chub_core/team/
detect.rs

1use std::collections::HashMap;
2use std::fs;
3use std::path::Path;
4
5use serde::Serialize;
6
7/// A detected dependency from a project file.
8#[derive(Debug, Clone, Serialize)]
9pub struct DetectedDep {
10    pub name: String,
11    pub version: Option<String>,
12    pub source_file: String,
13    pub language: String,
14}
15
16/// A match between a detected dependency and a registry doc.
17#[derive(Debug, Clone, Serialize)]
18pub struct DetectedMatch {
19    pub dep: DetectedDep,
20    pub doc_id: String,
21    pub doc_name: String,
22    pub confidence: f64,
23}
24
25/// Scan the current directory for dependency files and extract dependencies.
26pub fn detect_dependencies(root: &Path) -> Vec<DetectedDep> {
27    let mut deps = Vec::new();
28
29    // package.json (npm/yarn/pnpm)
30    let pkg_json = root.join("package.json");
31    if pkg_json.exists() {
32        deps.extend(detect_npm(&pkg_json));
33    }
34
35    // requirements.txt (Python)
36    let req_txt = root.join("requirements.txt");
37    if req_txt.exists() {
38        deps.extend(detect_requirements_txt(&req_txt));
39    }
40
41    // pyproject.toml (Python)
42    let pyproject = root.join("pyproject.toml");
43    if pyproject.exists() {
44        deps.extend(detect_pyproject(&pyproject));
45    }
46
47    // Cargo.toml (Rust)
48    let cargo_toml = root.join("Cargo.toml");
49    if cargo_toml.exists() {
50        deps.extend(detect_cargo(&cargo_toml));
51    }
52
53    // go.mod (Go)
54    let go_mod = root.join("go.mod");
55    if go_mod.exists() {
56        deps.extend(detect_go_mod(&go_mod));
57    }
58
59    // Gemfile (Ruby)
60    let gemfile = root.join("Gemfile");
61    if gemfile.exists() {
62        deps.extend(detect_gemfile(&gemfile));
63    }
64
65    // Pipfile (Python)
66    let pipfile = root.join("Pipfile");
67    if pipfile.exists() {
68        deps.extend(detect_pipfile(&pipfile));
69    }
70
71    // pom.xml (Java/Maven)
72    let pom_xml = root.join("pom.xml");
73    if pom_xml.exists() {
74        deps.extend(detect_pom_xml(&pom_xml));
75    }
76
77    // build.gradle (Java/Gradle)
78    let build_gradle = root.join("build.gradle");
79    if build_gradle.exists() {
80        deps.extend(detect_build_gradle(&build_gradle));
81    }
82
83    // build.gradle.kts (Kotlin DSL)
84    let build_gradle_kts = root.join("build.gradle.kts");
85    if build_gradle_kts.exists() {
86        deps.extend(detect_build_gradle(&build_gradle_kts));
87    }
88
89    // Deduplicate by name
90    let mut seen = std::collections::HashSet::new();
91    deps.retain(|d| seen.insert(d.name.clone()));
92
93    deps
94}
95
96fn detect_npm(path: &Path) -> Vec<DetectedDep> {
97    let content = match fs::read_to_string(path) {
98        Ok(c) => c,
99        Err(_) => return vec![],
100    };
101    let json: serde_json::Value = match serde_json::from_str(&content) {
102        Ok(v) => v,
103        Err(_) => return vec![],
104    };
105
106    let mut deps = Vec::new();
107    for key in &["dependencies", "devDependencies"] {
108        if let Some(obj) = json.get(key).and_then(|v| v.as_object()) {
109            for (name, version) in obj {
110                deps.push(DetectedDep {
111                    name: name.clone(),
112                    version: version.as_str().map(|s| s.to_string()),
113                    source_file: "package.json".to_string(),
114                    language: "javascript".to_string(),
115                });
116            }
117        }
118    }
119    deps
120}
121
122fn detect_requirements_txt(path: &Path) -> Vec<DetectedDep> {
123    let content = match fs::read_to_string(path) {
124        Ok(c) => c,
125        Err(_) => return vec![],
126    };
127
128    content
129        .lines()
130        .filter(|l| !l.trim().is_empty() && !l.starts_with('#') && !l.starts_with('-'))
131        .map(|line| {
132            let parts: Vec<&str> = line
133                .splitn(2, |c: char| {
134                    c == '=' || c == '>' || c == '<' || c == '!' || c == '~'
135                })
136                .collect();
137            let name = parts[0].trim().to_string();
138            let version = if parts.len() > 1 {
139                Some(
140                    parts[1]
141                        .trim_matches(|c: char| {
142                            c == '='
143                                || c == '>'
144                                || c == '<'
145                                || c == '!'
146                                || c == '~'
147                                || c.is_whitespace()
148                        })
149                        .to_string(),
150                )
151            } else {
152                None
153            };
154            DetectedDep {
155                name,
156                version,
157                source_file: "requirements.txt".to_string(),
158                language: "python".to_string(),
159            }
160        })
161        .collect()
162}
163
164fn detect_pyproject(path: &Path) -> Vec<DetectedDep> {
165    let content = match fs::read_to_string(path) {
166        Ok(c) => c,
167        Err(_) => return vec![],
168    };
169    let toml_val: toml::Value = match content.parse() {
170        Ok(v) => v,
171        Err(_) => return vec![],
172    };
173
174    let mut deps = Vec::new();
175
176    // [project.dependencies]
177    if let Some(project_deps) = toml_val
178        .get("project")
179        .and_then(|p| p.get("dependencies"))
180        .and_then(|d| d.as_array())
181    {
182        for dep in project_deps {
183            if let Some(s) = dep.as_str() {
184                let name = s
185                    .split(|c: char| !c.is_alphanumeric() && c != '-' && c != '_')
186                    .next()
187                    .unwrap_or(s)
188                    .to_string();
189                deps.push(DetectedDep {
190                    name,
191                    version: None,
192                    source_file: "pyproject.toml".to_string(),
193                    language: "python".to_string(),
194                });
195            }
196        }
197    }
198
199    // [tool.poetry.dependencies]
200    if let Some(poetry_deps) = toml_val
201        .get("tool")
202        .and_then(|t| t.get("poetry"))
203        .and_then(|p| p.get("dependencies"))
204        .and_then(|d| d.as_table())
205    {
206        for (name, val) in poetry_deps {
207            if name == "python" {
208                continue;
209            }
210            let version = val.as_str().map(|s| s.to_string());
211            deps.push(DetectedDep {
212                name: name.clone(),
213                version,
214                source_file: "pyproject.toml".to_string(),
215                language: "python".to_string(),
216            });
217        }
218    }
219
220    deps
221}
222
223fn detect_cargo(path: &Path) -> Vec<DetectedDep> {
224    let content = match fs::read_to_string(path) {
225        Ok(c) => c,
226        Err(_) => return vec![],
227    };
228    let toml_val: toml::Value = match content.parse() {
229        Ok(v) => v,
230        Err(_) => return vec![],
231    };
232
233    let mut deps = Vec::new();
234
235    let extract_deps = |table: &toml::map::Map<String, toml::Value>,
236                        deps: &mut Vec<DetectedDep>| {
237        for (name, val) in table {
238            let version = match val {
239                toml::Value::String(s) => Some(s.clone()),
240                toml::Value::Table(t) => t
241                    .get("version")
242                    .and_then(|v| v.as_str())
243                    .map(|s| s.to_string()),
244                _ => None,
245            };
246            deps.push(DetectedDep {
247                name: name.clone(),
248                version,
249                source_file: "Cargo.toml".to_string(),
250                language: "rust".to_string(),
251            });
252        }
253    };
254
255    for section in &["dependencies", "dev-dependencies"] {
256        if let Some(table) = toml_val.get(section).and_then(|d| d.as_table()) {
257            extract_deps(table, &mut deps);
258        }
259    }
260
261    // Workspace Cargo.toml: [workspace.dependencies]
262    if let Some(table) = toml_val
263        .get("workspace")
264        .and_then(|w| w.get("dependencies"))
265        .and_then(|d| d.as_table())
266    {
267        extract_deps(table, &mut deps);
268    }
269
270    deps
271}
272
273fn detect_go_mod(path: &Path) -> Vec<DetectedDep> {
274    let content = match fs::read_to_string(path) {
275        Ok(c) => c,
276        Err(_) => return vec![],
277    };
278
279    let mut deps = Vec::new();
280    let mut in_require = false;
281
282    for line in content.lines() {
283        let trimmed = line.trim();
284        if trimmed.starts_with("require (") || trimmed == "require (" {
285            in_require = true;
286            continue;
287        }
288        if trimmed == ")" {
289            in_require = false;
290            continue;
291        }
292        if in_require || trimmed.starts_with("require ") {
293            let dep_line = if let Some(stripped) = trimmed.strip_prefix("require ") {
294                stripped
295            } else {
296                trimmed
297            };
298            let parts: Vec<&str> = dep_line.split_whitespace().collect();
299            if !parts.is_empty() {
300                let name = parts[0].rsplit('/').next().unwrap_or(parts[0]).to_string();
301                let version = parts.get(1).map(|s| s.to_string());
302                deps.push(DetectedDep {
303                    name,
304                    version,
305                    source_file: "go.mod".to_string(),
306                    language: "go".to_string(),
307                });
308            }
309        }
310    }
311
312    deps
313}
314
315fn detect_gemfile(path: &Path) -> Vec<DetectedDep> {
316    let content = match fs::read_to_string(path) {
317        Ok(c) => c,
318        Err(_) => return vec![],
319    };
320
321    content
322        .lines()
323        .filter_map(|line| {
324            let trimmed = line.trim();
325            if !trimmed.starts_with("gem ") {
326                return None;
327            }
328            let rest = &trimmed[4..];
329            // Extract gem name from quotes
330            let name = rest.split(['\'', '"']).nth(1)?.to_string();
331            Some(DetectedDep {
332                name,
333                version: None,
334                source_file: "Gemfile".to_string(),
335                language: "ruby".to_string(),
336            })
337        })
338        .collect()
339}
340
341fn detect_pipfile(path: &Path) -> Vec<DetectedDep> {
342    let content = match fs::read_to_string(path) {
343        Ok(c) => c,
344        Err(_) => return vec![],
345    };
346    let toml_val: toml::Value = match content.parse() {
347        Ok(v) => v,
348        Err(_) => return vec![],
349    };
350
351    let mut deps = Vec::new();
352    for section in &["packages", "dev-packages"] {
353        if let Some(table) = toml_val.get(section).and_then(|d| d.as_table()) {
354            for (name, val) in table {
355                let version = val.as_str().map(|s| s.to_string());
356                deps.push(DetectedDep {
357                    name: name.clone(),
358                    version,
359                    source_file: "Pipfile".to_string(),
360                    language: "python".to_string(),
361                });
362            }
363        }
364    }
365    deps
366}
367
368fn detect_pom_xml(path: &Path) -> Vec<DetectedDep> {
369    let content = match fs::read_to_string(path) {
370        Ok(c) => c,
371        Err(_) => return vec![],
372    };
373
374    let mut deps = Vec::new();
375    let mut in_dependency = false;
376    let mut group_id = String::new();
377    let mut artifact_id = String::new();
378    let mut version = None;
379
380    for line in content.lines() {
381        let trimmed = line.trim();
382        if trimmed == "<dependency>" {
383            in_dependency = true;
384            group_id.clear();
385            artifact_id.clear();
386            version = None;
387            continue;
388        }
389        if trimmed == "</dependency>" {
390            if in_dependency && !artifact_id.is_empty() {
391                deps.push(DetectedDep {
392                    name: if group_id.is_empty() {
393                        artifact_id.clone()
394                    } else {
395                        format!("{}:{}", group_id, artifact_id)
396                    },
397                    version: version.clone(),
398                    source_file: "pom.xml".to_string(),
399                    language: "java".to_string(),
400                });
401            }
402            in_dependency = false;
403            continue;
404        }
405        if in_dependency {
406            if let Some(val) = extract_xml_value(trimmed, "groupId") {
407                group_id = val;
408            } else if let Some(val) = extract_xml_value(trimmed, "artifactId") {
409                artifact_id = val;
410            } else if let Some(val) = extract_xml_value(trimmed, "version") {
411                if !val.starts_with("${") {
412                    version = Some(val);
413                }
414            }
415        }
416    }
417
418    deps
419}
420
421fn extract_xml_value(line: &str, tag: &str) -> Option<String> {
422    let open = format!("<{}>", tag);
423    let close = format!("</{}>", tag);
424    if let Some(start) = line.find(&open) {
425        if let Some(end) = line.find(&close) {
426            let val = &line[start + open.len()..end];
427            return Some(val.trim().to_string());
428        }
429    }
430    None
431}
432
433fn detect_build_gradle(path: &Path) -> Vec<DetectedDep> {
434    let content = match fs::read_to_string(path) {
435        Ok(c) => c,
436        Err(_) => return vec![],
437    };
438
439    let mut deps = Vec::new();
440
441    for line in content.lines() {
442        let trimmed = line.trim();
443        // Match patterns like: implementation 'group:artifact:version'
444        // or: implementation "group:artifact:version"
445        for keyword in &[
446            "implementation",
447            "api",
448            "compileOnly",
449            "runtimeOnly",
450            "testImplementation",
451        ] {
452            if !trimmed.starts_with(keyword) {
453                continue;
454            }
455            let rest = &trimmed[keyword.len()..].trim_start();
456            // Extract quoted string
457            let quote = if rest.starts_with('\'') {
458                '\''
459            } else if rest.starts_with('"') {
460                '"'
461            } else if rest.starts_with('(') {
462                // implementation("group:artifact:version")
463                let inner = rest.trim_start_matches('(').trim_end_matches(')');
464                if inner.starts_with('\'') {
465                    '\''
466                } else if inner.starts_with('"') {
467                    '"'
468                } else {
469                    continue;
470                }
471            } else {
472                continue;
473            };
474            let content_str = if rest.starts_with('(') {
475                rest.trim_start_matches('(').trim_end_matches(')')
476            } else {
477                rest
478            };
479            let parts: Vec<&str> = content_str.trim_matches(quote).split(':').collect();
480            if parts.len() >= 2 {
481                let name = format!("{}:{}", parts[0], parts[1]);
482                let version = parts.get(2).map(|s| s.to_string());
483                deps.push(DetectedDep {
484                    name,
485                    version,
486                    source_file: path
487                        .file_name()
488                        .unwrap_or_default()
489                        .to_string_lossy()
490                        .to_string(),
491                    language: "java".to_string(),
492                });
493            }
494            break;
495        }
496    }
497
498    deps
499}
500
501/// Match detected dependencies to known docs in the registry.
502/// Uses simple name matching — the dep name is searched against doc IDs.
503pub fn match_deps_to_docs(
504    deps: &[DetectedDep],
505    doc_ids: &[(String, String)], // (id, name) pairs from registry
506) -> Vec<DetectedMatch> {
507    let mut matches = Vec::new();
508
509    // Build a lookup: lowercase name → (id, name)
510    let mut id_by_name: HashMap<String, (String, String)> = HashMap::new();
511    for (id, name) in doc_ids {
512        // Index by last segment of id (e.g., "openai/chat" → "openai")
513        let parts: Vec<&str> = id.split('/').collect();
514        if !parts.is_empty() {
515            id_by_name.insert(parts[0].to_lowercase(), (id.clone(), name.clone()));
516        }
517        // Also index by full id
518        id_by_name.insert(id.to_lowercase(), (id.clone(), name.clone()));
519    }
520
521    for dep in deps {
522        let dep_lower = dep.name.to_lowercase();
523
524        // Try exact match on first segment
525        if let Some((doc_id, doc_name)) = id_by_name.get(&dep_lower) {
526            matches.push(DetectedMatch {
527                dep: dep.clone(),
528                doc_id: doc_id.clone(),
529                doc_name: doc_name.clone(),
530                confidence: 1.0,
531            });
532            continue;
533        }
534
535        // Try partial match (require minimum 4 chars to avoid false positives
536        // like "tar" matching "starlette" or "ray" matching "rayon")
537        if dep_lower.len() >= 4 {
538            for (key, (doc_id, doc_name)) in &id_by_name {
539                if key.len() >= 4 && (key.contains(&dep_lower) || dep_lower.contains(key.as_str()))
540                {
541                    matches.push(DetectedMatch {
542                        dep: dep.clone(),
543                        doc_id: doc_id.clone(),
544                        doc_name: doc_name.clone(),
545                        confidence: 0.5,
546                    });
547                    break;
548                }
549            }
550        }
551    }
552
553    matches
554}