Skip to main content

chub_core/team/
detect.rs

1use std::collections::HashMap;
2use std::fs;
3use std::path::Path;
4
5use serde::Serialize;
6
7/// A detected dependency from a project file.
8#[derive(Debug, Clone, Serialize)]
9pub struct DetectedDep {
10    pub name: String,
11    pub version: Option<String>,
12    pub source_file: String,
13    pub language: String,
14}
15
16/// A match between a detected dependency and a registry doc.
17#[derive(Debug, Clone, Serialize)]
18pub struct DetectedMatch {
19    pub dep: DetectedDep,
20    pub doc_id: String,
21    pub doc_name: String,
22    pub confidence: f64,
23}
24
25/// Scan the current directory for dependency files and extract dependencies.
26pub fn detect_dependencies(root: &Path) -> Vec<DetectedDep> {
27    let mut deps = Vec::new();
28
29    // package.json (npm/yarn/pnpm)
30    let pkg_json = root.join("package.json");
31    if pkg_json.exists() {
32        deps.extend(detect_npm(&pkg_json));
33    }
34
35    // requirements.txt (Python)
36    let req_txt = root.join("requirements.txt");
37    if req_txt.exists() {
38        deps.extend(detect_requirements_txt(&req_txt));
39    }
40
41    // pyproject.toml (Python)
42    let pyproject = root.join("pyproject.toml");
43    if pyproject.exists() {
44        deps.extend(detect_pyproject(&pyproject));
45    }
46
47    // Cargo.toml (Rust)
48    let cargo_toml = root.join("Cargo.toml");
49    if cargo_toml.exists() {
50        deps.extend(detect_cargo(&cargo_toml));
51    }
52
53    // go.mod (Go)
54    let go_mod = root.join("go.mod");
55    if go_mod.exists() {
56        deps.extend(detect_go_mod(&go_mod));
57    }
58
59    // Gemfile (Ruby)
60    let gemfile = root.join("Gemfile");
61    if gemfile.exists() {
62        deps.extend(detect_gemfile(&gemfile));
63    }
64
65    // Pipfile (Python)
66    let pipfile = root.join("Pipfile");
67    if pipfile.exists() {
68        deps.extend(detect_pipfile(&pipfile));
69    }
70
71    // pom.xml (Java/Maven)
72    let pom_xml = root.join("pom.xml");
73    if pom_xml.exists() {
74        deps.extend(detect_pom_xml(&pom_xml));
75    }
76
77    // build.gradle (Java/Gradle)
78    let build_gradle = root.join("build.gradle");
79    if build_gradle.exists() {
80        deps.extend(detect_build_gradle(&build_gradle));
81    }
82
83    // build.gradle.kts (Kotlin DSL)
84    let build_gradle_kts = root.join("build.gradle.kts");
85    if build_gradle_kts.exists() {
86        deps.extend(detect_build_gradle(&build_gradle_kts));
87    }
88
89    // Deduplicate by name
90    let mut seen = std::collections::HashSet::new();
91    deps.retain(|d| seen.insert(d.name.clone()));
92
93    deps
94}
95
96fn detect_npm(path: &Path) -> Vec<DetectedDep> {
97    let content = match fs::read_to_string(path) {
98        Ok(c) => c,
99        Err(_) => return vec![],
100    };
101    let json: serde_json::Value = match serde_json::from_str(&content) {
102        Ok(v) => v,
103        Err(_) => return vec![],
104    };
105
106    let mut deps = Vec::new();
107    for key in &["dependencies", "devDependencies"] {
108        if let Some(obj) = json.get(key).and_then(|v| v.as_object()) {
109            for (name, version) in obj {
110                deps.push(DetectedDep {
111                    name: name.clone(),
112                    version: version.as_str().map(|s| s.to_string()),
113                    source_file: "package.json".to_string(),
114                    language: "javascript".to_string(),
115                });
116            }
117        }
118    }
119    deps
120}
121
122fn detect_requirements_txt(path: &Path) -> Vec<DetectedDep> {
123    let content = match fs::read_to_string(path) {
124        Ok(c) => c,
125        Err(_) => return vec![],
126    };
127
128    content
129        .lines()
130        .filter(|l| !l.trim().is_empty() && !l.starts_with('#') && !l.starts_with('-'))
131        .map(|line| {
132            let parts: Vec<&str> = line
133                .splitn(2, |c: char| {
134                    c == '=' || c == '>' || c == '<' || c == '!' || c == '~'
135                })
136                .collect();
137            let name = parts[0].trim().to_string();
138            let version = if parts.len() > 1 {
139                Some(
140                    parts[1]
141                        .trim_matches(|c: char| {
142                            c == '='
143                                || c == '>'
144                                || c == '<'
145                                || c == '!'
146                                || c == '~'
147                                || c.is_whitespace()
148                        })
149                        .to_string(),
150                )
151            } else {
152                None
153            };
154            DetectedDep {
155                name,
156                version,
157                source_file: "requirements.txt".to_string(),
158                language: "python".to_string(),
159            }
160        })
161        .collect()
162}
163
164fn detect_pyproject(path: &Path) -> Vec<DetectedDep> {
165    let content = match fs::read_to_string(path) {
166        Ok(c) => c,
167        Err(_) => return vec![],
168    };
169    let toml_val: toml::Value = match content.parse() {
170        Ok(v) => v,
171        Err(_) => return vec![],
172    };
173
174    let mut deps = Vec::new();
175
176    // [project.dependencies]
177    if let Some(project_deps) = toml_val
178        .get("project")
179        .and_then(|p| p.get("dependencies"))
180        .and_then(|d| d.as_array())
181    {
182        for dep in project_deps {
183            if let Some(s) = dep.as_str() {
184                let name = s
185                    .split(|c: char| !c.is_alphanumeric() && c != '-' && c != '_')
186                    .next()
187                    .unwrap_or(s)
188                    .to_string();
189                deps.push(DetectedDep {
190                    name,
191                    version: None,
192                    source_file: "pyproject.toml".to_string(),
193                    language: "python".to_string(),
194                });
195            }
196        }
197    }
198
199    // [tool.poetry.dependencies]
200    if let Some(poetry_deps) = toml_val
201        .get("tool")
202        .and_then(|t| t.get("poetry"))
203        .and_then(|p| p.get("dependencies"))
204        .and_then(|d| d.as_table())
205    {
206        for (name, val) in poetry_deps {
207            if name == "python" {
208                continue;
209            }
210            let version = val.as_str().map(|s| s.to_string());
211            deps.push(DetectedDep {
212                name: name.clone(),
213                version,
214                source_file: "pyproject.toml".to_string(),
215                language: "python".to_string(),
216            });
217        }
218    }
219
220    deps
221}
222
223fn detect_cargo(path: &Path) -> Vec<DetectedDep> {
224    let content = match fs::read_to_string(path) {
225        Ok(c) => c,
226        Err(_) => return vec![],
227    };
228    let toml_val: toml::Value = match content.parse() {
229        Ok(v) => v,
230        Err(_) => return vec![],
231    };
232
233    let mut deps = Vec::new();
234    for section in &["dependencies", "dev-dependencies"] {
235        if let Some(table) = toml_val.get(section).and_then(|d| d.as_table()) {
236            for (name, val) in table {
237                let version = match val {
238                    toml::Value::String(s) => Some(s.clone()),
239                    toml::Value::Table(t) => t
240                        .get("version")
241                        .and_then(|v| v.as_str())
242                        .map(|s| s.to_string()),
243                    _ => None,
244                };
245                deps.push(DetectedDep {
246                    name: name.clone(),
247                    version,
248                    source_file: "Cargo.toml".to_string(),
249                    language: "rust".to_string(),
250                });
251            }
252        }
253    }
254
255    deps
256}
257
258fn detect_go_mod(path: &Path) -> Vec<DetectedDep> {
259    let content = match fs::read_to_string(path) {
260        Ok(c) => c,
261        Err(_) => return vec![],
262    };
263
264    let mut deps = Vec::new();
265    let mut in_require = false;
266
267    for line in content.lines() {
268        let trimmed = line.trim();
269        if trimmed.starts_with("require (") || trimmed == "require (" {
270            in_require = true;
271            continue;
272        }
273        if trimmed == ")" {
274            in_require = false;
275            continue;
276        }
277        if in_require || trimmed.starts_with("require ") {
278            let dep_line = if let Some(stripped) = trimmed.strip_prefix("require ") {
279                stripped
280            } else {
281                trimmed
282            };
283            let parts: Vec<&str> = dep_line.split_whitespace().collect();
284            if !parts.is_empty() {
285                let name = parts[0].rsplit('/').next().unwrap_or(parts[0]).to_string();
286                let version = parts.get(1).map(|s| s.to_string());
287                deps.push(DetectedDep {
288                    name,
289                    version,
290                    source_file: "go.mod".to_string(),
291                    language: "go".to_string(),
292                });
293            }
294        }
295    }
296
297    deps
298}
299
300fn detect_gemfile(path: &Path) -> Vec<DetectedDep> {
301    let content = match fs::read_to_string(path) {
302        Ok(c) => c,
303        Err(_) => return vec![],
304    };
305
306    content
307        .lines()
308        .filter_map(|line| {
309            let trimmed = line.trim();
310            if !trimmed.starts_with("gem ") {
311                return None;
312            }
313            let rest = &trimmed[4..];
314            // Extract gem name from quotes
315            let name = rest.split(['\'', '"']).nth(1)?.to_string();
316            Some(DetectedDep {
317                name,
318                version: None,
319                source_file: "Gemfile".to_string(),
320                language: "ruby".to_string(),
321            })
322        })
323        .collect()
324}
325
326fn detect_pipfile(path: &Path) -> Vec<DetectedDep> {
327    let content = match fs::read_to_string(path) {
328        Ok(c) => c,
329        Err(_) => return vec![],
330    };
331    let toml_val: toml::Value = match content.parse() {
332        Ok(v) => v,
333        Err(_) => return vec![],
334    };
335
336    let mut deps = Vec::new();
337    for section in &["packages", "dev-packages"] {
338        if let Some(table) = toml_val.get(section).and_then(|d| d.as_table()) {
339            for (name, val) in table {
340                let version = val.as_str().map(|s| s.to_string());
341                deps.push(DetectedDep {
342                    name: name.clone(),
343                    version,
344                    source_file: "Pipfile".to_string(),
345                    language: "python".to_string(),
346                });
347            }
348        }
349    }
350    deps
351}
352
353fn detect_pom_xml(path: &Path) -> Vec<DetectedDep> {
354    let content = match fs::read_to_string(path) {
355        Ok(c) => c,
356        Err(_) => return vec![],
357    };
358
359    let mut deps = Vec::new();
360    let mut in_dependency = false;
361    let mut group_id = String::new();
362    let mut artifact_id = String::new();
363    let mut version = None;
364
365    for line in content.lines() {
366        let trimmed = line.trim();
367        if trimmed == "<dependency>" {
368            in_dependency = true;
369            group_id.clear();
370            artifact_id.clear();
371            version = None;
372            continue;
373        }
374        if trimmed == "</dependency>" {
375            if in_dependency && !artifact_id.is_empty() {
376                deps.push(DetectedDep {
377                    name: if group_id.is_empty() {
378                        artifact_id.clone()
379                    } else {
380                        format!("{}:{}", group_id, artifact_id)
381                    },
382                    version: version.clone(),
383                    source_file: "pom.xml".to_string(),
384                    language: "java".to_string(),
385                });
386            }
387            in_dependency = false;
388            continue;
389        }
390        if in_dependency {
391            if let Some(val) = extract_xml_value(trimmed, "groupId") {
392                group_id = val;
393            } else if let Some(val) = extract_xml_value(trimmed, "artifactId") {
394                artifact_id = val;
395            } else if let Some(val) = extract_xml_value(trimmed, "version") {
396                if !val.starts_with("${") {
397                    version = Some(val);
398                }
399            }
400        }
401    }
402
403    deps
404}
405
406fn extract_xml_value(line: &str, tag: &str) -> Option<String> {
407    let open = format!("<{}>", tag);
408    let close = format!("</{}>", tag);
409    if let Some(start) = line.find(&open) {
410        if let Some(end) = line.find(&close) {
411            let val = &line[start + open.len()..end];
412            return Some(val.trim().to_string());
413        }
414    }
415    None
416}
417
418fn detect_build_gradle(path: &Path) -> Vec<DetectedDep> {
419    let content = match fs::read_to_string(path) {
420        Ok(c) => c,
421        Err(_) => return vec![],
422    };
423
424    let mut deps = Vec::new();
425
426    for line in content.lines() {
427        let trimmed = line.trim();
428        // Match patterns like: implementation 'group:artifact:version'
429        // or: implementation "group:artifact:version"
430        for keyword in &[
431            "implementation",
432            "api",
433            "compileOnly",
434            "runtimeOnly",
435            "testImplementation",
436        ] {
437            if !trimmed.starts_with(keyword) {
438                continue;
439            }
440            let rest = &trimmed[keyword.len()..].trim_start();
441            // Extract quoted string
442            let quote = if rest.starts_with('\'') {
443                '\''
444            } else if rest.starts_with('"') {
445                '"'
446            } else if rest.starts_with('(') {
447                // implementation("group:artifact:version")
448                let inner = rest.trim_start_matches('(').trim_end_matches(')');
449                if inner.starts_with('\'') {
450                    '\''
451                } else if inner.starts_with('"') {
452                    '"'
453                } else {
454                    continue;
455                }
456            } else {
457                continue;
458            };
459            let content_str = if rest.starts_with('(') {
460                rest.trim_start_matches('(').trim_end_matches(')')
461            } else {
462                rest
463            };
464            let parts: Vec<&str> = content_str.trim_matches(quote).split(':').collect();
465            if parts.len() >= 2 {
466                let name = format!("{}:{}", parts[0], parts[1]);
467                let version = parts.get(2).map(|s| s.to_string());
468                deps.push(DetectedDep {
469                    name,
470                    version,
471                    source_file: path
472                        .file_name()
473                        .unwrap_or_default()
474                        .to_string_lossy()
475                        .to_string(),
476                    language: "java".to_string(),
477                });
478            }
479            break;
480        }
481    }
482
483    deps
484}
485
486/// Match detected dependencies to known docs in the registry.
487/// Uses simple name matching — the dep name is searched against doc IDs.
488pub fn match_deps_to_docs(
489    deps: &[DetectedDep],
490    doc_ids: &[(String, String)], // (id, name) pairs from registry
491) -> Vec<DetectedMatch> {
492    let mut matches = Vec::new();
493
494    // Build a lookup: lowercase name → (id, name)
495    let mut id_by_name: HashMap<String, (String, String)> = HashMap::new();
496    for (id, name) in doc_ids {
497        // Index by last segment of id (e.g., "openai/chat" → "openai")
498        let parts: Vec<&str> = id.split('/').collect();
499        if !parts.is_empty() {
500            id_by_name.insert(parts[0].to_lowercase(), (id.clone(), name.clone()));
501        }
502        // Also index by full id
503        id_by_name.insert(id.to_lowercase(), (id.clone(), name.clone()));
504    }
505
506    for dep in deps {
507        let dep_lower = dep.name.to_lowercase();
508
509        // Try exact match on first segment
510        if let Some((doc_id, doc_name)) = id_by_name.get(&dep_lower) {
511            matches.push(DetectedMatch {
512                dep: dep.clone(),
513                doc_id: doc_id.clone(),
514                doc_name: doc_name.clone(),
515                confidence: 1.0,
516            });
517            continue;
518        }
519
520        // Try partial match
521        for (key, (doc_id, doc_name)) in &id_by_name {
522            if key.contains(&dep_lower) || dep_lower.contains(key.as_str()) {
523                matches.push(DetectedMatch {
524                    dep: dep.clone(),
525                    doc_id: doc_id.clone(),
526                    doc_name: doc_name.clone(),
527                    confidence: 0.5,
528                });
529                break;
530            }
531        }
532    }
533
534    matches
535}