syncable_cli/analyzer/
language_detector.rs

1use crate::analyzer::{AnalysisConfig, DetectedLanguage};
2use crate::common::file_utils;
3use crate::error::Result;
4use serde_json::Value as JsonValue;
5use std::collections::HashMap;
6use std::path::PathBuf;
7
8/// Language detection results with detailed information
9#[derive(Debug, Clone)]
10pub struct LanguageInfo {
11    pub name: String,
12    pub version: Option<String>,
13    pub edition: Option<String>,
14    pub package_manager: Option<String>,
15    pub main_dependencies: Vec<String>,
16    pub dev_dependencies: Vec<String>,
17    pub confidence: f32,
18    pub source_files: Vec<PathBuf>,
19    pub manifest_files: Vec<PathBuf>,
20}
21
22/// Detects programming languages with advanced manifest parsing
23pub fn detect_languages(
24    files: &[PathBuf],
25    config: &AnalysisConfig,
26) -> Result<Vec<DetectedLanguage>> {
27    let mut language_info = HashMap::new();
28    
29    // First pass: collect files by extension and find manifests
30    let mut source_files_by_lang = HashMap::new();
31    let mut manifest_files = Vec::new();
32    
33    for file in files {
34        if let Some(extension) = file.extension().and_then(|e| e.to_str()) {
35            match extension {
36                // Rust files
37                "rs" => source_files_by_lang
38                    .entry("rust")
39                    .or_insert_with(Vec::new)
40                    .push(file.clone()),
41                    
42                // JavaScript/TypeScript files
43                "js" | "jsx" | "ts" | "tsx" | "mjs" | "cjs" => source_files_by_lang
44                    .entry("javascript")
45                    .or_insert_with(Vec::new)
46                    .push(file.clone()),
47                    
48                // Python files
49                "py" | "pyx" | "pyi" => source_files_by_lang
50                    .entry("python")
51                    .or_insert_with(Vec::new)
52                    .push(file.clone()),
53                    
54                // Go files
55                "go" => source_files_by_lang
56                    .entry("go")
57                    .or_insert_with(Vec::new)
58                    .push(file.clone()),
59                    
60                // Java/Kotlin files
61                "java" | "kt" | "kts" => source_files_by_lang
62                    .entry("jvm")
63                    .or_insert_with(Vec::new)
64                    .push(file.clone()),
65                    
66                _ => {}
67            }
68        }
69        
70        // Check for manifest files
71        if let Some(filename) = file.file_name().and_then(|n| n.to_str()) {
72            if is_manifest_file(filename) {
73                manifest_files.push(file.clone());
74            }
75        }
76    }
77    
78    // Second pass: analyze each detected language with manifest parsing
79    if source_files_by_lang.contains_key("rust") || has_manifest(&manifest_files, &["Cargo.toml"]) {
80        if let Ok(info) = analyze_rust_project(&manifest_files, source_files_by_lang.get("rust"), config) {
81            language_info.insert("rust", info);
82        }
83    }
84    
85    if source_files_by_lang.contains_key("javascript") || has_manifest(&manifest_files, &["package.json"]) {
86        if let Ok(info) = analyze_javascript_project(&manifest_files, source_files_by_lang.get("javascript"), config) {
87            language_info.insert("javascript", info);
88        }
89    }
90    
91    if source_files_by_lang.contains_key("python") || has_manifest(&manifest_files, &["requirements.txt", "Pipfile", "pyproject.toml", "setup.py"]) {
92        if let Ok(info) = analyze_python_project(&manifest_files, source_files_by_lang.get("python"), config) {
93            language_info.insert("python", info);
94        }
95    }
96    
97    if source_files_by_lang.contains_key("go") || has_manifest(&manifest_files, &["go.mod"]) {
98        if let Ok(info) = analyze_go_project(&manifest_files, source_files_by_lang.get("go"), config) {
99            language_info.insert("go", info);
100        }
101    }
102    
103    if source_files_by_lang.contains_key("jvm") || has_manifest(&manifest_files, &["pom.xml", "build.gradle", "build.gradle.kts"]) {
104        if let Ok(info) = analyze_jvm_project(&manifest_files, source_files_by_lang.get("jvm"), config) {
105            language_info.insert("jvm", info);
106        }
107    }
108    
109    // Convert to DetectedLanguage format
110    let mut detected_languages = Vec::new();
111    for (_, info) in language_info {
112        detected_languages.push(DetectedLanguage {
113            name: info.name,
114            version: info.version,
115            confidence: info.confidence,
116            files: info.source_files,
117            main_dependencies: info.main_dependencies,
118            dev_dependencies: info.dev_dependencies,
119            package_manager: info.package_manager,
120        });
121    }
122    
123    // Sort by confidence (highest first)
124    detected_languages.sort_by(|a, b| b.confidence.partial_cmp(&a.confidence).unwrap_or(std::cmp::Ordering::Equal));
125    
126    Ok(detected_languages)
127}
128
129/// Analyze Rust project from Cargo.toml
130fn analyze_rust_project(
131    manifest_files: &[PathBuf],
132    source_files: Option<&Vec<PathBuf>>,
133    config: &AnalysisConfig,
134) -> Result<LanguageInfo> {
135    let mut info = LanguageInfo {
136        name: "Rust".to_string(),
137        version: None,
138        edition: None,
139        package_manager: Some("cargo".to_string()),
140        main_dependencies: Vec::new(),
141        dev_dependencies: Vec::new(),
142        confidence: 0.5,
143        source_files: source_files.map_or(Vec::new(), |f| f.clone()),
144        manifest_files: Vec::new(),
145    };
146    
147    // Find and parse Cargo.toml
148    for manifest in manifest_files {
149        if manifest.file_name().and_then(|n| n.to_str()) == Some("Cargo.toml") {
150            info.manifest_files.push(manifest.clone());
151            
152            if let Ok(content) = file_utils::read_file_safe(manifest, config.max_file_size) {
153                if let Ok(cargo_toml) = toml::from_str::<toml::Value>(&content) {
154                    // Extract edition
155                    if let Some(package) = cargo_toml.get("package") {
156                        if let Some(edition) = package.get("edition").and_then(|e| e.as_str()) {
157                            info.edition = Some(edition.to_string());
158                        }
159                        
160                        // Estimate Rust version from edition
161                        info.version = match info.edition.as_deref() {
162                            Some("2021") => Some("1.56+".to_string()),
163                            Some("2018") => Some("1.31+".to_string()),
164                            Some("2015") => Some("1.0+".to_string()),
165                            _ => Some("unknown".to_string()),
166                        };
167                    }
168                    
169                    // Extract dependencies
170                    if let Some(deps) = cargo_toml.get("dependencies") {
171                        if let Some(deps_table) = deps.as_table() {
172                            for (name, _) in deps_table {
173                                info.main_dependencies.push(name.clone());
174                            }
175                        }
176                    }
177                    
178                    // Extract dev dependencies if enabled
179                    if config.include_dev_dependencies {
180                        if let Some(dev_deps) = cargo_toml.get("dev-dependencies") {
181                            if let Some(dev_deps_table) = dev_deps.as_table() {
182                                for (name, _) in dev_deps_table {
183                                    info.dev_dependencies.push(name.clone());
184                                }
185                            }
186                        }
187                    }
188                    
189                    info.confidence = 0.95; // High confidence with manifest
190                }
191            }
192            break;
193        }
194    }
195    
196    // Boost confidence if we have source files
197    if !info.source_files.is_empty() {
198        info.confidence = (info.confidence + 0.9) / 2.0;
199    }
200    
201    Ok(info)
202}
203
204/// Analyze JavaScript/TypeScript project from package.json
205fn analyze_javascript_project(
206    manifest_files: &[PathBuf],
207    source_files: Option<&Vec<PathBuf>>,
208    config: &AnalysisConfig,
209) -> Result<LanguageInfo> {
210    let mut info = LanguageInfo {
211        name: "JavaScript/TypeScript".to_string(),
212        version: None,
213        edition: None,
214        package_manager: None,
215        main_dependencies: Vec::new(),
216        dev_dependencies: Vec::new(),
217        confidence: 0.5,
218        source_files: source_files.map_or(Vec::new(), |f| f.clone()),
219        manifest_files: Vec::new(),
220    };
221    
222    // Detect package manager from lock files
223    for manifest in manifest_files {
224        if let Some(filename) = manifest.file_name().and_then(|n| n.to_str()) {
225            match filename {
226                "package-lock.json" => info.package_manager = Some("npm".to_string()),
227                "yarn.lock" => info.package_manager = Some("yarn".to_string()),
228                "pnpm-lock.yaml" => info.package_manager = Some("pnpm".to_string()),
229                _ => {}
230            }
231        }
232    }
233    
234    // Default to npm if no package manager detected
235    if info.package_manager.is_none() {
236        info.package_manager = Some("npm".to_string());
237    }
238    
239    // Find and parse package.json
240    for manifest in manifest_files {
241        if manifest.file_name().and_then(|n| n.to_str()) == Some("package.json") {
242            info.manifest_files.push(manifest.clone());
243            
244            if let Ok(content) = file_utils::read_file_safe(manifest, config.max_file_size) {
245                if let Ok(package_json) = serde_json::from_str::<JsonValue>(&content) {
246                    // Extract Node.js version from engines
247                    if let Some(engines) = package_json.get("engines") {
248                        if let Some(node_version) = engines.get("node").and_then(|v| v.as_str()) {
249                            info.version = Some(node_version.to_string());
250                        }
251                    }
252                    
253                    // Extract dependencies (always include all buckets for framework detection)
254                    if let Some(deps) = package_json.get("dependencies").and_then(|d| d.as_object()) {
255                        for (name, _) in deps {
256                            info.main_dependencies.push(name.clone());
257                        }
258                    }
259
260                    // Frameworks like Vite/Remix/Next are often in devDependencies; always include
261                    if let Some(dev_deps) = package_json.get("devDependencies").and_then(|d| d.as_object()) {
262                        for (name, _) in dev_deps {
263                            info.main_dependencies.push(name.clone());
264                            info.dev_dependencies.push(name.clone());
265                        }
266                    }
267
268                    // peerDependencies frequently carry framework identity (e.g., react-router)
269                    if let Some(peer_deps) = package_json.get("peerDependencies").and_then(|d| d.as_object()) {
270                        for (name, _) in peer_deps {
271                            info.main_dependencies.push(name.clone());
272                        }
273                    }
274
275                    // optional/bundled deps can also hold framework markers (rare but cheap to add)
276                    if let Some(opt_deps) = package_json.get("optionalDependencies").and_then(|d| d.as_object()) {
277                        for (name, _) in opt_deps {
278                            info.main_dependencies.push(name.clone());
279                        }
280                    }
281                    if let Some(bundle_deps) = package_json.get("bundledDependencies").and_then(|d| d.as_array()) {
282                        for dep in bundle_deps.iter().filter_map(|d| d.as_str()) {
283                            info.main_dependencies.push(dep.to_string());
284                        }
285                    }
286
287                    info.confidence = 0.95; // High confidence with manifest
288                }
289            }
290            break;
291        }
292    }
293    
294    // Adjust name based on file types
295    if let Some(files) = source_files {
296        let has_typescript = files.iter().any(|f| {
297            f.extension()
298                .and_then(|e| e.to_str())
299                .map_or(false, |ext| ext == "ts" || ext == "tsx")
300        });
301        
302        if has_typescript {
303            info.name = "TypeScript".to_string();
304        } else {
305            info.name = "JavaScript".to_string();
306        }
307    }
308    
309    // Boost confidence if we have source files
310    if !info.source_files.is_empty() {
311        info.confidence = (info.confidence + 0.9) / 2.0;
312    }
313    
314    Ok(info)
315}
316
317/// Analyze Python project from various manifest files
318fn analyze_python_project(
319    manifest_files: &[PathBuf],
320    source_files: Option<&Vec<PathBuf>>,
321    config: &AnalysisConfig,
322) -> Result<LanguageInfo> {
323    let mut info = LanguageInfo {
324        name: "Python".to_string(),
325        version: None,
326        edition: None,
327        package_manager: None,
328        main_dependencies: Vec::new(),
329        dev_dependencies: Vec::new(),
330        confidence: 0.5,
331        source_files: source_files.map_or(Vec::new(), |f| f.clone()),
332        manifest_files: Vec::new(),
333    };
334    
335    // Detect package manager and parse manifest files
336    for manifest in manifest_files {
337        if let Some(filename) = manifest.file_name().and_then(|n| n.to_str()) {
338            info.manifest_files.push(manifest.clone());
339            
340            match filename {
341                "requirements.txt" => {
342                    info.package_manager = Some("pip".to_string());
343                    if let Ok(content) = file_utils::read_file_safe(manifest, config.max_file_size) {
344                        parse_requirements_txt(&content, &mut info);
345                        info.confidence = 0.85;
346                    }
347                }
348                "Pipfile" => {
349                    info.package_manager = Some("pipenv".to_string());
350                    if let Ok(content) = file_utils::read_file_safe(manifest, config.max_file_size) {
351                        parse_pipfile(&content, &mut info, config);
352                        info.confidence = 0.90;
353                    }
354                }
355                "pyproject.toml" => {
356                    info.package_manager = Some("poetry/pip".to_string());
357                    if let Ok(content) = file_utils::read_file_safe(manifest, config.max_file_size) {
358                        parse_pyproject_toml(&content, &mut info, config);
359                        info.confidence = 0.95;
360                    }
361                }
362                "setup.py" => {
363                    info.package_manager = Some("setuptools".to_string());
364                    if let Ok(content) = file_utils::read_file_safe(manifest, config.max_file_size) {
365                        parse_setup_py(&content, &mut info);
366                        info.confidence = 0.80;
367                    }
368                }
369                _ => {}
370            }
371        }
372    }
373    
374    // Default to pip if no package manager detected
375    if info.package_manager.is_none() && !info.source_files.is_empty() {
376        info.package_manager = Some("pip".to_string());
377        info.confidence = 0.75;
378    }
379    
380    // Boost confidence if we have source files
381    if !info.source_files.is_empty() {
382        info.confidence = (info.confidence + 0.8) / 2.0;
383    }
384    
385    Ok(info)
386}
387
388/// Parse requirements.txt file
389fn parse_requirements_txt(content: &str, info: &mut LanguageInfo) {
390    for line in content.lines() {
391        let line = line.trim();
392        if line.is_empty() || line.starts_with('#') {
393            continue;
394        }
395        
396        // Extract package name (before ==, >=, etc.)
397        if let Some(package_name) = line.split(&['=', '>', '<', '!', '~', ';'][..]).next() {
398            let clean_name = package_name.trim();
399            if !clean_name.is_empty() && !clean_name.starts_with('-') {
400                info.main_dependencies.push(clean_name.to_string());
401            }
402        }
403    }
404}
405
406/// Parse Pipfile (TOML format)
407fn parse_pipfile(content: &str, info: &mut LanguageInfo, config: &AnalysisConfig) {
408    if let Ok(pipfile) = toml::from_str::<toml::Value>(content) {
409        // Extract Python version requirement
410        if let Some(requires) = pipfile.get("requires") {
411            if let Some(python_version) = requires.get("python_version").and_then(|v| v.as_str()) {
412                info.version = Some(format!("~={}", python_version));
413            } else if let Some(python_full) = requires.get("python_full_version").and_then(|v| v.as_str()) {
414                info.version = Some(format!("=={}", python_full));
415            }
416        }
417        
418        // Extract packages
419        if let Some(packages) = pipfile.get("packages") {
420            if let Some(packages_table) = packages.as_table() {
421                for (name, _) in packages_table {
422                    info.main_dependencies.push(name.clone());
423                }
424            }
425        }
426        
427        // Extract dev packages if enabled
428        if config.include_dev_dependencies {
429            if let Some(dev_packages) = pipfile.get("dev-packages") {
430                if let Some(dev_packages_table) = dev_packages.as_table() {
431                    for (name, _) in dev_packages_table {
432                        info.dev_dependencies.push(name.clone());
433                    }
434                }
435            }
436        }
437    }
438}
439
440/// Parse pyproject.toml file
441fn parse_pyproject_toml(content: &str, info: &mut LanguageInfo, config: &AnalysisConfig) {
442    if let Ok(pyproject) = toml::from_str::<toml::Value>(content) {
443        // Extract Python version from project metadata
444        if let Some(project) = pyproject.get("project") {
445            if let Some(requires_python) = project.get("requires-python").and_then(|v| v.as_str()) {
446                info.version = Some(requires_python.to_string());
447            }
448            
449            // Extract dependencies
450            if let Some(dependencies) = project.get("dependencies") {
451                if let Some(deps_array) = dependencies.as_array() {
452                    for dep in deps_array {
453                        if let Some(dep_str) = dep.as_str() {
454                            if let Some(package_name) = dep_str.split(&['=', '>', '<', '!', '~', ';'][..]).next() {
455                                let clean_name = package_name.trim();
456                                if !clean_name.is_empty() {
457                                    info.main_dependencies.push(clean_name.to_string());
458                                }
459                            }
460                        }
461                    }
462                }
463            }
464            
465            // Extract optional dependencies (dev dependencies)
466            if config.include_dev_dependencies {
467                if let Some(optional_deps) = project.get("optional-dependencies") {
468                    if let Some(optional_table) = optional_deps.as_table() {
469                        for (_, deps) in optional_table {
470                            if let Some(deps_array) = deps.as_array() {
471                                for dep in deps_array {
472                                    if let Some(dep_str) = dep.as_str() {
473                                        if let Some(package_name) = dep_str.split(&['=', '>', '<', '!', '~', ';'][..]).next() {
474                                            let clean_name = package_name.trim();
475                                            if !clean_name.is_empty() {
476                                                info.dev_dependencies.push(clean_name.to_string());
477                                            }
478                                        }
479                                    }
480                                }
481                            }
482                        }
483                    }
484                }
485            }
486        }
487        
488        // Check for Poetry configuration
489        if pyproject.get("tool").and_then(|t| t.get("poetry")).is_some() {
490            info.package_manager = Some("poetry".to_string());
491            
492            // Extract Poetry dependencies
493            if let Some(tool) = pyproject.get("tool") {
494                if let Some(poetry) = tool.get("poetry") {
495                    if let Some(dependencies) = poetry.get("dependencies") {
496                        if let Some(deps_table) = dependencies.as_table() {
497                            for (name, _) in deps_table {
498                                if name != "python" {
499                                    info.main_dependencies.push(name.clone());
500                                }
501                            }
502                        }
503                    }
504                    
505                    if config.include_dev_dependencies {
506                        if let Some(dev_dependencies) = poetry.get("group")
507                            .and_then(|g| g.get("dev"))
508                            .and_then(|d| d.get("dependencies")) 
509                        {
510                            if let Some(dev_deps_table) = dev_dependencies.as_table() {
511                                for (name, _) in dev_deps_table {
512                                    info.dev_dependencies.push(name.clone());
513                                }
514                            }
515                        }
516                    }
517                }
518            }
519        }
520    }
521}
522
523/// Parse setup.py file (basic extraction)
524fn parse_setup_py(content: &str, info: &mut LanguageInfo) {
525    // Basic regex-based parsing for common patterns
526    for line in content.lines() {
527        let line = line.trim();
528        
529        // Look for python_requires
530        if line.contains("python_requires") {
531            if let Some(start) = line.find("\"") {
532                if let Some(end) = line[start + 1..].find("\"") {
533                    let version = &line[start + 1..start + 1 + end];
534                    info.version = Some(version.to_string());
535                }
536            } else if let Some(start) = line.find("'") {
537                if let Some(end) = line[start + 1..].find("'") {
538                    let version = &line[start + 1..start + 1 + end];
539                    info.version = Some(version.to_string());
540                }
541            }
542        }
543        
544        // Look for install_requires (basic pattern)
545        if line.contains("install_requires") && line.contains("[") {
546            // This is a simplified parser - could be enhanced
547            info.main_dependencies.push("setuptools-detected".to_string());
548        }
549    }
550}
551
552/// Analyze Go project from go.mod
553fn analyze_go_project(
554    manifest_files: &[PathBuf],
555    source_files: Option<&Vec<PathBuf>>,
556    config: &AnalysisConfig,
557) -> Result<LanguageInfo> {
558    let mut info = LanguageInfo {
559        name: "Go".to_string(),
560        version: None,
561        edition: None,
562        package_manager: Some("go mod".to_string()),
563        main_dependencies: Vec::new(),
564        dev_dependencies: Vec::new(),
565        confidence: 0.5,
566        source_files: source_files.map_or(Vec::new(), |f| f.clone()),
567        manifest_files: Vec::new(),
568    };
569    
570    // Find and parse go.mod
571    for manifest in manifest_files {
572        if let Some(filename) = manifest.file_name().and_then(|n| n.to_str()) {
573            match filename {
574                "go.mod" => {
575                    info.manifest_files.push(manifest.clone());
576                    if let Ok(content) = file_utils::read_file_safe(manifest, config.max_file_size) {
577                        parse_go_mod(&content, &mut info);
578                        info.confidence = 0.95;
579                    }
580                }
581                "go.sum" => {
582                    info.manifest_files.push(manifest.clone());
583                    // go.sum contains checksums, indicates a real Go project
584                    info.confidence = (info.confidence + 0.9) / 2.0;
585                }
586                _ => {}
587            }
588        }
589    }
590    
591    // Boost confidence if we have source files
592    if !info.source_files.is_empty() {
593        info.confidence = (info.confidence + 0.85) / 2.0;
594    }
595    
596    Ok(info)
597}
598
599/// Parse go.mod file
600fn parse_go_mod(content: &str, info: &mut LanguageInfo) {
601    for line in content.lines() {
602        let line = line.trim();
603        
604        // Parse go version directive
605        if line.starts_with("go ") {
606            let version = line[3..].trim();
607            info.version = Some(version.to_string());
608        }
609        
610        // Parse require block
611        if line.starts_with("require ") {
612            // Single line require
613            let require_line = &line[8..].trim();
614            if let Some(module_name) = require_line.split_whitespace().next() {
615                info.main_dependencies.push(module_name.to_string());
616            }
617        }
618    }
619    
620    // Parse multi-line require blocks
621    let mut in_require_block = false;
622    for line in content.lines() {
623        let line = line.trim();
624        
625        if line == "require (" {
626            in_require_block = true;
627            continue;
628        }
629        
630        if in_require_block {
631            if line == ")" {
632                in_require_block = false;
633                continue;
634            }
635            
636            // Parse dependency line
637            if !line.is_empty() && !line.starts_with("//") {
638                if let Some(module_name) = line.split_whitespace().next() {
639                    info.main_dependencies.push(module_name.to_string());
640                }
641            }
642        }
643    }
644}
645
646/// Analyze JVM project (Java/Kotlin) from build files
647fn analyze_jvm_project(
648    manifest_files: &[PathBuf],
649    source_files: Option<&Vec<PathBuf>>,
650    config: &AnalysisConfig,
651) -> Result<LanguageInfo> {
652    let mut info = LanguageInfo {
653        name: "Java/Kotlin".to_string(),
654        version: None,
655        edition: None,
656        package_manager: None,
657        main_dependencies: Vec::new(),
658        dev_dependencies: Vec::new(),
659        confidence: 0.5,
660        source_files: source_files.map_or(Vec::new(), |f| f.clone()),
661        manifest_files: Vec::new(),
662    };
663    
664    // Detect build tool and parse manifest files
665    for manifest in manifest_files {
666        if let Some(filename) = manifest.file_name().and_then(|n| n.to_str()) {
667            info.manifest_files.push(manifest.clone());
668            
669            match filename {
670                "pom.xml" => {
671                    info.package_manager = Some("maven".to_string());
672                    if let Ok(content) = file_utils::read_file_safe(manifest, config.max_file_size) {
673                        parse_maven_pom(&content, &mut info, config);
674                        info.confidence = 0.90;
675                    }
676                }
677                "build.gradle" => {
678                    info.package_manager = Some("gradle".to_string());
679                    if let Ok(content) = file_utils::read_file_safe(manifest, config.max_file_size) {
680                        parse_gradle_build(&content, &mut info, config);
681                        info.confidence = 0.85;
682                    }
683                }
684                "build.gradle.kts" => {
685                    info.package_manager = Some("gradle".to_string());
686                    if let Ok(content) = file_utils::read_file_safe(manifest, config.max_file_size) {
687                        parse_gradle_kts_build(&content, &mut info, config);
688                        info.confidence = 0.85;
689                    }
690                }
691                _ => {}
692            }
693        }
694    }
695    
696    // Adjust name based on file types
697    if let Some(files) = source_files {
698        let has_kotlin = files.iter().any(|f| {
699            f.extension()
700                .and_then(|e| e.to_str())
701                .map_or(false, |ext| ext == "kt" || ext == "kts")
702        });
703        
704        if has_kotlin {
705            info.name = "Kotlin".to_string();
706        } else {
707            info.name = "Java".to_string();
708        }
709    }
710    
711    // Boost confidence if we have source files
712    if !info.source_files.is_empty() {
713        info.confidence = (info.confidence + 0.8) / 2.0;
714    }
715    
716    Ok(info)
717}
718
719/// Parse Maven pom.xml file (basic XML parsing)
720fn parse_maven_pom(content: &str, info: &mut LanguageInfo, config: &AnalysisConfig) {
721    // Simple regex-based XML parsing for common Maven patterns
722    
723    // Extract Java version from maven.compiler.source or java.version
724    for line in content.lines() {
725        let line = line.trim();
726        
727        // Look for Java version in properties
728        if line.contains("<maven.compiler.source>") {
729            if let Some(version) = extract_xml_content(line, "maven.compiler.source") {
730                info.version = Some(version);
731            }
732        } else if line.contains("<java.version>") {
733            if let Some(version) = extract_xml_content(line, "java.version") {
734                info.version = Some(version);
735            }
736        } else if line.contains("<maven.compiler.target>") && info.version.is_none() {
737            if let Some(version) = extract_xml_content(line, "maven.compiler.target") {
738                info.version = Some(version);
739            }
740        }
741        
742        // Extract dependencies
743        if line.contains("<groupId>") && line.contains("<artifactId>") {
744            // This is a simplified approach - real XML parsing would be better
745            if let Some(group_id) = extract_xml_content(line, "groupId") {
746                if let Some(artifact_id) = extract_xml_content(line, "artifactId") {
747                    let dependency = format!("{}:{}", group_id, artifact_id);
748                    info.main_dependencies.push(dependency);
749                }
750            }
751        } else if line.contains("<artifactId>") && !line.contains("<groupId>") {
752            if let Some(artifact_id) = extract_xml_content(line, "artifactId") {
753                info.main_dependencies.push(artifact_id);
754            }
755        }
756    }
757    
758    // Look for dependencies in a more structured way
759    let mut in_dependencies = false;
760    let mut in_test_dependencies = false;
761    
762    for line in content.lines() {
763        let line = line.trim();
764        
765        if line.contains("<dependencies>") {
766            in_dependencies = true;
767            continue;
768        }
769        
770        if line.contains("</dependencies>") {
771            in_dependencies = false;
772            in_test_dependencies = false;
773            continue;
774        }
775        
776        if in_dependencies && line.contains("<scope>test</scope>") {
777            in_test_dependencies = true;
778        }
779        
780        if in_dependencies && line.contains("<artifactId>") {
781            if let Some(artifact_id) = extract_xml_content(line, "artifactId") {
782                if in_test_dependencies && config.include_dev_dependencies {
783                    info.dev_dependencies.push(artifact_id);
784                } else if !in_test_dependencies {
785                    info.main_dependencies.push(artifact_id);
786                }
787            }
788        }
789    }
790}
791
792/// Parse Gradle build.gradle file (Groovy syntax)
793fn parse_gradle_build(content: &str, info: &mut LanguageInfo, config: &AnalysisConfig) {
794    for line in content.lines() {
795        let line = line.trim();
796        
797        // Look for Java version
798        if line.contains("sourceCompatibility") || line.contains("targetCompatibility") {
799            if let Some(version) = extract_gradle_version(line) {
800                info.version = Some(version);
801            }
802        } else if line.contains("JavaVersion.VERSION_") {
803            if let Some(pos) = line.find("VERSION_") {
804                let version_part = &line[pos + 8..];
805                if let Some(end) = version_part.find(|c: char| !c.is_numeric() && c != '_') {
806                    let version = &version_part[..end].replace('_', ".");
807                    info.version = Some(version.to_string());
808                }
809            }
810        }
811        
812        // Look for dependencies
813        if line.starts_with("implementation ") || line.starts_with("compile ") {
814            if let Some(dep) = extract_gradle_dependency(line) {
815                info.main_dependencies.push(dep);
816            }
817        } else if (line.starts_with("testImplementation ") || line.starts_with("testCompile ")) && config.include_dev_dependencies {
818            if let Some(dep) = extract_gradle_dependency(line) {
819                info.dev_dependencies.push(dep);
820            }
821        }
822    }
823}
824
825/// Parse Gradle build.gradle.kts file (Kotlin syntax)
826fn parse_gradle_kts_build(content: &str, info: &mut LanguageInfo, config: &AnalysisConfig) {
827    // Kotlin DSL is similar to Groovy but with some syntax differences
828    parse_gradle_build(content, info, config); // Reuse the same logic for now
829}
830
831/// Extract content from XML tags
832fn extract_xml_content(line: &str, tag: &str) -> Option<String> {
833    let open_tag = format!("<{}>", tag);
834    let close_tag = format!("</{}>", tag);
835    
836    if let Some(start) = line.find(&open_tag) {
837        if let Some(end) = line.find(&close_tag) {
838            let content_start = start + open_tag.len();
839            if content_start < end {
840                return Some(line[content_start..end].trim().to_string());
841            }
842        }
843    }
844    None
845}
846
847/// Extract version from Gradle configuration line
848fn extract_gradle_version(line: &str) -> Option<String> {
849    // Look for patterns like sourceCompatibility = '11' or sourceCompatibility = "11"
850    if let Some(equals_pos) = line.find('=') {
851        let value_part = line[equals_pos + 1..].trim();
852        if let Some(start_quote) = value_part.find(['\'', '"']) {
853            let quote_char = value_part.chars().nth(start_quote).unwrap();
854            if let Some(end_quote) = value_part[start_quote + 1..].find(quote_char) {
855                let version = &value_part[start_quote + 1..start_quote + 1 + end_quote];
856                return Some(version.to_string());
857            }
858        }
859    }
860    None
861}
862
863/// Extract dependency from Gradle dependency line
864fn extract_gradle_dependency(line: &str) -> Option<String> {
865    // Look for patterns like implementation 'group:artifact:version' or implementation("group:artifact:version")
866    if let Some(start_quote) = line.find(['\'', '"']) {
867        let quote_char = line.chars().nth(start_quote).unwrap();
868        if let Some(end_quote) = line[start_quote + 1..].find(quote_char) {
869            let dependency = &line[start_quote + 1..start_quote + 1 + end_quote];
870            // Extract just the artifact name for simplicity
871            if let Some(last_colon) = dependency.rfind(':') {
872                if let Some(first_colon) = dependency[..last_colon].rfind(':') {
873                    return Some(dependency[first_colon + 1..last_colon].to_string());
874                }
875            }
876            return Some(dependency.to_string());
877        }
878    }
879    None
880}
881
882/// Check if a filename is a known manifest file
883fn is_manifest_file(filename: &str) -> bool {
884    matches!(
885        filename,
886        "Cargo.toml" | "Cargo.lock" |
887        "package.json" | "package-lock.json" | "yarn.lock" | "pnpm-lock.yaml" |
888        "requirements.txt" | "Pipfile" | "Pipfile.lock" | "pyproject.toml" | "setup.py" |
889        "go.mod" | "go.sum" |
890        "pom.xml" | "build.gradle" | "build.gradle.kts"
891    )
892}
893
894/// Check if any of the specified manifest files exist
895fn has_manifest(manifest_files: &[PathBuf], target_files: &[&str]) -> bool {
896    manifest_files.iter().any(|path| {
897        path.file_name()
898            .and_then(|name| name.to_str())
899            .map_or(false, |name| target_files.contains(&name))
900    })
901}
902
903#[cfg(test)]
904mod tests {
905    use super::*;
906    use tempfile::TempDir;
907    use std::fs;
908    
909    #[test]
910    fn test_rust_project_detection() {
911        let temp_dir = TempDir::new().unwrap();
912        let root = temp_dir.path();
913        
914        // Create Cargo.toml
915        let cargo_toml = r#"
916[package]
917name = "test-project"
918version = "0.1.0"
919edition = "2021"
920
921[dependencies]
922serde = "1.0"
923tokio = "1.0"
924
925[dev-dependencies]
926assert_cmd = "2.0"
927"#;
928        fs::write(root.join("Cargo.toml"), cargo_toml).unwrap();
929        fs::create_dir_all(root.join("src")).unwrap();
930        fs::write(root.join("src/main.rs"), "fn main() {}").unwrap();
931        
932        let config = AnalysisConfig::default();
933        let files = vec![
934            root.join("Cargo.toml"),
935            root.join("src/main.rs"),
936        ];
937        
938        let languages = detect_languages(&files, &config).unwrap();
939        assert_eq!(languages.len(), 1);
940        assert_eq!(languages[0].name, "Rust");
941        assert_eq!(languages[0].version, Some("1.56+".to_string()));
942        assert!(languages[0].confidence > 0.9);
943    }
944    
945    #[test]
946    fn test_javascript_project_detection() {
947        let temp_dir = TempDir::new().unwrap();
948        let root = temp_dir.path();
949        
950        // Create package.json
951        let package_json = r#"
952{
953  "name": "test-project",
954  "version": "1.0.0",
955  "engines": {
956    "node": ">=16.0.0"
957  },
958  "dependencies": {
959    "express": "^4.18.0",
960    "lodash": "^4.17.21"
961  },
962  "devDependencies": {
963    "jest": "^29.0.0"
964  }
965}
966"#;
967        fs::write(root.join("package.json"), package_json).unwrap();
968        fs::write(root.join("index.js"), "console.log('hello');").unwrap();
969        
970        let config = AnalysisConfig::default();
971        let files = vec![
972            root.join("package.json"),
973            root.join("index.js"),
974        ];
975        
976        let languages = detect_languages(&files, &config).unwrap();
977        assert_eq!(languages.len(), 1);
978        assert_eq!(languages[0].name, "JavaScript");
979        assert_eq!(languages[0].version, Some(">=16.0.0".to_string()));
980        assert!(languages[0].confidence > 0.9);
981    }
982    
983    #[test]
984    fn test_python_project_detection() {
985        let temp_dir = TempDir::new().unwrap();
986        let root = temp_dir.path();
987        
988        // Create pyproject.toml
989        let pyproject_toml = r#"
990[project]
991name = "test-project"
992version = "0.1.0"
993requires-python = ">=3.8"
994dependencies = [
995    "flask>=2.0.0",
996    "requests>=2.25.0",
997    "pandas>=1.3.0"
998]
999
1000[project.optional-dependencies]
1001dev = [
1002    "pytest>=6.0.0",
1003    "black>=21.0.0"
1004]
1005"#;
1006        fs::write(root.join("pyproject.toml"), pyproject_toml).unwrap();
1007        fs::write(root.join("app.py"), "print('Hello, World!')").unwrap();
1008        
1009        let config = AnalysisConfig::default();
1010        let files = vec![
1011            root.join("pyproject.toml"),
1012            root.join("app.py"),
1013        ];
1014        
1015        let languages = detect_languages(&files, &config).unwrap();
1016        assert_eq!(languages.len(), 1);
1017        assert_eq!(languages[0].name, "Python");
1018        assert_eq!(languages[0].version, Some(">=3.8".to_string()));
1019        assert!(languages[0].confidence > 0.8);
1020    }
1021    
1022    #[test]
1023    fn test_go_project_detection() {
1024        let temp_dir = TempDir::new().unwrap();
1025        let root = temp_dir.path();
1026        
1027        // Create go.mod
1028        let go_mod = r#"
1029module example.com/myproject
1030
1031go 1.21
1032
1033require (
1034    github.com/gin-gonic/gin v1.9.1
1035    github.com/stretchr/testify v1.8.4
1036    golang.org/x/time v0.3.0
1037)
1038"#;
1039        fs::write(root.join("go.mod"), go_mod).unwrap();
1040        fs::write(root.join("main.go"), "package main\n\nfunc main() {}").unwrap();
1041        
1042        let config = AnalysisConfig::default();
1043        let files = vec![
1044            root.join("go.mod"),
1045            root.join("main.go"),
1046        ];
1047        
1048        let languages = detect_languages(&files, &config).unwrap();
1049        assert_eq!(languages.len(), 1);
1050        assert_eq!(languages[0].name, "Go");
1051        assert_eq!(languages[0].version, Some("1.21".to_string()));
1052        assert!(languages[0].confidence > 0.8);
1053    }
1054    
1055    #[test]
1056    fn test_java_maven_project_detection() {
1057        let temp_dir = TempDir::new().unwrap();
1058        let root = temp_dir.path();
1059        
1060        // Create pom.xml
1061        let pom_xml = r#"
1062<?xml version="1.0" encoding="UTF-8"?>
1063<project xmlns="http://maven.apache.org/POM/4.0.0">
1064    <modelVersion>4.0.0</modelVersion>
1065    
1066    <groupId>com.example</groupId>
1067    <artifactId>test-project</artifactId>
1068    <version>1.0.0</version>
1069    
1070    <properties>
1071        <maven.compiler.source>17</maven.compiler.source>
1072        <maven.compiler.target>17</maven.compiler.target>
1073    </properties>
1074    
1075    <dependencies>
1076        <dependency>
1077            <groupId>org.springframework</groupId>
1078            <artifactId>spring-core</artifactId>
1079            <version>5.3.21</version>
1080        </dependency>
1081        <dependency>
1082            <groupId>junit</groupId>
1083            <artifactId>junit</artifactId>
1084            <version>4.13.2</version>
1085            <scope>test</scope>
1086        </dependency>
1087    </dependencies>
1088</project>
1089"#;
1090        fs::create_dir_all(root.join("src/main/java")).unwrap();
1091        fs::write(root.join("pom.xml"), pom_xml).unwrap();
1092        fs::write(root.join("src/main/java/App.java"), "public class App {}").unwrap();
1093        
1094        let config = AnalysisConfig::default();
1095        let files = vec![
1096            root.join("pom.xml"),
1097            root.join("src/main/java/App.java"),
1098        ];
1099        
1100        let languages = detect_languages(&files, &config).unwrap();
1101        assert_eq!(languages.len(), 1);
1102        assert_eq!(languages[0].name, "Java");
1103        assert_eq!(languages[0].version, Some("17".to_string()));
1104        assert!(languages[0].confidence > 0.8);
1105    }
1106    
1107    #[test]
1108    fn test_kotlin_gradle_project_detection() {
1109        let temp_dir = TempDir::new().unwrap();
1110        let root = temp_dir.path();
1111        
1112        // Create build.gradle.kts
1113        let build_gradle_kts = r#"
1114plugins {
1115    kotlin("jvm") version "1.9.0"
1116    application
1117}
1118
1119java {
1120    sourceCompatibility = JavaVersion.VERSION_17
1121    targetCompatibility = JavaVersion.VERSION_17
1122}
1123
1124dependencies {
1125    implementation("org.jetbrains.kotlin:kotlin-stdlib")
1126    implementation("io.ktor:ktor-server-core:2.3.2")
1127    testImplementation("org.jetbrains.kotlin:kotlin-test")
1128}
1129"#;
1130        fs::create_dir_all(root.join("src/main/kotlin")).unwrap();
1131        fs::write(root.join("build.gradle.kts"), build_gradle_kts).unwrap();
1132        fs::write(root.join("src/main/kotlin/Main.kt"), "fun main() {}").unwrap();
1133        
1134        let config = AnalysisConfig::default();
1135        let files = vec![
1136            root.join("build.gradle.kts"),
1137            root.join("src/main/kotlin/Main.kt"),
1138        ];
1139        
1140        let languages = detect_languages(&files, &config).unwrap();
1141        assert_eq!(languages.len(), 1);
1142        assert_eq!(languages[0].name, "Kotlin");
1143        assert!(languages[0].confidence > 0.8);
1144    }
1145    
1146    #[test]
1147    fn test_python_requirements_txt_detection() {
1148        let temp_dir = TempDir::new().unwrap();
1149        let root = temp_dir.path();
1150        
1151        // Create requirements.txt
1152        let requirements_txt = r#"
1153Flask==2.3.2
1154requests>=2.28.0
1155pandas==1.5.3
1156pytest==7.4.0
1157black>=23.0.0
1158"#;
1159        fs::write(root.join("requirements.txt"), requirements_txt).unwrap();
1160        fs::write(root.join("app.py"), "import flask").unwrap();
1161        
1162        let config = AnalysisConfig::default();
1163        let files = vec![
1164            root.join("requirements.txt"),
1165            root.join("app.py"),
1166        ];
1167        
1168        let languages = detect_languages(&files, &config).unwrap();
1169        assert_eq!(languages.len(), 1);
1170        assert_eq!(languages[0].name, "Python");
1171        assert!(languages[0].confidence > 0.8);
1172    }
1173}