syncable_cli/analyzer/
language_detector.rs

1use crate::analyzer::{AnalysisConfig, DetectedLanguage};
2use crate::common::file_utils;
3use crate::error::Result;
4use serde_json::Value as JsonValue;
5use std::collections::HashMap;
6use std::path::PathBuf;
7
8/// Language detection results with detailed information
9#[derive(Debug, Clone)]
10pub struct LanguageInfo {
11    pub name: String,
12    pub version: Option<String>,
13    pub edition: Option<String>,
14    pub package_manager: Option<String>,
15    pub main_dependencies: Vec<String>,
16    pub dev_dependencies: Vec<String>,
17    pub confidence: f32,
18    pub source_files: Vec<PathBuf>,
19    pub manifest_files: Vec<PathBuf>,
20}
21
22/// Detects programming languages with advanced manifest parsing
23pub fn detect_languages(
24    files: &[PathBuf],
25    config: &AnalysisConfig,
26) -> Result<Vec<DetectedLanguage>> {
27    let mut language_info = HashMap::new();
28
29    // First pass: collect files by extension and find manifests
30    let mut source_files_by_lang = HashMap::new();
31    let mut manifest_files = Vec::new();
32
33    for file in files {
34        if let Some(extension) = file.extension().and_then(|e| e.to_str()) {
35            match extension {
36                // Rust files
37                "rs" => source_files_by_lang
38                    .entry("rust")
39                    .or_insert_with(Vec::new)
40                    .push(file.clone()),
41
42                // JavaScript/TypeScript files
43                "js" | "jsx" | "ts" | "tsx" | "mjs" | "cjs" => source_files_by_lang
44                    .entry("javascript")
45                    .or_insert_with(Vec::new)
46                    .push(file.clone()),
47
48                // Python files
49                "py" | "pyx" | "pyi" => source_files_by_lang
50                    .entry("python")
51                    .or_insert_with(Vec::new)
52                    .push(file.clone()),
53
54                // Go files
55                "go" => source_files_by_lang
56                    .entry("go")
57                    .or_insert_with(Vec::new)
58                    .push(file.clone()),
59
60                // Java/Kotlin files
61                "java" | "kt" | "kts" => source_files_by_lang
62                    .entry("jvm")
63                    .or_insert_with(Vec::new)
64                    .push(file.clone()),
65
66                _ => {}
67            }
68        }
69
70        // Check for manifest files
71        if let Some(filename) = file.file_name().and_then(|n| n.to_str()) {
72            if is_manifest_file(filename) {
73                manifest_files.push(file.clone());
74            }
75        }
76    }
77
78    // Second pass: analyze each detected language with manifest parsing
79    if source_files_by_lang.contains_key("rust") || has_manifest(&manifest_files, &["Cargo.toml"]) {
80        if let Ok(info) =
81            analyze_rust_project(&manifest_files, source_files_by_lang.get("rust"), config)
82        {
83            language_info.insert("rust", info);
84        }
85    }
86
87    if source_files_by_lang.contains_key("javascript")
88        || has_manifest(&manifest_files, &["package.json"])
89    {
90        if let Ok(info) = analyze_javascript_project(
91            &manifest_files,
92            source_files_by_lang.get("javascript"),
93            config,
94        ) {
95            language_info.insert("javascript", info);
96        }
97    }
98
99    if source_files_by_lang.contains_key("python")
100        || has_manifest(
101            &manifest_files,
102            &["requirements.txt", "Pipfile", "pyproject.toml", "setup.py"],
103        )
104    {
105        if let Ok(info) =
106            analyze_python_project(&manifest_files, source_files_by_lang.get("python"), config)
107        {
108            language_info.insert("python", info);
109        }
110    }
111
112    if source_files_by_lang.contains_key("go") || has_manifest(&manifest_files, &["go.mod"]) {
113        if let Ok(info) =
114            analyze_go_project(&manifest_files, source_files_by_lang.get("go"), config)
115        {
116            language_info.insert("go", info);
117        }
118    }
119
120    if source_files_by_lang.contains_key("jvm")
121        || has_manifest(
122            &manifest_files,
123            &["pom.xml", "build.gradle", "build.gradle.kts"],
124        )
125    {
126        if let Ok(info) =
127            analyze_jvm_project(&manifest_files, source_files_by_lang.get("jvm"), config)
128        {
129            language_info.insert("jvm", info);
130        }
131    }
132
133    // Convert to DetectedLanguage format
134    let mut detected_languages = Vec::new();
135    for (_, info) in language_info {
136        detected_languages.push(DetectedLanguage {
137            name: info.name,
138            version: info.version,
139            confidence: info.confidence,
140            files: info.source_files,
141            main_dependencies: info.main_dependencies,
142            dev_dependencies: info.dev_dependencies,
143            package_manager: info.package_manager,
144        });
145    }
146
147    // Sort by confidence (highest first)
148    detected_languages.sort_by(|a, b| {
149        b.confidence
150            .partial_cmp(&a.confidence)
151            .unwrap_or(std::cmp::Ordering::Equal)
152    });
153
154    Ok(detected_languages)
155}
156
157/// Analyze Rust project from Cargo.toml
158fn analyze_rust_project(
159    manifest_files: &[PathBuf],
160    source_files: Option<&Vec<PathBuf>>,
161    config: &AnalysisConfig,
162) -> Result<LanguageInfo> {
163    let mut info = LanguageInfo {
164        name: "Rust".to_string(),
165        version: None,
166        edition: None,
167        package_manager: Some("cargo".to_string()),
168        main_dependencies: Vec::new(),
169        dev_dependencies: Vec::new(),
170        confidence: 0.5,
171        source_files: source_files.map_or(Vec::new(), |f| f.clone()),
172        manifest_files: Vec::new(),
173    };
174
175    // Find and parse Cargo.toml
176    for manifest in manifest_files {
177        if manifest.file_name().and_then(|n| n.to_str()) == Some("Cargo.toml") {
178            info.manifest_files.push(manifest.clone());
179
180            if let Ok(content) = file_utils::read_file_safe(manifest, config.max_file_size) {
181                if let Ok(cargo_toml) = toml::from_str::<toml::Value>(&content) {
182                    // Extract edition
183                    if let Some(package) = cargo_toml.get("package") {
184                        if let Some(edition) = package.get("edition").and_then(|e| e.as_str()) {
185                            info.edition = Some(edition.to_string());
186                        }
187
188                        // Estimate Rust version from edition
189                        info.version = match info.edition.as_deref() {
190                            Some("2021") => Some("1.56+".to_string()),
191                            Some("2018") => Some("1.31+".to_string()),
192                            Some("2015") => Some("1.0+".to_string()),
193                            _ => Some("unknown".to_string()),
194                        };
195                    }
196
197                    // Extract dependencies
198                    if let Some(deps) = cargo_toml.get("dependencies") {
199                        if let Some(deps_table) = deps.as_table() {
200                            for (name, _) in deps_table {
201                                info.main_dependencies.push(name.clone());
202                            }
203                        }
204                    }
205
206                    // Extract dev dependencies if enabled
207                    if config.include_dev_dependencies {
208                        if let Some(dev_deps) = cargo_toml.get("dev-dependencies") {
209                            if let Some(dev_deps_table) = dev_deps.as_table() {
210                                for (name, _) in dev_deps_table {
211                                    info.dev_dependencies.push(name.clone());
212                                }
213                            }
214                        }
215                    }
216
217                    info.confidence = 0.95; // High confidence with manifest
218                }
219            }
220            break;
221        }
222    }
223
224    // Boost confidence if we have source files
225    if !info.source_files.is_empty() {
226        info.confidence = (info.confidence + 0.9) / 2.0;
227    }
228
229    Ok(info)
230}
231
232/// Analyze JavaScript/TypeScript project from package.json
233fn analyze_javascript_project(
234    manifest_files: &[PathBuf],
235    source_files: Option<&Vec<PathBuf>>,
236    config: &AnalysisConfig,
237) -> Result<LanguageInfo> {
238    let mut info = LanguageInfo {
239        name: "JavaScript/TypeScript".to_string(),
240        version: None,
241        edition: None,
242        package_manager: None,
243        main_dependencies: Vec::new(),
244        dev_dependencies: Vec::new(),
245        confidence: 0.5,
246        source_files: source_files.map_or(Vec::new(), |f| f.clone()),
247        manifest_files: Vec::new(),
248    };
249
250    // Detect package manager from lock files
251    for manifest in manifest_files {
252        if let Some(filename) = manifest.file_name().and_then(|n| n.to_str()) {
253            match filename {
254                "package-lock.json" => info.package_manager = Some("npm".to_string()),
255                "yarn.lock" => info.package_manager = Some("yarn".to_string()),
256                "pnpm-lock.yaml" => info.package_manager = Some("pnpm".to_string()),
257                _ => {}
258            }
259        }
260    }
261
262    // Default to npm if no package manager detected
263    if info.package_manager.is_none() {
264        info.package_manager = Some("npm".to_string());
265    }
266
267    // Find and parse package.json
268    for manifest in manifest_files {
269        if manifest.file_name().and_then(|n| n.to_str()) == Some("package.json") {
270            info.manifest_files.push(manifest.clone());
271
272            if let Ok(content) = file_utils::read_file_safe(manifest, config.max_file_size) {
273                if let Ok(package_json) = serde_json::from_str::<JsonValue>(&content) {
274                    // Extract Node.js version from engines
275                    if let Some(engines) = package_json.get("engines") {
276                        if let Some(node_version) = engines.get("node").and_then(|v| v.as_str()) {
277                            info.version = Some(node_version.to_string());
278                        }
279                    }
280
281                    // Extract dependencies (always include all buckets for framework detection)
282                    if let Some(deps) = package_json.get("dependencies").and_then(|d| d.as_object())
283                    {
284                        for (name, _) in deps {
285                            info.main_dependencies.push(name.clone());
286                        }
287                    }
288
289                    // Frameworks like Vite/Remix/Next are often in devDependencies; always include
290                    if let Some(dev_deps) = package_json
291                        .get("devDependencies")
292                        .and_then(|d| d.as_object())
293                    {
294                        for (name, _) in dev_deps {
295                            info.main_dependencies.push(name.clone());
296                            info.dev_dependencies.push(name.clone());
297                        }
298                    }
299
300                    // peerDependencies frequently carry framework identity (e.g., react-router)
301                    if let Some(peer_deps) = package_json
302                        .get("peerDependencies")
303                        .and_then(|d| d.as_object())
304                    {
305                        for (name, _) in peer_deps {
306                            info.main_dependencies.push(name.clone());
307                        }
308                    }
309
310                    // optional/bundled deps can also hold framework markers (rare but cheap to add)
311                    if let Some(opt_deps) = package_json
312                        .get("optionalDependencies")
313                        .and_then(|d| d.as_object())
314                    {
315                        for (name, _) in opt_deps {
316                            info.main_dependencies.push(name.clone());
317                        }
318                    }
319                    if let Some(bundle_deps) = package_json
320                        .get("bundledDependencies")
321                        .and_then(|d| d.as_array())
322                    {
323                        for dep in bundle_deps.iter().filter_map(|d| d.as_str()) {
324                            info.main_dependencies.push(dep.to_string());
325                        }
326                    }
327
328                    info.confidence = 0.95; // High confidence with manifest
329                }
330            }
331            break;
332        }
333    }
334
335    // Adjust name based on file types
336    if let Some(files) = source_files {
337        let has_typescript = files.iter().any(|f| {
338            f.extension()
339                .and_then(|e| e.to_str())
340                .map_or(false, |ext| ext == "ts" || ext == "tsx")
341        });
342
343        if has_typescript {
344            info.name = "TypeScript".to_string();
345        } else {
346            info.name = "JavaScript".to_string();
347        }
348    }
349
350    // Boost confidence if we have source files
351    if !info.source_files.is_empty() {
352        info.confidence = (info.confidence + 0.9) / 2.0;
353    }
354
355    Ok(info)
356}
357
358/// Analyze Python project from various manifest files
359fn analyze_python_project(
360    manifest_files: &[PathBuf],
361    source_files: Option<&Vec<PathBuf>>,
362    config: &AnalysisConfig,
363) -> Result<LanguageInfo> {
364    let mut info = LanguageInfo {
365        name: "Python".to_string(),
366        version: None,
367        edition: None,
368        package_manager: None,
369        main_dependencies: Vec::new(),
370        dev_dependencies: Vec::new(),
371        confidence: 0.5,
372        source_files: source_files.map_or(Vec::new(), |f| f.clone()),
373        manifest_files: Vec::new(),
374    };
375
376    // Detect package manager and parse manifest files
377    for manifest in manifest_files {
378        if let Some(filename) = manifest.file_name().and_then(|n| n.to_str()) {
379            info.manifest_files.push(manifest.clone());
380
381            match filename {
382                "requirements.txt" => {
383                    info.package_manager = Some("pip".to_string());
384                    if let Ok(content) = file_utils::read_file_safe(manifest, config.max_file_size)
385                    {
386                        parse_requirements_txt(&content, &mut info);
387                        info.confidence = 0.85;
388                    }
389                }
390                "Pipfile" => {
391                    info.package_manager = Some("pipenv".to_string());
392                    if let Ok(content) = file_utils::read_file_safe(manifest, config.max_file_size)
393                    {
394                        parse_pipfile(&content, &mut info, config);
395                        info.confidence = 0.90;
396                    }
397                }
398                "pyproject.toml" => {
399                    info.package_manager = Some("poetry/pip".to_string());
400                    if let Ok(content) = file_utils::read_file_safe(manifest, config.max_file_size)
401                    {
402                        parse_pyproject_toml(&content, &mut info, config);
403                        info.confidence = 0.95;
404                    }
405                }
406                "setup.py" => {
407                    info.package_manager = Some("setuptools".to_string());
408                    if let Ok(content) = file_utils::read_file_safe(manifest, config.max_file_size)
409                    {
410                        parse_setup_py(&content, &mut info);
411                        info.confidence = 0.80;
412                    }
413                }
414                _ => {}
415            }
416        }
417    }
418
419    // Default to pip if no package manager detected
420    if info.package_manager.is_none() && !info.source_files.is_empty() {
421        info.package_manager = Some("pip".to_string());
422        info.confidence = 0.75;
423    }
424
425    // Boost confidence if we have source files
426    if !info.source_files.is_empty() {
427        info.confidence = (info.confidence + 0.8) / 2.0;
428    }
429
430    Ok(info)
431}
432
433/// Parse requirements.txt file
434fn parse_requirements_txt(content: &str, info: &mut LanguageInfo) {
435    for line in content.lines() {
436        let line = line.trim();
437        if line.is_empty() || line.starts_with('#') {
438            continue;
439        }
440
441        // Extract package name (before ==, >=, etc.)
442        if let Some(package_name) = line.split(&['=', '>', '<', '!', '~', ';'][..]).next() {
443            let clean_name = package_name.trim();
444            if !clean_name.is_empty() && !clean_name.starts_with('-') {
445                info.main_dependencies.push(clean_name.to_string());
446            }
447        }
448    }
449}
450
451/// Parse Pipfile (TOML format)
452fn parse_pipfile(content: &str, info: &mut LanguageInfo, config: &AnalysisConfig) {
453    if let Ok(pipfile) = toml::from_str::<toml::Value>(content) {
454        // Extract Python version requirement
455        if let Some(requires) = pipfile.get("requires") {
456            if let Some(python_version) = requires.get("python_version").and_then(|v| v.as_str()) {
457                info.version = Some(format!("~={}", python_version));
458            } else if let Some(python_full) =
459                requires.get("python_full_version").and_then(|v| v.as_str())
460            {
461                info.version = Some(format!("=={}", python_full));
462            }
463        }
464
465        // Extract packages
466        if let Some(packages) = pipfile.get("packages") {
467            if let Some(packages_table) = packages.as_table() {
468                for (name, _) in packages_table {
469                    info.main_dependencies.push(name.clone());
470                }
471            }
472        }
473
474        // Extract dev packages if enabled
475        if config.include_dev_dependencies {
476            if let Some(dev_packages) = pipfile.get("dev-packages") {
477                if let Some(dev_packages_table) = dev_packages.as_table() {
478                    for (name, _) in dev_packages_table {
479                        info.dev_dependencies.push(name.clone());
480                    }
481                }
482            }
483        }
484    }
485}
486
487/// Parse pyproject.toml file
488fn parse_pyproject_toml(content: &str, info: &mut LanguageInfo, config: &AnalysisConfig) {
489    if let Ok(pyproject) = toml::from_str::<toml::Value>(content) {
490        // Extract Python version from project metadata
491        if let Some(project) = pyproject.get("project") {
492            if let Some(requires_python) = project.get("requires-python").and_then(|v| v.as_str()) {
493                info.version = Some(requires_python.to_string());
494            }
495
496            // Extract dependencies
497            if let Some(dependencies) = project.get("dependencies") {
498                if let Some(deps_array) = dependencies.as_array() {
499                    for dep in deps_array {
500                        if let Some(dep_str) = dep.as_str() {
501                            if let Some(package_name) =
502                                dep_str.split(&['=', '>', '<', '!', '~', ';'][..]).next()
503                            {
504                                let clean_name = package_name.trim();
505                                if !clean_name.is_empty() {
506                                    info.main_dependencies.push(clean_name.to_string());
507                                }
508                            }
509                        }
510                    }
511                }
512            }
513
514            // Extract optional dependencies (dev dependencies)
515            if config.include_dev_dependencies {
516                if let Some(optional_deps) = project.get("optional-dependencies") {
517                    if let Some(optional_table) = optional_deps.as_table() {
518                        for (_, deps) in optional_table {
519                            if let Some(deps_array) = deps.as_array() {
520                                for dep in deps_array {
521                                    if let Some(dep_str) = dep.as_str() {
522                                        if let Some(package_name) = dep_str
523                                            .split(&['=', '>', '<', '!', '~', ';'][..])
524                                            .next()
525                                        {
526                                            let clean_name = package_name.trim();
527                                            if !clean_name.is_empty() {
528                                                info.dev_dependencies.push(clean_name.to_string());
529                                            }
530                                        }
531                                    }
532                                }
533                            }
534                        }
535                    }
536                }
537            }
538        }
539
540        // Check for Poetry configuration
541        if pyproject
542            .get("tool")
543            .and_then(|t| t.get("poetry"))
544            .is_some()
545        {
546            info.package_manager = Some("poetry".to_string());
547
548            // Extract Poetry dependencies
549            if let Some(tool) = pyproject.get("tool") {
550                if let Some(poetry) = tool.get("poetry") {
551                    if let Some(dependencies) = poetry.get("dependencies") {
552                        if let Some(deps_table) = dependencies.as_table() {
553                            for (name, _) in deps_table {
554                                if name != "python" {
555                                    info.main_dependencies.push(name.clone());
556                                }
557                            }
558                        }
559                    }
560
561                    if config.include_dev_dependencies {
562                        if let Some(dev_dependencies) = poetry
563                            .get("group")
564                            .and_then(|g| g.get("dev"))
565                            .and_then(|d| d.get("dependencies"))
566                        {
567                            if let Some(dev_deps_table) = dev_dependencies.as_table() {
568                                for (name, _) in dev_deps_table {
569                                    info.dev_dependencies.push(name.clone());
570                                }
571                            }
572                        }
573                    }
574                }
575            }
576        }
577    }
578}
579
580/// Parse setup.py file (basic extraction)
581fn parse_setup_py(content: &str, info: &mut LanguageInfo) {
582    // Basic regex-based parsing for common patterns
583    for line in content.lines() {
584        let line = line.trim();
585
586        // Look for python_requires
587        if line.contains("python_requires") {
588            if let Some(start) = line.find("\"") {
589                if let Some(end) = line[start + 1..].find("\"") {
590                    let version = &line[start + 1..start + 1 + end];
591                    info.version = Some(version.to_string());
592                }
593            } else if let Some(start) = line.find("'") {
594                if let Some(end) = line[start + 1..].find("'") {
595                    let version = &line[start + 1..start + 1 + end];
596                    info.version = Some(version.to_string());
597                }
598            }
599        }
600
601        // Look for install_requires (basic pattern)
602        if line.contains("install_requires") && line.contains("[") {
603            // This is a simplified parser - could be enhanced
604            info.main_dependencies
605                .push("setuptools-detected".to_string());
606        }
607    }
608}
609
610/// Analyze Go project from go.mod
611fn analyze_go_project(
612    manifest_files: &[PathBuf],
613    source_files: Option<&Vec<PathBuf>>,
614    config: &AnalysisConfig,
615) -> Result<LanguageInfo> {
616    let mut info = LanguageInfo {
617        name: "Go".to_string(),
618        version: None,
619        edition: None,
620        package_manager: Some("go mod".to_string()),
621        main_dependencies: Vec::new(),
622        dev_dependencies: Vec::new(),
623        confidence: 0.5,
624        source_files: source_files.map_or(Vec::new(), |f| f.clone()),
625        manifest_files: Vec::new(),
626    };
627
628    // Find and parse go.mod
629    for manifest in manifest_files {
630        if let Some(filename) = manifest.file_name().and_then(|n| n.to_str()) {
631            match filename {
632                "go.mod" => {
633                    info.manifest_files.push(manifest.clone());
634                    if let Ok(content) = file_utils::read_file_safe(manifest, config.max_file_size)
635                    {
636                        parse_go_mod(&content, &mut info);
637                        info.confidence = 0.95;
638                    }
639                }
640                "go.sum" => {
641                    info.manifest_files.push(manifest.clone());
642                    // go.sum contains checksums, indicates a real Go project
643                    info.confidence = (info.confidence + 0.9) / 2.0;
644                }
645                _ => {}
646            }
647        }
648    }
649
650    // Boost confidence if we have source files
651    if !info.source_files.is_empty() {
652        info.confidence = (info.confidence + 0.85) / 2.0;
653    }
654
655    Ok(info)
656}
657
658/// Parse go.mod file
659fn parse_go_mod(content: &str, info: &mut LanguageInfo) {
660    for line in content.lines() {
661        let line = line.trim();
662
663        // Parse go version directive
664        if line.starts_with("go ") {
665            let version = line[3..].trim();
666            info.version = Some(version.to_string());
667        }
668
669        // Parse require block
670        if line.starts_with("require ") {
671            // Single line require
672            let require_line = &line[8..].trim();
673            if let Some(module_name) = require_line.split_whitespace().next() {
674                info.main_dependencies.push(module_name.to_string());
675            }
676        }
677    }
678
679    // Parse multi-line require blocks
680    let mut in_require_block = false;
681    for line in content.lines() {
682        let line = line.trim();
683
684        if line == "require (" {
685            in_require_block = true;
686            continue;
687        }
688
689        if in_require_block {
690            if line == ")" {
691                in_require_block = false;
692                continue;
693            }
694
695            // Parse dependency line
696            if !line.is_empty() && !line.starts_with("//") {
697                if let Some(module_name) = line.split_whitespace().next() {
698                    info.main_dependencies.push(module_name.to_string());
699                }
700            }
701        }
702    }
703}
704
705/// Analyze JVM project (Java/Kotlin) from build files
706fn analyze_jvm_project(
707    manifest_files: &[PathBuf],
708    source_files: Option<&Vec<PathBuf>>,
709    config: &AnalysisConfig,
710) -> Result<LanguageInfo> {
711    let mut info = LanguageInfo {
712        name: "Java/Kotlin".to_string(),
713        version: None,
714        edition: None,
715        package_manager: None,
716        main_dependencies: Vec::new(),
717        dev_dependencies: Vec::new(),
718        confidence: 0.5,
719        source_files: source_files.map_or(Vec::new(), |f| f.clone()),
720        manifest_files: Vec::new(),
721    };
722
723    // Detect build tool and parse manifest files
724    for manifest in manifest_files {
725        if let Some(filename) = manifest.file_name().and_then(|n| n.to_str()) {
726            info.manifest_files.push(manifest.clone());
727
728            match filename {
729                "pom.xml" => {
730                    info.package_manager = Some("maven".to_string());
731                    if let Ok(content) = file_utils::read_file_safe(manifest, config.max_file_size)
732                    {
733                        parse_maven_pom(&content, &mut info, config);
734                        info.confidence = 0.90;
735                    }
736                }
737                "build.gradle" => {
738                    info.package_manager = Some("gradle".to_string());
739                    if let Ok(content) = file_utils::read_file_safe(manifest, config.max_file_size)
740                    {
741                        parse_gradle_build(&content, &mut info, config);
742                        info.confidence = 0.85;
743                    }
744                }
745                "build.gradle.kts" => {
746                    info.package_manager = Some("gradle".to_string());
747                    if let Ok(content) = file_utils::read_file_safe(manifest, config.max_file_size)
748                    {
749                        parse_gradle_kts_build(&content, &mut info, config);
750                        info.confidence = 0.85;
751                    }
752                }
753                _ => {}
754            }
755        }
756    }
757
758    // Adjust name based on file types
759    if let Some(files) = source_files {
760        let has_kotlin = files.iter().any(|f| {
761            f.extension()
762                .and_then(|e| e.to_str())
763                .map_or(false, |ext| ext == "kt" || ext == "kts")
764        });
765
766        if has_kotlin {
767            info.name = "Kotlin".to_string();
768        } else {
769            info.name = "Java".to_string();
770        }
771    }
772
773    // Boost confidence if we have source files
774    if !info.source_files.is_empty() {
775        info.confidence = (info.confidence + 0.8) / 2.0;
776    }
777
778    Ok(info)
779}
780
781/// Parse Maven pom.xml file (basic XML parsing)
782fn parse_maven_pom(content: &str, info: &mut LanguageInfo, config: &AnalysisConfig) {
783    // Simple regex-based XML parsing for common Maven patterns
784
785    // Extract Java version from maven.compiler.source or java.version
786    for line in content.lines() {
787        let line = line.trim();
788
789        // Look for Java version in properties
790        if line.contains("<maven.compiler.source>") {
791            if let Some(version) = extract_xml_content(line, "maven.compiler.source") {
792                info.version = Some(version);
793            }
794        } else if line.contains("<java.version>") {
795            if let Some(version) = extract_xml_content(line, "java.version") {
796                info.version = Some(version);
797            }
798        } else if line.contains("<maven.compiler.target>") && info.version.is_none() {
799            if let Some(version) = extract_xml_content(line, "maven.compiler.target") {
800                info.version = Some(version);
801            }
802        }
803
804        // Extract dependencies
805        if line.contains("<groupId>") && line.contains("<artifactId>") {
806            // This is a simplified approach - real XML parsing would be better
807            if let Some(group_id) = extract_xml_content(line, "groupId") {
808                if let Some(artifact_id) = extract_xml_content(line, "artifactId") {
809                    let dependency = format!("{}:{}", group_id, artifact_id);
810                    info.main_dependencies.push(dependency);
811                }
812            }
813        } else if line.contains("<artifactId>") && !line.contains("<groupId>") {
814            if let Some(artifact_id) = extract_xml_content(line, "artifactId") {
815                info.main_dependencies.push(artifact_id);
816            }
817        }
818    }
819
820    // Look for dependencies in a more structured way
821    let mut in_dependencies = false;
822    let mut in_test_dependencies = false;
823
824    for line in content.lines() {
825        let line = line.trim();
826
827        if line.contains("<dependencies>") {
828            in_dependencies = true;
829            continue;
830        }
831
832        if line.contains("</dependencies>") {
833            in_dependencies = false;
834            in_test_dependencies = false;
835            continue;
836        }
837
838        if in_dependencies && line.contains("<scope>test</scope>") {
839            in_test_dependencies = true;
840        }
841
842        if in_dependencies && line.contains("<artifactId>") {
843            if let Some(artifact_id) = extract_xml_content(line, "artifactId") {
844                if in_test_dependencies && config.include_dev_dependencies {
845                    info.dev_dependencies.push(artifact_id);
846                } else if !in_test_dependencies {
847                    info.main_dependencies.push(artifact_id);
848                }
849            }
850        }
851    }
852}
853
854/// Parse Gradle build.gradle file (Groovy syntax)
855fn parse_gradle_build(content: &str, info: &mut LanguageInfo, config: &AnalysisConfig) {
856    for line in content.lines() {
857        let line = line.trim();
858
859        // Look for Java version
860        if line.contains("sourceCompatibility") || line.contains("targetCompatibility") {
861            if let Some(version) = extract_gradle_version(line) {
862                info.version = Some(version);
863            }
864        } else if line.contains("JavaVersion.VERSION_") {
865            if let Some(pos) = line.find("VERSION_") {
866                let version_part = &line[pos + 8..];
867                if let Some(end) = version_part.find(|c: char| !c.is_numeric() && c != '_') {
868                    let version = &version_part[..end].replace('_', ".");
869                    info.version = Some(version.to_string());
870                }
871            }
872        }
873
874        // Look for dependencies
875        if line.starts_with("implementation ") || line.starts_with("compile ") {
876            if let Some(dep) = extract_gradle_dependency(line) {
877                info.main_dependencies.push(dep);
878            }
879        } else if (line.starts_with("testImplementation ") || line.starts_with("testCompile "))
880            && config.include_dev_dependencies
881        {
882            if let Some(dep) = extract_gradle_dependency(line) {
883                info.dev_dependencies.push(dep);
884            }
885        }
886    }
887}
888
889/// Parse Gradle build.gradle.kts file (Kotlin syntax)
890fn parse_gradle_kts_build(content: &str, info: &mut LanguageInfo, config: &AnalysisConfig) {
891    // Kotlin DSL is similar to Groovy but with some syntax differences
892    parse_gradle_build(content, info, config); // Reuse the same logic for now
893}
894
895/// Extract content from XML tags
896fn extract_xml_content(line: &str, tag: &str) -> Option<String> {
897    let open_tag = format!("<{}>", tag);
898    let close_tag = format!("</{}>", tag);
899
900    if let Some(start) = line.find(&open_tag) {
901        if let Some(end) = line.find(&close_tag) {
902            let content_start = start + open_tag.len();
903            if content_start < end {
904                return Some(line[content_start..end].trim().to_string());
905            }
906        }
907    }
908    None
909}
910
911/// Extract version from Gradle configuration line
912fn extract_gradle_version(line: &str) -> Option<String> {
913    // Look for patterns like sourceCompatibility = '11' or sourceCompatibility = "11"
914    if let Some(equals_pos) = line.find('=') {
915        let value_part = line[equals_pos + 1..].trim();
916        if let Some(start_quote) = value_part.find(['\'', '"']) {
917            let quote_char = value_part.chars().nth(start_quote).unwrap();
918            if let Some(end_quote) = value_part[start_quote + 1..].find(quote_char) {
919                let version = &value_part[start_quote + 1..start_quote + 1 + end_quote];
920                return Some(version.to_string());
921            }
922        }
923    }
924    None
925}
926
927/// Extract dependency from Gradle dependency line
928fn extract_gradle_dependency(line: &str) -> Option<String> {
929    // Look for patterns like implementation 'group:artifact:version' or implementation("group:artifact:version")
930    if let Some(start_quote) = line.find(['\'', '"']) {
931        let quote_char = line.chars().nth(start_quote).unwrap();
932        if let Some(end_quote) = line[start_quote + 1..].find(quote_char) {
933            let dependency = &line[start_quote + 1..start_quote + 1 + end_quote];
934            // Extract just the artifact name for simplicity
935            if let Some(last_colon) = dependency.rfind(':') {
936                if let Some(first_colon) = dependency[..last_colon].rfind(':') {
937                    return Some(dependency[first_colon + 1..last_colon].to_string());
938                }
939            }
940            return Some(dependency.to_string());
941        }
942    }
943    None
944}
945
946/// Check if a filename is a known manifest file
947fn is_manifest_file(filename: &str) -> bool {
948    matches!(
949        filename,
950        "Cargo.toml"
951            | "Cargo.lock"
952            | "package.json"
953            | "package-lock.json"
954            | "yarn.lock"
955            | "pnpm-lock.yaml"
956            | "requirements.txt"
957            | "Pipfile"
958            | "Pipfile.lock"
959            | "pyproject.toml"
960            | "setup.py"
961            | "go.mod"
962            | "go.sum"
963            | "pom.xml"
964            | "build.gradle"
965            | "build.gradle.kts"
966    )
967}
968
969/// Check if any of the specified manifest files exist
970fn has_manifest(manifest_files: &[PathBuf], target_files: &[&str]) -> bool {
971    manifest_files.iter().any(|path| {
972        path.file_name()
973            .and_then(|name| name.to_str())
974            .map_or(false, |name| target_files.contains(&name))
975    })
976}
977
978#[cfg(test)]
979mod tests {
980    use super::*;
981    use std::fs;
982    use tempfile::TempDir;
983
984    #[test]
985    fn test_rust_project_detection() {
986        let temp_dir = TempDir::new().unwrap();
987        let root = temp_dir.path();
988
989        // Create Cargo.toml
990        let cargo_toml = r#"
991[package]
992name = "test-project"
993version = "0.1.0"
994edition = "2021"
995
996[dependencies]
997serde = "1.0"
998tokio = "1.0"
999
1000[dev-dependencies]
1001assert_cmd = "2.0"
1002"#;
1003        fs::write(root.join("Cargo.toml"), cargo_toml).unwrap();
1004        fs::create_dir_all(root.join("src")).unwrap();
1005        fs::write(root.join("src/main.rs"), "fn main() {}").unwrap();
1006
1007        let config = AnalysisConfig::default();
1008        let files = vec![root.join("Cargo.toml"), root.join("src/main.rs")];
1009
1010        let languages = detect_languages(&files, &config).unwrap();
1011        assert_eq!(languages.len(), 1);
1012        assert_eq!(languages[0].name, "Rust");
1013        assert_eq!(languages[0].version, Some("1.56+".to_string()));
1014        assert!(languages[0].confidence > 0.9);
1015    }
1016
1017    #[test]
1018    fn test_javascript_project_detection() {
1019        let temp_dir = TempDir::new().unwrap();
1020        let root = temp_dir.path();
1021
1022        // Create package.json
1023        let package_json = r#"
1024{
1025  "name": "test-project",
1026  "version": "1.0.0",
1027  "engines": {
1028    "node": ">=16.0.0"
1029  },
1030  "dependencies": {
1031    "express": "^4.18.0",
1032    "lodash": "^4.17.21"
1033  },
1034  "devDependencies": {
1035    "jest": "^29.0.0"
1036  }
1037}
1038"#;
1039        fs::write(root.join("package.json"), package_json).unwrap();
1040        fs::write(root.join("index.js"), "console.log('hello');").unwrap();
1041
1042        let config = AnalysisConfig::default();
1043        let files = vec![root.join("package.json"), root.join("index.js")];
1044
1045        let languages = detect_languages(&files, &config).unwrap();
1046        assert_eq!(languages.len(), 1);
1047        assert_eq!(languages[0].name, "JavaScript");
1048        assert_eq!(languages[0].version, Some(">=16.0.0".to_string()));
1049        assert!(languages[0].confidence > 0.9);
1050    }
1051
1052    #[test]
1053    fn test_python_project_detection() {
1054        let temp_dir = TempDir::new().unwrap();
1055        let root = temp_dir.path();
1056
1057        // Create pyproject.toml
1058        let pyproject_toml = r#"
1059[project]
1060name = "test-project"
1061version = "0.1.0"
1062requires-python = ">=3.8"
1063dependencies = [
1064    "flask>=2.0.0",
1065    "requests>=2.25.0",
1066    "pandas>=1.3.0"
1067]
1068
1069[project.optional-dependencies]
1070dev = [
1071    "pytest>=6.0.0",
1072    "black>=21.0.0"
1073]
1074"#;
1075        fs::write(root.join("pyproject.toml"), pyproject_toml).unwrap();
1076        fs::write(root.join("app.py"), "print('Hello, World!')").unwrap();
1077
1078        let config = AnalysisConfig::default();
1079        let files = vec![root.join("pyproject.toml"), root.join("app.py")];
1080
1081        let languages = detect_languages(&files, &config).unwrap();
1082        assert_eq!(languages.len(), 1);
1083        assert_eq!(languages[0].name, "Python");
1084        assert_eq!(languages[0].version, Some(">=3.8".to_string()));
1085        assert!(languages[0].confidence > 0.8);
1086    }
1087
1088    #[test]
1089    fn test_go_project_detection() {
1090        let temp_dir = TempDir::new().unwrap();
1091        let root = temp_dir.path();
1092
1093        // Create go.mod
1094        let go_mod = r#"
1095module example.com/myproject
1096
1097go 1.21
1098
1099require (
1100    github.com/gin-gonic/gin v1.9.1
1101    github.com/stretchr/testify v1.8.4
1102    golang.org/x/time v0.3.0
1103)
1104"#;
1105        fs::write(root.join("go.mod"), go_mod).unwrap();
1106        fs::write(root.join("main.go"), "package main\n\nfunc main() {}").unwrap();
1107
1108        let config = AnalysisConfig::default();
1109        let files = vec![root.join("go.mod"), root.join("main.go")];
1110
1111        let languages = detect_languages(&files, &config).unwrap();
1112        assert_eq!(languages.len(), 1);
1113        assert_eq!(languages[0].name, "Go");
1114        assert_eq!(languages[0].version, Some("1.21".to_string()));
1115        assert!(languages[0].confidence > 0.8);
1116    }
1117
1118    #[test]
1119    fn test_java_maven_project_detection() {
1120        let temp_dir = TempDir::new().unwrap();
1121        let root = temp_dir.path();
1122
1123        // Create pom.xml
1124        let pom_xml = r#"
1125<?xml version="1.0" encoding="UTF-8"?>
1126<project xmlns="http://maven.apache.org/POM/4.0.0">
1127    <modelVersion>4.0.0</modelVersion>
1128    
1129    <groupId>com.example</groupId>
1130    <artifactId>test-project</artifactId>
1131    <version>1.0.0</version>
1132    
1133    <properties>
1134        <maven.compiler.source>17</maven.compiler.source>
1135        <maven.compiler.target>17</maven.compiler.target>
1136    </properties>
1137    
1138    <dependencies>
1139        <dependency>
1140            <groupId>org.springframework</groupId>
1141            <artifactId>spring-core</artifactId>
1142            <version>5.3.21</version>
1143        </dependency>
1144        <dependency>
1145            <groupId>junit</groupId>
1146            <artifactId>junit</artifactId>
1147            <version>4.13.2</version>
1148            <scope>test</scope>
1149        </dependency>
1150    </dependencies>
1151</project>
1152"#;
1153        fs::create_dir_all(root.join("src/main/java")).unwrap();
1154        fs::write(root.join("pom.xml"), pom_xml).unwrap();
1155        fs::write(root.join("src/main/java/App.java"), "public class App {}").unwrap();
1156
1157        let config = AnalysisConfig::default();
1158        let files = vec![root.join("pom.xml"), root.join("src/main/java/App.java")];
1159
1160        let languages = detect_languages(&files, &config).unwrap();
1161        assert_eq!(languages.len(), 1);
1162        assert_eq!(languages[0].name, "Java");
1163        assert_eq!(languages[0].version, Some("17".to_string()));
1164        assert!(languages[0].confidence > 0.8);
1165    }
1166
1167    #[test]
1168    fn test_kotlin_gradle_project_detection() {
1169        let temp_dir = TempDir::new().unwrap();
1170        let root = temp_dir.path();
1171
1172        // Create build.gradle.kts
1173        let build_gradle_kts = r#"
1174plugins {
1175    kotlin("jvm") version "1.9.0"
1176    application
1177}
1178
1179java {
1180    sourceCompatibility = JavaVersion.VERSION_17
1181    targetCompatibility = JavaVersion.VERSION_17
1182}
1183
1184dependencies {
1185    implementation("org.jetbrains.kotlin:kotlin-stdlib")
1186    implementation("io.ktor:ktor-server-core:2.3.2")
1187    testImplementation("org.jetbrains.kotlin:kotlin-test")
1188}
1189"#;
1190        fs::create_dir_all(root.join("src/main/kotlin")).unwrap();
1191        fs::write(root.join("build.gradle.kts"), build_gradle_kts).unwrap();
1192        fs::write(root.join("src/main/kotlin/Main.kt"), "fun main() {}").unwrap();
1193
1194        let config = AnalysisConfig::default();
1195        let files = vec![
1196            root.join("build.gradle.kts"),
1197            root.join("src/main/kotlin/Main.kt"),
1198        ];
1199
1200        let languages = detect_languages(&files, &config).unwrap();
1201        assert_eq!(languages.len(), 1);
1202        assert_eq!(languages[0].name, "Kotlin");
1203        assert!(languages[0].confidence > 0.8);
1204    }
1205
1206    #[test]
1207    fn test_python_requirements_txt_detection() {
1208        let temp_dir = TempDir::new().unwrap();
1209        let root = temp_dir.path();
1210
1211        // Create requirements.txt
1212        let requirements_txt = r#"
1213Flask==2.3.2
1214requests>=2.28.0
1215pandas==1.5.3
1216pytest==7.4.0
1217black>=23.0.0
1218"#;
1219        fs::write(root.join("requirements.txt"), requirements_txt).unwrap();
1220        fs::write(root.join("app.py"), "import flask").unwrap();
1221
1222        let config = AnalysisConfig::default();
1223        let files = vec![root.join("requirements.txt"), root.join("app.py")];
1224
1225        let languages = detect_languages(&files, &config).unwrap();
1226        assert_eq!(languages.len(), 1);
1227        assert_eq!(languages[0].name, "Python");
1228        assert!(languages[0].confidence > 0.8);
1229    }
1230}