syncable_cli/analyzer/
language_detector.rs

1use crate::analyzer::{AnalysisConfig, DetectedLanguage};
2use crate::common::file_utils;
3use crate::error::Result;
4use serde_json::Value as JsonValue;
5use std::collections::HashMap;
6use std::path::PathBuf;
7
8/// Language detection results with detailed information
9#[derive(Debug, Clone)]
10pub struct LanguageInfo {
11    pub name: String,
12    pub version: Option<String>,
13    pub edition: Option<String>,
14    pub package_manager: Option<String>,
15    pub main_dependencies: Vec<String>,
16    pub dev_dependencies: Vec<String>,
17    pub confidence: f32,
18    pub source_files: Vec<PathBuf>,
19    pub manifest_files: Vec<PathBuf>,
20}
21
22/// Detects programming languages with advanced manifest parsing
23pub fn detect_languages(
24    files: &[PathBuf],
25    config: &AnalysisConfig,
26) -> Result<Vec<DetectedLanguage>> {
27    let mut language_info = HashMap::new();
28
29    // First pass: collect files by extension and find manifests
30    let mut source_files_by_lang = HashMap::new();
31    let mut manifest_files = Vec::new();
32
33    for file in files {
34        if let Some(extension) = file.extension().and_then(|e| e.to_str()) {
35            match extension {
36                // Rust files
37                "rs" => source_files_by_lang
38                    .entry("rust")
39                    .or_insert_with(Vec::new)
40                    .push(file.clone()),
41
42                // JavaScript/TypeScript files
43                "js" | "jsx" | "ts" | "tsx" | "mjs" | "cjs" => source_files_by_lang
44                    .entry("javascript")
45                    .or_insert_with(Vec::new)
46                    .push(file.clone()),
47
48                // Python files
49                "py" | "pyx" | "pyi" => source_files_by_lang
50                    .entry("python")
51                    .or_insert_with(Vec::new)
52                    .push(file.clone()),
53
54                // Go files
55                "go" => source_files_by_lang
56                    .entry("go")
57                    .or_insert_with(Vec::new)
58                    .push(file.clone()),
59
60                // Java/Kotlin files
61                "java" | "kt" | "kts" => source_files_by_lang
62                    .entry("jvm")
63                    .or_insert_with(Vec::new)
64                    .push(file.clone()),
65
66                _ => {}
67            }
68        }
69
70        // Check for manifest files
71        if let Some(filename) = file.file_name().and_then(|n| n.to_str())
72            && is_manifest_file(filename)
73        {
74            manifest_files.push(file.clone());
75        }
76    }
77
78    // Second pass: analyze each detected language with manifest parsing
79    if (source_files_by_lang.contains_key("rust") || has_manifest(&manifest_files, &["Cargo.toml"]))
80        && let Ok(info) =
81            analyze_rust_project(&manifest_files, source_files_by_lang.get("rust"), config)
82    {
83        language_info.insert("rust", info);
84    }
85
86    if (source_files_by_lang.contains_key("javascript")
87        || has_manifest(&manifest_files, &["package.json"]))
88        && let Ok(info) = analyze_javascript_project(
89            &manifest_files,
90            source_files_by_lang.get("javascript"),
91            config,
92        )
93    {
94        language_info.insert("javascript", info);
95    }
96
97    if (source_files_by_lang.contains_key("python")
98        || has_manifest(
99            &manifest_files,
100            &["requirements.txt", "Pipfile", "pyproject.toml", "setup.py"],
101        ))
102        && let Ok(info) =
103            analyze_python_project(&manifest_files, source_files_by_lang.get("python"), config)
104    {
105        language_info.insert("python", info);
106    }
107
108    if (source_files_by_lang.contains_key("go") || has_manifest(&manifest_files, &["go.mod"]))
109        && let Ok(info) =
110            analyze_go_project(&manifest_files, source_files_by_lang.get("go"), config)
111    {
112        language_info.insert("go", info);
113    }
114
115    if (source_files_by_lang.contains_key("jvm")
116        || has_manifest(
117            &manifest_files,
118            &["pom.xml", "build.gradle", "build.gradle.kts"],
119        ))
120        && let Ok(info) =
121            analyze_jvm_project(&manifest_files, source_files_by_lang.get("jvm"), config)
122    {
123        language_info.insert("jvm", info);
124    }
125
126    // Convert to DetectedLanguage format
127    let mut detected_languages = Vec::new();
128    for (_, info) in language_info {
129        detected_languages.push(DetectedLanguage {
130            name: info.name,
131            version: info.version,
132            confidence: info.confidence,
133            files: info.source_files,
134            main_dependencies: info.main_dependencies,
135            dev_dependencies: info.dev_dependencies,
136            package_manager: info.package_manager,
137        });
138    }
139
140    // Sort by confidence (highest first)
141    detected_languages.sort_by(|a, b| {
142        b.confidence
143            .partial_cmp(&a.confidence)
144            .unwrap_or(std::cmp::Ordering::Equal)
145    });
146
147    Ok(detected_languages)
148}
149
150/// Analyze Rust project from Cargo.toml
151fn analyze_rust_project(
152    manifest_files: &[PathBuf],
153    source_files: Option<&Vec<PathBuf>>,
154    config: &AnalysisConfig,
155) -> Result<LanguageInfo> {
156    let mut info = LanguageInfo {
157        name: "Rust".to_string(),
158        version: None,
159        edition: None,
160        package_manager: Some("cargo".to_string()),
161        main_dependencies: Vec::new(),
162        dev_dependencies: Vec::new(),
163        confidence: 0.5,
164        source_files: source_files.map_or(Vec::new(), |f| f.clone()),
165        manifest_files: Vec::new(),
166    };
167
168    // Find and parse Cargo.toml
169    for manifest in manifest_files {
170        if manifest.file_name().and_then(|n| n.to_str()) == Some("Cargo.toml") {
171            info.manifest_files.push(manifest.clone());
172
173            if let Ok(content) = file_utils::read_file_safe(manifest, config.max_file_size)
174                && let Ok(cargo_toml) = toml::from_str::<toml::Value>(&content)
175            {
176                // Extract edition
177                if let Some(package) = cargo_toml.get("package")
178                    && let Some(edition) = package.get("edition").and_then(|e| e.as_str())
179                {
180                    info.edition = Some(edition.to_string());
181                }
182
183                // Estimate Rust version from edition
184                info.version = match info.edition.as_deref() {
185                    Some("2021") => Some("1.56+".to_string()),
186                    Some("2018") => Some("1.31+".to_string()),
187                    Some("2015") => Some("1.0+".to_string()),
188                    _ => Some("unknown".to_string()),
189                };
190
191                // Extract dependencies
192                if let Some(deps_table) = cargo_toml.get("dependencies").and_then(|d| d.as_table())
193                {
194                    for (name, _) in deps_table {
195                        info.main_dependencies.push(name.clone());
196                    }
197                }
198
199                // Extract dev dependencies if enabled
200                if config.include_dev_dependencies
201                    && let Some(dev_deps_table) = cargo_toml
202                        .get("dev-dependencies")
203                        .and_then(|d| d.as_table())
204                {
205                    for (name, _) in dev_deps_table {
206                        info.dev_dependencies.push(name.clone());
207                    }
208                }
209
210                info.confidence = 0.95; // High confidence with manifest
211            }
212            break;
213        }
214    }
215
216    // Boost confidence if we have source files
217    if !info.source_files.is_empty() {
218        info.confidence = (info.confidence + 0.9) / 2.0;
219    }
220
221    Ok(info)
222}
223
224/// Analyze JavaScript/TypeScript project from package.json
225fn analyze_javascript_project(
226    manifest_files: &[PathBuf],
227    source_files: Option<&Vec<PathBuf>>,
228    config: &AnalysisConfig,
229) -> Result<LanguageInfo> {
230    let mut info = LanguageInfo {
231        name: "JavaScript/TypeScript".to_string(),
232        version: None,
233        edition: None,
234        package_manager: None,
235        main_dependencies: Vec::new(),
236        dev_dependencies: Vec::new(),
237        confidence: 0.5,
238        source_files: source_files.map_or(Vec::new(), |f| f.clone()),
239        manifest_files: Vec::new(),
240    };
241
242    // Detect package manager from lock files
243    for manifest in manifest_files {
244        if let Some(filename) = manifest.file_name().and_then(|n| n.to_str()) {
245            match filename {
246                "package-lock.json" => info.package_manager = Some("npm".to_string()),
247                "yarn.lock" => info.package_manager = Some("yarn".to_string()),
248                "pnpm-lock.yaml" => info.package_manager = Some("pnpm".to_string()),
249                _ => {}
250            }
251        }
252    }
253
254    // Default to npm if no package manager detected
255    if info.package_manager.is_none() {
256        info.package_manager = Some("npm".to_string());
257    }
258
259    // Find and parse package.json
260    for manifest in manifest_files {
261        if manifest.file_name().and_then(|n| n.to_str()) == Some("package.json") {
262            info.manifest_files.push(manifest.clone());
263
264            if let Ok(content) = file_utils::read_file_safe(manifest, config.max_file_size)
265                && let Ok(package_json) = serde_json::from_str::<JsonValue>(&content)
266            {
267                // Extract Node.js version from engines
268                if let Some(node_version) = package_json
269                    .get("engines")
270                    .and_then(|e| e.get("node"))
271                    .and_then(|v| v.as_str())
272                {
273                    info.version = Some(node_version.to_string());
274                }
275
276                // Extract dependencies (always include all buckets for framework detection)
277                if let Some(deps) = package_json.get("dependencies").and_then(|d| d.as_object()) {
278                    for (name, _) in deps {
279                        info.main_dependencies.push(name.clone());
280                    }
281                }
282
283                // Frameworks like Vite/Remix/Next are often in devDependencies; always include
284                if let Some(dev_deps) = package_json
285                    .get("devDependencies")
286                    .and_then(|d| d.as_object())
287                {
288                    for (name, _) in dev_deps {
289                        info.main_dependencies.push(name.clone());
290                        info.dev_dependencies.push(name.clone());
291                    }
292                }
293
294                // peerDependencies frequently carry framework identity (e.g., react-router)
295                if let Some(peer_deps) = package_json
296                    .get("peerDependencies")
297                    .and_then(|d| d.as_object())
298                {
299                    for (name, _) in peer_deps {
300                        info.main_dependencies.push(name.clone());
301                    }
302                }
303
304                // optional/bundled deps can also hold framework markers (rare but cheap to add)
305                if let Some(opt_deps) = package_json
306                    .get("optionalDependencies")
307                    .and_then(|d| d.as_object())
308                {
309                    for (name, _) in opt_deps {
310                        info.main_dependencies.push(name.clone());
311                    }
312                }
313                if let Some(bundle_deps) = package_json
314                    .get("bundledDependencies")
315                    .and_then(|d| d.as_array())
316                {
317                    for dep in bundle_deps.iter().filter_map(|d| d.as_str()) {
318                        info.main_dependencies.push(dep.to_string());
319                    }
320                }
321
322                info.confidence = 0.95; // High confidence with manifest
323            }
324            break;
325        }
326    }
327
328    // Adjust name based on file types
329    if let Some(files) = source_files {
330        let has_typescript = files.iter().any(|f| {
331            f.extension()
332                .and_then(|e| e.to_str())
333                .is_some_and(|ext| ext == "ts" || ext == "tsx")
334        });
335
336        if has_typescript {
337            info.name = "TypeScript".to_string();
338        } else {
339            info.name = "JavaScript".to_string();
340        }
341    }
342
343    // Boost confidence if we have source files
344    if !info.source_files.is_empty() {
345        info.confidence = (info.confidence + 0.9) / 2.0;
346    }
347
348    Ok(info)
349}
350
351/// Analyze Python project from various manifest files
352fn analyze_python_project(
353    manifest_files: &[PathBuf],
354    source_files: Option<&Vec<PathBuf>>,
355    config: &AnalysisConfig,
356) -> Result<LanguageInfo> {
357    let mut info = LanguageInfo {
358        name: "Python".to_string(),
359        version: None,
360        edition: None,
361        package_manager: None,
362        main_dependencies: Vec::new(),
363        dev_dependencies: Vec::new(),
364        confidence: 0.5,
365        source_files: source_files.map_or(Vec::new(), |f| f.clone()),
366        manifest_files: Vec::new(),
367    };
368
369    // Detect package manager and parse manifest files
370    for manifest in manifest_files {
371        if let Some(filename) = manifest.file_name().and_then(|n| n.to_str()) {
372            info.manifest_files.push(manifest.clone());
373
374            match filename {
375                "requirements.txt" => {
376                    info.package_manager = Some("pip".to_string());
377                    if let Ok(content) = file_utils::read_file_safe(manifest, config.max_file_size)
378                    {
379                        parse_requirements_txt(&content, &mut info);
380                        info.confidence = 0.85;
381                    }
382                }
383                "Pipfile" => {
384                    info.package_manager = Some("pipenv".to_string());
385                    if let Ok(content) = file_utils::read_file_safe(manifest, config.max_file_size)
386                    {
387                        parse_pipfile(&content, &mut info, config);
388                        info.confidence = 0.90;
389                    }
390                }
391                "pyproject.toml" => {
392                    info.package_manager = Some("poetry/pip".to_string());
393                    if let Ok(content) = file_utils::read_file_safe(manifest, config.max_file_size)
394                    {
395                        parse_pyproject_toml(&content, &mut info, config);
396                        info.confidence = 0.95;
397                    }
398                }
399                "setup.py" => {
400                    info.package_manager = Some("setuptools".to_string());
401                    if let Ok(content) = file_utils::read_file_safe(manifest, config.max_file_size)
402                    {
403                        parse_setup_py(&content, &mut info);
404                        info.confidence = 0.80;
405                    }
406                }
407                _ => {}
408            }
409        }
410    }
411
412    // Default to pip if no package manager detected
413    if info.package_manager.is_none() && !info.source_files.is_empty() {
414        info.package_manager = Some("pip".to_string());
415        info.confidence = 0.75;
416    }
417
418    // Boost confidence if we have source files
419    if !info.source_files.is_empty() {
420        info.confidence = (info.confidence + 0.8) / 2.0;
421    }
422
423    Ok(info)
424}
425
426/// Parse requirements.txt file
427fn parse_requirements_txt(content: &str, info: &mut LanguageInfo) {
428    for line in content.lines() {
429        let line = line.trim();
430        if line.is_empty() || line.starts_with('#') {
431            continue;
432        }
433
434        // Extract package name (before ==, >=, etc.)
435        if let Some(package_name) = line.split(&['=', '>', '<', '!', '~', ';'][..]).next() {
436            let clean_name = package_name.trim();
437            if !clean_name.is_empty() && !clean_name.starts_with('-') {
438                info.main_dependencies.push(clean_name.to_string());
439            }
440        }
441    }
442}
443
444/// Parse Pipfile (TOML format)
445fn parse_pipfile(content: &str, info: &mut LanguageInfo, config: &AnalysisConfig) {
446    if let Ok(pipfile) = toml::from_str::<toml::Value>(content) {
447        // Extract Python version requirement
448        if let Some(requires) = pipfile.get("requires") {
449            if let Some(python_version) = requires.get("python_version").and_then(|v| v.as_str()) {
450                info.version = Some(format!("~={}", python_version));
451            } else if let Some(python_full) =
452                requires.get("python_full_version").and_then(|v| v.as_str())
453            {
454                info.version = Some(format!("=={}", python_full));
455            }
456        }
457
458        // Extract packages
459        if let Some(packages_table) = pipfile.get("packages").and_then(|p| p.as_table()) {
460            for (name, _) in packages_table {
461                info.main_dependencies.push(name.clone());
462            }
463        }
464
465        // Extract dev packages if enabled
466        if config.include_dev_dependencies
467            && let Some(dev_packages_table) = pipfile.get("dev-packages").and_then(|d| d.as_table())
468        {
469            for (name, _) in dev_packages_table {
470                info.dev_dependencies.push(name.clone());
471            }
472        }
473    }
474}
475
476/// Parse pyproject.toml file
477fn parse_pyproject_toml(content: &str, info: &mut LanguageInfo, config: &AnalysisConfig) {
478    if let Ok(pyproject) = toml::from_str::<toml::Value>(content) {
479        // Extract Python version from project metadata
480        if let Some(project) = pyproject.get("project") {
481            if let Some(requires_python) = project.get("requires-python").and_then(|v| v.as_str()) {
482                info.version = Some(requires_python.to_string());
483            }
484
485            // Extract dependencies
486            if let Some(deps_array) = project.get("dependencies").and_then(|d| d.as_array()) {
487                for dep in deps_array {
488                    if let Some(dep_str) = dep.as_str()
489                        && let Some(package_name) =
490                            dep_str.split(&['=', '>', '<', '!', '~', ';'][..]).next()
491                    {
492                        let clean_name = package_name.trim();
493                        if !clean_name.is_empty() {
494                            info.main_dependencies.push(clean_name.to_string());
495                        }
496                    }
497                }
498            }
499
500            // Extract optional dependencies (dev dependencies)
501            if config.include_dev_dependencies
502                && let Some(optional_table) = project
503                    .get("optional-dependencies")
504                    .and_then(|o| o.as_table())
505            {
506                for (_, deps) in optional_table {
507                    if let Some(deps_array) = deps.as_array() {
508                        for dep in deps_array {
509                            if let Some(dep_str) = dep.as_str()
510                                && let Some(package_name) =
511                                    dep_str.split(&['=', '>', '<', '!', '~', ';'][..]).next()
512                            {
513                                let clean_name = package_name.trim();
514                                if !clean_name.is_empty() {
515                                    info.dev_dependencies.push(clean_name.to_string());
516                                }
517                            }
518                        }
519                    }
520                }
521            }
522        }
523
524        // Check for Poetry configuration
525        if let Some(poetry) = pyproject.get("tool").and_then(|t| t.get("poetry")) {
526            info.package_manager = Some("poetry".to_string());
527
528            // Extract Poetry dependencies
529            if let Some(deps_table) = poetry.get("dependencies").and_then(|d| d.as_table()) {
530                for (name, _) in deps_table {
531                    if name != "python" {
532                        info.main_dependencies.push(name.clone());
533                    }
534                }
535            }
536
537            if config.include_dev_dependencies
538                && let Some(dev_deps_table) = poetry
539                    .get("group")
540                    .and_then(|g| g.get("dev"))
541                    .and_then(|d| d.get("dependencies"))
542                    .and_then(|d| d.as_table())
543            {
544                for (name, _) in dev_deps_table {
545                    info.dev_dependencies.push(name.clone());
546                }
547            }
548        }
549    }
550}
551
552/// Parse setup.py file (basic extraction)
553fn parse_setup_py(content: &str, info: &mut LanguageInfo) {
554    // Basic regex-based parsing for common patterns
555    for line in content.lines() {
556        let line = line.trim();
557
558        // Look for python_requires
559        if line.contains("python_requires") {
560            if let Some(start) = line.find('"')
561                && let Some(end) = line[start + 1..].find('"')
562            {
563                let version = &line[start + 1..start + 1 + end];
564                info.version = Some(version.to_string());
565            } else if let Some(start) = line.find('\'')
566                && let Some(end) = line[start + 1..].find('\'')
567            {
568                let version = &line[start + 1..start + 1 + end];
569                info.version = Some(version.to_string());
570            }
571        }
572
573        // Look for install_requires (basic pattern)
574        if line.contains("install_requires") && line.contains("[") {
575            // This is a simplified parser - could be enhanced
576            info.main_dependencies
577                .push("setuptools-detected".to_string());
578        }
579    }
580}
581
582/// Analyze Go project from go.mod
583fn analyze_go_project(
584    manifest_files: &[PathBuf],
585    source_files: Option<&Vec<PathBuf>>,
586    config: &AnalysisConfig,
587) -> Result<LanguageInfo> {
588    let mut info = LanguageInfo {
589        name: "Go".to_string(),
590        version: None,
591        edition: None,
592        package_manager: Some("go mod".to_string()),
593        main_dependencies: Vec::new(),
594        dev_dependencies: Vec::new(),
595        confidence: 0.5,
596        source_files: source_files.map_or(Vec::new(), |f| f.clone()),
597        manifest_files: Vec::new(),
598    };
599
600    // Find and parse go.mod
601    for manifest in manifest_files {
602        if let Some(filename) = manifest.file_name().and_then(|n| n.to_str()) {
603            match filename {
604                "go.mod" => {
605                    info.manifest_files.push(manifest.clone());
606                    if let Ok(content) = file_utils::read_file_safe(manifest, config.max_file_size)
607                    {
608                        parse_go_mod(&content, &mut info);
609                        info.confidence = 0.95;
610                    }
611                }
612                "go.sum" => {
613                    info.manifest_files.push(manifest.clone());
614                    // go.sum contains checksums, indicates a real Go project
615                    info.confidence = (info.confidence + 0.9) / 2.0;
616                }
617                _ => {}
618            }
619        }
620    }
621
622    // Boost confidence if we have source files
623    if !info.source_files.is_empty() {
624        info.confidence = (info.confidence + 0.85) / 2.0;
625    }
626
627    Ok(info)
628}
629
630/// Parse go.mod file
631fn parse_go_mod(content: &str, info: &mut LanguageInfo) {
632    for line in content.lines() {
633        let line = line.trim();
634
635        // Parse go version directive
636        if let Some(version) = line.strip_prefix("go ") {
637            info.version = Some(version.trim().to_string());
638        }
639
640        // Parse require block
641        if let Some(require_line) = line.strip_prefix("require ") {
642            // Single line require
643            let require_line = require_line.trim();
644            if let Some(module_name) = require_line.split_whitespace().next() {
645                info.main_dependencies.push(module_name.to_string());
646            }
647        }
648    }
649
650    // Parse multi-line require blocks
651    let mut in_require_block = false;
652    for line in content.lines() {
653        let line = line.trim();
654
655        if line == "require (" {
656            in_require_block = true;
657            continue;
658        }
659
660        if in_require_block {
661            if line == ")" {
662                in_require_block = false;
663                continue;
664            }
665
666            // Parse dependency line
667            if !line.is_empty()
668                && !line.starts_with("//")
669                && let Some(module_name) = line.split_whitespace().next()
670            {
671                info.main_dependencies.push(module_name.to_string());
672            }
673        }
674    }
675}
676
677/// Analyze JVM project (Java/Kotlin) from build files
678fn analyze_jvm_project(
679    manifest_files: &[PathBuf],
680    source_files: Option<&Vec<PathBuf>>,
681    config: &AnalysisConfig,
682) -> Result<LanguageInfo> {
683    let mut info = LanguageInfo {
684        name: "Java/Kotlin".to_string(),
685        version: None,
686        edition: None,
687        package_manager: None,
688        main_dependencies: Vec::new(),
689        dev_dependencies: Vec::new(),
690        confidence: 0.5,
691        source_files: source_files.map_or(Vec::new(), |f| f.clone()),
692        manifest_files: Vec::new(),
693    };
694
695    // Detect build tool and parse manifest files
696    for manifest in manifest_files {
697        if let Some(filename) = manifest.file_name().and_then(|n| n.to_str()) {
698            info.manifest_files.push(manifest.clone());
699
700            match filename {
701                "pom.xml" => {
702                    info.package_manager = Some("maven".to_string());
703                    if let Ok(content) = file_utils::read_file_safe(manifest, config.max_file_size)
704                    {
705                        parse_maven_pom(&content, &mut info, config);
706                        info.confidence = 0.90;
707                    }
708                }
709                "build.gradle" => {
710                    info.package_manager = Some("gradle".to_string());
711                    if let Ok(content) = file_utils::read_file_safe(manifest, config.max_file_size)
712                    {
713                        parse_gradle_build(&content, &mut info, config);
714                        info.confidence = 0.85;
715                    }
716                }
717                "build.gradle.kts" => {
718                    info.package_manager = Some("gradle".to_string());
719                    if let Ok(content) = file_utils::read_file_safe(manifest, config.max_file_size)
720                    {
721                        parse_gradle_kts_build(&content, &mut info, config);
722                        info.confidence = 0.85;
723                    }
724                }
725                _ => {}
726            }
727        }
728    }
729
730    // Adjust name based on file types
731    if let Some(files) = source_files {
732        let has_kotlin = files.iter().any(|f| {
733            f.extension()
734                .and_then(|e| e.to_str())
735                .is_some_and(|ext| ext == "kt" || ext == "kts")
736        });
737
738        if has_kotlin {
739            info.name = "Kotlin".to_string();
740        } else {
741            info.name = "Java".to_string();
742        }
743    }
744
745    // Boost confidence if we have source files
746    if !info.source_files.is_empty() {
747        info.confidence = (info.confidence + 0.8) / 2.0;
748    }
749
750    Ok(info)
751}
752
753/// Parse Maven pom.xml file (basic XML parsing)
754fn parse_maven_pom(content: &str, info: &mut LanguageInfo, config: &AnalysisConfig) {
755    // Simple regex-based XML parsing for common Maven patterns
756
757    // Extract Java version from maven.compiler.source or java.version
758    for line in content.lines() {
759        let line = line.trim();
760
761        // Look for Java version in properties
762        if line.contains("<maven.compiler.source>")
763            && let Some(version) = extract_xml_content(line, "maven.compiler.source")
764        {
765            info.version = Some(version);
766        } else if line.contains("<java.version>")
767            && let Some(version) = extract_xml_content(line, "java.version")
768        {
769            info.version = Some(version);
770        } else if line.contains("<maven.compiler.target>")
771            && info.version.is_none()
772            && let Some(version) = extract_xml_content(line, "maven.compiler.target")
773        {
774            info.version = Some(version);
775        }
776
777        // Extract dependencies
778        if line.contains("<groupId>")
779            && line.contains("<artifactId>")
780            && let Some(group_id) = extract_xml_content(line, "groupId")
781            && let Some(artifact_id) = extract_xml_content(line, "artifactId")
782        {
783            // This is a simplified approach - real XML parsing would be better
784            let dependency = format!("{}:{}", group_id, artifact_id);
785            info.main_dependencies.push(dependency);
786        } else if line.contains("<artifactId>")
787            && !line.contains("<groupId>")
788            && let Some(artifact_id) = extract_xml_content(line, "artifactId")
789        {
790            info.main_dependencies.push(artifact_id);
791        }
792    }
793
794    // Look for dependencies in a more structured way
795    let mut in_dependencies = false;
796    let mut in_test_dependencies = false;
797
798    for line in content.lines() {
799        let line = line.trim();
800
801        if line.contains("<dependencies>") {
802            in_dependencies = true;
803            continue;
804        }
805
806        if line.contains("</dependencies>") {
807            in_dependencies = false;
808            in_test_dependencies = false;
809            continue;
810        }
811
812        if in_dependencies && line.contains("<scope>test</scope>") {
813            in_test_dependencies = true;
814        }
815
816        if in_dependencies
817            && line.contains("<artifactId>")
818            && let Some(artifact_id) = extract_xml_content(line, "artifactId")
819        {
820            if in_test_dependencies && config.include_dev_dependencies {
821                info.dev_dependencies.push(artifact_id);
822            } else if !in_test_dependencies {
823                info.main_dependencies.push(artifact_id);
824            }
825        }
826    }
827}
828
829/// Parse Gradle build.gradle file (Groovy syntax)
830fn parse_gradle_build(content: &str, info: &mut LanguageInfo, config: &AnalysisConfig) {
831    for line in content.lines() {
832        let line = line.trim();
833
834        // Look for Java version
835        if (line.contains("sourceCompatibility") || line.contains("targetCompatibility"))
836            && let Some(version) = extract_gradle_version(line)
837        {
838            info.version = Some(version);
839        } else if line.contains("JavaVersion.VERSION_")
840            && let Some(pos) = line.find("VERSION_")
841        {
842            let version_part = &line[pos + 8..];
843            if let Some(end) = version_part.find(|c: char| !c.is_numeric() && c != '_') {
844                let version = &version_part[..end].replace('_', ".");
845                info.version = Some(version.to_string());
846            }
847        }
848
849        // Look for dependencies
850        if (line.starts_with("implementation ") || line.starts_with("compile "))
851            && let Some(dep) = extract_gradle_dependency(line)
852        {
853            info.main_dependencies.push(dep);
854        } else if (line.starts_with("testImplementation ") || line.starts_with("testCompile "))
855            && config.include_dev_dependencies
856            && let Some(dep) = extract_gradle_dependency(line)
857        {
858            info.dev_dependencies.push(dep);
859        }
860    }
861}
862
863/// Parse Gradle build.gradle.kts file (Kotlin syntax)
864fn parse_gradle_kts_build(content: &str, info: &mut LanguageInfo, config: &AnalysisConfig) {
865    // Kotlin DSL is similar to Groovy but with some syntax differences
866    parse_gradle_build(content, info, config); // Reuse the same logic for now
867}
868
869/// Extract content from XML tags
870fn extract_xml_content(line: &str, tag: &str) -> Option<String> {
871    let open_tag = format!("<{}>", tag);
872    let close_tag = format!("</{}>", tag);
873
874    if let Some(start) = line.find(&open_tag)
875        && let Some(end) = line.find(&close_tag)
876    {
877        let content_start = start + open_tag.len();
878        if content_start < end {
879            return Some(line[content_start..end].trim().to_string());
880        }
881    }
882    None
883}
884
885/// Extract version from Gradle configuration line
886fn extract_gradle_version(line: &str) -> Option<String> {
887    // Look for patterns like sourceCompatibility = '11' or sourceCompatibility = "11"
888    let equals_pos = line.find('=')?;
889    let value_part = line[equals_pos + 1..].trim();
890    let start_quote = value_part.find(['\'', '"'])?;
891    let quote_char = value_part.chars().nth(start_quote)?;
892    let end_quote = value_part[start_quote + 1..].find(quote_char)?;
893    let version = &value_part[start_quote + 1..start_quote + 1 + end_quote];
894    Some(version.to_string())
895}
896
897/// Extract dependency from Gradle dependency line
898fn extract_gradle_dependency(line: &str) -> Option<String> {
899    // Look for patterns like implementation 'group:artifact:version' or implementation("group:artifact:version")
900    let start_quote = line.find(['\'', '"'])?;
901    let quote_char = line.chars().nth(start_quote)?;
902    let end_quote = line[start_quote + 1..].find(quote_char)?;
903    let dependency = &line[start_quote + 1..start_quote + 1 + end_quote];
904    // Extract just the artifact name for simplicity
905    if let Some(last_colon) = dependency.rfind(':')
906        && let Some(first_colon) = dependency[..last_colon].rfind(':')
907    {
908        return Some(dependency[first_colon + 1..last_colon].to_string());
909    }
910    Some(dependency.to_string())
911}
912
913/// Check if a filename is a known manifest file
914fn is_manifest_file(filename: &str) -> bool {
915    matches!(
916        filename,
917        "Cargo.toml"
918            | "Cargo.lock"
919            | "package.json"
920            | "package-lock.json"
921            | "yarn.lock"
922            | "pnpm-lock.yaml"
923            | "requirements.txt"
924            | "Pipfile"
925            | "Pipfile.lock"
926            | "pyproject.toml"
927            | "setup.py"
928            | "go.mod"
929            | "go.sum"
930            | "pom.xml"
931            | "build.gradle"
932            | "build.gradle.kts"
933    )
934}
935
936/// Check if any of the specified manifest files exist
937fn has_manifest(manifest_files: &[PathBuf], target_files: &[&str]) -> bool {
938    manifest_files.iter().any(|path| {
939        path.file_name()
940            .and_then(|name| name.to_str())
941            .is_some_and(|name| target_files.contains(&name))
942    })
943}
944
945#[cfg(test)]
946mod tests {
947    use super::*;
948    use std::fs;
949    use tempfile::TempDir;
950
951    #[test]
952    fn test_rust_project_detection() {
953        let temp_dir = TempDir::new().unwrap();
954        let root = temp_dir.path();
955
956        // Create Cargo.toml
957        let cargo_toml = r#"
958[package]
959name = "test-project"
960version = "0.1.0"
961edition = "2021"
962
963[dependencies]
964serde = "1.0"
965tokio = "1.0"
966
967[dev-dependencies]
968assert_cmd = "2.0"
969"#;
970        fs::write(root.join("Cargo.toml"), cargo_toml).unwrap();
971        fs::create_dir_all(root.join("src")).unwrap();
972        fs::write(root.join("src/main.rs"), "fn main() {}").unwrap();
973
974        let config = AnalysisConfig::default();
975        let files = vec![root.join("Cargo.toml"), root.join("src/main.rs")];
976
977        let languages = detect_languages(&files, &config).unwrap();
978        assert_eq!(languages.len(), 1);
979        assert_eq!(languages[0].name, "Rust");
980        assert_eq!(languages[0].version, Some("1.56+".to_string()));
981        assert!(languages[0].confidence > 0.9);
982    }
983
984    #[test]
985    fn test_javascript_project_detection() {
986        let temp_dir = TempDir::new().unwrap();
987        let root = temp_dir.path();
988
989        // Create package.json
990        let package_json = r#"
991{
992  "name": "test-project",
993  "version": "1.0.0",
994  "engines": {
995    "node": ">=16.0.0"
996  },
997  "dependencies": {
998    "express": "^4.18.0",
999    "lodash": "^4.17.21"
1000  },
1001  "devDependencies": {
1002    "jest": "^29.0.0"
1003  }
1004}
1005"#;
1006        fs::write(root.join("package.json"), package_json).unwrap();
1007        fs::write(root.join("index.js"), "console.log('hello');").unwrap();
1008
1009        let config = AnalysisConfig::default();
1010        let files = vec![root.join("package.json"), root.join("index.js")];
1011
1012        let languages = detect_languages(&files, &config).unwrap();
1013        assert_eq!(languages.len(), 1);
1014        assert_eq!(languages[0].name, "JavaScript");
1015        assert_eq!(languages[0].version, Some(">=16.0.0".to_string()));
1016        assert!(languages[0].confidence > 0.9);
1017    }
1018
1019    #[test]
1020    fn test_python_project_detection() {
1021        let temp_dir = TempDir::new().unwrap();
1022        let root = temp_dir.path();
1023
1024        // Create pyproject.toml
1025        let pyproject_toml = r#"
1026[project]
1027name = "test-project"
1028version = "0.1.0"
1029requires-python = ">=3.8"
1030dependencies = [
1031    "flask>=2.0.0",
1032    "requests>=2.25.0",
1033    "pandas>=1.3.0"
1034]
1035
1036[project.optional-dependencies]
1037dev = [
1038    "pytest>=6.0.0",
1039    "black>=21.0.0"
1040]
1041"#;
1042        fs::write(root.join("pyproject.toml"), pyproject_toml).unwrap();
1043        fs::write(root.join("app.py"), "print('Hello, World!')").unwrap();
1044
1045        let config = AnalysisConfig::default();
1046        let files = vec![root.join("pyproject.toml"), root.join("app.py")];
1047
1048        let languages = detect_languages(&files, &config).unwrap();
1049        assert_eq!(languages.len(), 1);
1050        assert_eq!(languages[0].name, "Python");
1051        assert_eq!(languages[0].version, Some(">=3.8".to_string()));
1052        assert!(languages[0].confidence > 0.8);
1053    }
1054
1055    #[test]
1056    fn test_go_project_detection() {
1057        let temp_dir = TempDir::new().unwrap();
1058        let root = temp_dir.path();
1059
1060        // Create go.mod
1061        let go_mod = r#"
1062module example.com/myproject
1063
1064go 1.21
1065
1066require (
1067    github.com/gin-gonic/gin v1.9.1
1068    github.com/stretchr/testify v1.8.4
1069    golang.org/x/time v0.3.0
1070)
1071"#;
1072        fs::write(root.join("go.mod"), go_mod).unwrap();
1073        fs::write(root.join("main.go"), "package main\n\nfunc main() {}").unwrap();
1074
1075        let config = AnalysisConfig::default();
1076        let files = vec![root.join("go.mod"), root.join("main.go")];
1077
1078        let languages = detect_languages(&files, &config).unwrap();
1079        assert_eq!(languages.len(), 1);
1080        assert_eq!(languages[0].name, "Go");
1081        assert_eq!(languages[0].version, Some("1.21".to_string()));
1082        assert!(languages[0].confidence > 0.8);
1083    }
1084
1085    #[test]
1086    fn test_java_maven_project_detection() {
1087        let temp_dir = TempDir::new().unwrap();
1088        let root = temp_dir.path();
1089
1090        // Create pom.xml
1091        let pom_xml = r#"
1092<?xml version="1.0" encoding="UTF-8"?>
1093<project xmlns="http://maven.apache.org/POM/4.0.0">
1094    <modelVersion>4.0.0</modelVersion>
1095    
1096    <groupId>com.example</groupId>
1097    <artifactId>test-project</artifactId>
1098    <version>1.0.0</version>
1099    
1100    <properties>
1101        <maven.compiler.source>17</maven.compiler.source>
1102        <maven.compiler.target>17</maven.compiler.target>
1103    </properties>
1104    
1105    <dependencies>
1106        <dependency>
1107            <groupId>org.springframework</groupId>
1108            <artifactId>spring-core</artifactId>
1109            <version>5.3.21</version>
1110        </dependency>
1111        <dependency>
1112            <groupId>junit</groupId>
1113            <artifactId>junit</artifactId>
1114            <version>4.13.2</version>
1115            <scope>test</scope>
1116        </dependency>
1117    </dependencies>
1118</project>
1119"#;
1120        fs::create_dir_all(root.join("src/main/java")).unwrap();
1121        fs::write(root.join("pom.xml"), pom_xml).unwrap();
1122        fs::write(root.join("src/main/java/App.java"), "public class App {}").unwrap();
1123
1124        let config = AnalysisConfig::default();
1125        let files = vec![root.join("pom.xml"), root.join("src/main/java/App.java")];
1126
1127        let languages = detect_languages(&files, &config).unwrap();
1128        assert_eq!(languages.len(), 1);
1129        assert_eq!(languages[0].name, "Java");
1130        assert_eq!(languages[0].version, Some("17".to_string()));
1131        assert!(languages[0].confidence > 0.8);
1132    }
1133
1134    #[test]
1135    fn test_kotlin_gradle_project_detection() {
1136        let temp_dir = TempDir::new().unwrap();
1137        let root = temp_dir.path();
1138
1139        // Create build.gradle.kts
1140        let build_gradle_kts = r#"
1141plugins {
1142    kotlin("jvm") version "1.9.0"
1143    application
1144}
1145
1146java {
1147    sourceCompatibility = JavaVersion.VERSION_17
1148    targetCompatibility = JavaVersion.VERSION_17
1149}
1150
1151dependencies {
1152    implementation("org.jetbrains.kotlin:kotlin-stdlib")
1153    implementation("io.ktor:ktor-server-core:2.3.2")
1154    testImplementation("org.jetbrains.kotlin:kotlin-test")
1155}
1156"#;
1157        fs::create_dir_all(root.join("src/main/kotlin")).unwrap();
1158        fs::write(root.join("build.gradle.kts"), build_gradle_kts).unwrap();
1159        fs::write(root.join("src/main/kotlin/Main.kt"), "fun main() {}").unwrap();
1160
1161        let config = AnalysisConfig::default();
1162        let files = vec![
1163            root.join("build.gradle.kts"),
1164            root.join("src/main/kotlin/Main.kt"),
1165        ];
1166
1167        let languages = detect_languages(&files, &config).unwrap();
1168        assert_eq!(languages.len(), 1);
1169        assert_eq!(languages[0].name, "Kotlin");
1170        assert!(languages[0].confidence > 0.8);
1171    }
1172
1173    #[test]
1174    fn test_python_requirements_txt_detection() {
1175        let temp_dir = TempDir::new().unwrap();
1176        let root = temp_dir.path();
1177
1178        // Create requirements.txt
1179        let requirements_txt = r#"
1180Flask==2.3.2
1181requests>=2.28.0
1182pandas==1.5.3
1183pytest==7.4.0
1184black>=23.0.0
1185"#;
1186        fs::write(root.join("requirements.txt"), requirements_txt).unwrap();
1187        fs::write(root.join("app.py"), "import flask").unwrap();
1188
1189        let config = AnalysisConfig::default();
1190        let files = vec![root.join("requirements.txt"), root.join("app.py")];
1191
1192        let languages = detect_languages(&files, &config).unwrap();
1193        assert_eq!(languages.len(), 1);
1194        assert_eq!(languages[0].name, "Python");
1195        assert!(languages[0].confidence > 0.8);
1196    }
1197}