syncable_cli/analyzer/
monorepo_detector.rs

1use crate::analyzer::{
2    AnalysisConfig, ProjectInfo, ProjectCategory, MonorepoAnalysis, TechnologySummary, 
3    ArchitecturePattern, analyze_project_with_config, ProjectAnalysis, AnalysisMetadata
4};
5use crate::error::Result;
6use crate::common::file_utils;
7use std::path::{Path, PathBuf};
8use std::collections::HashSet;
9use serde_json::Value as JsonValue;
10use chrono::Utc;
11
12/// Configuration for monorepo detection
13#[derive(Debug, Clone)]
14pub struct MonorepoDetectionConfig {
15    /// Maximum depth to search for projects
16    pub max_depth: usize,
17    /// Minimum confidence threshold for considering a directory as a project
18    pub min_project_confidence: f32,
19    /// Whether to analyze subdirectories that might be projects
20    pub deep_scan: bool,
21    /// Patterns to exclude from project detection
22    pub exclude_patterns: Vec<String>,
23}
24
25impl Default for MonorepoDetectionConfig {
26    fn default() -> Self {
27        Self {
28            max_depth: 3,
29            min_project_confidence: 0.6,
30            deep_scan: true,
31            exclude_patterns: vec![
32                "node_modules".to_string(),
33                ".git".to_string(),
34                "target".to_string(),
35                "build".to_string(),
36                "dist".to_string(),
37                ".next".to_string(),
38                "__pycache__".to_string(),
39                "vendor".to_string(),
40                ".venv".to_string(),
41                "venv".to_string(),
42                ".env".to_string(),
43                "coverage".to_string(),
44                "docs".to_string(),
45                "tmp".to_string(),
46                "temp".to_string(),
47            ],
48        }
49    }
50}
51
52/// Detects if a path contains a monorepo and analyzes all projects within it
53pub fn analyze_monorepo(path: &Path) -> Result<MonorepoAnalysis> {
54    analyze_monorepo_with_config(path, &MonorepoDetectionConfig::default(), &AnalysisConfig::default())
55}
56
57/// Analyzes a monorepo with custom configuration
58pub fn analyze_monorepo_with_config(
59    path: &Path, 
60    monorepo_config: &MonorepoDetectionConfig,
61    analysis_config: &AnalysisConfig,
62) -> Result<MonorepoAnalysis> {
63    let start_time = std::time::Instant::now();
64    let root_path = file_utils::validate_project_path(path)?;
65    
66    log::info!("Starting monorepo analysis of: {}", root_path.display());
67    
68    // Detect potential projects within the path
69    let potential_projects = detect_potential_projects(&root_path, monorepo_config)?;
70    
71    log::debug!("Found {} potential projects", potential_projects.len());
72    
73    // Determine if this is actually a monorepo or just a single project
74    let is_monorepo = determine_if_monorepo(&root_path, &potential_projects, monorepo_config)?;
75    
76    let mut projects = Vec::new();
77    
78    if is_monorepo && potential_projects.len() > 1 {
79        // Analyze each project separately
80        for project_path in potential_projects {
81            if let Ok(project_info) = analyze_individual_project(&root_path, &project_path, analysis_config) {
82                projects.push(project_info);
83            }
84        }
85        
86        // If we didn't find multiple valid projects, treat as single project
87        if projects.len() <= 1 {
88            log::info!("Detected potential monorepo but only found {} valid project(s), treating as single project", projects.len());
89            projects.clear();
90            let single_analysis = analyze_project_with_config(&root_path, analysis_config)?;
91            projects.push(ProjectInfo {
92                path: PathBuf::from("."),
93                name: extract_project_name(&root_path, &single_analysis),
94                project_category: determine_project_category(&single_analysis, &root_path),
95                analysis: single_analysis,
96            });
97        }
98    } else {
99        // Single project analysis
100        let single_analysis = analyze_project_with_config(&root_path, analysis_config)?;
101        projects.push(ProjectInfo {
102            path: PathBuf::from("."),
103            name: extract_project_name(&root_path, &single_analysis),
104            project_category: determine_project_category(&single_analysis, &root_path),
105            analysis: single_analysis,
106        });
107    }
108    
109    // Generate technology summary
110    let technology_summary = generate_technology_summary(&projects);
111    
112    let duration = start_time.elapsed();
113    let metadata = AnalysisMetadata {
114        timestamp: Utc::now().to_rfc3339(),
115        analyzer_version: env!("CARGO_PKG_VERSION").to_string(),
116        analysis_duration_ms: duration.as_millis() as u64,
117        files_analyzed: projects.iter().map(|p| p.analysis.analysis_metadata.files_analyzed).sum(),
118        confidence_score: calculate_overall_confidence(&projects),
119    };
120    
121    Ok(MonorepoAnalysis {
122        root_path,
123        is_monorepo: projects.len() > 1,
124        projects,
125        metadata,
126        technology_summary,
127    })
128}
129
130/// Detects potential project directories within a given path
131fn detect_potential_projects(
132    root_path: &Path, 
133    config: &MonorepoDetectionConfig
134) -> Result<Vec<PathBuf>> {
135    let mut potential_projects = Vec::new();
136    
137    // Check if root itself is a project
138    if is_project_directory(root_path)? {
139        potential_projects.push(root_path.to_path_buf());
140    }
141    
142    if config.deep_scan {
143        // Recursively check subdirectories
144        scan_for_projects(root_path, root_path, &mut potential_projects, 0, config)?;
145    }
146    
147    // Remove duplicates and sort by path depth (shallower first)
148    potential_projects.sort_by_key(|p| p.components().count());
149    potential_projects.dedup();
150    
151    // Filter out nested projects (prefer parent projects)
152    filter_nested_projects(potential_projects)
153}
154
155/// Recursively scans for project directories
156fn scan_for_projects(
157    root_path: &Path,
158    current_path: &Path,
159    projects: &mut Vec<PathBuf>,
160    depth: usize,
161    config: &MonorepoDetectionConfig,
162) -> Result<()> {
163    if depth >= config.max_depth {
164        return Ok(());
165    }
166    
167    if let Ok(entries) = std::fs::read_dir(current_path) {
168        for entry in entries.flatten() {
169            if !entry.file_type()?.is_dir() {
170                continue;
171            }
172            
173            let dir_name = entry.file_name().to_string_lossy().to_string();
174            let dir_path = entry.path();
175            
176            // Skip excluded patterns
177            if should_exclude_directory(&dir_name, config) {
178                continue;
179            }
180            
181            // Check if this directory looks like a project
182            if is_project_directory(&dir_path)? {
183                projects.push(dir_path.clone());
184            }
185            
186            // Continue scanning subdirectories
187            scan_for_projects(root_path, &dir_path, projects, depth + 1, config)?;
188        }
189    }
190    
191    Ok(())
192}
193
194/// Determines if a directory should be excluded from scanning
195fn should_exclude_directory(dir_name: &str, config: &MonorepoDetectionConfig) -> bool {
196    // Skip hidden directories
197    if dir_name.starts_with('.') {
198        return true;
199    }
200    
201    // Skip excluded patterns
202    config.exclude_patterns.iter().any(|pattern| dir_name == pattern)
203}
204
205/// Checks if a directory appears to be a project directory
206fn is_project_directory(path: &Path) -> Result<bool> {
207    // Common project indicator files
208    let project_indicators = [
209        // JavaScript/TypeScript
210        "package.json",
211        // Rust
212        "Cargo.toml",
213        // Python
214        "requirements.txt", "pyproject.toml", "Pipfile", "setup.py",
215        // Go
216        "go.mod",
217        // Java/Kotlin
218        "pom.xml", "build.gradle", "build.gradle.kts",
219        // .NET
220        "*.csproj", "*.fsproj", "*.vbproj",
221        // Ruby
222        "Gemfile",
223        // PHP
224        "composer.json",
225        // Docker
226        "Dockerfile",
227    ];
228    
229    // Check for manifest files
230    for indicator in &project_indicators {
231        if indicator.contains('*') {
232            // Handle glob patterns
233            if let Ok(entries) = std::fs::read_dir(path) {
234                for entry in entries.flatten() {
235                    if let Some(file_name) = entry.file_name().to_str() {
236                        let pattern = indicator.replace('*', "");
237                        if file_name.ends_with(&pattern) {
238                            return Ok(true);
239                        }
240                    }
241                }
242            }
243        } else {
244            if path.join(indicator).exists() {
245                return Ok(true);
246            }
247        }
248    }
249    
250    // Check for common source directories with code
251    let source_dirs = ["src", "lib", "app", "pages", "components"];
252    for src_dir in &source_dirs {
253        let src_path = path.join(src_dir);
254        if src_path.is_dir() && directory_contains_code(&src_path)? {
255            return Ok(true);
256        }
257    }
258    
259    Ok(false)
260}
261
262/// Checks if a directory contains source code files
263fn directory_contains_code(path: &Path) -> Result<bool> {
264    let code_extensions = ["js", "ts", "jsx", "tsx", "py", "rs", "go", "java", "kt", "cs", "rb", "php"];
265    
266    if let Ok(entries) = std::fs::read_dir(path) {
267        for entry in entries.flatten() {
268            if let Some(extension) = entry.path().extension() {
269                if let Some(ext_str) = extension.to_str() {
270                    if code_extensions.contains(&ext_str) {
271                        return Ok(true);
272                    }
273                }
274            }
275            
276            // Recursively check subdirectories (limited depth)
277            if entry.file_type()?.is_dir() {
278                if directory_contains_code(&entry.path())? {
279                    return Ok(true);
280                }
281            }
282        }
283    }
284    
285    Ok(false)
286}
287
288/// Filters out nested projects, keeping only top-level ones
289fn filter_nested_projects(mut projects: Vec<PathBuf>) -> Result<Vec<PathBuf>> {
290    projects.sort_by_key(|p| p.components().count());
291    
292    let mut filtered = Vec::new();
293    
294    for project in projects {
295        let is_nested = filtered.iter().any(|parent: &PathBuf| {
296            project.starts_with(parent) && project != *parent
297        });
298        
299        if !is_nested {
300            filtered.push(project);
301        }
302    }
303    
304    Ok(filtered)
305}
306
307/// Determines if the detected projects constitute a monorepo
308fn determine_if_monorepo(
309    root_path: &Path,
310    potential_projects: &[PathBuf],
311    _config: &MonorepoDetectionConfig,
312) -> Result<bool> {
313    // If we have multiple project directories, likely a monorepo
314    if potential_projects.len() > 1 {
315        return Ok(true);
316    }
317    
318    // Check for common monorepo indicators
319    let monorepo_indicators = [
320        "lerna.json",           // Lerna
321        "nx.json",              // Nx
322        "rush.json",            // Rush
323        "pnpm-workspace.yaml",  // pnpm workspaces
324        "yarn.lock",            // Yarn workspaces (need to check package.json)
325        "packages",             // Common packages directory
326        "apps",                 // Common apps directory
327        "services",             // Common services directory
328        "libs",                 // Common libs directory
329    ];
330    
331    for indicator in &monorepo_indicators {
332        if root_path.join(indicator).exists() {
333            return Ok(true);
334        }
335    }
336    
337    // Check package.json for workspace configuration
338    let package_json_path = root_path.join("package.json");
339    if package_json_path.exists() {
340        if let Ok(content) = std::fs::read_to_string(&package_json_path) {
341            if let Ok(package_json) = serde_json::from_str::<JsonValue>(&content) {
342                // Check for workspaces
343                if package_json.get("workspaces").is_some() {
344                    return Ok(true);
345                }
346            }
347        }
348    }
349    
350    Ok(false)
351}
352
353/// Analyzes an individual project within a monorepo
354fn analyze_individual_project(
355    root_path: &Path,
356    project_path: &Path,
357    config: &AnalysisConfig,
358) -> Result<ProjectInfo> {
359    log::debug!("Analyzing individual project: {}", project_path.display());
360    
361    let analysis = analyze_project_with_config(project_path, config)?;
362    let relative_path = project_path.strip_prefix(root_path)
363        .unwrap_or(project_path)
364        .to_path_buf();
365    
366    let name = extract_project_name(project_path, &analysis);
367    let category = determine_project_category(&analysis, project_path);
368    
369    Ok(ProjectInfo {
370        path: relative_path,
371        name,
372        project_category: category,
373        analysis,
374    })
375}
376
377/// Extracts a meaningful project name from path and analysis
378fn extract_project_name(project_path: &Path, _analysis: &ProjectAnalysis) -> String {
379    // Try to get name from package.json
380    let package_json_path = project_path.join("package.json");
381    if package_json_path.exists() {
382        if let Ok(content) = std::fs::read_to_string(&package_json_path) {
383            if let Ok(package_json) = serde_json::from_str::<JsonValue>(&content) {
384                if let Some(name) = package_json.get("name").and_then(|n| n.as_str()) {
385                    return name.to_string();
386                }
387            }
388        }
389    }
390    
391    // Try to get name from Cargo.toml
392    let cargo_toml_path = project_path.join("Cargo.toml");
393    if cargo_toml_path.exists() {
394        if let Ok(content) = std::fs::read_to_string(&cargo_toml_path) {
395            if let Ok(cargo_toml) = toml::from_str::<toml::Value>(&content) {
396                if let Some(name) = cargo_toml.get("package")
397                    .and_then(|p| p.get("name"))
398                    .and_then(|n| n.as_str()) {
399                    return name.to_string();
400                }
401            }
402        }
403    }
404    
405    // Try to get name from pyproject.toml
406    let pyproject_toml_path = project_path.join("pyproject.toml");
407    if pyproject_toml_path.exists() {
408        if let Ok(content) = std::fs::read_to_string(&pyproject_toml_path) {
409            if let Ok(pyproject) = toml::from_str::<toml::Value>(&content) {
410                if let Some(name) = pyproject.get("project")
411                    .and_then(|p| p.get("name"))
412                    .and_then(|n| n.as_str()) {
413                    return name.to_string();
414                } else if let Some(name) = pyproject.get("tool")
415                    .and_then(|t| t.get("poetry"))
416                    .and_then(|p| p.get("name"))
417                    .and_then(|n| n.as_str()) {
418                    return name.to_string();
419                }
420            }
421        }
422    }
423    
424    // Fall back to directory name
425    project_path.file_name()
426        .and_then(|n| n.to_str())
427        .unwrap_or("unknown")
428        .to_string()
429}
430
431/// Determines the category of a project based on its analysis
432fn determine_project_category(analysis: &ProjectAnalysis, project_path: &Path) -> ProjectCategory {
433    let dir_name = project_path.file_name()
434        .and_then(|n| n.to_str())
435        .unwrap_or("")
436        .to_lowercase();
437    
438    // Check directory name patterns first
439    let category_from_name = match dir_name.as_str() {
440        name if name.contains("frontend") || name.contains("client") || name.contains("web") => Some(ProjectCategory::Frontend),
441        name if name.contains("backend") || name.contains("server") => Some(ProjectCategory::Backend),
442        name if name.contains("api") => Some(ProjectCategory::Api),
443        name if name.contains("service") => Some(ProjectCategory::Service),
444        name if name.contains("lib") || name.contains("library") => Some(ProjectCategory::Library),
445        name if name.contains("tool") || name.contains("cli") => Some(ProjectCategory::Tool),
446        name if name.contains("docs") || name.contains("doc") => Some(ProjectCategory::Documentation),
447        name if name.contains("infra") || name.contains("deploy") => Some(ProjectCategory::Infrastructure),
448        _ => None,
449    };
450    
451    // If we found a category from the directory name, return it
452    if let Some(category) = category_from_name {
453        return category;
454    }
455    
456    // Analyze technologies to determine category
457    let has_frontend_tech = analysis.technologies.iter().any(|t| {
458        matches!(t.name.as_str(), 
459            "React" | "Vue.js" | "Angular" | "Next.js" | "Nuxt.js" | "Svelte" | 
460            "Astro" | "Gatsby" | "Vite" | "Webpack" | "Parcel"
461        )
462    });
463    
464    let has_backend_tech = analysis.technologies.iter().any(|t| {
465        matches!(t.name.as_str(),
466            "Express.js" | "FastAPI" | "Django" | "Flask" | "Actix Web" | "Rocket" |
467            "Spring Boot" | "Gin" | "Echo" | "Fiber" | "ASP.NET"
468        )
469    });
470    
471    let has_api_tech = analysis.technologies.iter().any(|t| {
472        matches!(t.name.as_str(),
473            "REST API" | "GraphQL" | "gRPC" | "FastAPI" | "Express.js"
474        )
475    });
476    
477    let has_database = analysis.technologies.iter().any(|t| {
478        matches!(t.category, crate::analyzer::TechnologyCategory::Database)
479    });
480    
481    if has_frontend_tech && !has_backend_tech {
482        ProjectCategory::Frontend
483    } else if has_backend_tech && !has_frontend_tech {
484        ProjectCategory::Backend
485    } else if has_api_tech || (has_backend_tech && has_database) {
486        ProjectCategory::Api
487    } else if matches!(analysis.project_type, crate::analyzer::ProjectType::Library) {
488        ProjectCategory::Library
489    } else if matches!(analysis.project_type, crate::analyzer::ProjectType::CliTool) {
490        ProjectCategory::Tool
491    } else {
492        ProjectCategory::Unknown
493    }
494}
495
496/// Generates a summary of technologies across all projects
497fn generate_technology_summary(projects: &[ProjectInfo]) -> TechnologySummary {
498    let mut all_languages = HashSet::new();
499    let mut all_frameworks = HashSet::new();
500    let mut all_databases = HashSet::new();
501    
502    for project in projects {
503        // Collect languages
504        for lang in &project.analysis.languages {
505            all_languages.insert(lang.name.clone());
506        }
507        
508        // Collect technologies
509        for tech in &project.analysis.technologies {
510            match tech.category {
511                crate::analyzer::TechnologyCategory::FrontendFramework |
512                crate::analyzer::TechnologyCategory::BackendFramework |
513                crate::analyzer::TechnologyCategory::MetaFramework => {
514                    all_frameworks.insert(tech.name.clone());
515                }
516                crate::analyzer::TechnologyCategory::Database => {
517                    all_databases.insert(tech.name.clone());
518                }
519                _ => {}
520            }
521        }
522    }
523    
524    let architecture_pattern = determine_architecture_pattern(projects);
525    
526    TechnologySummary {
527        languages: all_languages.into_iter().collect(),
528        frameworks: all_frameworks.into_iter().collect(),
529        databases: all_databases.into_iter().collect(),
530        total_projects: projects.len(),
531        architecture_pattern,
532    }
533}
534
535/// Determines the overall architecture pattern
536fn determine_architecture_pattern(projects: &[ProjectInfo]) -> ArchitecturePattern {
537    if projects.len() == 1 {
538        return ArchitecturePattern::Monolithic;
539    }
540    
541    let has_frontend = projects.iter().any(|p| p.project_category == ProjectCategory::Frontend);
542    let has_backend = projects.iter().any(|p| matches!(p.project_category, ProjectCategory::Backend | ProjectCategory::Api));
543    let service_count = projects.iter().filter(|p| p.project_category == ProjectCategory::Service).count();
544    
545    if service_count >= 2 {
546        ArchitecturePattern::Microservices
547    } else if has_frontend && has_backend {
548        ArchitecturePattern::Fullstack
549    } else if projects.iter().all(|p| p.project_category == ProjectCategory::Api) {
550        ArchitecturePattern::ApiFirst
551    } else {
552        ArchitecturePattern::Mixed
553    }
554}
555
556/// Calculates overall confidence score across all projects
557fn calculate_overall_confidence(projects: &[ProjectInfo]) -> f32 {
558    if projects.is_empty() {
559        return 0.0;
560    }
561    
562    let total_confidence: f32 = projects.iter()
563        .map(|p| p.analysis.analysis_metadata.confidence_score)
564        .sum();
565    
566    total_confidence / projects.len() as f32
567}
568
569#[cfg(test)]
570mod tests {
571    use super::*;
572    use tempfile::TempDir;
573    use std::fs;
574
575    #[test]
576    fn test_single_project_detection() {
577        let temp_dir = TempDir::new().unwrap();
578        let root = temp_dir.path();
579        
580        // Create a simple Node.js project
581        fs::write(root.join("package.json"), r#"{"name": "test-app"}"#).unwrap();
582        fs::write(root.join("index.js"), "console.log('hello');").unwrap();
583        
584        let analysis = analyze_monorepo(root).unwrap();
585        
586        assert!(!analysis.is_monorepo);
587        assert_eq!(analysis.projects.len(), 1);
588        assert_eq!(analysis.projects[0].name, "test-app");
589    }
590    
591    #[test]
592    fn test_monorepo_detection() {
593        let temp_dir = TempDir::new().unwrap();
594        let root = temp_dir.path();
595        
596        // Create frontend project
597        let frontend_dir = root.join("frontend");
598        fs::create_dir_all(&frontend_dir).unwrap();
599        fs::write(frontend_dir.join("package.json"), r#"{"name": "frontend-app", "dependencies": {"react": "^18.0.0"}}"#).unwrap();
600        
601        // Create backend project
602        let backend_dir = root.join("backend");
603        fs::create_dir_all(&backend_dir).unwrap();
604        fs::write(backend_dir.join("package.json"), r#"{"name": "backend-api", "dependencies": {"express": "^4.18.0"}}"#).unwrap();
605        
606        // Create root package.json with workspaces
607        fs::write(root.join("package.json"), r#"{"name": "monorepo", "workspaces": ["frontend", "backend"]}"#).unwrap();
608        
609        let analysis = analyze_monorepo(root).unwrap();
610        
611        assert!(analysis.is_monorepo);
612        assert_eq!(analysis.projects.len(), 2);
613        assert_eq!(analysis.technology_summary.architecture_pattern, ArchitecturePattern::Fullstack);
614    }
615}