1use crate::analyzer::{
2 AnalysisConfig, ProjectInfo, ProjectCategory, MonorepoAnalysis, TechnologySummary,
3 ArchitecturePattern, analyze_project_with_config, ProjectAnalysis, AnalysisMetadata
4};
5use crate::error::Result;
6use crate::common::file_utils;
7use std::path::{Path, PathBuf};
8use std::collections::HashSet;
9use serde_json::Value as JsonValue;
10use chrono::Utc;
11
12#[derive(Debug, Clone)]
14pub struct MonorepoDetectionConfig {
15 pub max_depth: usize,
17 pub min_project_confidence: f32,
19 pub deep_scan: bool,
21 pub exclude_patterns: Vec<String>,
23}
24
25impl Default for MonorepoDetectionConfig {
26 fn default() -> Self {
27 Self {
28 max_depth: 3,
29 min_project_confidence: 0.6,
30 deep_scan: true,
31 exclude_patterns: vec![
32 "node_modules".to_string(),
33 ".git".to_string(),
34 "target".to_string(),
35 "build".to_string(),
36 "dist".to_string(),
37 ".next".to_string(),
38 "__pycache__".to_string(),
39 "vendor".to_string(),
40 ".venv".to_string(),
41 "venv".to_string(),
42 ".env".to_string(),
43 "coverage".to_string(),
44 "docs".to_string(),
45 "tmp".to_string(),
46 "temp".to_string(),
47 ],
48 }
49 }
50}
51
52pub fn analyze_monorepo(path: &Path) -> Result<MonorepoAnalysis> {
54 analyze_monorepo_with_config(path, &MonorepoDetectionConfig::default(), &AnalysisConfig::default())
55}
56
57pub fn analyze_monorepo_with_config(
59 path: &Path,
60 monorepo_config: &MonorepoDetectionConfig,
61 analysis_config: &AnalysisConfig,
62) -> Result<MonorepoAnalysis> {
63 let start_time = std::time::Instant::now();
64 let root_path = file_utils::validate_project_path(path)?;
65
66 log::info!("Starting monorepo analysis of: {}", root_path.display());
67
68 let potential_projects = detect_potential_projects(&root_path, monorepo_config)?;
70
71 log::debug!("Found {} potential projects", potential_projects.len());
72
73 let is_monorepo = determine_if_monorepo(&root_path, &potential_projects, monorepo_config)?;
75
76 let mut projects = Vec::new();
77
78 if is_monorepo && potential_projects.len() > 1 {
79 for project_path in potential_projects {
81 if let Ok(project_info) = analyze_individual_project(&root_path, &project_path, analysis_config) {
82 projects.push(project_info);
83 }
84 }
85
86 if projects.len() <= 1 {
88 log::info!("Detected potential monorepo but only found {} valid project(s), treating as single project", projects.len());
89 projects.clear();
90 let single_analysis = analyze_project_with_config(&root_path, analysis_config)?;
91 projects.push(ProjectInfo {
92 path: PathBuf::from("."),
93 name: extract_project_name(&root_path, &single_analysis),
94 project_category: determine_project_category(&single_analysis, &root_path),
95 analysis: single_analysis,
96 });
97 }
98 } else {
99 let single_analysis = analyze_project_with_config(&root_path, analysis_config)?;
101 projects.push(ProjectInfo {
102 path: PathBuf::from("."),
103 name: extract_project_name(&root_path, &single_analysis),
104 project_category: determine_project_category(&single_analysis, &root_path),
105 analysis: single_analysis,
106 });
107 }
108
109 let technology_summary = generate_technology_summary(&projects);
111
112 let duration = start_time.elapsed();
113 let metadata = AnalysisMetadata {
114 timestamp: Utc::now().to_rfc3339(),
115 analyzer_version: env!("CARGO_PKG_VERSION").to_string(),
116 analysis_duration_ms: duration.as_millis() as u64,
117 files_analyzed: projects.iter().map(|p| p.analysis.analysis_metadata.files_analyzed).sum(),
118 confidence_score: calculate_overall_confidence(&projects),
119 };
120
121 Ok(MonorepoAnalysis {
122 root_path,
123 is_monorepo: projects.len() > 1,
124 projects,
125 metadata,
126 technology_summary,
127 })
128}
129
130fn detect_potential_projects(
132 root_path: &Path,
133 config: &MonorepoDetectionConfig
134) -> Result<Vec<PathBuf>> {
135 let mut potential_projects = Vec::new();
136
137 if is_project_directory(root_path)? {
139 potential_projects.push(root_path.to_path_buf());
140 }
141
142 if config.deep_scan {
143 scan_for_projects(root_path, root_path, &mut potential_projects, 0, config)?;
145 }
146
147 potential_projects.sort_by_key(|p| p.components().count());
149 potential_projects.dedup();
150
151 filter_nested_projects(potential_projects)
153}
154
155fn scan_for_projects(
157 root_path: &Path,
158 current_path: &Path,
159 projects: &mut Vec<PathBuf>,
160 depth: usize,
161 config: &MonorepoDetectionConfig,
162) -> Result<()> {
163 if depth >= config.max_depth {
164 return Ok(());
165 }
166
167 if let Ok(entries) = std::fs::read_dir(current_path) {
168 for entry in entries.flatten() {
169 if !entry.file_type()?.is_dir() {
170 continue;
171 }
172
173 let dir_name = entry.file_name().to_string_lossy().to_string();
174 let dir_path = entry.path();
175
176 if should_exclude_directory(&dir_name, config) {
178 continue;
179 }
180
181 if is_project_directory(&dir_path)? {
183 projects.push(dir_path.clone());
184 }
185
186 scan_for_projects(root_path, &dir_path, projects, depth + 1, config)?;
188 }
189 }
190
191 Ok(())
192}
193
194fn should_exclude_directory(dir_name: &str, config: &MonorepoDetectionConfig) -> bool {
196 if dir_name.starts_with('.') {
198 return true;
199 }
200
201 config.exclude_patterns.iter().any(|pattern| dir_name == pattern)
203}
204
205fn is_project_directory(path: &Path) -> Result<bool> {
207 let project_indicators = [
209 "package.json",
211 "Cargo.toml",
213 "requirements.txt", "pyproject.toml", "Pipfile", "setup.py",
215 "go.mod",
217 "pom.xml", "build.gradle", "build.gradle.kts",
219 "*.csproj", "*.fsproj", "*.vbproj",
221 "Gemfile",
223 "composer.json",
225 "Dockerfile",
227 ];
228
229 for indicator in &project_indicators {
231 if indicator.contains('*') {
232 if let Ok(entries) = std::fs::read_dir(path) {
234 for entry in entries.flatten() {
235 if let Some(file_name) = entry.file_name().to_str() {
236 let pattern = indicator.replace('*', "");
237 if file_name.ends_with(&pattern) {
238 return Ok(true);
239 }
240 }
241 }
242 }
243 } else {
244 if path.join(indicator).exists() {
245 return Ok(true);
246 }
247 }
248 }
249
250 let source_dirs = ["src", "lib", "app", "pages", "components"];
252 for src_dir in &source_dirs {
253 let src_path = path.join(src_dir);
254 if src_path.is_dir() && directory_contains_code(&src_path)? {
255 return Ok(true);
256 }
257 }
258
259 Ok(false)
260}
261
262fn directory_contains_code(path: &Path) -> Result<bool> {
264 let code_extensions = ["js", "ts", "jsx", "tsx", "py", "rs", "go", "java", "kt", "cs", "rb", "php"];
265
266 if let Ok(entries) = std::fs::read_dir(path) {
267 for entry in entries.flatten() {
268 if let Some(extension) = entry.path().extension() {
269 if let Some(ext_str) = extension.to_str() {
270 if code_extensions.contains(&ext_str) {
271 return Ok(true);
272 }
273 }
274 }
275
276 if entry.file_type()?.is_dir() {
278 if directory_contains_code(&entry.path())? {
279 return Ok(true);
280 }
281 }
282 }
283 }
284
285 Ok(false)
286}
287
288fn filter_nested_projects(mut projects: Vec<PathBuf>) -> Result<Vec<PathBuf>> {
290 projects.sort_by_key(|p| p.components().count());
291
292 let mut filtered = Vec::new();
293
294 for project in projects {
295 let is_nested = filtered.iter().any(|parent: &PathBuf| {
296 project.starts_with(parent) && project != *parent
297 });
298
299 if !is_nested {
300 filtered.push(project);
301 }
302 }
303
304 Ok(filtered)
305}
306
307fn determine_if_monorepo(
309 root_path: &Path,
310 potential_projects: &[PathBuf],
311 _config: &MonorepoDetectionConfig,
312) -> Result<bool> {
313 if potential_projects.len() > 1 {
315 return Ok(true);
316 }
317
318 let monorepo_indicators = [
320 "lerna.json", "nx.json", "rush.json", "pnpm-workspace.yaml", "yarn.lock", "packages", "apps", "services", "libs", ];
330
331 for indicator in &monorepo_indicators {
332 if root_path.join(indicator).exists() {
333 return Ok(true);
334 }
335 }
336
337 let package_json_path = root_path.join("package.json");
339 if package_json_path.exists() {
340 if let Ok(content) = std::fs::read_to_string(&package_json_path) {
341 if let Ok(package_json) = serde_json::from_str::<JsonValue>(&content) {
342 if package_json.get("workspaces").is_some() {
344 return Ok(true);
345 }
346 }
347 }
348 }
349
350 Ok(false)
351}
352
353fn analyze_individual_project(
355 root_path: &Path,
356 project_path: &Path,
357 config: &AnalysisConfig,
358) -> Result<ProjectInfo> {
359 log::debug!("Analyzing individual project: {}", project_path.display());
360
361 let analysis = analyze_project_with_config(project_path, config)?;
362 let relative_path = project_path.strip_prefix(root_path)
363 .unwrap_or(project_path)
364 .to_path_buf();
365
366 let name = extract_project_name(project_path, &analysis);
367 let category = determine_project_category(&analysis, project_path);
368
369 Ok(ProjectInfo {
370 path: relative_path,
371 name,
372 project_category: category,
373 analysis,
374 })
375}
376
377fn extract_project_name(project_path: &Path, _analysis: &ProjectAnalysis) -> String {
379 let package_json_path = project_path.join("package.json");
381 if package_json_path.exists() {
382 if let Ok(content) = std::fs::read_to_string(&package_json_path) {
383 if let Ok(package_json) = serde_json::from_str::<JsonValue>(&content) {
384 if let Some(name) = package_json.get("name").and_then(|n| n.as_str()) {
385 return name.to_string();
386 }
387 }
388 }
389 }
390
391 let cargo_toml_path = project_path.join("Cargo.toml");
393 if cargo_toml_path.exists() {
394 if let Ok(content) = std::fs::read_to_string(&cargo_toml_path) {
395 if let Ok(cargo_toml) = toml::from_str::<toml::Value>(&content) {
396 if let Some(name) = cargo_toml.get("package")
397 .and_then(|p| p.get("name"))
398 .and_then(|n| n.as_str()) {
399 return name.to_string();
400 }
401 }
402 }
403 }
404
405 let pyproject_toml_path = project_path.join("pyproject.toml");
407 if pyproject_toml_path.exists() {
408 if let Ok(content) = std::fs::read_to_string(&pyproject_toml_path) {
409 if let Ok(pyproject) = toml::from_str::<toml::Value>(&content) {
410 if let Some(name) = pyproject.get("project")
411 .and_then(|p| p.get("name"))
412 .and_then(|n| n.as_str()) {
413 return name.to_string();
414 } else if let Some(name) = pyproject.get("tool")
415 .and_then(|t| t.get("poetry"))
416 .and_then(|p| p.get("name"))
417 .and_then(|n| n.as_str()) {
418 return name.to_string();
419 }
420 }
421 }
422 }
423
424 project_path.file_name()
426 .and_then(|n| n.to_str())
427 .unwrap_or("unknown")
428 .to_string()
429}
430
431fn determine_project_category(analysis: &ProjectAnalysis, project_path: &Path) -> ProjectCategory {
433 let dir_name = project_path.file_name()
434 .and_then(|n| n.to_str())
435 .unwrap_or("")
436 .to_lowercase();
437
438 let category_from_name = match dir_name.as_str() {
440 name if name.contains("frontend") || name.contains("client") || name.contains("web") => Some(ProjectCategory::Frontend),
441 name if name.contains("backend") || name.contains("server") => Some(ProjectCategory::Backend),
442 name if name.contains("api") => Some(ProjectCategory::Api),
443 name if name.contains("service") => Some(ProjectCategory::Service),
444 name if name.contains("lib") || name.contains("library") => Some(ProjectCategory::Library),
445 name if name.contains("tool") || name.contains("cli") => Some(ProjectCategory::Tool),
446 name if name.contains("docs") || name.contains("doc") => Some(ProjectCategory::Documentation),
447 name if name.contains("infra") || name.contains("deploy") => Some(ProjectCategory::Infrastructure),
448 _ => None,
449 };
450
451 if let Some(category) = category_from_name {
453 return category;
454 }
455
456 let has_frontend_tech = analysis.technologies.iter().any(|t| {
458 matches!(t.name.as_str(),
459 "React" | "Vue.js" | "Angular" | "Next.js" | "Nuxt.js" | "Svelte" |
460 "Astro" | "Gatsby" | "Vite" | "Webpack" | "Parcel"
461 )
462 });
463
464 let has_backend_tech = analysis.technologies.iter().any(|t| {
465 matches!(t.name.as_str(),
466 "Express.js" | "FastAPI" | "Django" | "Flask" | "Actix Web" | "Rocket" |
467 "Spring Boot" | "Gin" | "Echo" | "Fiber" | "ASP.NET"
468 )
469 });
470
471 let has_api_tech = analysis.technologies.iter().any(|t| {
472 matches!(t.name.as_str(),
473 "REST API" | "GraphQL" | "gRPC" | "FastAPI" | "Express.js"
474 )
475 });
476
477 let has_database = analysis.technologies.iter().any(|t| {
478 matches!(t.category, crate::analyzer::TechnologyCategory::Database)
479 });
480
481 if has_frontend_tech && !has_backend_tech {
482 ProjectCategory::Frontend
483 } else if has_backend_tech && !has_frontend_tech {
484 ProjectCategory::Backend
485 } else if has_api_tech || (has_backend_tech && has_database) {
486 ProjectCategory::Api
487 } else if matches!(analysis.project_type, crate::analyzer::ProjectType::Library) {
488 ProjectCategory::Library
489 } else if matches!(analysis.project_type, crate::analyzer::ProjectType::CliTool) {
490 ProjectCategory::Tool
491 } else {
492 ProjectCategory::Unknown
493 }
494}
495
496fn generate_technology_summary(projects: &[ProjectInfo]) -> TechnologySummary {
498 let mut all_languages = HashSet::new();
499 let mut all_frameworks = HashSet::new();
500 let mut all_databases = HashSet::new();
501
502 for project in projects {
503 for lang in &project.analysis.languages {
505 all_languages.insert(lang.name.clone());
506 }
507
508 for tech in &project.analysis.technologies {
510 match tech.category {
511 crate::analyzer::TechnologyCategory::FrontendFramework |
512 crate::analyzer::TechnologyCategory::BackendFramework |
513 crate::analyzer::TechnologyCategory::MetaFramework => {
514 all_frameworks.insert(tech.name.clone());
515 }
516 crate::analyzer::TechnologyCategory::Database => {
517 all_databases.insert(tech.name.clone());
518 }
519 _ => {}
520 }
521 }
522 }
523
524 let architecture_pattern = determine_architecture_pattern(projects);
525
526 TechnologySummary {
527 languages: all_languages.into_iter().collect(),
528 frameworks: all_frameworks.into_iter().collect(),
529 databases: all_databases.into_iter().collect(),
530 total_projects: projects.len(),
531 architecture_pattern,
532 }
533}
534
535fn determine_architecture_pattern(projects: &[ProjectInfo]) -> ArchitecturePattern {
537 if projects.len() == 1 {
538 return ArchitecturePattern::Monolithic;
539 }
540
541 let has_frontend = projects.iter().any(|p| p.project_category == ProjectCategory::Frontend);
542 let has_backend = projects.iter().any(|p| matches!(p.project_category, ProjectCategory::Backend | ProjectCategory::Api));
543 let service_count = projects.iter().filter(|p| p.project_category == ProjectCategory::Service).count();
544
545 if service_count >= 2 {
546 ArchitecturePattern::Microservices
547 } else if has_frontend && has_backend {
548 ArchitecturePattern::Fullstack
549 } else if projects.iter().all(|p| p.project_category == ProjectCategory::Api) {
550 ArchitecturePattern::ApiFirst
551 } else {
552 ArchitecturePattern::Mixed
553 }
554}
555
556fn calculate_overall_confidence(projects: &[ProjectInfo]) -> f32 {
558 if projects.is_empty() {
559 return 0.0;
560 }
561
562 let total_confidence: f32 = projects.iter()
563 .map(|p| p.analysis.analysis_metadata.confidence_score)
564 .sum();
565
566 total_confidence / projects.len() as f32
567}
568
569#[cfg(test)]
570mod tests {
571 use super::*;
572 use tempfile::TempDir;
573 use std::fs;
574
575 #[test]
576 fn test_single_project_detection() {
577 let temp_dir = TempDir::new().unwrap();
578 let root = temp_dir.path();
579
580 fs::write(root.join("package.json"), r#"{"name": "test-app"}"#).unwrap();
582 fs::write(root.join("index.js"), "console.log('hello');").unwrap();
583
584 let analysis = analyze_monorepo(root).unwrap();
585
586 assert!(!analysis.is_monorepo);
587 assert_eq!(analysis.projects.len(), 1);
588 assert_eq!(analysis.projects[0].name, "test-app");
589 }
590
591 #[test]
592 fn test_monorepo_detection() {
593 let temp_dir = TempDir::new().unwrap();
594 let root = temp_dir.path();
595
596 let frontend_dir = root.join("frontend");
598 fs::create_dir_all(&frontend_dir).unwrap();
599 fs::write(frontend_dir.join("package.json"), r#"{"name": "frontend-app", "dependencies": {"react": "^18.0.0"}}"#).unwrap();
600
601 let backend_dir = root.join("backend");
603 fs::create_dir_all(&backend_dir).unwrap();
604 fs::write(backend_dir.join("package.json"), r#"{"name": "backend-api", "dependencies": {"express": "^4.18.0"}}"#).unwrap();
605
606 fs::write(root.join("package.json"), r#"{"name": "monorepo", "workspaces": ["frontend", "backend"]}"#).unwrap();
608
609 let analysis = analyze_monorepo(root).unwrap();
610
611 assert!(analysis.is_monorepo);
612 assert_eq!(analysis.projects.len(), 2);
613 assert_eq!(analysis.technology_summary.architecture_pattern, ArchitecturePattern::Fullstack);
614 }
615}