syncable_cli/analyzer/
mod.rs

1//! # Analyzer Module
2//!
3//! This module provides project analysis capabilities for detecting:
4//! - Programming languages and their versions
5//! - Frameworks and libraries
6//! - Dependencies and their versions
7//! - Entry points and exposed ports
8
9use crate::error::Result;
10use chrono::Utc;
11use serde::{Deserialize, Serialize};
12use std::collections::HashMap;
13use std::path::{Path, PathBuf};
14
15pub mod context;
16pub mod dclint;
17pub mod dependency_parser;
18pub mod display;
19pub mod docker_analyzer;
20pub mod framework_detector;
21pub mod frameworks;
22pub mod hadolint;
23pub mod helmlint;
24pub mod k8s_optimize;
25pub mod kubelint;
26pub mod language_detector;
27pub mod monorepo;
28pub mod runtime;
29pub mod security;
30pub mod security_analyzer;
31pub mod tool_management;
32pub mod vulnerability;
33
34// Re-export dependency analysis types
35pub use dependency_parser::{DependencyAnalysis, DependencyInfo, DetailedDependencyMap};
36
37// Re-export security analysis types
38pub use security_analyzer::{
39    ComplianceStatus, SecurityAnalysisConfig, SecurityAnalyzer, SecurityCategory, SecurityFinding,
40    SecurityReport, SecuritySeverity,
41};
42
43// Re-export security analysis types
44pub use security::SecretPatternManager;
45pub use security::config::SecurityConfigPreset;
46
47// Re-export tool management types
48pub use tool_management::{InstallationSource, ToolDetector, ToolInstaller, ToolStatus};
49
50// Re-export runtime detection types
51pub use runtime::{
52    DetectionConfidence, JavaScriptRuntime, PackageManager, RuntimeDetectionResult, RuntimeDetector,
53};
54
55// Re-export vulnerability checking types
56pub use vulnerability::types::VulnerabilitySeverity as VulnSeverity;
57pub use vulnerability::{
58    VulnerabilityChecker, VulnerabilityInfo, VulnerabilityReport, VulnerableDependency,
59};
60
61// Re-export monorepo analysis types
62pub use monorepo::{MonorepoDetectionConfig, analyze_monorepo, analyze_monorepo_with_config};
63
64// Re-export Docker analysis types
65pub use docker_analyzer::{
66    ComposeFileInfo, DiscoveredDockerfile, DockerAnalysis, DockerEnvironment, DockerService,
67    DockerfileInfo, NetworkingConfig, OrchestrationPattern, analyze_docker_infrastructure,
68    discover_dockerfiles_for_deployment,
69};
70
71/// Represents a detected programming language
72#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
73pub struct DetectedLanguage {
74    pub name: String,
75    pub version: Option<String>,
76    pub confidence: f32,
77    pub files: Vec<PathBuf>,
78    pub main_dependencies: Vec<String>,
79    pub dev_dependencies: Vec<String>,
80    pub package_manager: Option<String>,
81}
82
83/// Categories of detected technologies with proper classification
84#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
85pub enum TechnologyCategory {
86    /// Full-stack meta-frameworks that provide complete application structure
87    MetaFramework,
88    /// Frontend frameworks that provide application structure (Angular, Svelte)
89    FrontendFramework,
90    /// Backend frameworks that provide server structure (Express, Django, Spring Boot)
91    BackendFramework,
92    /// Libraries that provide specific functionality (React, Tanstack Query, Axios)
93    Library(LibraryType),
94    /// Build and development tools (Vite, Webpack, Rollup)
95    BuildTool,
96    /// Database and ORM tools (Prisma, TypeORM, SQLAlchemy)
97    Database,
98    /// Testing frameworks and libraries (Jest, Vitest, Cypress)
99    Testing,
100    /// JavaScript/Python/etc runtimes (Node.js, Bun, Deno)
101    Runtime,
102    /// Package managers (npm, yarn, pnpm, pip, cargo)
103    PackageManager,
104}
105
106/// Specific types of libraries for better classification
107#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
108pub enum LibraryType {
109    /// UI libraries (React, Vue, Preact)
110    UI,
111    /// State management (Zustand, Redux, Pinia)
112    StateManagement,
113    /// Data fetching (Tanstack Query, Apollo, Relay)
114    DataFetching,
115    /// Routing (React Router, Vue Router - when not meta-framework)
116    Routing,
117    /// Styling (Styled Components, Emotion, Tailwind)
118    Styling,
119    /// Utilities (Lodash, Date-fns, Zod)
120    Utility,
121    /// HTTP clients (Axios, Fetch libraries)
122    HttpClient,
123    /// Authentication (Auth0, Firebase Auth)
124    Authentication,
125    /// CLI frameworks (clap, structopt, argh)
126    CLI,
127    /// Other specific types
128    Other(String),
129}
130
131/// Represents a detected technology (framework, library, or tool)
132#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
133pub struct DetectedTechnology {
134    pub name: String,
135    pub version: Option<String>,
136    pub category: TechnologyCategory,
137    pub confidence: f32,
138    /// Dependencies this technology requires (e.g., Next.js requires React)
139    pub requires: Vec<String>,
140    /// Technologies that conflict with this one (e.g., Tanstack Start conflicts with React Router v7)
141    pub conflicts_with: Vec<String>,
142    /// Whether this is the primary technology driving the architecture
143    pub is_primary: bool,
144    /// File indicators that helped identify this technology
145    pub file_indicators: Vec<String>,
146}
147
148/// Represents a service within a microservice architecture
149#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
150pub struct ServiceAnalysis {
151    pub name: String,
152    pub path: PathBuf,
153    pub languages: Vec<DetectedLanguage>,
154    pub technologies: Vec<DetectedTechnology>,
155    pub entry_points: Vec<EntryPoint>,
156    pub ports: Vec<Port>,
157    pub environment_variables: Vec<EnvVar>,
158    pub build_scripts: Vec<BuildScript>,
159    pub service_type: ProjectType,
160}
161
162/// Represents application entry points
163#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
164pub struct EntryPoint {
165    pub file: PathBuf,
166    pub function: Option<String>,
167    pub command: Option<String>,
168}
169
170/// Source of port detection - indicates where the port was discovered
171#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
172pub enum PortSource {
173    /// Detected from Dockerfile EXPOSE directive
174    Dockerfile,
175    /// Detected from docker-compose.yml ports section
176    DockerCompose,
177    /// Detected from package.json scripts (Node.js)
178    PackageJson,
179    /// Inferred from framework defaults (e.g., Express=3000, FastAPI=8000)
180    FrameworkDefault,
181    /// Detected from environment variable reference (e.g., process.env.PORT)
182    EnvVar,
183    /// Detected from source code analysis (e.g., .listen(3000))
184    SourceCode,
185    /// Detected from configuration files (e.g., config.yaml, settings.py)
186    ConfigFile,
187}
188
189impl PortSource {
190    /// Returns a human-readable description of the port source
191    pub fn description(&self) -> &'static str {
192        match self {
193            PortSource::Dockerfile => "Dockerfile EXPOSE",
194            PortSource::DockerCompose => "docker-compose.yml",
195            PortSource::PackageJson => "package.json scripts",
196            PortSource::FrameworkDefault => "framework default",
197            PortSource::EnvVar => "environment variable",
198            PortSource::SourceCode => "source code",
199            PortSource::ConfigFile => "configuration file",
200        }
201    }
202}
203
204/// Represents exposed network ports
205#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
206pub struct Port {
207    pub number: u16,
208    pub protocol: Protocol,
209    pub description: Option<String>,
210    /// Source where this port was detected (optional for backward compatibility)
211    #[serde(skip_serializing_if = "Option::is_none")]
212    pub source: Option<PortSource>,
213}
214
215impl Port {
216    /// Create a new port with source information
217    pub fn with_source(number: u16, protocol: Protocol, source: PortSource) -> Self {
218        Self {
219            number,
220            protocol,
221            description: None,
222            source: Some(source),
223        }
224    }
225
226    /// Create a new port with source and description
227    pub fn with_source_and_description(
228        number: u16,
229        protocol: Protocol,
230        source: PortSource,
231        description: impl Into<String>,
232    ) -> Self {
233        Self {
234            number,
235            protocol,
236            description: Some(description.into()),
237            source: Some(source),
238        }
239    }
240}
241
242#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
243pub enum Protocol {
244    Tcp,
245    Udp,
246    Http,
247    Https,
248}
249
250/// Source of health endpoint detection
251#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
252pub enum HealthEndpointSource {
253    /// Found by analyzing source code patterns
254    CodePattern,
255    /// Known framework convention (e.g., Spring Actuator)
256    FrameworkDefault,
257    /// Found in configuration files (e.g., K8s manifests, docker-compose)
258    ConfigFile,
259}
260
261impl HealthEndpointSource {
262    /// Returns a human-readable description of the detection source
263    pub fn description(&self) -> &'static str {
264        match self {
265            HealthEndpointSource::CodePattern => "source code analysis",
266            HealthEndpointSource::FrameworkDefault => "framework convention",
267            HealthEndpointSource::ConfigFile => "configuration file",
268        }
269    }
270}
271
272/// Represents a detected health check endpoint
273#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
274pub struct HealthEndpoint {
275    /// The HTTP path for the health check (e.g., "/health", "/healthz")
276    pub path: String,
277    /// Confidence level (0.0-1.0) in this detection
278    pub confidence: f32,
279    /// Where this endpoint was detected from
280    pub source: HealthEndpointSource,
281    /// Optional description or context
282    pub description: Option<String>,
283}
284
285impl HealthEndpoint {
286    /// Create a new health endpoint with high confidence from code analysis
287    pub fn from_code(path: impl Into<String>, confidence: f32) -> Self {
288        Self {
289            path: path.into(),
290            confidence,
291            source: HealthEndpointSource::CodePattern,
292            description: None,
293        }
294    }
295
296    /// Create a health endpoint from a framework default
297    pub fn from_framework(path: impl Into<String>, framework: &str) -> Self {
298        Self {
299            path: path.into(),
300            confidence: 0.7, // Framework defaults have moderate confidence
301            source: HealthEndpointSource::FrameworkDefault,
302            description: Some(format!("{} default health endpoint", framework)),
303        }
304    }
305}
306
307/// Represents environment variables
308#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
309pub struct EnvVar {
310    pub name: String,
311    pub default_value: Option<String>,
312    pub required: bool,
313    pub description: Option<String>,
314}
315
316/// Represents different project types
317#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
318pub enum ProjectType {
319    WebApplication,
320    ApiService,
321    CliTool,
322    Library,
323    MobileApp,
324    DesktopApp,
325    Microservice,
326    StaticSite,
327    Hybrid, // Multiple types
328    Unknown,
329}
330
331/// Represents build scripts and commands
332#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
333pub struct BuildScript {
334    pub name: String,
335    pub command: String,
336    pub description: Option<String>,
337    pub is_default: bool,
338}
339
340/// Detected infrastructure files and configurations in the project
341#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)]
342pub struct InfrastructurePresence {
343    /// Whether Kubernetes manifests were detected
344    pub has_kubernetes: bool,
345    /// Paths to directories or files containing K8s manifests
346    pub kubernetes_paths: Vec<PathBuf>,
347    /// Whether Helm charts were detected
348    pub has_helm: bool,
349    /// Paths to Helm chart directories (containing Chart.yaml)
350    pub helm_chart_paths: Vec<PathBuf>,
351    /// Whether docker-compose files were detected
352    pub has_docker_compose: bool,
353    /// Whether Terraform files were detected
354    pub has_terraform: bool,
355    /// Paths to directories containing .tf files
356    pub terraform_paths: Vec<PathBuf>,
357    /// Whether Syncable deployment config exists
358    pub has_deployment_config: bool,
359    /// Summary of what was detected for display purposes
360    pub summary: Option<String>,
361}
362
363impl InfrastructurePresence {
364    /// Returns true if any infrastructure was detected
365    pub fn has_any(&self) -> bool {
366        self.has_kubernetes || self.has_helm || self.has_docker_compose || self.has_terraform || self.has_deployment_config
367    }
368
369    /// Returns a list of detected infrastructure types
370    pub fn detected_types(&self) -> Vec<&'static str> {
371        let mut types = Vec::new();
372        if self.has_kubernetes { types.push("Kubernetes"); }
373        if self.has_helm { types.push("Helm"); }
374        if self.has_docker_compose { types.push("Docker Compose"); }
375        if self.has_terraform { types.push("Terraform"); }
376        if self.has_deployment_config { types.push("Syncable Config"); }
377        types
378    }
379}
380
381/// Type alias for dependency maps
382pub type DependencyMap = HashMap<String, String>;
383
384/// Types of project architectures
385#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
386pub enum ArchitectureType {
387    /// Single application/service
388    Monolithic,
389    /// Multiple services in one repository
390    Microservices,
391    /// Mixed approach with both
392    Hybrid,
393}
394
395/// Backward compatibility type alias
396pub type DetectedFramework = DetectedTechnology;
397
398/// Enhanced project analysis with proper technology classification and microservice support
399#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
400pub struct ProjectAnalysis {
401    pub project_root: PathBuf,
402    pub languages: Vec<DetectedLanguage>,
403    /// All detected technologies (frameworks, libraries, tools) with proper classification
404    pub technologies: Vec<DetectedTechnology>,
405    /// Legacy field for backward compatibility - will be populated from technologies
406    #[deprecated(note = "Use technologies field instead")]
407    pub frameworks: Vec<DetectedFramework>,
408    pub dependencies: DependencyMap,
409    pub entry_points: Vec<EntryPoint>,
410    pub ports: Vec<Port>,
411    /// Detected health check endpoints
412    #[serde(default)]
413    pub health_endpoints: Vec<HealthEndpoint>,
414    pub environment_variables: Vec<EnvVar>,
415    pub project_type: ProjectType,
416    pub build_scripts: Vec<BuildScript>,
417    /// Individual service analyses for microservice architectures
418    pub services: Vec<ServiceAnalysis>,
419    /// Whether this is a monolithic project or microservice architecture
420    pub architecture_type: ArchitectureType,
421    /// Docker infrastructure analysis
422    pub docker_analysis: Option<DockerAnalysis>,
423    /// Detected infrastructure (K8s, Helm, Terraform, etc.)
424    #[serde(default)]
425    pub infrastructure: Option<InfrastructurePresence>,
426    pub analysis_metadata: AnalysisMetadata,
427}
428
429/// Metadata about the analysis process
430#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
431pub struct AnalysisMetadata {
432    pub timestamp: String,
433    pub analyzer_version: String,
434    pub analysis_duration_ms: u64,
435    pub files_analyzed: usize,
436    pub confidence_score: f32,
437}
438
439/// Configuration for project analysis
440#[derive(Debug, Clone)]
441pub struct AnalysisConfig {
442    pub include_dev_dependencies: bool,
443    pub deep_analysis: bool,
444    pub ignore_patterns: Vec<String>,
445    pub max_file_size: usize,
446}
447
448impl Default for AnalysisConfig {
449    fn default() -> Self {
450        Self {
451            include_dev_dependencies: false,
452            deep_analysis: true,
453            ignore_patterns: vec![
454                "node_modules".to_string(),
455                ".git".to_string(),
456                "target".to_string(),
457                "build".to_string(),
458                ".next".to_string(),
459                "dist".to_string(),
460            ],
461            max_file_size: 1024 * 1024, // 1MB
462        }
463    }
464}
465
466/// Represents an individual project within a monorepo
467#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
468pub struct ProjectInfo {
469    /// Relative path from the monorepo root
470    pub path: PathBuf,
471    /// Display name for the project (derived from directory name or package name)
472    pub name: String,
473    /// Type of project (frontend, backend, service, etc.)
474    pub project_category: ProjectCategory,
475    /// Full analysis of this specific project
476    pub analysis: ProjectAnalysis,
477}
478
479/// Category of project within a monorepo
480#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
481pub enum ProjectCategory {
482    Frontend,
483    Backend,
484    Api,
485    Service,
486    Library,
487    Tool,
488    Documentation,
489    Infrastructure,
490    Unknown,
491}
492
493/// Represents the overall analysis of a monorepo or single project
494#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
495pub struct MonorepoAnalysis {
496    /// Root path of the analysis
497    pub root_path: PathBuf,
498    /// Whether this is a monorepo (multiple projects) or single project
499    pub is_monorepo: bool,
500    /// List of detected projects (will have 1 item for single projects)
501    pub projects: Vec<ProjectInfo>,
502    /// Overall metadata for the entire analysis
503    pub metadata: AnalysisMetadata,
504    /// Summary of all technologies found across projects
505    pub technology_summary: TechnologySummary,
506}
507
508/// Summary of technologies across all projects
509#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
510pub struct TechnologySummary {
511    pub languages: Vec<String>,
512    pub frameworks: Vec<String>,
513    pub databases: Vec<String>,
514    pub total_projects: usize,
515    pub architecture_pattern: ArchitecturePattern,
516}
517
518/// Detected architecture patterns
519#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
520pub enum ArchitecturePattern {
521    /// Single application
522    Monolithic,
523    /// Frontend + Backend separation
524    Fullstack,
525    /// Multiple independent services
526    Microservices,
527    /// API-first architecture
528    ApiFirst,
529    /// Event-driven architecture
530    EventDriven,
531    /// Unknown or mixed pattern
532    Mixed,
533}
534
535/// Analyzes a project directory to detect languages, frameworks, and dependencies.
536///
537/// # Arguments
538/// * `path` - The root directory of the project to analyze
539///
540/// # Returns
541/// A `ProjectAnalysis` containing detected components or an error
542///
543/// # Examples
544/// ```no_run
545/// use syncable_cli::analyzer::analyze_project;
546/// use std::path::Path;
547///
548/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
549/// let analysis = analyze_project(Path::new("./my-project"))?;
550/// println!("Languages: {:?}", analysis.languages);
551/// # Ok(())
552/// # }
553/// ```
554pub fn analyze_project(path: &Path) -> Result<ProjectAnalysis> {
555    analyze_project_with_config(path, &AnalysisConfig::default())
556}
557
558/// Analyzes a project with custom configuration
559pub fn analyze_project_with_config(
560    path: &Path,
561    config: &AnalysisConfig,
562) -> Result<ProjectAnalysis> {
563    let start_time = std::time::Instant::now();
564
565    // Validate project path
566    let project_root = crate::common::file_utils::validate_project_path(path)?;
567
568    log::info!("Starting analysis of project: {}", project_root.display());
569
570    // Collect project files
571    let files = crate::common::file_utils::collect_project_files(&project_root, config)?;
572    log::debug!("Found {} files to analyze", files.len());
573
574    // Perform parallel analysis
575    let languages = language_detector::detect_languages(&files, config)?;
576    let frameworks = framework_detector::detect_frameworks(&project_root, &languages, config)?;
577    let dependencies = dependency_parser::parse_dependencies(&project_root, &languages, config)?;
578    let context = context::analyze_context(&project_root, &languages, &frameworks, config)?;
579
580    // Detect health check endpoints
581    let health_endpoints = context::detect_health_endpoints(&project_root, &frameworks, config.max_file_size);
582
583    // Detect infrastructure presence (K8s, Helm, Terraform, etc.)
584    let infrastructure = context::detect_infrastructure(&project_root);
585
586    // Analyze Docker infrastructure
587    let docker_analysis = analyze_docker_infrastructure(&project_root).ok();
588
589    let duration = start_time.elapsed();
590    let confidence = calculate_confidence_score(&languages, &frameworks);
591
592    #[allow(deprecated)]
593    let analysis = ProjectAnalysis {
594        project_root,
595        languages,
596        technologies: frameworks.clone(), // New field with proper technology classification
597        frameworks,                       // Backward compatibility
598        dependencies,
599        entry_points: context.entry_points,
600        ports: context.ports,
601        health_endpoints,
602        environment_variables: context.environment_variables,
603        project_type: context.project_type,
604        build_scripts: context.build_scripts,
605        services: vec![], // TODO: Implement microservice detection
606        architecture_type: ArchitectureType::Monolithic, // TODO: Detect architecture type
607        docker_analysis,
608        infrastructure: Some(infrastructure),
609        analysis_metadata: AnalysisMetadata {
610            timestamp: Utc::now().to_rfc3339(),
611            analyzer_version: env!("CARGO_PKG_VERSION").to_string(),
612            analysis_duration_ms: duration.as_millis() as u64,
613            files_analyzed: files.len(),
614            confidence_score: confidence,
615        },
616    };
617
618    log::info!("Analysis completed in {}ms", duration.as_millis());
619    Ok(analysis)
620}
621
622/// Calculate overall confidence score based on detection results
623fn calculate_confidence_score(
624    languages: &[DetectedLanguage],
625    frameworks: &[DetectedFramework],
626) -> f32 {
627    if languages.is_empty() {
628        return 0.0;
629    }
630
631    let lang_confidence: f32 =
632        languages.iter().map(|l| l.confidence).sum::<f32>() / languages.len() as f32;
633    let framework_confidence: f32 = if frameworks.is_empty() {
634        0.5 // Neutral score if no frameworks detected
635    } else {
636        frameworks.iter().map(|f| f.confidence).sum::<f32>() / frameworks.len() as f32
637    };
638
639    (lang_confidence * 0.7 + framework_confidence * 0.3).min(1.0)
640}
641
642#[cfg(test)]
643mod tests {
644    use super::*;
645
646    #[test]
647    fn test_confidence_calculation() {
648        let languages = vec![DetectedLanguage {
649            name: "Rust".to_string(),
650            version: Some("1.70.0".to_string()),
651            confidence: 0.9,
652            files: vec![],
653            main_dependencies: vec!["serde".to_string(), "tokio".to_string()],
654            dev_dependencies: vec!["assert_cmd".to_string()],
655            package_manager: Some("cargo".to_string()),
656        }];
657
658        let technologies = vec![DetectedTechnology {
659            name: "Actix Web".to_string(),
660            version: Some("4.0".to_string()),
661            category: TechnologyCategory::BackendFramework,
662            confidence: 0.8,
663            requires: vec!["serde".to_string(), "tokio".to_string()],
664            conflicts_with: vec![],
665            is_primary: true,
666            file_indicators: vec![],
667        }];
668
669        let frameworks = technologies.clone(); // For backward compatibility
670
671        let score = calculate_confidence_score(&languages, &frameworks);
672        assert!(score > 0.8);
673        assert!(score <= 1.0);
674    }
675
676    #[test]
677    fn test_empty_analysis() {
678        let languages = vec![];
679        let frameworks = vec![];
680        let score = calculate_confidence_score(&languages, &frameworks);
681        assert_eq!(score, 0.0);
682    }
683}