Skip to main content

syncable_cli/analyzer/
mod.rs

1//! # Analyzer Module
2//!
3//! This module provides project analysis capabilities for detecting:
4//! - Programming languages and their versions
5//! - Frameworks and libraries
6//! - Dependencies and their versions
7//! - Entry points and exposed ports
8
9use crate::error::Result;
10use chrono::Utc;
11use serde::{Deserialize, Serialize};
12use std::collections::HashMap;
13use std::path::{Path, PathBuf};
14
15pub mod context;
16pub mod dclint;
17pub mod dependency_parser;
18pub mod display;
19pub mod docker_analyzer;
20pub mod framework_detector;
21pub mod frameworks;
22pub mod hadolint;
23pub mod helmlint;
24pub mod k8s_optimize;
25pub mod kubelint;
26pub mod language_detector;
27pub mod monorepo;
28pub mod runtime;
29pub mod security;
30pub mod security_analyzer;
31pub mod tool_management;
32pub mod vulnerability;
33
34// Re-export dependency analysis types
35pub use dependency_parser::{DependencyAnalysis, DependencyInfo, DetailedDependencyMap};
36
37// Re-export security analysis types
38pub use security_analyzer::{
39    ComplianceStatus, SecurityAnalysisConfig, SecurityAnalyzer, SecurityCategory, SecurityFinding,
40    SecurityReport, SecuritySeverity,
41};
42
43// Re-export security analysis types
44pub use security::SecretPatternManager;
45pub use security::config::SecurityConfigPreset;
46
47// Re-export tool management types
48pub use tool_management::{InstallationSource, ToolDetector, ToolInstaller, ToolStatus};
49
50// Re-export runtime detection types
51pub use runtime::{
52    DetectionConfidence, JavaScriptRuntime, PackageManager, RuntimeDetectionResult, RuntimeDetector,
53};
54
55// Re-export vulnerability checking types
56pub use vulnerability::types::VulnerabilitySeverity as VulnSeverity;
57pub use vulnerability::{
58    VulnerabilityChecker, VulnerabilityInfo, VulnerabilityReport, VulnerableDependency,
59};
60
61// Re-export monorepo analysis types
62pub use monorepo::{MonorepoDetectionConfig, analyze_monorepo, analyze_monorepo_with_config};
63
64// Re-export Docker analysis types
65pub use docker_analyzer::{
66    ComposeFileInfo, DiscoveredDockerfile, DockerAnalysis, DockerEnvironment, DockerService,
67    DockerfileInfo, NetworkingConfig, OrchestrationPattern, analyze_docker_infrastructure,
68    discover_dockerfiles_for_deployment,
69};
70
71/// Represents a detected programming language
72#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
73pub struct DetectedLanguage {
74    pub name: String,
75    pub version: Option<String>,
76    pub confidence: f32,
77    pub files: Vec<PathBuf>,
78    pub main_dependencies: Vec<String>,
79    pub dev_dependencies: Vec<String>,
80    pub package_manager: Option<String>,
81}
82
83/// Categories of detected technologies with proper classification
84#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
85pub enum TechnologyCategory {
86    /// Full-stack meta-frameworks that provide complete application structure
87    MetaFramework,
88    /// Frontend frameworks that provide application structure (Angular, Svelte)
89    FrontendFramework,
90    /// Backend frameworks that provide server structure (Express, Django, Spring Boot)
91    BackendFramework,
92    /// Libraries that provide specific functionality (React, Tanstack Query, Axios)
93    Library(LibraryType),
94    /// Build and development tools (Vite, Webpack, Rollup)
95    BuildTool,
96    /// Database and ORM tools (Prisma, TypeORM, SQLAlchemy)
97    Database,
98    /// Testing frameworks and libraries (Jest, Vitest, Cypress)
99    Testing,
100    /// JavaScript/Python/etc runtimes (Node.js, Bun, Deno)
101    Runtime,
102    /// Package managers (npm, yarn, pnpm, pip, cargo)
103    PackageManager,
104}
105
106/// Specific types of libraries for better classification
107#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
108pub enum LibraryType {
109    /// UI libraries (React, Vue, Preact)
110    UI,
111    /// State management (Zustand, Redux, Pinia)
112    StateManagement,
113    /// Data fetching (Tanstack Query, Apollo, Relay)
114    DataFetching,
115    /// Routing (React Router, Vue Router - when not meta-framework)
116    Routing,
117    /// Styling (Styled Components, Emotion, Tailwind)
118    Styling,
119    /// Utilities (Lodash, Date-fns, Zod)
120    Utility,
121    /// HTTP clients (Axios, Fetch libraries)
122    HttpClient,
123    /// Authentication (Auth0, Firebase Auth)
124    Authentication,
125    /// CLI frameworks (clap, structopt, argh)
126    CLI,
127    /// Other specific types
128    Other(String),
129}
130
131/// Represents a detected technology (framework, library, or tool)
132#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
133pub struct DetectedTechnology {
134    pub name: String,
135    pub version: Option<String>,
136    pub category: TechnologyCategory,
137    pub confidence: f32,
138    /// Dependencies this technology requires (e.g., Next.js requires React)
139    pub requires: Vec<String>,
140    /// Technologies that conflict with this one (e.g., Tanstack Start conflicts with React Router v7)
141    pub conflicts_with: Vec<String>,
142    /// Whether this is the primary technology driving the architecture
143    pub is_primary: bool,
144    /// File indicators that helped identify this technology
145    pub file_indicators: Vec<String>,
146}
147
148/// Represents a service within a microservice architecture
149#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
150pub struct ServiceAnalysis {
151    pub name: String,
152    pub path: PathBuf,
153    pub languages: Vec<DetectedLanguage>,
154    pub technologies: Vec<DetectedTechnology>,
155    pub entry_points: Vec<EntryPoint>,
156    pub ports: Vec<Port>,
157    pub environment_variables: Vec<EnvVar>,
158    pub build_scripts: Vec<BuildScript>,
159    pub service_type: ProjectType,
160}
161
162/// Represents application entry points
163#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
164pub struct EntryPoint {
165    pub file: PathBuf,
166    pub function: Option<String>,
167    pub command: Option<String>,
168}
169
170/// Source of port detection - indicates where the port was discovered
171#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
172pub enum PortSource {
173    /// Detected from Dockerfile EXPOSE directive
174    Dockerfile,
175    /// Detected from docker-compose.yml ports section
176    DockerCompose,
177    /// Detected from package.json scripts (Node.js)
178    PackageJson,
179    /// Inferred from framework defaults (e.g., Express=3000, FastAPI=8000)
180    FrameworkDefault,
181    /// Detected from environment variable reference (e.g., process.env.PORT)
182    EnvVar,
183    /// Detected from source code analysis (e.g., .listen(3000))
184    SourceCode,
185    /// Detected from configuration files (e.g., config.yaml, settings.py)
186    ConfigFile,
187}
188
189impl PortSource {
190    /// Returns a human-readable description of the port source
191    pub fn description(&self) -> &'static str {
192        match self {
193            PortSource::Dockerfile => "Dockerfile EXPOSE",
194            PortSource::DockerCompose => "docker-compose.yml",
195            PortSource::PackageJson => "package.json scripts",
196            PortSource::FrameworkDefault => "framework default",
197            PortSource::EnvVar => "environment variable",
198            PortSource::SourceCode => "source code",
199            PortSource::ConfigFile => "configuration file",
200        }
201    }
202}
203
204/// Represents exposed network ports
205#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
206pub struct Port {
207    pub number: u16,
208    pub protocol: Protocol,
209    pub description: Option<String>,
210    /// Source where this port was detected (optional for backward compatibility)
211    #[serde(skip_serializing_if = "Option::is_none")]
212    pub source: Option<PortSource>,
213}
214
215impl Port {
216    /// Create a new port with source information
217    pub fn with_source(number: u16, protocol: Protocol, source: PortSource) -> Self {
218        Self {
219            number,
220            protocol,
221            description: None,
222            source: Some(source),
223        }
224    }
225
226    /// Create a new port with source and description
227    pub fn with_source_and_description(
228        number: u16,
229        protocol: Protocol,
230        source: PortSource,
231        description: impl Into<String>,
232    ) -> Self {
233        Self {
234            number,
235            protocol,
236            description: Some(description.into()),
237            source: Some(source),
238        }
239    }
240}
241
242#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
243pub enum Protocol {
244    Tcp,
245    Udp,
246    Http,
247    Https,
248}
249
250/// Source of health endpoint detection
251#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
252pub enum HealthEndpointSource {
253    /// Found by analyzing source code patterns
254    CodePattern,
255    /// Known framework convention (e.g., Spring Actuator)
256    FrameworkDefault,
257    /// Found in configuration files (e.g., K8s manifests, docker-compose)
258    ConfigFile,
259}
260
261impl HealthEndpointSource {
262    /// Returns a human-readable description of the detection source
263    pub fn description(&self) -> &'static str {
264        match self {
265            HealthEndpointSource::CodePattern => "source code analysis",
266            HealthEndpointSource::FrameworkDefault => "framework convention",
267            HealthEndpointSource::ConfigFile => "configuration file",
268        }
269    }
270}
271
272/// Represents a detected health check endpoint
273#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
274pub struct HealthEndpoint {
275    /// The HTTP path for the health check (e.g., "/health", "/healthz")
276    pub path: String,
277    /// Confidence level (0.0-1.0) in this detection
278    pub confidence: f32,
279    /// Where this endpoint was detected from
280    pub source: HealthEndpointSource,
281    /// Optional description or context
282    pub description: Option<String>,
283}
284
285impl HealthEndpoint {
286    /// Create a new health endpoint with high confidence from code analysis
287    pub fn from_code(path: impl Into<String>, confidence: f32) -> Self {
288        Self {
289            path: path.into(),
290            confidence,
291            source: HealthEndpointSource::CodePattern,
292            description: None,
293        }
294    }
295
296    /// Create a health endpoint from a framework default
297    pub fn from_framework(path: impl Into<String>, framework: &str) -> Self {
298        Self {
299            path: path.into(),
300            confidence: 0.7, // Framework defaults have moderate confidence
301            source: HealthEndpointSource::FrameworkDefault,
302            description: Some(format!("{} default health endpoint", framework)),
303        }
304    }
305}
306
307/// Represents environment variables
308#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
309pub struct EnvVar {
310    pub name: String,
311    pub default_value: Option<String>,
312    pub required: bool,
313    pub description: Option<String>,
314}
315
316/// Represents different project types
317#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
318pub enum ProjectType {
319    WebApplication,
320    ApiService,
321    CliTool,
322    Library,
323    MobileApp,
324    DesktopApp,
325    Microservice,
326    StaticSite,
327    Hybrid, // Multiple types
328    Unknown,
329}
330
331/// Represents build scripts and commands
332#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
333pub struct BuildScript {
334    pub name: String,
335    pub command: String,
336    pub description: Option<String>,
337    pub is_default: bool,
338}
339
340/// Detected infrastructure files and configurations in the project
341#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)]
342pub struct InfrastructurePresence {
343    /// Whether Kubernetes manifests were detected
344    pub has_kubernetes: bool,
345    /// Paths to directories or files containing K8s manifests
346    pub kubernetes_paths: Vec<PathBuf>,
347    /// Whether Helm charts were detected
348    pub has_helm: bool,
349    /// Paths to Helm chart directories (containing Chart.yaml)
350    pub helm_chart_paths: Vec<PathBuf>,
351    /// Whether docker-compose files were detected
352    pub has_docker_compose: bool,
353    /// Whether Terraform files were detected
354    pub has_terraform: bool,
355    /// Paths to directories containing .tf files
356    pub terraform_paths: Vec<PathBuf>,
357    /// Whether Syncable deployment config exists
358    pub has_deployment_config: bool,
359    /// Summary of what was detected for display purposes
360    pub summary: Option<String>,
361}
362
363impl InfrastructurePresence {
364    /// Returns true if any infrastructure was detected
365    pub fn has_any(&self) -> bool {
366        self.has_kubernetes
367            || self.has_helm
368            || self.has_docker_compose
369            || self.has_terraform
370            || self.has_deployment_config
371    }
372
373    /// Returns a list of detected infrastructure types
374    pub fn detected_types(&self) -> Vec<&'static str> {
375        let mut types = Vec::new();
376        if self.has_kubernetes {
377            types.push("Kubernetes");
378        }
379        if self.has_helm {
380            types.push("Helm");
381        }
382        if self.has_docker_compose {
383            types.push("Docker Compose");
384        }
385        if self.has_terraform {
386            types.push("Terraform");
387        }
388        if self.has_deployment_config {
389            types.push("Syncable Config");
390        }
391        types
392    }
393}
394
395/// Type alias for dependency maps
396pub type DependencyMap = HashMap<String, String>;
397
398/// Types of project architectures
399#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
400pub enum ArchitectureType {
401    /// Single application/service
402    Monolithic,
403    /// Multiple services in one repository
404    Microservices,
405    /// Mixed approach with both
406    Hybrid,
407}
408
409/// Backward compatibility type alias
410pub type DetectedFramework = DetectedTechnology;
411
412/// Enhanced project analysis with proper technology classification and microservice support
413#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
414pub struct ProjectAnalysis {
415    pub project_root: PathBuf,
416    pub languages: Vec<DetectedLanguage>,
417    /// All detected technologies (frameworks, libraries, tools) with proper classification
418    pub technologies: Vec<DetectedTechnology>,
419    /// Legacy field for backward compatibility - will be populated from technologies
420    #[deprecated(note = "Use technologies field instead")]
421    pub frameworks: Vec<DetectedFramework>,
422    pub dependencies: DependencyMap,
423    pub entry_points: Vec<EntryPoint>,
424    pub ports: Vec<Port>,
425    /// Detected health check endpoints
426    #[serde(default)]
427    pub health_endpoints: Vec<HealthEndpoint>,
428    pub environment_variables: Vec<EnvVar>,
429    pub project_type: ProjectType,
430    pub build_scripts: Vec<BuildScript>,
431    /// Individual service analyses for microservice architectures
432    pub services: Vec<ServiceAnalysis>,
433    /// Whether this is a monolithic project or microservice architecture
434    pub architecture_type: ArchitectureType,
435    /// Docker infrastructure analysis
436    pub docker_analysis: Option<DockerAnalysis>,
437    /// Detected infrastructure (K8s, Helm, Terraform, etc.)
438    #[serde(default)]
439    pub infrastructure: Option<InfrastructurePresence>,
440    pub analysis_metadata: AnalysisMetadata,
441}
442
443/// Metadata about the analysis process
444#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
445pub struct AnalysisMetadata {
446    pub timestamp: String,
447    pub analyzer_version: String,
448    pub analysis_duration_ms: u64,
449    pub files_analyzed: usize,
450    pub confidence_score: f32,
451}
452
453/// Configuration for project analysis
454#[derive(Debug, Clone)]
455pub struct AnalysisConfig {
456    pub include_dev_dependencies: bool,
457    pub deep_analysis: bool,
458    pub ignore_patterns: Vec<String>,
459    pub max_file_size: usize,
460}
461
462impl Default for AnalysisConfig {
463    fn default() -> Self {
464        Self {
465            include_dev_dependencies: false,
466            deep_analysis: true,
467            ignore_patterns: vec![
468                "node_modules".to_string(),
469                ".git".to_string(),
470                "target".to_string(),
471                "build".to_string(),
472                ".next".to_string(),
473                "dist".to_string(),
474            ],
475            max_file_size: 1024 * 1024, // 1MB
476        }
477    }
478}
479
480/// Represents an individual project within a monorepo
481#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
482pub struct ProjectInfo {
483    /// Relative path from the monorepo root
484    pub path: PathBuf,
485    /// Display name for the project (derived from directory name or package name)
486    pub name: String,
487    /// Type of project (frontend, backend, service, etc.)
488    pub project_category: ProjectCategory,
489    /// Full analysis of this specific project
490    pub analysis: ProjectAnalysis,
491}
492
493/// Category of project within a monorepo
494#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
495pub enum ProjectCategory {
496    Frontend,
497    Backend,
498    Api,
499    Service,
500    Library,
501    Tool,
502    Documentation,
503    Infrastructure,
504    Unknown,
505}
506
507/// Represents the overall analysis of a monorepo or single project
508#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
509pub struct MonorepoAnalysis {
510    /// Root path of the analysis
511    pub root_path: PathBuf,
512    /// Whether this is a monorepo (multiple projects) or single project
513    pub is_monorepo: bool,
514    /// List of detected projects (will have 1 item for single projects)
515    pub projects: Vec<ProjectInfo>,
516    /// Overall metadata for the entire analysis
517    pub metadata: AnalysisMetadata,
518    /// Summary of all technologies found across projects
519    pub technology_summary: TechnologySummary,
520}
521
522/// Summary of technologies across all projects
523#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
524pub struct TechnologySummary {
525    pub languages: Vec<String>,
526    pub frameworks: Vec<String>,
527    pub databases: Vec<String>,
528    pub total_projects: usize,
529    pub architecture_pattern: ArchitecturePattern,
530}
531
532/// Detected architecture patterns
533#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
534pub enum ArchitecturePattern {
535    /// Single application
536    Monolithic,
537    /// Frontend + Backend separation
538    Fullstack,
539    /// Multiple independent services
540    Microservices,
541    /// API-first architecture
542    ApiFirst,
543    /// Event-driven architecture
544    EventDriven,
545    /// Unknown or mixed pattern
546    Mixed,
547}
548
549/// Analyzes a project directory to detect languages, frameworks, and dependencies.
550///
551/// # Arguments
552/// * `path` - The root directory of the project to analyze
553///
554/// # Returns
555/// A `ProjectAnalysis` containing detected components or an error
556///
557/// # Examples
558/// ```no_run
559/// use syncable_cli::analyzer::analyze_project;
560/// use std::path::Path;
561///
562/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
563/// let analysis = analyze_project(Path::new("./my-project"))?;
564/// println!("Languages: {:?}", analysis.languages);
565/// # Ok(())
566/// # }
567/// ```
568pub fn analyze_project(path: &Path) -> Result<ProjectAnalysis> {
569    analyze_project_with_config(path, &AnalysisConfig::default())
570}
571
572/// Analyzes a project with custom configuration
573pub fn analyze_project_with_config(
574    path: &Path,
575    config: &AnalysisConfig,
576) -> Result<ProjectAnalysis> {
577    let start_time = std::time::Instant::now();
578
579    // Validate project path
580    let project_root = crate::common::file_utils::validate_project_path(path)?;
581
582    log::info!("Starting analysis of project: {}", project_root.display());
583
584    // Collect project files
585    let files = crate::common::file_utils::collect_project_files(&project_root, config)?;
586    log::debug!("Found {} files to analyze", files.len());
587
588    // Perform parallel analysis
589    let languages = language_detector::detect_languages(&files, config)?;
590    let frameworks = framework_detector::detect_frameworks(&project_root, &languages, config)?;
591    let dependencies = dependency_parser::parse_dependencies(&project_root, &languages, config)?;
592    let context = context::analyze_context(&project_root, &languages, &frameworks, config)?;
593
594    // Detect health check endpoints
595    let health_endpoints =
596        context::detect_health_endpoints(&project_root, &frameworks, config.max_file_size);
597
598    // Detect infrastructure presence (K8s, Helm, Terraform, etc.)
599    let infrastructure = context::detect_infrastructure(&project_root);
600
601    // Analyze Docker infrastructure
602    let docker_analysis = analyze_docker_infrastructure(&project_root).ok();
603
604    let duration = start_time.elapsed();
605    let confidence = calculate_confidence_score(&languages, &frameworks);
606
607    #[allow(deprecated)]
608    let analysis = ProjectAnalysis {
609        project_root,
610        languages,
611        technologies: frameworks.clone(), // New field with proper technology classification
612        frameworks,                       // Backward compatibility
613        dependencies,
614        entry_points: context.entry_points,
615        ports: context.ports,
616        health_endpoints,
617        environment_variables: context.environment_variables,
618        project_type: context.project_type,
619        build_scripts: context.build_scripts,
620        services: vec![], // TODO: Implement microservice detection
621        architecture_type: ArchitectureType::Monolithic, // TODO: Detect architecture type
622        docker_analysis,
623        infrastructure: Some(infrastructure),
624        analysis_metadata: AnalysisMetadata {
625            timestamp: Utc::now().to_rfc3339(),
626            analyzer_version: env!("CARGO_PKG_VERSION").to_string(),
627            analysis_duration_ms: duration.as_millis() as u64,
628            files_analyzed: files.len(),
629            confidence_score: confidence,
630        },
631    };
632
633    log::info!("Analysis completed in {}ms", duration.as_millis());
634    Ok(analysis)
635}
636
637/// Calculate overall confidence score based on detection results
638fn calculate_confidence_score(
639    languages: &[DetectedLanguage],
640    frameworks: &[DetectedFramework],
641) -> f32 {
642    if languages.is_empty() {
643        return 0.0;
644    }
645
646    let lang_confidence: f32 =
647        languages.iter().map(|l| l.confidence).sum::<f32>() / languages.len() as f32;
648    let framework_confidence: f32 = if frameworks.is_empty() {
649        0.5 // Neutral score if no frameworks detected
650    } else {
651        frameworks.iter().map(|f| f.confidence).sum::<f32>() / frameworks.len() as f32
652    };
653
654    (lang_confidence * 0.7 + framework_confidence * 0.3).min(1.0)
655}
656
657#[cfg(test)]
658mod tests {
659    use super::*;
660
661    #[test]
662    fn test_confidence_calculation() {
663        let languages = vec![DetectedLanguage {
664            name: "Rust".to_string(),
665            version: Some("1.70.0".to_string()),
666            confidence: 0.9,
667            files: vec![],
668            main_dependencies: vec!["serde".to_string(), "tokio".to_string()],
669            dev_dependencies: vec!["assert_cmd".to_string()],
670            package_manager: Some("cargo".to_string()),
671        }];
672
673        let technologies = vec![DetectedTechnology {
674            name: "Actix Web".to_string(),
675            version: Some("4.0".to_string()),
676            category: TechnologyCategory::BackendFramework,
677            confidence: 0.8,
678            requires: vec!["serde".to_string(), "tokio".to_string()],
679            conflicts_with: vec![],
680            is_primary: true,
681            file_indicators: vec![],
682        }];
683
684        let frameworks = technologies.clone(); // For backward compatibility
685
686        let score = calculate_confidence_score(&languages, &frameworks);
687        assert!(score > 0.8);
688        assert!(score <= 1.0);
689    }
690
691    #[test]
692    fn test_empty_analysis() {
693        let languages = vec![];
694        let frameworks = vec![];
695        let score = calculate_confidence_score(&languages, &frameworks);
696        assert_eq!(score, 0.0);
697    }
698}