Skip to main content

valknut_rs/api/
engine.rs

1//! Main analysis engine implementation.
2
3use std::path::{Path, PathBuf};
4use std::sync::Arc;
5
6use tracing::info;
7
8use crate::api::config_types::AnalysisConfig as ApiAnalysisConfig;
9use crate::core::config::ValknutConfig;
10use crate::core::errors::{Result, ValknutError};
11use crate::core::featureset::FeatureVector;
12use crate::core::pipeline::AnalysisResults;
13use crate::core::pipeline::{AnalysisConfig as PipelineAnalysisConfig, AnalysisPipeline};
14
15/// Compute the common root directory from a list of paths.
16/// Returns the longest common prefix that ends at a directory boundary.
17fn compute_common_root(paths: &[PathBuf]) -> PathBuf {
18    if paths.is_empty() {
19        return PathBuf::new();
20    }
21
22    // Canonicalize paths that exist, otherwise use as-is
23    let canonical_paths: Vec<PathBuf> = paths
24        .iter()
25        .map(|p| p.canonicalize().unwrap_or_else(|_| p.clone()))
26        .collect();
27
28    // Start with the first path's parent directory
29    let first = &canonical_paths[0];
30    let mut common = first.parent().unwrap_or(first).to_path_buf();
31
32    // Find the common prefix across all paths
33    for path in &canonical_paths[1..] {
34        while !path.starts_with(&common) {
35            if let Some(parent) = common.parent() {
36                common = parent.to_path_buf();
37            } else {
38                return PathBuf::new();
39            }
40        }
41    }
42
43    common
44}
45
46/// Main valknut analysis engine
47pub struct ValknutEngine {
48    /// Internal analysis pipeline
49    pipeline: AnalysisPipeline,
50
51    /// Engine configuration
52    config: Arc<ValknutConfig>,
53}
54
55/// Factory and analysis methods for [`ValknutEngine`].
56impl ValknutEngine {
57    /// Create a new valknut engine with the given configuration
58    pub async fn new(config: ApiAnalysisConfig) -> Result<Self> {
59        info!("Initializing Valknut analysis engine");
60
61        // Convert high-level config to internal config
62        let internal_config = config.to_valknut_config();
63
64        // Validate configuration
65        internal_config.validate()?;
66
67        let config_arc = Arc::new(internal_config.clone());
68        let analysis_config = PipelineAnalysisConfig::from(internal_config.clone());
69        let pipeline = AnalysisPipeline::new_with_config(analysis_config, internal_config);
70
71        // TODO: Register feature extractors based on enabled languages
72        // For now, we'll create a minimal setup
73
74        // Check if pipeline needs fitting with training data
75        // For this initial implementation, we'll skip the training phase
76        // and rely on default configurations
77
78        info!("Valknut engine initialized successfully");
79
80        Ok(Self {
81            pipeline,
82            config: config_arc,
83        })
84    }
85
86    /// Create a new engine directly from a fully-populated ValknutConfig.
87    ///
88    /// This avoids lossy round-trips through the public API config when we need
89    /// to preserve advanced settings like denoising and dedupe thresholds.
90    pub async fn new_from_valknut_config(valknut_config: ValknutConfig) -> Result<Self> {
91        info!("Initializing Valknut analysis engine (direct config)");
92
93        valknut_config.validate()?;
94
95        let config_arc = Arc::new(valknut_config.clone());
96        let analysis_config = PipelineAnalysisConfig::from(valknut_config.clone());
97        let pipeline = AnalysisPipeline::new_with_config(analysis_config, valknut_config);
98
99        info!("Valknut engine initialized successfully");
100
101        Ok(Self {
102            pipeline,
103            config: config_arc,
104        })
105    }
106
107    /// Analyze a directory of code files
108    pub async fn analyze_directory<P: AsRef<Path>>(&mut self, path: P) -> Result<AnalysisResults> {
109        let path = path.as_ref();
110        info!("Starting directory analysis: {}", path.display());
111
112        // Verify path exists
113        if !path.exists() {
114            return Err(ValknutError::io(
115                format!("Path does not exist: {}", path.display()),
116                std::io::Error::new(std::io::ErrorKind::NotFound, "Path not found"),
117            ));
118        }
119
120        if !path.is_dir() {
121            return Err(ValknutError::validation(format!(
122                "Path is not a directory: {}",
123                path.display()
124            )));
125        }
126
127        // Run the pipeline
128        let pipeline_results = self.pipeline.analyze_directory(path).await?;
129
130        // Convert to public API format with the directory as project root
131        let project_root = path.canonicalize().unwrap_or_else(|_| path.to_path_buf());
132        let results = AnalysisResults::from_pipeline_results(pipeline_results, project_root);
133
134        info!(
135            "Directory analysis completed: {} files processed, {} entities analyzed",
136            results.files_analyzed(),
137            results.summary.entities_analyzed
138        );
139
140        Ok(results)
141    }
142
143    /// Analyze specific files
144    pub async fn analyze_files<P: AsRef<Path>>(&mut self, files: &[P]) -> Result<AnalysisResults> {
145        info!("Starting analysis of {} specific files", files.len());
146
147        if files.is_empty() {
148            return Ok(AnalysisResults::empty());
149        }
150
151        let paths: Vec<PathBuf> = files
152            .iter()
153            .map(|file| file.as_ref().to_path_buf())
154            .collect();
155
156        let comprehensive = self
157            .pipeline
158            .analyze_paths(&paths, None)
159            .await
160            .map_err(|err| {
161                ValknutError::pipeline("file_analysis", format!("File analysis failed: {}", err))
162            })?;
163
164        let pipeline_results = self.pipeline.wrap_results(comprehensive);
165
166        // Compute project root from common prefix of file paths
167        let project_root = compute_common_root(&paths);
168        Ok(AnalysisResults::from_pipeline_results(pipeline_results, project_root))
169    }
170
171    /// Analyze pre-extracted feature vectors (for testing and advanced usage)
172    pub async fn analyze_vectors(
173        &mut self,
174        vectors: Vec<FeatureVector>,
175    ) -> Result<AnalysisResults> {
176        info!("Analyzing {} pre-extracted feature vectors", vectors.len());
177
178        // Ensure pipeline is ready
179        if !vectors.is_empty() && !self.pipeline.is_ready() {
180            // Fit the pipeline with the provided vectors as training data
181            info!("Fitting pipeline with provided vectors");
182            self.pipeline.fit(&vectors).await?;
183        }
184
185        // Run analysis
186        let pipeline_results = self.pipeline.analyze_vectors(vectors).await?;
187
188        // Convert to public API format (no project root for vector-only analysis)
189        let results = AnalysisResults::from_pipeline_results(pipeline_results, PathBuf::new());
190
191        info!(
192            "Vector analysis completed: {} entities analyzed",
193            results.summary.entities_analyzed
194        );
195
196        Ok(results)
197    }
198
199    /// Get the current configuration
200    pub fn config(&self) -> &ValknutConfig {
201        &self.config
202    }
203
204    /// Get pipeline status information
205    pub fn get_status(&self) -> EngineStatus {
206        let pipeline_status = self.pipeline.get_status();
207
208        EngineStatus {
209            is_ready: pipeline_status.is_ready,
210            pipeline_fitted: self.pipeline.is_ready(),
211            configuration_valid: pipeline_status.config_valid,
212            issues: pipeline_status.issues,
213            supported_languages: self.get_supported_languages(),
214        }
215    }
216
217    /// Get list of supported languages based on configuration
218    fn get_supported_languages(&self) -> Vec<String> {
219        self.config
220            .languages
221            .iter()
222            .filter(|(_, config)| config.enabled)
223            .map(|(name, _)| name.clone())
224            .collect()
225    }
226
227    /// Check if the engine is ready for analysis
228    pub fn is_ready(&self) -> bool {
229        self.pipeline.is_ready()
230    }
231
232    /// Perform a health check of the engine
233    pub async fn health_check(&self) -> HealthCheckResult {
234        let mut checks = Vec::new();
235        let mut overall_status = true;
236
237        // Check configuration validity
238        let config_check = self.check_configuration();
239        if config_check.status == HealthCheckStatus::Failed {
240            overall_status = false;
241        }
242        checks.push(config_check);
243
244        // Check pipeline status
245        let pipeline_check = self.check_pipeline();
246        if pipeline_check.status == HealthCheckStatus::Failed {
247            overall_status = false;
248        }
249        checks.push(pipeline_check);
250
251        // Check feature extractors
252        checks.push(self.check_feature_extractors());
253
254        // Check supported languages
255        checks.push(self.check_language_support());
256
257        HealthCheckResult {
258            overall_status,
259            checks,
260            timestamp: chrono::Utc::now(),
261        }
262    }
263
264    /// Check configuration validity.
265    fn check_configuration(&self) -> HealthCheck {
266        match self.config.validate() {
267            Ok(_) => HealthCheck::passed("Configuration"),
268            Err(e) => HealthCheck::failed("Configuration", e.to_string()),
269        }
270    }
271
272    /// Check pipeline status.
273    fn check_pipeline(&self) -> HealthCheck {
274        let status = self.pipeline.get_status();
275        if status.ready {
276            HealthCheck::passed("Pipeline")
277        } else {
278            HealthCheck::failed("Pipeline", status.issues.join("; "))
279        }
280    }
281
282    /// Check feature extractors availability.
283    fn check_feature_extractors(&self) -> HealthCheck {
284        let count = self.pipeline.extractor_registry().get_all_extractors().count();
285        if count > 0 {
286            HealthCheck::passed_with_message("Feature Extractors", format!("{} extractors available", count))
287        } else {
288            HealthCheck::warning("Feature Extractors", "No feature extractors registered")
289        }
290    }
291
292    /// Check language support.
293    fn check_language_support(&self) -> HealthCheck {
294        let languages = self.get_supported_languages();
295        if languages.is_empty() {
296            HealthCheck::warning("Language Support", "No languages enabled")
297        } else {
298            HealthCheck::passed_with_message("Language Support", format!("Languages: {}", languages.join(", ")))
299        }
300    }
301}
302
303/// Status information about the analysis engine
304#[derive(Debug)]
305pub struct EngineStatus {
306    /// Whether the engine is ready for analysis
307    pub is_ready: bool,
308
309    /// Whether the pipeline has been fitted
310    pub pipeline_fitted: bool,
311
312    /// Whether the configuration is valid
313    pub configuration_valid: bool,
314
315    /// List of issues preventing readiness
316    pub issues: Vec<String>,
317
318    /// List of supported languages
319    pub supported_languages: Vec<String>,
320}
321
322/// Result of an engine health check
323#[derive(Debug)]
324pub struct HealthCheckResult {
325    /// Overall health status
326    pub overall_status: bool,
327
328    /// Individual health checks
329    pub checks: Vec<HealthCheck>,
330
331    /// Timestamp of the check
332    pub timestamp: chrono::DateTime<chrono::Utc>,
333}
334
335/// Individual health check result
336#[derive(Debug)]
337pub struct HealthCheck {
338    /// Name of the component being checked
339    pub name: String,
340
341    /// Status of this check
342    pub status: HealthCheckStatus,
343
344    /// Optional message with details
345    pub message: Option<String>,
346}
347
348/// Factory methods for [`HealthCheck`].
349impl HealthCheck {
350    /// Create a passed health check.
351    fn passed(name: &str) -> Self {
352        Self {
353            name: name.to_string(),
354            status: HealthCheckStatus::Passed,
355            message: None,
356        }
357    }
358
359    /// Create a passed health check with a message.
360    fn passed_with_message(name: &str, message: String) -> Self {
361        Self {
362            name: name.to_string(),
363            status: HealthCheckStatus::Passed,
364            message: Some(message),
365        }
366    }
367
368    /// Create a failed health check.
369    fn failed(name: &str, message: String) -> Self {
370        Self {
371            name: name.to_string(),
372            status: HealthCheckStatus::Failed,
373            message: Some(message),
374        }
375    }
376
377    /// Create a warning health check.
378    fn warning(name: &str, message: &str) -> Self {
379        Self {
380            name: name.to_string(),
381            status: HealthCheckStatus::Warning,
382            message: Some(message.to_string()),
383        }
384    }
385}
386
387/// Health check status
388#[derive(Debug, PartialEq, Eq)]
389pub enum HealthCheckStatus {
390    /// Check passed successfully
391    Passed,
392
393    /// Check failed
394    Failed,
395
396    /// Check passed with warnings
397    Warning,
398}
399
400#[cfg(test)]
401mod tests {
402    use super::*;
403    use crate::api::config_types::AnalysisConfig;
404    use tempfile::TempDir;
405
406    #[tokio::test]
407    async fn test_engine_creation() {
408        let config = AnalysisConfig::default();
409        let result = ValknutEngine::new(config).await;
410        assert!(result.is_ok());
411
412        let engine = result.unwrap();
413        assert!(!engine.get_supported_languages().is_empty());
414    }
415
416    #[tokio::test]
417    async fn test_analyze_nonexistent_directory() {
418        let config = AnalysisConfig::default();
419        let mut engine = ValknutEngine::new(config).await.unwrap();
420
421        let result = engine.analyze_directory("/nonexistent/path").await;
422        assert!(result.is_err());
423
424        if let Err(ValknutError::Io { .. }) = result {
425            // Expected error type
426        } else {
427            panic!("Expected Io error");
428        }
429    }
430
431    #[tokio::test]
432    async fn test_analyze_empty_directory() {
433        let config = AnalysisConfig::default();
434        let mut engine = ValknutEngine::new(config).await.unwrap();
435
436        // Create temporary empty directory
437        let temp_dir = TempDir::new().unwrap();
438
439        let result = engine.analyze_directory(temp_dir.path()).await;
440        assert!(result.is_ok());
441
442        let results = result.unwrap();
443        println!(
444            "Files processed: {}, entities analyzed: {}",
445            results.summary.files_processed, results.summary.entities_analyzed
446        );
447        // Empty directory might still analyze some files (like hidden config files)
448        assert_eq!(results.summary.entities_analyzed, 0);
449    }
450
451    #[tokio::test]
452    async fn test_analyze_vectors() {
453        let config = AnalysisConfig::default();
454        let mut engine = ValknutEngine::new(config).await.unwrap();
455
456        // Create test vectors
457        let mut vectors = vec![FeatureVector::new("entity1"), FeatureVector::new("entity2")];
458
459        vectors[0].add_feature("complexity", 2.0);
460        vectors[1].add_feature("complexity", 8.0);
461
462        let result = engine.analyze_vectors(vectors).await;
463        assert!(result.is_ok());
464
465        let results = result.unwrap();
466        println!(
467            "Vector test - entities analyzed: {}",
468            results.summary.entities_analyzed
469        );
470        // The vector analysis should analyze some entities, but the exact count may vary
471        // based on implementation details (entities_analyzed is unsigned, always >= 0)
472    }
473
474    #[tokio::test]
475    async fn test_health_check() {
476        let config = AnalysisConfig::default();
477        let engine = ValknutEngine::new(config).await.unwrap();
478
479        let health = engine.health_check().await;
480
481        // Should have at least configuration and pipeline checks
482        assert!(!health.checks.is_empty());
483
484        // Find configuration check
485        let config_check = health.checks.iter().find(|c| c.name == "Configuration");
486        assert!(config_check.is_some());
487        assert_eq!(config_check.unwrap().status, HealthCheckStatus::Passed);
488    }
489
490    #[tokio::test]
491    async fn test_engine_status() {
492        let config = AnalysisConfig::default();
493        let engine = ValknutEngine::new(config).await.unwrap();
494
495        let status = engine.get_status();
496        assert!(!status.supported_languages.is_empty());
497        assert!(status.configuration_valid);
498    }
499
500    #[tokio::test]
501    async fn test_analyze_file_not_directory() {
502        let config = AnalysisConfig::default();
503        let mut engine = ValknutEngine::new(config).await.unwrap();
504
505        // Create temporary file (not directory)
506        let temp_dir = TempDir::new().unwrap();
507        let temp_file = temp_dir.path().join("test.txt");
508        std::fs::write(&temp_file, "test content").unwrap();
509
510        let result = engine.analyze_directory(&temp_file).await;
511        assert!(result.is_err());
512
513        if let Err(ValknutError::Validation { .. }) = result {
514            // Expected error type
515        } else {
516            panic!("Expected Validation error for non-directory path");
517        }
518    }
519
520    #[tokio::test]
521    async fn test_analyze_files_empty_list() {
522        let config = AnalysisConfig::default();
523        let mut engine = ValknutEngine::new(config).await.unwrap();
524
525        let empty_files: Vec<&str> = vec![];
526        let result = engine.analyze_files(&empty_files).await;
527        assert!(result.is_ok());
528
529        let results = result.unwrap();
530        assert_eq!(results.summary.files_processed, 0);
531        assert_eq!(results.summary.entities_analyzed, 0);
532        assert_eq!(results.summary.refactoring_needed, 0);
533        assert_eq!(results.summary.high_priority, 0);
534        assert_eq!(results.summary.critical, 0);
535        assert_eq!(results.summary.avg_refactoring_score, 0.0);
536        assert_eq!(results.summary.code_health_score, 1.0);
537        assert!(results.refactoring_candidates.is_empty());
538        assert!(results.warnings.is_empty());
539    }
540
541    #[tokio::test]
542    async fn test_analyze_files_with_parent_directory() {
543        let config = AnalysisConfig::default();
544        let mut engine = ValknutEngine::new(config).await.unwrap();
545
546        // Create temporary file
547        let temp_dir = TempDir::new().unwrap();
548        let temp_file = temp_dir.path().join("test.py");
549        std::fs::write(&temp_file, "def hello(): pass").unwrap();
550
551        let files = vec![temp_file.as_path()];
552        let result = engine.analyze_files(&files).await;
553        assert!(result.is_ok()); // Should analyze the parent directory
554    }
555
556    #[tokio::test]
557    async fn test_analyze_files_no_parent_directory() {
558        let config = AnalysisConfig::default();
559        let mut engine = ValknutEngine::new(config).await.unwrap();
560
561        // Try to analyze a relative path with no parent directory
562        let files = vec![std::path::Path::new("file_with_no_parent.rs")];
563        let result = engine.analyze_files(&files).await;
564        assert!(result.is_ok());
565
566        let results = result.unwrap();
567        assert_eq!(results.summary.files_processed, 0);
568        assert_eq!(results.summary.entities_analyzed, 0);
569    }
570
571    #[tokio::test]
572    async fn test_analyze_vectors_empty() {
573        let config = AnalysisConfig::default();
574        let mut engine = ValknutEngine::new(config).await.unwrap();
575
576        let empty_vectors = vec![];
577        let result = engine.analyze_vectors(empty_vectors).await;
578        assert!(result.is_ok());
579
580        let results = result.unwrap();
581        assert_eq!(results.summary.entities_analyzed, 0);
582    }
583
584    #[tokio::test]
585    async fn test_analyze_vectors_with_multiple_features() {
586        let config = AnalysisConfig::default();
587        let mut engine = ValknutEngine::new(config).await.unwrap();
588
589        let mut vectors = vec![FeatureVector::new("complex_entity")];
590        vectors[0].add_feature("complexity", 10.0);
591        vectors[0].add_feature("maintainability", 0.3);
592        vectors[0].add_feature("duplication", 5.0);
593
594        let result = engine.analyze_vectors(vectors).await;
595        assert!(result.is_ok());
596
597        let results = result.unwrap();
598        // Engine should process something (entities_analyzed is unsigned, always >= 0)
599    }
600
601    #[tokio::test]
602    async fn test_config_access() {
603        let original_config = AnalysisConfig::default()
604            .with_confidence_threshold(0.85)
605            .with_max_files(100);
606        let engine = ValknutEngine::new(original_config).await.unwrap();
607
608        let engine_config = engine.config();
609        assert_eq!(engine_config.analysis.confidence_threshold, 0.85);
610        assert_eq!(engine_config.analysis.max_files, 100);
611    }
612
613    #[tokio::test]
614    async fn test_is_ready() {
615        let config = AnalysisConfig::default();
616        let engine = ValknutEngine::new(config).await.unwrap();
617
618        // Engine should be ready after creation (even if pipeline isn't fitted)
619        let ready = engine.is_ready();
620        // This will depend on the pipeline implementation, so we just test it doesn't crash
621        let _ = ready;
622    }
623
624    #[tokio::test]
625    async fn test_get_supported_languages() {
626        let config = AnalysisConfig::default()
627            .with_languages(vec!["python".to_string(), "javascript".to_string()]);
628        let engine = ValknutEngine::new(config).await.unwrap();
629
630        let languages = engine.get_supported_languages();
631        // Should have some languages enabled from the default configuration
632        assert!(!languages.is_empty());
633    }
634
635    #[tokio::test]
636    async fn test_health_check_comprehensive() {
637        let config = AnalysisConfig::default();
638        let engine = ValknutEngine::new(config).await.unwrap();
639
640        let health = engine.health_check().await;
641
642        // Should have several checks
643        assert!(health.checks.len() >= 4);
644
645        // Check for expected components
646        let check_names: Vec<&str> = health.checks.iter().map(|c| c.name.as_str()).collect();
647        assert!(check_names.contains(&"Configuration"));
648        assert!(check_names.contains(&"Pipeline"));
649        assert!(check_names.contains(&"Feature Extractors"));
650        assert!(check_names.contains(&"Language Support"));
651
652        // Timestamp should be recent
653        let now = chrono::Utc::now();
654        let check_time = health.timestamp;
655        let diff = now - check_time;
656        assert!(diff.num_seconds() < 10); // Should be within 10 seconds
657    }
658
659    #[test]
660    fn test_engine_status_debug() {
661        let status = EngineStatus {
662            is_ready: true,
663            pipeline_fitted: false,
664            configuration_valid: true,
665            issues: vec!["test issue".to_string()],
666            supported_languages: vec!["python".to_string(), "rust".to_string()],
667        };
668
669        let debug_str = format!("{:?}", status);
670        assert!(debug_str.contains("is_ready: true"));
671        assert!(debug_str.contains("pipeline_fitted: false"));
672        assert!(debug_str.contains("test issue"));
673        assert!(debug_str.contains("python"));
674        assert!(debug_str.contains("rust"));
675    }
676
677    #[test]
678    fn test_health_check_result_debug() {
679        let result = HealthCheckResult {
680            overall_status: true,
681            checks: vec![HealthCheck {
682                name: "Test".to_string(),
683                status: HealthCheckStatus::Passed,
684                message: Some("All good".to_string()),
685            }],
686            timestamp: chrono::Utc::now(),
687        };
688
689        let debug_str = format!("{:?}", result);
690        assert!(debug_str.contains("overall_status: true"));
691        assert!(debug_str.contains("Test"));
692        assert!(debug_str.contains("Passed"));
693        assert!(debug_str.contains("All good"));
694    }
695
696    #[test]
697    fn test_health_check_status_equality() {
698        assert_eq!(HealthCheckStatus::Passed, HealthCheckStatus::Passed);
699        assert_eq!(HealthCheckStatus::Failed, HealthCheckStatus::Failed);
700        assert_eq!(HealthCheckStatus::Warning, HealthCheckStatus::Warning);
701        assert_ne!(HealthCheckStatus::Passed, HealthCheckStatus::Failed);
702        assert_ne!(HealthCheckStatus::Warning, HealthCheckStatus::Passed);
703    }
704
705    #[test]
706    fn test_health_check_debug() {
707        let check = HealthCheck {
708            name: "Test Component".to_string(),
709            status: HealthCheckStatus::Warning,
710            message: Some("Minor issue detected".to_string()),
711        };
712
713        let debug_str = format!("{:?}", check);
714        assert!(debug_str.contains("Test Component"));
715        assert!(debug_str.contains("Warning"));
716        assert!(debug_str.contains("Minor issue detected"));
717    }
718
719    #[test]
720    fn test_health_check_no_message() {
721        let check = HealthCheck {
722            name: "Silent Check".to_string(),
723            status: HealthCheckStatus::Passed,
724            message: None,
725        };
726
727        let debug_str = format!("{:?}", check);
728        assert!(debug_str.contains("Silent Check"));
729        assert!(debug_str.contains("Passed"));
730        assert!(debug_str.contains("None"));
731    }
732}