scribe_scanner/
lib.rs

1//! # Scribe Scanner
2//! 
3//! High-performance file system scanning and indexing capabilities for the Scribe library.
4//! This crate provides efficient tools for discovering, filtering, and analyzing files
5//! in large codebases with git integration and parallel processing.
6//!
7//! ## Features
8//!
9//! - **Fast Repository Traversal**: Efficient file discovery using `walkdir` and `ignore`
10//! - **Git Integration**: Prefer `git ls-files` when available, with fallback to filesystem walk
11//! - **Language Detection**: Automatic detection for 25+ programming languages
12//! - **Content Analysis**: Extract imports, documentation structure, and metadata
13//! - **Parallel Processing**: Memory-efficient parallel file processing using Rayon
14//! - **Binary Detection**: Smart binary file detection using content analysis
15//!
16//! ## Usage
17//!
18//! ```rust
19//! use scribe_scanner::{Scanner, ScanOptions};
20//! use std::path::Path;
21//!
22//! # async fn example() -> scribe_core::Result<()> {
23//! let scanner = Scanner::new();
24//! let options = ScanOptions::default()
25//!     .with_git_integration(true)
26//!     .with_parallel_processing(true);
27//!
28//! let results = scanner.scan(Path::new("."), options).await?;
29//! println!("Scanned {} files", results.len());
30//! # Ok(())
31//! # }
32//! ```
33
34// Core modules
35pub mod scanner;
36pub mod metadata;
37pub mod content;
38pub mod git_integration;
39pub mod language_detection;
40
41// Re-export main types for convenience
42pub use scanner::{Scanner, ScanOptions, ScanResult, ScanProgress};
43pub use metadata::{FileMetadata, MetadataExtractor, SizeStats};
44pub use content::{ContentAnalyzer, ImportInfo, DocumentationInfo, ContentStats};
45pub use git_integration::{GitIntegrator, GitFileInfo, GitCommitInfo};
46pub use language_detection::{LanguageDetector, LanguageHints, DetectionStrategy};
47
48use scribe_core::{Result, FileInfo};
49use std::path::Path;
50
51/// Current version of the scanner crate
52pub const VERSION: &str = env!("CARGO_PKG_VERSION");
53
54/// High-level scanner facade providing convenient access to all scanning functionality
55pub struct FileScanner {
56    scanner: Scanner,
57    metadata_extractor: MetadataExtractor,
58    content_analyzer: ContentAnalyzer,
59    git_integrator: Option<GitIntegrator>,
60    language_detector: LanguageDetector,
61}
62
63impl FileScanner {
64    /// Create a new file scanner with default configuration
65    pub fn new() -> Self {
66        Self {
67            scanner: Scanner::new(),
68            metadata_extractor: MetadataExtractor::new(),
69            content_analyzer: ContentAnalyzer::new(),
70            git_integrator: None,
71            language_detector: LanguageDetector::new(),
72        }
73    }
74
75    /// Enable git integration for enhanced file discovery
76    pub fn with_git_integration(mut self, repo_path: &Path) -> Result<Self> {
77        self.git_integrator = Some(GitIntegrator::new(repo_path)?);
78        Ok(self)
79    }
80
81    /// Scan a directory with comprehensive analysis
82    pub async fn scan_comprehensive<P: AsRef<Path>>(&self, path: P) -> Result<Vec<FileInfo>> {
83        let options = ScanOptions::default()
84            .with_metadata_extraction(true)
85            .with_content_analysis(true)
86            .with_git_integration(self.git_integrator.is_some())
87            .with_parallel_processing(true);
88        
89        self.scanner.scan(path, options).await
90    }
91
92    /// Quick scan without full content analysis
93    pub async fn scan_fast<P: AsRef<Path>>(&self, path: P) -> Result<Vec<FileInfo>> {
94        let options = ScanOptions::default()
95            .with_metadata_extraction(true)
96            .with_parallel_processing(true);
97        
98        self.scanner.scan(path, options).await
99    }
100
101    /// Get detailed statistics about the scanning process
102    pub fn get_stats(&self) -> ScannerStats {
103        ScannerStats {
104            files_processed: self.scanner.files_processed(),
105            directories_traversed: self.scanner.directories_traversed(),
106            binary_files_skipped: self.scanner.binary_files_skipped(),
107            git_files_discovered: self.git_integrator.as_ref().map(|g| g.files_discovered()).unwrap_or(0),
108        }
109    }
110}
111
112impl Default for FileScanner {
113    fn default() -> Self {
114        Self::new()
115    }
116}
117
118/// Statistics about the scanning process
119#[derive(Debug, Clone)]
120pub struct ScannerStats {
121    pub files_processed: usize,
122    pub directories_traversed: usize,
123    pub binary_files_skipped: usize,
124    pub git_files_discovered: usize,
125}
126
127#[cfg(test)]
128mod tests {
129    use super::*;
130    use tempfile::TempDir;
131    use std::fs;
132
133    #[tokio::test]
134    async fn test_scanner_creation() {
135        let scanner = FileScanner::new();
136        let stats = scanner.get_stats();
137        assert_eq!(stats.files_processed, 0);
138    }
139
140    #[tokio::test]
141    async fn test_fast_scan() {
142        let temp_dir = TempDir::new().unwrap();
143        let test_file = temp_dir.path().join("test.rs");
144        fs::write(&test_file, "fn main() {}").unwrap();
145
146        let scanner = FileScanner::new();
147        let results = scanner.scan_fast(temp_dir.path()).await.unwrap();
148        
149        assert!(!results.is_empty());
150        assert!(results.iter().any(|f| f.path.file_name().unwrap() == "test.rs"));
151    }
152
153    #[test]
154    fn test_scanner_stats() {
155        let scanner = FileScanner::new();
156        let stats = scanner.get_stats();
157        
158        assert_eq!(stats.files_processed, 0);
159        assert_eq!(stats.directories_traversed, 0);
160        assert_eq!(stats.binary_files_skipped, 0);
161        assert_eq!(stats.git_files_discovered, 0);
162    }
163}