scribe/
lib.rs

1#![cfg_attr(not(tarpaulin), warn(warnings))]
2#![cfg_attr(tarpaulin, allow(warnings))]
3
4//! # Scribe - Advanced Code Analysis Library
5//!
6//! Scribe is a comprehensive Rust library for code analysis, repository exploration,
7//! and intelligent file processing. It provides powerful tools for understanding
8//! codebases through heuristic scoring, graph analysis, and AI-powered insights.
9//!
10//! ## Features
11//!
12//! - **🔍 Intelligent File Analysis**: Multi-dimensional heuristic scoring system
13//! - **📊 Dependency Graph Analysis**: PageRank centrality for code importance
14//! - **⚡ High-Performance Scanning**: Parallel file system traversal with git integration
15//! - **🎯 Advanced Pattern Matching**: Flexible glob and gitignore pattern support
16//! - **🧠 Smart Code Selection**: Context-aware code bundling and relevance scoring
17//! - **🛠️ Extensible Architecture**: Plugin system for custom analyzers and scorers
18//!
19//! ## Quick Start
20//!
21//! Add this to your `Cargo.toml`:
22//!
23//! ```toml
24//! [dependencies]
25//! scribe = "0.1.0"
26//! ```
27//!
28//! ### Basic Usage
29//!
30//! ```rust,no_run
31//! use scribe_analyzer::prelude::*;
32//! use std::path::Path;
33//!
34//! # async fn example() -> scribe_analyzer::Result<()> {
35//! // Configure analysis
36//! let config = Config::default();
37//! let repo_path = Path::new(".");
38//!
39//! // Quick analysis - get most important files
40//! let important_files = scribe_analyzer::analyze_repository(repo_path, &config).await?;
41//!
42//! println!("Top 10 most important files:");
43//! for (file, score) in important_files.top_files(10) {
44//!     println!("  {}: {:.3}", file, score);
45//! }
46//! # Ok(())
47//! # }
48//! ```
49//!
50//! ### Feature-Specific Usage
51//!
52//! ```rust,no_run
53//! // For minimal installations with selective features
54//! use scribe_analyzer::core::{Config, FileInfo};
55//! use scribe_analyzer::scanner::{Scanner, ScanOptions};
56//!
57//! # async fn selective_example() -> scribe_analyzer::Result<()> {
58//! let scanner = Scanner::new();
59//! let options = ScanOptions::default();
60//! let files = scanner.scan(".", options).await?;
61//! println!("Found {} files", files.len());
62//! # Ok(())
63//! # }
64//! ```
65//!
66//! ## Feature Flags
67//!
68//! Scribe uses feature flags to allow selective compilation:
69//!
70//! - **`default`**: Includes `core`, `analysis`, `graph`, `scanner`, `patterns`, `selection`
71//! - **`core`**: Essential types, traits, and utilities (always recommended)
72//! - **`analysis`**: Heuristic scoring and code analysis algorithms
73//! - **`graph`**: PageRank centrality and dependency graph analysis
74//! - **`scanner`**: High-performance file system scanning with git integration
75//! - **`patterns`**: Flexible pattern matching (glob, gitignore)
76//! - **`selection`**: Intelligent code selection and context extraction
77//!
78//! ### Feature Groups
79//!
80//! - **`minimal`**: Just `core` functionality
81//! - **`fast`**: Core + scanning and patterns for quick file operations
82//! - **`comprehensive`**: All features (same as default)
83//! - **`full`**: Alias for default
84//!
85//! ### Selective Installation Examples
86//!
87//! ```toml
88//! # Minimal installation
89//! scribe = { version = "0.1.0", default-features = false, features = ["core"] }
90//!
91//! # Fast file operations only
92//! scribe = { version = "0.1.0", default-features = false, features = ["fast"] }
93//!
94//! # Analysis without graph features
95//! scribe = { version = "0.1.0", default-features = false, features = ["core", "analysis", "scanner"] }
96//! ```
97//!
98//! ## Architecture
99//!
100//! Scribe is built with a modular architecture:
101//!
102//! ```text
103//! ┌─────────────────────────────────────────────────────────────┐
104//! │                        scribe                               │
105//! │  ┌─────────────┐ ┌─────────────┐ ┌─────────────────────────┐ │
106//! │  │ scribe-core │ │scribe-scanner│ │    scribe-patterns     │ │
107//! │  │   (types,   │ │(file system  │ │  (glob, gitignore,     │ │
108//! │  │ traits,     │ │ traversal,   │ │   pattern matching)    │ │
109//! │  │ utilities)  │ │ git support) │ │                        │ │
110//! │  └─────────────┘ └─────────────┘ └─────────────────────────┘ │
111//! │  ┌─────────────┐ ┌─────────────┐ ┌─────────────────────────┐ │
112//! │  │scribe-analysis│ │scribe-graph │ │   scribe-selection     │ │
113//! │  │ (heuristic  │ │  (PageRank  │ │ (intelligent bundling, │ │
114//! │  │  scoring,   │ │ centrality, │ │  context extraction,   │ │
115//! │  │ code metrics)│ │ dependency  │ │   relevance scoring)   │ │
116//! │  │             │ │  analysis)  │ │                        │ │
117//! │  └─────────────┘ └─────────────┘ └─────────────────────────┘ │
118//! └─────────────────────────────────────────────────────────────┘
119//! ```
120
121// Re-export core functionality (always available when scribe is used)
122
123pub mod pipeline;
124pub mod report;
125
126pub use pipeline::{
127    analyze_and_select, select_from_analysis, AnalysisOutcome, SelectionOptions, SelectionOutcome,
128};
129
130pub use report::{
131    format_bytes, format_number, format_timestamp, generate_cxml_output, generate_html_output,
132    generate_json_output, generate_markdown_output, generate_repomix_output, generate_report,
133    generate_text_output, generate_xml_output, get_file_icon, ReportFile, ReportFormat,
134    SelectionMetrics,
135};
136
137#[cfg(feature = "core")]
138pub use scribe_core as core;
139
140#[cfg(feature = "core")]
141pub use scribe_core::{
142    meta,
143    Config,
144    FileInfo,
145    FileType,
146    HeuristicWeights,
147
148    Language,
149    // Essential types
150    Result,
151    ScoreComponents,
152    ScribeError,
153    // Version and meta information
154    VERSION as CORE_VERSION,
155};
156
157// Analysis functionality
158#[cfg(feature = "analysis")]
159pub use scribe_analysis as analysis;
160
161#[cfg(feature = "analysis")]
162pub use scribe_analysis::{
163    DocumentAnalysis, HeuristicScorer, HeuristicSystem, ImportGraph, ImportGraphBuilder,
164    TemplateDetector,
165};
166
167// Graph analysis functionality
168#[cfg(feature = "graph")]
169pub use scribe_graph as graph;
170
171#[cfg(feature = "graph")]
172pub use scribe_graph::{
173    CentralityCalculator,
174    CentralityResults,
175    DependencyGraph,
176    GraphStatistics,
177    PageRankAnalysis,
178    PageRankAnalysis as GraphAnalysis, // Alias for convenience
179    PageRankResults,
180};
181
182// Scanner functionality
183#[cfg(feature = "scanner")]
184pub use scribe_scanner as scanner;
185
186#[cfg(feature = "scanner")]
187pub use scribe_scanner::{
188    FileScanner, LanguageDetector, ScanOptions, ScanResult, Scanner, ScannerStats,
189};
190
191// Pattern matching functionality
192#[cfg(feature = "patterns")]
193pub use scribe_patterns as patterns;
194
195#[cfg(feature = "patterns")]
196pub use scribe_patterns::{
197    presets, GitignoreMatcher, GlobMatcher, PatternBuilder, PatternMatcher, PatternMatcherBuilder,
198    QuickMatcher,
199};
200
201// Selection functionality
202#[cfg(feature = "selection")]
203pub use scribe_selection as selection;
204
205#[cfg(feature = "selection")]
206pub use scribe_selection::{
207    apply_token_budget_selection, CodeBundle, CodeBundler, CodeContext, CodeSelector,
208    ContextExtractor, ContextFile, QuotaManager, SelectionEngine, TwoPassSelector,
209};
210
211/// Current version of the main Scribe library
212pub const VERSION: &str = env!("CARGO_PKG_VERSION");
213
214/// High-level repository analysis results
215#[cfg(all(feature = "analysis", feature = "scanner"))]
216#[derive(Debug, Clone)]
217pub struct RepositoryAnalysis {
218    /// All scanned files with metadata
219    pub files: Vec<FileInfo>,
220    /// Heuristic scores for each file
221    pub heuristic_scores: std::collections::HashMap<String, f64>,
222    /// Graph centrality scores (if graph feature enabled)
223    #[cfg(feature = "graph")]
224    pub centrality_scores: Option<std::collections::HashMap<String, f64>>,
225    /// Combined final scores
226    pub final_scores: std::collections::HashMap<String, f64>,
227    /// Analysis metadata
228    pub metadata: AnalysisMetadata,
229}
230
231#[cfg(all(feature = "analysis", feature = "scanner"))]
232impl RepositoryAnalysis {
233    /// Get the top N files by score
234    pub fn top_files(&self, n: usize) -> Vec<(&str, f64)> {
235        let mut scored: Vec<_> = self
236            .final_scores
237            .iter()
238            .map(|(path, score)| (path.as_str(), *score))
239            .collect();
240
241        scored.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
242        scored.into_iter().take(n).collect()
243    }
244
245    /// Get files above a certain score threshold
246    pub fn files_above_threshold(&self, threshold: f64) -> Vec<(&str, f64)> {
247        self.final_scores
248            .iter()
249            .filter(|(_, score)| **score >= threshold)
250            .map(|(path, score)| (path.as_str(), *score))
251            .collect()
252    }
253
254    /// Get total number of analyzed files
255    pub fn file_count(&self) -> usize {
256        self.files.len()
257    }
258
259    /// Get analysis summary statistics
260    pub fn summary(&self) -> String {
261        let avg_score = self.final_scores.values().sum::<f64>() / self.final_scores.len() as f64;
262        let top_file = self
263            .top_files(1)
264            .get(0)
265            .map(|(path, score)| format!("{} ({:.3})", path, score))
266            .unwrap_or_else(|| "None".to_string());
267
268        format!(
269            "Repository Analysis Summary:\n\
270             - Files analyzed: {}\n\
271             - Average score: {:.3}\n\
272             - Top file: {}\n\
273             - Scribe version: {}",
274            self.file_count(),
275            avg_score,
276            top_file,
277            self.metadata.scribe_version
278        )
279    }
280}
281
282/// Convenience function for quick repository analysis
283///
284/// This function performs a complete repository analysis using default configuration
285/// and returns the most important files based on comprehensive scoring.
286///
287/// # Example
288///
289/// ```rust,no_run
290/// use scribe_analyzer;
291/// use std::path::Path;
292///
293/// # async fn example() -> scribe_analyzer::Result<()> {
294/// let config = scribe_analyzer::Config::default();
295/// let analysis = scribe_analyzer::analyze_repository(".", &config).await?;
296///
297/// println!("Analysis: {}", analysis.summary());
298/// for (file, score) in analysis.top_files(5) {
299///     println!("  {}: {:.3}", file, score);
300/// }
301/// # Ok(())
302/// # }
303/// ```
304#[cfg(all(feature = "analysis", feature = "scanner", feature = "patterns"))]
305pub async fn analyze_repository<P: AsRef<std::path::Path>>(
306    path: P,
307    config: &Config,
308) -> Result<RepositoryAnalysis> {
309    use std::collections::HashMap;
310
311    // Apply default performance tuning for faster analysis
312    let mut optimized_config = config.clone();
313
314    // Tune PerformanceConfig for maximum parallel throughput
315    optimized_config.performance.batch_size = 20; // Smaller batches = faster tail latency
316    optimized_config.performance.use_mmap = true; // Memory mapping for large files
317    optimized_config.performance.io_buffer_size = 512 * 1024; // 512KB buffers
318
319    // Enable caching and tuned scoring defaults
320    optimized_config.analysis.enable_caching = true;
321
322    // When available, leverage the scaling engine for large repositories
323    #[cfg(feature = "scaling")]
324    {
325        use scribe_scaling::{create_scaling_engine, quick_scale_estimate};
326
327        match quick_scale_estimate(path.as_ref()).await {
328            Ok((file_count, estimated_duration, _memory_usage)) => {
329                if std::env::var("SCRIBE_DEBUG").is_ok() {
330                    eprintln!(
331                        "Scaling estimate: {} files, {:?} duration",
332                        file_count, estimated_duration
333                    );
334                }
335
336                if file_count > 50 || estimated_duration.as_secs() > 2 {
337                    if config.features.scaling_enabled {
338                        if std::env::var("SCRIBE_DEBUG").is_ok() {
339                            eprintln!("Using scaling engine for large repo");
340                        }
341                    } else {
342                        if std::env::var("SCRIBE_DEBUG").is_ok() {
343                            eprintln!("Large repo but scaling disabled");
344                        }
345                    }
346                }
347
348                if (file_count > 50 || estimated_duration.as_secs() > 2)
349                    && config.features.scaling_enabled
350                {
351                    match create_scaling_engine(path.as_ref()).await {
352                        Ok(mut scaling_engine) => {
353                            if std::env::var("SCRIBE_DEBUG").is_ok() {
354                                eprintln!("Scaling engine created, processing repository...");
355                            }
356
357                            // Use scaling engine's optimized processing
358                            match scaling_engine.process_repository(path.as_ref()).await {
359                                Ok(processing_result) => {
360                                    if std::env::var("SCRIBE_DEBUG").is_ok() {
361                                        eprintln!("Scaling processing complete: {} files processed in {:?}", 
362                                            processing_result.total_files, processing_result.processing_time);
363                                    }
364
365                                    return convert_scaling_result_to_analysis(
366                                        processing_result,
367                                        optimized_config,
368                                        path.as_ref(),
369                                    )
370                                    .await;
371                                }
372                                Err(e) => {
373                                    if std::env::var("SCRIBE_DEBUG").is_ok() {
374                                        eprintln!(
375                                            "Scaling engine processing failed: {}, falling back",
376                                            e
377                                        );
378                                    }
379                                }
380                            }
381                        }
382                        Err(e) => {
383                            if std::env::var("SCRIBE_DEBUG").is_ok() {
384                                eprintln!("Failed to create scaling engine: {}, falling back", e);
385                            }
386                        }
387                    }
388                } else if file_count > 50 || estimated_duration.as_secs() > 2 {
389                    if std::env::var("SCRIBE_DEBUG").is_ok() {
390                        eprintln!("Large repo detected but scaling disabled, using optimized basic scanner");
391                    }
392                } else {
393                    if std::env::var("SCRIBE_DEBUG").is_ok() {
394                        eprintln!("Small repo detected, using optimized basic scanner");
395                    }
396                }
397            }
398            Err(e) => {
399                if std::env::var("SCRIBE_DEBUG").is_ok() {
400                    eprintln!("Scaling estimate failed: {}, falling back", e);
401                }
402            }
403        }
404    }
405
406    // Fallback to the optimized scanning pipeline when advanced selection fails
407    fallback_scan(path, &optimized_config).await
408}
409
410async fn fallback_scan<P: AsRef<std::path::Path>>(
411    path: P,
412    config: &Config,
413) -> Result<RepositoryAnalysis> {
414    let repo_path = path.as_ref();
415    let start_time = std::time::Instant::now();
416
417    if std::env::var("SCRIBE_DEBUG").is_ok() {
418        eprintln!("🔄 Using fallback scanner with optimized config");
419    }
420
421    let scanner = Scanner::new();
422    let scan_options = ScanOptions::default()
423        .with_git_integration(true)
424        .with_parallel_processing(true);
425
426    let mut files = scanner.scan(repo_path, scan_options).await?;
427
428    if config.features.auto_exclude_tests {
429        let original_count = files.len();
430        files.retain(|file| !scribe_core::file::is_test_path(&file.path));
431        if std::env::var("SCRIBE_DEBUG").is_ok() && files.len() != original_count {
432            eprintln!(
433                "Auto-excluded {} test files, {} files remaining",
434                original_count - files.len(),
435                files.len()
436            );
437        }
438    }
439
440    if let Some(token_budget) = config.analysis.token_budget {
441        if std::env::var("SCRIBE_DEBUG").is_ok() {
442            eprintln!("🎯 Applying token budget: {} tokens", token_budget);
443        }
444        files = apply_token_budget_selection(files, token_budget, config).await?;
445        if std::env::var("SCRIBE_DEBUG").is_ok() {
446            eprintln!("✅ Token budget applied: {} files selected", files.len());
447        }
448    }
449
450    let analysis = build_repository_analysis(files, config, &["optimized_scanner"])?;
451
452    if std::env::var("SCRIBE_DEBUG").is_ok() {
453        eprintln!(
454            "📊 Completed fallback analysis in {:?} ({} files)",
455            start_time.elapsed(),
456            analysis.files.len()
457        );
458    }
459
460    Ok(analysis)
461}
462
463#[derive(Debug, Clone)]
464struct AnalyzerContext {
465    imports: Vec<String>,
466    doc_analysis: Option<DocumentAnalysis>,
467    has_examples: bool,
468    is_entrypoint: bool,
469    priority_boost: f64,
470    content: Option<String>,
471}
472
473#[derive(Debug, Clone)]
474struct AnalyzerFile {
475    path: String,
476    relative_path: String,
477    depth: usize,
478    is_docs: bool,
479    is_readme: bool,
480    is_test: bool,
481    is_entrypoint: bool,
482    has_examples: bool,
483    priority_boost: f64,
484    churn_score: f64,
485    centrality_score: f64,
486    imports: Vec<String>,
487    doc_analysis: Option<DocumentAnalysis>,
488}
489
490impl AnalyzerFile {
491    fn from_file_info(file: &FileInfo, context: &AnalyzerContext) -> Self {
492        let path_string = file.path.to_string_lossy().to_string();
493        let relative = if file.relative_path.is_empty() {
494            path_string.clone()
495        } else {
496            file.relative_path.clone()
497        };
498        let normalized_path = relative.replace('\\', "/");
499        let depth = normalized_path.matches('/').count();
500        let is_docs = matches!(file.file_type, FileType::Documentation { .. });
501        let is_readme = normalized_path.to_lowercase().contains("readme");
502        let is_test = matches!(file.file_type, FileType::Test { .. })
503            || scribe_core::file::is_test_path(&file.path);
504
505        Self {
506            path: path_string,
507            relative_path: normalized_path,
508            depth,
509            is_docs,
510            is_readme,
511            is_test,
512            is_entrypoint: context.is_entrypoint,
513            has_examples: context.has_examples,
514            priority_boost: context.priority_boost.min(1.0),
515            churn_score: compute_churn_score(file),
516            centrality_score: 0.0,
517            imports: context.imports.clone(),
518            doc_analysis: context.doc_analysis.clone(),
519        }
520    }
521}
522
523impl scribe_analysis::heuristics::ScanResult for AnalyzerFile {
524    fn path(&self) -> &str {
525        &self.path
526    }
527
528    fn relative_path(&self) -> &str {
529        &self.relative_path
530    }
531
532    fn depth(&self) -> usize {
533        self.depth
534    }
535
536    fn is_docs(&self) -> bool {
537        self.is_docs
538    }
539
540    fn is_readme(&self) -> bool {
541        self.is_readme
542    }
543
544    fn is_test(&self) -> bool {
545        self.is_test
546    }
547
548    fn is_entrypoint(&self) -> bool {
549        self.is_entrypoint
550    }
551
552    fn has_examples(&self) -> bool {
553        self.has_examples
554    }
555
556    fn priority_boost(&self) -> f64 {
557        self.priority_boost
558    }
559
560    fn churn_score(&self) -> f64 {
561        self.churn_score
562    }
563
564    fn centrality_in(&self) -> f64 {
565        self.centrality_score
566    }
567
568    fn imports(&self) -> Option<&[String]> {
569        if self.imports.is_empty() {
570            None
571        } else {
572            Some(&self.imports)
573        }
574    }
575
576    fn doc_analysis(&self) -> Option<&DocumentAnalysis> {
577        self.doc_analysis.as_ref()
578    }
579}
580
581fn build_repository_analysis(
582    mut files: Vec<FileInfo>,
583    config: &Config,
584    additional_features: &[&str],
585) -> Result<RepositoryAnalysis> {
586    use std::collections::{HashMap, HashSet};
587
588    let contexts: Vec<AnalyzerContext> = files
589        .iter()
590        .map(|file| derive_file_context(file, config))
591        .collect();
592
593    let mut analyzer_files: Vec<AnalyzerFile> = files
594        .iter()
595        .zip(contexts.iter())
596        .map(|(file, context)| AnalyzerFile::from_file_info(file, context))
597        .collect();
598
599    #[cfg(feature = "graph")]
600    let mut centrality_scores = compute_centrality_scores(&analyzer_files);
601
602    #[cfg(not(feature = "graph"))]
603    let mut centrality_scores: Option<HashMap<String, f64>> = None;
604
605    #[cfg(feature = "graph")]
606    if let Some(ref centrality) = centrality_scores {
607        for analyzer in analyzer_files.iter_mut() {
608            if let Some(score) = centrality.get(&analyzer.path) {
609                analyzer.centrality_score = *score;
610            }
611        }
612
613        for file in files.iter_mut() {
614            let key = file.path.to_string_lossy().to_string();
615            if let Some(score) = centrality.get(&key) {
616                file.centrality_score = Some(*score);
617            }
618        }
619    }
620
621    let mut heuristic_scores = HashMap::with_capacity(analyzer_files.len());
622    let mut scoring_system = HeuristicSystem::with_v2_features()?;
623    let scored_files = scoring_system.score_all_files(&analyzer_files)?;
624    for (idx, components) in scored_files {
625        let key = analyzer_files[idx].path.clone();
626        heuristic_scores.insert(key, components.final_score);
627    }
628
629    let final_scores = heuristic_scores.clone();
630
631    let mut features: HashSet<String> = HashSet::new();
632    features.insert("heuristic_scoring".to_string());
633    #[cfg(feature = "graph")]
634    if centrality_scores.is_some() {
635        features.insert("centrality_analysis".to_string());
636    }
637    for feature in additional_features {
638        features.insert((*feature).to_string());
639    }
640
641    let mut features_enabled: Vec<String> = features.into_iter().collect();
642    features_enabled.sort();
643
644    let metadata = AnalysisMetadata {
645        timestamp: std::time::SystemTime::now(),
646        scribe_version: VERSION.to_string(),
647        config_hash: Some(config.compute_hash()),
648        features_enabled,
649    };
650
651    Ok(RepositoryAnalysis {
652        files,
653        heuristic_scores,
654        #[cfg(feature = "graph")]
655        centrality_scores,
656        final_scores,
657        metadata,
658    })
659}
660
661fn derive_file_context(file: &FileInfo, config: &Config) -> AnalyzerContext {
662    let mut imports = Vec::new();
663    let mut doc_analysis = None;
664    let mut has_examples = file.relative_path.to_lowercase().contains("example");
665    let mut is_entrypoint = scribe_core::file::is_entrypoint_path(&file.path, &file.language);
666    let mut priority_boost = compute_priority_boost(file);
667    let mut cached_content: Option<String> = None;
668
669    if should_load_content(file, config) {
670        if let Ok(content) = std::fs::read_to_string(&file.path) {
671            if matches!(file.file_type, FileType::Documentation { .. }) {
672                doc_analysis = Some(analyze_document_content(&content));
673            }
674
675            if !has_examples {
676                has_examples = content.contains("Example") || content.contains("example");
677            }
678
679            if matches!(
680                file.language,
681                Language::Rust
682                    | Language::Python
683                    | Language::JavaScript
684                    | Language::TypeScript
685                    | Language::Go
686            ) {
687                imports = extract_imports(&content, &file.language);
688            }
689
690            if !is_entrypoint {
691                is_entrypoint = detect_entrypoint_from_content(&content, &file.language);
692            }
693
694            cached_content = Some(content);
695        }
696    }
697
698    AnalyzerContext {
699        imports,
700        doc_analysis,
701        has_examples,
702        is_entrypoint,
703        priority_boost,
704        content: cached_content,
705    }
706}
707
708fn should_load_content(file: &FileInfo, config: &Config) -> bool {
709    if !config.analysis.analyze_content || file.is_binary {
710        return false;
711    }
712
713    let size_limit = std::cmp::max(config.performance.io_buffer_size as u64, 256 * 1024);
714    file.size <= size_limit
715}
716
717fn compute_priority_boost(file: &FileInfo) -> f64 {
718    let path_lower = file.relative_path.to_lowercase();
719    let mut boost: f64 = 0.0;
720
721    if path_lower.ends_with("readme.md") || path_lower.ends_with("readme") {
722        boost += 0.4;
723    }
724    if path_lower.ends_with("cargo.toml")
725        || path_lower.ends_with("package.json")
726        || path_lower.ends_with("requirements.txt")
727        || path_lower.ends_with("pyproject.toml")
728    {
729        boost += 0.25;
730    }
731    if path_lower.ends_with("main.rs")
732        || path_lower.ends_with("main.py")
733        || path_lower.ends_with("main.go")
734        || path_lower.ends_with("index.js")
735        || path_lower.ends_with("index.ts")
736    {
737        boost += 0.3;
738    }
739    if path_lower.ends_with("lib.rs") {
740        boost += 0.2;
741    }
742    if path_lower.ends_with("build.rs") || path_lower.ends_with("setup.py") {
743        boost += 0.15;
744    }
745
746    boost.min(1.0)
747}
748
749fn detect_entrypoint_from_content(content: &str, language: &Language) -> bool {
750    match language {
751        Language::Rust => content.contains("fn main("),
752        Language::Python => content.contains("__name__ == \"__main__\""),
753        Language::JavaScript | Language::TypeScript => {
754            content.contains("module.exports") || content.contains("export default")
755        }
756        Language::Go => content.contains("func main("),
757        Language::Java => content.contains("public static void main("),
758        _ => false,
759    }
760}
761
762fn extract_imports(content: &str, language: &Language) -> Vec<String> {
763    use std::collections::HashSet;
764
765    let mut imports = HashSet::new();
766
767    match language {
768        Language::Rust => {
769            for line in content.lines() {
770                let trimmed = line.trim();
771                if trimmed.starts_with("use ") {
772                    let statement = trimmed
773                        .trim_start_matches("use ")
774                        .trim_end_matches(';')
775                        .split_whitespace()
776                        .next()
777                        .unwrap_or_default()
778                        .trim_end_matches("::");
779                    if !statement.is_empty() {
780                        imports.insert(statement.to_string());
781                    }
782                } else if trimmed.starts_with("mod ") {
783                    let module = trimmed
784                        .trim_start_matches("mod ")
785                        .trim_end_matches(';')
786                        .trim();
787                    if !module.is_empty() {
788                        imports.insert(module.to_string());
789                    }
790                }
791            }
792        }
793        Language::Python => {
794            for line in content.lines() {
795                let trimmed = line.trim();
796                if trimmed.starts_with("import ") {
797                    for module in trimmed.trim_start_matches("import ").split(',') {
798                        let module = module.trim().split_whitespace().next().unwrap_or("");
799                        if !module.is_empty() {
800                            imports.insert(module.to_string());
801                        }
802                    }
803                } else if trimmed.starts_with("from ") && trimmed.contains(" import ") {
804                    let module = trimmed
805                        .trim_start_matches("from ")
806                        .split(" import ")
807                        .next()
808                        .unwrap_or("")
809                        .trim();
810                    if !module.is_empty() {
811                        imports.insert(module.to_string());
812                    }
813                }
814            }
815        }
816        Language::JavaScript | Language::TypeScript => {
817            for line in content.lines() {
818                let trimmed = line.trim();
819                if trimmed.starts_with("import ") {
820                    if let Some(start) = trimmed.find("\"") {
821                        if let Some(end) = trimmed[start + 1..].find('"') {
822                            imports.insert(trimmed[start + 1..start + 1 + end].to_string());
823                        }
824                    } else if let Some(start) = trimmed.find('\'') {
825                        if let Some(end) = trimmed[start + 1..].find('\'') {
826                            imports.insert(trimmed[start + 1..start + 1 + end].to_string());
827                        }
828                    }
829                } else if trimmed.contains("require(") {
830                    if let Some(start) = trimmed.find("require(") {
831                        let start = start + "require(".len();
832                        let slice = &trimmed[start..];
833                        if let Some(end_idx) = slice.find(')') {
834                            let inner = &slice[..end_idx];
835                            let inner = inner.trim_matches(&['\'', '"'][..]);
836                            if !inner.is_empty() {
837                                imports.insert(inner.to_string());
838                            }
839                        }
840                    }
841                }
842            }
843        }
844        Language::Go => {
845            let mut in_block = false;
846            for line in content.lines() {
847                let trimmed = line.trim();
848                if trimmed == "import (" {
849                    in_block = true;
850                    continue;
851                }
852                if in_block {
853                    if trimmed == ")" {
854                        in_block = false;
855                        continue;
856                    }
857                    let import_path = trimmed.trim_matches(&['"', '`'][..]);
858                    if !import_path.is_empty() {
859                        imports.insert(import_path.to_string());
860                    }
861                } else if trimmed.starts_with("import ") {
862                    let import_path = trimmed
863                        .trim_start_matches("import ")
864                        .trim_matches(&['"', '`'][..]);
865                    if !import_path.is_empty() {
866                        imports.insert(import_path.to_string());
867                    }
868                }
869            }
870        }
871        _ => {}
872    }
873
874    let mut ordered: Vec<String> = imports.into_iter().collect();
875    ordered.sort();
876    ordered.truncate(64);
877    ordered
878}
879
880fn analyze_document_content(content: &str) -> DocumentAnalysis {
881    let mut analysis = DocumentAnalysis::new();
882    let mut in_code_block = false;
883
884    for line in content.lines() {
885        let trimmed = line.trim();
886
887        if trimmed.starts_with("```") {
888            if !in_code_block {
889                analysis.code_block_count += 1;
890            }
891            in_code_block = !in_code_block;
892            continue;
893        }
894
895        if trimmed.starts_with('#') {
896            analysis.heading_count += 1;
897            if trimmed.to_lowercase().contains("table of contents") {
898                analysis.toc_indicators += 1;
899            }
900        }
901
902        if trimmed.contains("](") {
903            analysis.link_count += trimmed.matches("](").count();
904        }
905    }
906
907    analysis.is_well_structured = analysis.heading_count > 0 && analysis.link_count > 0;
908    analysis
909}
910
911fn compute_churn_score(file: &FileInfo) -> f64 {
912    use scribe_core::GitFileStatus;
913
914    match &file.git_status {
915        Some(status) => match status.working_tree {
916            GitFileStatus::Modified => 0.6,
917            GitFileStatus::Added => 0.8,
918            GitFileStatus::Deleted => 0.4,
919            GitFileStatus::Renamed => 0.5,
920            GitFileStatus::Copied => 0.45,
921            GitFileStatus::Unmerged => 0.9,
922            GitFileStatus::Untracked => 0.3,
923            _ => 0.1,
924        },
925        None => 0.0,
926    }
927}
928
929fn language_from_identifier(language: &str, path: &std::path::Path) -> Language {
930    if !language.is_empty() {
931        match language.to_lowercase().as_str() {
932            "rust" => return Language::Rust,
933            "python" => return Language::Python,
934            "javascript" => return Language::JavaScript,
935            "typescript" => return Language::TypeScript,
936            "go" => return Language::Go,
937            "java" => return Language::Java,
938            "c" => return Language::C,
939            "cpp" | "c++" => return Language::Cpp,
940            "kotlin" => return Language::Kotlin,
941            "swift" => return Language::Swift,
942            "php" => return Language::PHP,
943            "ruby" => return Language::Ruby,
944            _ => {}
945        }
946    }
947
948    let extension = path.extension().and_then(|ext| ext.to_str()).unwrap_or("");
949    Language::from_extension(extension)
950}
951
952#[cfg(feature = "graph")]
953fn compute_centrality_scores(
954    analyzer_files: &[AnalyzerFile],
955) -> Option<std::collections::HashMap<String, f64>> {
956    use scribe_graph::CentralityCalculator;
957
958    if analyzer_files.is_empty() {
959        return Some(std::collections::HashMap::new());
960    }
961
962    let calculator = CentralityCalculator::new().ok()?;
963    let results = calculator.calculate_centrality(analyzer_files).ok()?;
964    Some(results.pagerank_scores.into_iter().collect())
965}
966
967#[cfg(feature = "scaling")]
968async fn convert_scaling_result_to_analysis(
969    processing_result: scribe_scaling::ProcessingResult,
970    config: Config,
971    repo_root: &std::path::Path,
972) -> Result<RepositoryAnalysis> {
973    if std::env::var("SCRIBE_DEBUG").is_ok() {
974        eprintln!("🔄 Converting scaling result to repository analysis format");
975    }
976
977    let mut files: Vec<FileInfo> = Vec::with_capacity(processing_result.files.len());
978
979    for file_meta in processing_result.files {
980        let mut absolute_path = file_meta.path.clone();
981        if !absolute_path.is_absolute() {
982            absolute_path = repo_root.join(absolute_path);
983        }
984
985        let relative_path = absolute_path
986            .strip_prefix(repo_root)
987            .map(|p| p.to_string_lossy().replace('\\', "/"))
988            .unwrap_or_else(|_| absolute_path.to_string_lossy().replace('\\', "/"));
989
990        let extension = absolute_path
991            .extension()
992            .and_then(|ext| ext.to_str())
993            .unwrap_or("");
994
995        let language = language_from_identifier(&file_meta.language, &absolute_path);
996        let file_type = FileInfo::classify_file_type(&relative_path, &language, extension);
997
998        files.push(FileInfo {
999            path: absolute_path,
1000            relative_path,
1001            size: file_meta.size,
1002            modified: Some(file_meta.modified),
1003            decision: scribe_core::RenderDecision::include("scaling_engine"),
1004            file_type,
1005            language,
1006            content: None,
1007            token_estimate: None,
1008            line_count: None,
1009            char_count: None,
1010            is_binary: false,
1011            git_status: None,
1012            centrality_score: None,
1013        });
1014    }
1015
1016    let analysis = build_repository_analysis(
1017        files,
1018        &config,
1019        &[
1020            "scaling_engine",
1021            "progressive_loading",
1022            "optimized_processing",
1023        ],
1024    )?;
1025
1026    if std::env::var("SCRIBE_DEBUG").is_ok() {
1027        eprintln!(
1028            "📈 Scaling analysis processed {} files in {:?} (cache hits {}, misses {})",
1029            analysis.files.len(),
1030            processing_result.processing_time,
1031            processing_result.cache_hits,
1032            processing_result.cache_misses
1033        );
1034    }
1035
1036    Ok(analysis)
1037}
1038
1039/// Convenience function for fast file scanning without deep analysis
1040///
1041/// This is useful when you just need to discover files quickly without
1042/// computing complex heuristic scores.
1043#[cfg(all(feature = "scanner", feature = "patterns"))]
1044pub async fn scan_repository<P: AsRef<std::path::Path>>(
1045    path: P,
1046    include_patterns: Option<&[&str]>,
1047    exclude_patterns: Option<&[&str]>,
1048) -> Result<Vec<FileInfo>> {
1049    let scanner = Scanner::new();
1050    let mut options = ScanOptions::default()
1051        .with_git_integration(true)
1052        .with_parallel_processing(true);
1053
1054    // Apply patterns if provided
1055    if let (Some(includes), Some(excludes)) = (include_patterns, exclude_patterns) {
1056        let matcher = QuickMatcher::new(includes, excludes)?;
1057        // Note: This would need proper integration with ScanOptions
1058        // options = options.with_pattern_matcher(matcher);
1059    }
1060
1061    scanner.scan(path, options).await
1062}
1063
1064/// Prelude module for convenient imports
1065///
1066/// This module re-exports the most commonly used types and functions
1067/// to provide a convenient single import for typical usage.
1068///
1069/// # Example
1070///
1071/// ```rust
1072/// use scribe_analyzer::prelude::*;
1073///
1074/// // Now you have access to:
1075/// // - Result, ScribeError
1076/// // - Config, FileInfo
1077/// // - analyze_repository function
1078/// // - Scanner, PatternMatcher
1079/// // - And other commonly used types
1080/// ```
1081pub mod prelude {
1082    //! Commonly used imports for Scribe applications
1083
1084    #[cfg(feature = "core")]
1085    pub use crate::core::{
1086        Config, FileInfo, FileType, HeuristicWeights, Language, Result, ScoreComponents,
1087        ScribeError, VERSION as CORE_VERSION,
1088    };
1089
1090    #[cfg(feature = "analysis")]
1091    pub use crate::analysis::{HeuristicScorer, HeuristicSystem};
1092
1093    #[cfg(feature = "scanner")]
1094    pub use crate::scanner::{FileScanner, ScanOptions, Scanner};
1095
1096    #[cfg(feature = "patterns")]
1097    pub use crate::patterns::{presets, PatternMatcher, PatternMatcherBuilder, QuickMatcher};
1098
1099    #[cfg(feature = "graph")]
1100    pub use crate::graph::{CentralityCalculator, PageRankAnalysis};
1101
1102    #[cfg(feature = "selection")]
1103    pub use crate::selection::{CodeSelector, SelectionEngine};
1104
1105    // High-level functions
1106    #[cfg(all(feature = "analysis", feature = "scanner", feature = "patterns"))]
1107    pub use crate::{analyze_repository, RepositoryAnalysis};
1108
1109    #[cfg(all(feature = "scanner", feature = "patterns"))]
1110    pub use crate::scan_repository;
1111
1112    pub use crate::VERSION;
1113}
1114
1115/// Utility functions for common operations
1116pub mod utils {
1117    #[cfg(feature = "core")]
1118    pub use crate::core::utils::*;
1119
1120    #[cfg(feature = "patterns")]
1121    pub use crate::patterns::utils as pattern_utils;
1122
1123    #[cfg(feature = "graph")]
1124    pub use crate::graph::utils as graph_utils;
1125}
1126
1127// Re-export the main AnalysisMetadata type if available
1128#[cfg(feature = "core")]
1129pub use crate::core::AnalysisMetadata;
1130
1131#[cfg(test)]
1132mod tests {
1133    use super::*;
1134
1135    #[test]
1136    fn test_version() {
1137        assert!(!VERSION.is_empty());
1138    }
1139
1140    #[cfg(feature = "core")]
1141    #[test]
1142    fn test_core_reexport() {
1143        let config = Config::default();
1144        assert!(config.validate().is_ok());
1145    }
1146
1147    #[cfg(all(feature = "analysis", feature = "scanner", feature = "patterns"))]
1148    #[tokio::test]
1149    async fn test_repository_analysis_interface() {
1150        use std::fs;
1151        use tempfile::TempDir;
1152
1153        let temp_dir = TempDir::new().unwrap();
1154        let test_file = temp_dir.path().join("test.rs");
1155        fs::write(&test_file, "fn main() { println!(\"Hello world\"); }").unwrap();
1156
1157        let config = Config::default();
1158        let result = analyze_repository(temp_dir.path(), &config).await;
1159
1160        // Should succeed or fail gracefully
1161        match result {
1162            Ok(analysis) => {
1163                assert!(analysis.file_count() > 0);
1164                assert!(!analysis.summary().is_empty());
1165            }
1166            Err(_) => {
1167                // Analysis might fail in test environment, which is acceptable
1168                // as long as the interface compiles correctly
1169            }
1170        }
1171    }
1172
1173    #[cfg(all(feature = "scanner", feature = "patterns"))]
1174    #[tokio::test]
1175    async fn test_scan_repository_interface() {
1176        use std::fs;
1177        use tempfile::TempDir;
1178
1179        let temp_dir = TempDir::new().unwrap();
1180        let test_file = temp_dir.path().join("test.rs");
1181        fs::write(&test_file, "fn main() {}").unwrap();
1182
1183        let result =
1184            scan_repository(temp_dir.path(), Some(&["**/*.rs"]), Some(&["**/target/**"])).await;
1185
1186        // Should find the test file
1187        match result {
1188            Ok(files) => {
1189                assert!(!files.is_empty());
1190                assert!(files
1191                    .iter()
1192                    .any(|f| f.path.file_name().unwrap() == "test.rs"));
1193            }
1194            Err(_) => {
1195                // Scan might fail in test environment, which is acceptable
1196            }
1197        }
1198    }
1199
1200    #[cfg(feature = "core")]
1201    #[test]
1202    fn test_prelude_imports() {
1203        use crate::prelude::*;
1204
1205        // Test that basic types are available
1206        let config = Config::default();
1207        assert!(config.validate().is_ok());
1208
1209        // Test that version is available
1210        assert!(!VERSION.is_empty());
1211    }
1212}
scribe/lib.rs

scribe/
lib.rs