1#![cfg_attr(not(tarpaulin), warn(warnings))]
2#![cfg_attr(tarpaulin, allow(warnings))]
3
4pub mod pipeline;
124pub mod report;
125
126pub use pipeline::{
127 analyze_and_select, select_from_analysis, AnalysisOutcome, SelectionOptions, SelectionOutcome,
128};
129
130pub use report::{
131 format_bytes, format_number, format_timestamp, generate_cxml_output, generate_html_output,
132 generate_json_output, generate_markdown_output, generate_repomix_output, generate_report,
133 generate_text_output, generate_xml_output, get_file_icon, ReportFile, ReportFormat,
134 SelectionMetrics,
135};
136
137#[cfg(feature = "core")]
138pub use scribe_core as core;
139
140#[cfg(feature = "core")]
141pub use scribe_core::{
142 meta,
143 Config,
144 FileInfo,
145 FileType,
146 HeuristicWeights,
147
148 Language,
149 Result,
151 ScoreComponents,
152 ScribeError,
153 VERSION as CORE_VERSION,
155};
156
157#[cfg(feature = "analysis")]
159pub use scribe_analysis as analysis;
160
161#[cfg(feature = "analysis")]
162pub use scribe_analysis::{
163 DocumentAnalysis, HeuristicScorer, HeuristicSystem, ImportGraph, ImportGraphBuilder,
164 TemplateDetector,
165};
166
167#[cfg(feature = "graph")]
169pub use scribe_graph as graph;
170
171#[cfg(feature = "graph")]
172pub use scribe_graph::{
173 CentralityCalculator,
174 CentralityResults,
175 DependencyGraph,
176 GraphStatistics,
177 PageRankAnalysis,
178 PageRankAnalysis as GraphAnalysis, PageRankResults,
180};
181
182#[cfg(feature = "scanner")]
184pub use scribe_scanner as scanner;
185
186#[cfg(feature = "scanner")]
187pub use scribe_scanner::{
188 FileScanner, LanguageDetector, ScanOptions, ScanResult, Scanner, ScannerStats,
189};
190
191#[cfg(feature = "patterns")]
193pub use scribe_patterns as patterns;
194
195#[cfg(feature = "patterns")]
196pub use scribe_patterns::{
197 presets, GitignoreMatcher, GlobMatcher, PatternBuilder, PatternMatcher, PatternMatcherBuilder,
198 QuickMatcher,
199};
200
201#[cfg(feature = "selection")]
203pub use scribe_selection as selection;
204
205#[cfg(feature = "selection")]
206pub use scribe_selection::{
207 apply_token_budget_selection, CodeBundle, CodeBundler, CodeContext, CodeSelector,
208 ContextExtractor, ContextFile, QuotaManager, SelectionEngine, TwoPassSelector,
209};
210
211pub const VERSION: &str = env!("CARGO_PKG_VERSION");
213
214#[cfg(all(feature = "analysis", feature = "scanner"))]
216#[derive(Debug, Clone)]
217pub struct RepositoryAnalysis {
218 pub files: Vec<FileInfo>,
220 pub heuristic_scores: std::collections::HashMap<String, f64>,
222 #[cfg(feature = "graph")]
224 pub centrality_scores: Option<std::collections::HashMap<String, f64>>,
225 pub final_scores: std::collections::HashMap<String, f64>,
227 pub metadata: AnalysisMetadata,
229}
230
231#[cfg(all(feature = "analysis", feature = "scanner"))]
232impl RepositoryAnalysis {
233 pub fn top_files(&self, n: usize) -> Vec<(&str, f64)> {
235 let mut scored: Vec<_> = self
236 .final_scores
237 .iter()
238 .map(|(path, score)| (path.as_str(), *score))
239 .collect();
240
241 scored.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
242 scored.into_iter().take(n).collect()
243 }
244
245 pub fn files_above_threshold(&self, threshold: f64) -> Vec<(&str, f64)> {
247 self.final_scores
248 .iter()
249 .filter(|(_, score)| **score >= threshold)
250 .map(|(path, score)| (path.as_str(), *score))
251 .collect()
252 }
253
254 pub fn file_count(&self) -> usize {
256 self.files.len()
257 }
258
259 pub fn summary(&self) -> String {
261 let avg_score = self.final_scores.values().sum::<f64>() / self.final_scores.len() as f64;
262 let top_file = self
263 .top_files(1)
264 .get(0)
265 .map(|(path, score)| format!("{} ({:.3})", path, score))
266 .unwrap_or_else(|| "None".to_string());
267
268 format!(
269 "Repository Analysis Summary:\n\
270 - Files analyzed: {}\n\
271 - Average score: {:.3}\n\
272 - Top file: {}\n\
273 - Scribe version: {}",
274 self.file_count(),
275 avg_score,
276 top_file,
277 self.metadata.scribe_version
278 )
279 }
280}
281
282#[cfg(all(feature = "analysis", feature = "scanner", feature = "patterns"))]
305pub async fn analyze_repository<P: AsRef<std::path::Path>>(
306 path: P,
307 config: &Config,
308) -> Result<RepositoryAnalysis> {
309 use std::collections::HashMap;
310
311 let mut optimized_config = config.clone();
313
314 optimized_config.performance.batch_size = 20; optimized_config.performance.use_mmap = true; optimized_config.performance.io_buffer_size = 512 * 1024; optimized_config.analysis.enable_caching = true;
321
322 #[cfg(feature = "scaling")]
324 {
325 use scribe_scaling::{create_scaling_engine, quick_scale_estimate};
326
327 match quick_scale_estimate(path.as_ref()).await {
328 Ok((file_count, estimated_duration, _memory_usage)) => {
329 if std::env::var("SCRIBE_DEBUG").is_ok() {
330 eprintln!(
331 "Scaling estimate: {} files, {:?} duration",
332 file_count, estimated_duration
333 );
334 }
335
336 if file_count > 50 || estimated_duration.as_secs() > 2 {
337 if config.features.scaling_enabled {
338 if std::env::var("SCRIBE_DEBUG").is_ok() {
339 eprintln!("Using scaling engine for large repo");
340 }
341 } else {
342 if std::env::var("SCRIBE_DEBUG").is_ok() {
343 eprintln!("Large repo but scaling disabled");
344 }
345 }
346 }
347
348 if (file_count > 50 || estimated_duration.as_secs() > 2)
349 && config.features.scaling_enabled
350 {
351 match create_scaling_engine(path.as_ref()).await {
352 Ok(mut scaling_engine) => {
353 if std::env::var("SCRIBE_DEBUG").is_ok() {
354 eprintln!("Scaling engine created, processing repository...");
355 }
356
357 match scaling_engine.process_repository(path.as_ref()).await {
359 Ok(processing_result) => {
360 if std::env::var("SCRIBE_DEBUG").is_ok() {
361 eprintln!("Scaling processing complete: {} files processed in {:?}",
362 processing_result.total_files, processing_result.processing_time);
363 }
364
365 return convert_scaling_result_to_analysis(
366 processing_result,
367 optimized_config,
368 path.as_ref(),
369 )
370 .await;
371 }
372 Err(e) => {
373 if std::env::var("SCRIBE_DEBUG").is_ok() {
374 eprintln!(
375 "Scaling engine processing failed: {}, falling back",
376 e
377 );
378 }
379 }
380 }
381 }
382 Err(e) => {
383 if std::env::var("SCRIBE_DEBUG").is_ok() {
384 eprintln!("Failed to create scaling engine: {}, falling back", e);
385 }
386 }
387 }
388 } else if file_count > 50 || estimated_duration.as_secs() > 2 {
389 if std::env::var("SCRIBE_DEBUG").is_ok() {
390 eprintln!("Large repo detected but scaling disabled, using optimized basic scanner");
391 }
392 } else {
393 if std::env::var("SCRIBE_DEBUG").is_ok() {
394 eprintln!("Small repo detected, using optimized basic scanner");
395 }
396 }
397 }
398 Err(e) => {
399 if std::env::var("SCRIBE_DEBUG").is_ok() {
400 eprintln!("Scaling estimate failed: {}, falling back", e);
401 }
402 }
403 }
404 }
405
406 fallback_scan(path, &optimized_config).await
408}
409
410async fn fallback_scan<P: AsRef<std::path::Path>>(
411 path: P,
412 config: &Config,
413) -> Result<RepositoryAnalysis> {
414 let repo_path = path.as_ref();
415 let start_time = std::time::Instant::now();
416
417 if std::env::var("SCRIBE_DEBUG").is_ok() {
418 eprintln!("π Using fallback scanner with optimized config");
419 }
420
421 let scanner = Scanner::new();
422 let scan_options = ScanOptions::default()
423 .with_git_integration(true)
424 .with_parallel_processing(true);
425
426 let mut files = scanner.scan(repo_path, scan_options).await?;
427
428 if config.features.auto_exclude_tests {
429 let original_count = files.len();
430 files.retain(|file| !scribe_core::file::is_test_path(&file.path));
431 if std::env::var("SCRIBE_DEBUG").is_ok() && files.len() != original_count {
432 eprintln!(
433 "Auto-excluded {} test files, {} files remaining",
434 original_count - files.len(),
435 files.len()
436 );
437 }
438 }
439
440 if let Some(token_budget) = config.analysis.token_budget {
441 if std::env::var("SCRIBE_DEBUG").is_ok() {
442 eprintln!("π― Applying token budget: {} tokens", token_budget);
443 }
444 files = apply_token_budget_selection(files, token_budget, config).await?;
445 if std::env::var("SCRIBE_DEBUG").is_ok() {
446 eprintln!("β
Token budget applied: {} files selected", files.len());
447 }
448 }
449
450 let analysis = build_repository_analysis(files, config, &["optimized_scanner"])?;
451
452 if std::env::var("SCRIBE_DEBUG").is_ok() {
453 eprintln!(
454 "π Completed fallback analysis in {:?} ({} files)",
455 start_time.elapsed(),
456 analysis.files.len()
457 );
458 }
459
460 Ok(analysis)
461}
462
463#[derive(Debug, Clone)]
464struct AnalyzerContext {
465 imports: Vec<String>,
466 doc_analysis: Option<DocumentAnalysis>,
467 has_examples: bool,
468 is_entrypoint: bool,
469 priority_boost: f64,
470 content: Option<String>,
471}
472
473#[derive(Debug, Clone)]
474struct AnalyzerFile {
475 path: String,
476 relative_path: String,
477 depth: usize,
478 is_docs: bool,
479 is_readme: bool,
480 is_test: bool,
481 is_entrypoint: bool,
482 has_examples: bool,
483 priority_boost: f64,
484 churn_score: f64,
485 centrality_score: f64,
486 imports: Vec<String>,
487 doc_analysis: Option<DocumentAnalysis>,
488}
489
490impl AnalyzerFile {
491 fn from_file_info(file: &FileInfo, context: &AnalyzerContext) -> Self {
492 let path_string = file.path.to_string_lossy().to_string();
493 let relative = if file.relative_path.is_empty() {
494 path_string.clone()
495 } else {
496 file.relative_path.clone()
497 };
498 let normalized_path = relative.replace('\\', "/");
499 let depth = normalized_path.matches('/').count();
500 let is_docs = matches!(file.file_type, FileType::Documentation { .. });
501 let is_readme = normalized_path.to_lowercase().contains("readme");
502 let is_test = matches!(file.file_type, FileType::Test { .. })
503 || scribe_core::file::is_test_path(&file.path);
504
505 Self {
506 path: path_string,
507 relative_path: normalized_path,
508 depth,
509 is_docs,
510 is_readme,
511 is_test,
512 is_entrypoint: context.is_entrypoint,
513 has_examples: context.has_examples,
514 priority_boost: context.priority_boost.min(1.0),
515 churn_score: compute_churn_score(file),
516 centrality_score: 0.0,
517 imports: context.imports.clone(),
518 doc_analysis: context.doc_analysis.clone(),
519 }
520 }
521}
522
523impl scribe_analysis::heuristics::ScanResult for AnalyzerFile {
524 fn path(&self) -> &str {
525 &self.path
526 }
527
528 fn relative_path(&self) -> &str {
529 &self.relative_path
530 }
531
532 fn depth(&self) -> usize {
533 self.depth
534 }
535
536 fn is_docs(&self) -> bool {
537 self.is_docs
538 }
539
540 fn is_readme(&self) -> bool {
541 self.is_readme
542 }
543
544 fn is_test(&self) -> bool {
545 self.is_test
546 }
547
548 fn is_entrypoint(&self) -> bool {
549 self.is_entrypoint
550 }
551
552 fn has_examples(&self) -> bool {
553 self.has_examples
554 }
555
556 fn priority_boost(&self) -> f64 {
557 self.priority_boost
558 }
559
560 fn churn_score(&self) -> f64 {
561 self.churn_score
562 }
563
564 fn centrality_in(&self) -> f64 {
565 self.centrality_score
566 }
567
568 fn imports(&self) -> Option<&[String]> {
569 if self.imports.is_empty() {
570 None
571 } else {
572 Some(&self.imports)
573 }
574 }
575
576 fn doc_analysis(&self) -> Option<&DocumentAnalysis> {
577 self.doc_analysis.as_ref()
578 }
579}
580
581fn build_repository_analysis(
582 mut files: Vec<FileInfo>,
583 config: &Config,
584 additional_features: &[&str],
585) -> Result<RepositoryAnalysis> {
586 use std::collections::{HashMap, HashSet};
587
588 let contexts: Vec<AnalyzerContext> = files
589 .iter()
590 .map(|file| derive_file_context(file, config))
591 .collect();
592
593 let mut analyzer_files: Vec<AnalyzerFile> = files
594 .iter()
595 .zip(contexts.iter())
596 .map(|(file, context)| AnalyzerFile::from_file_info(file, context))
597 .collect();
598
599 #[cfg(feature = "graph")]
600 let mut centrality_scores = compute_centrality_scores(&analyzer_files);
601
602 #[cfg(not(feature = "graph"))]
603 let mut centrality_scores: Option<HashMap<String, f64>> = None;
604
605 #[cfg(feature = "graph")]
606 if let Some(ref centrality) = centrality_scores {
607 for analyzer in analyzer_files.iter_mut() {
608 if let Some(score) = centrality.get(&analyzer.path) {
609 analyzer.centrality_score = *score;
610 }
611 }
612
613 for file in files.iter_mut() {
614 let key = file.path.to_string_lossy().to_string();
615 if let Some(score) = centrality.get(&key) {
616 file.centrality_score = Some(*score);
617 }
618 }
619 }
620
621 let mut heuristic_scores = HashMap::with_capacity(analyzer_files.len());
622 let mut scoring_system = HeuristicSystem::with_v2_features()?;
623 let scored_files = scoring_system.score_all_files(&analyzer_files)?;
624 for (idx, components) in scored_files {
625 let key = analyzer_files[idx].path.clone();
626 heuristic_scores.insert(key, components.final_score);
627 }
628
629 let final_scores = heuristic_scores.clone();
630
631 let mut features: HashSet<String> = HashSet::new();
632 features.insert("heuristic_scoring".to_string());
633 #[cfg(feature = "graph")]
634 if centrality_scores.is_some() {
635 features.insert("centrality_analysis".to_string());
636 }
637 for feature in additional_features {
638 features.insert((*feature).to_string());
639 }
640
641 let mut features_enabled: Vec<String> = features.into_iter().collect();
642 features_enabled.sort();
643
644 let metadata = AnalysisMetadata {
645 timestamp: std::time::SystemTime::now(),
646 scribe_version: VERSION.to_string(),
647 config_hash: Some(config.compute_hash()),
648 features_enabled,
649 };
650
651 Ok(RepositoryAnalysis {
652 files,
653 heuristic_scores,
654 #[cfg(feature = "graph")]
655 centrality_scores,
656 final_scores,
657 metadata,
658 })
659}
660
661fn derive_file_context(file: &FileInfo, config: &Config) -> AnalyzerContext {
662 let mut imports = Vec::new();
663 let mut doc_analysis = None;
664 let mut has_examples = file.relative_path.to_lowercase().contains("example");
665 let mut is_entrypoint = scribe_core::file::is_entrypoint_path(&file.path, &file.language);
666 let mut priority_boost = compute_priority_boost(file);
667 let mut cached_content: Option<String> = None;
668
669 if should_load_content(file, config) {
670 if let Ok(content) = std::fs::read_to_string(&file.path) {
671 if matches!(file.file_type, FileType::Documentation { .. }) {
672 doc_analysis = Some(analyze_document_content(&content));
673 }
674
675 if !has_examples {
676 has_examples = content.contains("Example") || content.contains("example");
677 }
678
679 if matches!(
680 file.language,
681 Language::Rust
682 | Language::Python
683 | Language::JavaScript
684 | Language::TypeScript
685 | Language::Go
686 ) {
687 imports = extract_imports(&content, &file.language);
688 }
689
690 if !is_entrypoint {
691 is_entrypoint = detect_entrypoint_from_content(&content, &file.language);
692 }
693
694 cached_content = Some(content);
695 }
696 }
697
698 AnalyzerContext {
699 imports,
700 doc_analysis,
701 has_examples,
702 is_entrypoint,
703 priority_boost,
704 content: cached_content,
705 }
706}
707
708fn should_load_content(file: &FileInfo, config: &Config) -> bool {
709 if !config.analysis.analyze_content || file.is_binary {
710 return false;
711 }
712
713 let size_limit = std::cmp::max(config.performance.io_buffer_size as u64, 256 * 1024);
714 file.size <= size_limit
715}
716
717fn compute_priority_boost(file: &FileInfo) -> f64 {
718 let path_lower = file.relative_path.to_lowercase();
719 let mut boost: f64 = 0.0;
720
721 if path_lower.ends_with("readme.md") || path_lower.ends_with("readme") {
722 boost += 0.4;
723 }
724 if path_lower.ends_with("cargo.toml")
725 || path_lower.ends_with("package.json")
726 || path_lower.ends_with("requirements.txt")
727 || path_lower.ends_with("pyproject.toml")
728 {
729 boost += 0.25;
730 }
731 if path_lower.ends_with("main.rs")
732 || path_lower.ends_with("main.py")
733 || path_lower.ends_with("main.go")
734 || path_lower.ends_with("index.js")
735 || path_lower.ends_with("index.ts")
736 {
737 boost += 0.3;
738 }
739 if path_lower.ends_with("lib.rs") {
740 boost += 0.2;
741 }
742 if path_lower.ends_with("build.rs") || path_lower.ends_with("setup.py") {
743 boost += 0.15;
744 }
745
746 boost.min(1.0)
747}
748
749fn detect_entrypoint_from_content(content: &str, language: &Language) -> bool {
750 match language {
751 Language::Rust => content.contains("fn main("),
752 Language::Python => content.contains("__name__ == \"__main__\""),
753 Language::JavaScript | Language::TypeScript => {
754 content.contains("module.exports") || content.contains("export default")
755 }
756 Language::Go => content.contains("func main("),
757 Language::Java => content.contains("public static void main("),
758 _ => false,
759 }
760}
761
762fn extract_imports(content: &str, language: &Language) -> Vec<String> {
763 use std::collections::HashSet;
764
765 let mut imports = HashSet::new();
766
767 match language {
768 Language::Rust => {
769 for line in content.lines() {
770 let trimmed = line.trim();
771 if trimmed.starts_with("use ") {
772 let statement = trimmed
773 .trim_start_matches("use ")
774 .trim_end_matches(';')
775 .split_whitespace()
776 .next()
777 .unwrap_or_default()
778 .trim_end_matches("::");
779 if !statement.is_empty() {
780 imports.insert(statement.to_string());
781 }
782 } else if trimmed.starts_with("mod ") {
783 let module = trimmed
784 .trim_start_matches("mod ")
785 .trim_end_matches(';')
786 .trim();
787 if !module.is_empty() {
788 imports.insert(module.to_string());
789 }
790 }
791 }
792 }
793 Language::Python => {
794 for line in content.lines() {
795 let trimmed = line.trim();
796 if trimmed.starts_with("import ") {
797 for module in trimmed.trim_start_matches("import ").split(',') {
798 let module = module.trim().split_whitespace().next().unwrap_or("");
799 if !module.is_empty() {
800 imports.insert(module.to_string());
801 }
802 }
803 } else if trimmed.starts_with("from ") && trimmed.contains(" import ") {
804 let module = trimmed
805 .trim_start_matches("from ")
806 .split(" import ")
807 .next()
808 .unwrap_or("")
809 .trim();
810 if !module.is_empty() {
811 imports.insert(module.to_string());
812 }
813 }
814 }
815 }
816 Language::JavaScript | Language::TypeScript => {
817 for line in content.lines() {
818 let trimmed = line.trim();
819 if trimmed.starts_with("import ") {
820 if let Some(start) = trimmed.find("\"") {
821 if let Some(end) = trimmed[start + 1..].find('"') {
822 imports.insert(trimmed[start + 1..start + 1 + end].to_string());
823 }
824 } else if let Some(start) = trimmed.find('\'') {
825 if let Some(end) = trimmed[start + 1..].find('\'') {
826 imports.insert(trimmed[start + 1..start + 1 + end].to_string());
827 }
828 }
829 } else if trimmed.contains("require(") {
830 if let Some(start) = trimmed.find("require(") {
831 let start = start + "require(".len();
832 let slice = &trimmed[start..];
833 if let Some(end_idx) = slice.find(')') {
834 let inner = &slice[..end_idx];
835 let inner = inner.trim_matches(&['\'', '"'][..]);
836 if !inner.is_empty() {
837 imports.insert(inner.to_string());
838 }
839 }
840 }
841 }
842 }
843 }
844 Language::Go => {
845 let mut in_block = false;
846 for line in content.lines() {
847 let trimmed = line.trim();
848 if trimmed == "import (" {
849 in_block = true;
850 continue;
851 }
852 if in_block {
853 if trimmed == ")" {
854 in_block = false;
855 continue;
856 }
857 let import_path = trimmed.trim_matches(&['"', '`'][..]);
858 if !import_path.is_empty() {
859 imports.insert(import_path.to_string());
860 }
861 } else if trimmed.starts_with("import ") {
862 let import_path = trimmed
863 .trim_start_matches("import ")
864 .trim_matches(&['"', '`'][..]);
865 if !import_path.is_empty() {
866 imports.insert(import_path.to_string());
867 }
868 }
869 }
870 }
871 _ => {}
872 }
873
874 let mut ordered: Vec<String> = imports.into_iter().collect();
875 ordered.sort();
876 ordered.truncate(64);
877 ordered
878}
879
880fn analyze_document_content(content: &str) -> DocumentAnalysis {
881 let mut analysis = DocumentAnalysis::new();
882 let mut in_code_block = false;
883
884 for line in content.lines() {
885 let trimmed = line.trim();
886
887 if trimmed.starts_with("```") {
888 if !in_code_block {
889 analysis.code_block_count += 1;
890 }
891 in_code_block = !in_code_block;
892 continue;
893 }
894
895 if trimmed.starts_with('#') {
896 analysis.heading_count += 1;
897 if trimmed.to_lowercase().contains("table of contents") {
898 analysis.toc_indicators += 1;
899 }
900 }
901
902 if trimmed.contains("](") {
903 analysis.link_count += trimmed.matches("](").count();
904 }
905 }
906
907 analysis.is_well_structured = analysis.heading_count > 0 && analysis.link_count > 0;
908 analysis
909}
910
911fn compute_churn_score(file: &FileInfo) -> f64 {
912 use scribe_core::GitFileStatus;
913
914 match &file.git_status {
915 Some(status) => match status.working_tree {
916 GitFileStatus::Modified => 0.6,
917 GitFileStatus::Added => 0.8,
918 GitFileStatus::Deleted => 0.4,
919 GitFileStatus::Renamed => 0.5,
920 GitFileStatus::Copied => 0.45,
921 GitFileStatus::Unmerged => 0.9,
922 GitFileStatus::Untracked => 0.3,
923 _ => 0.1,
924 },
925 None => 0.0,
926 }
927}
928
929fn language_from_identifier(language: &str, path: &std::path::Path) -> Language {
930 if !language.is_empty() {
931 match language.to_lowercase().as_str() {
932 "rust" => return Language::Rust,
933 "python" => return Language::Python,
934 "javascript" => return Language::JavaScript,
935 "typescript" => return Language::TypeScript,
936 "go" => return Language::Go,
937 "java" => return Language::Java,
938 "c" => return Language::C,
939 "cpp" | "c++" => return Language::Cpp,
940 "kotlin" => return Language::Kotlin,
941 "swift" => return Language::Swift,
942 "php" => return Language::PHP,
943 "ruby" => return Language::Ruby,
944 _ => {}
945 }
946 }
947
948 let extension = path.extension().and_then(|ext| ext.to_str()).unwrap_or("");
949 Language::from_extension(extension)
950}
951
952#[cfg(feature = "graph")]
953fn compute_centrality_scores(
954 analyzer_files: &[AnalyzerFile],
955) -> Option<std::collections::HashMap<String, f64>> {
956 use scribe_graph::CentralityCalculator;
957
958 if analyzer_files.is_empty() {
959 return Some(std::collections::HashMap::new());
960 }
961
962 let calculator = CentralityCalculator::new().ok()?;
963 let results = calculator.calculate_centrality(analyzer_files).ok()?;
964 Some(results.pagerank_scores.into_iter().collect())
965}
966
967#[cfg(feature = "scaling")]
968async fn convert_scaling_result_to_analysis(
969 processing_result: scribe_scaling::ProcessingResult,
970 config: Config,
971 repo_root: &std::path::Path,
972) -> Result<RepositoryAnalysis> {
973 if std::env::var("SCRIBE_DEBUG").is_ok() {
974 eprintln!("π Converting scaling result to repository analysis format");
975 }
976
977 let mut files: Vec<FileInfo> = Vec::with_capacity(processing_result.files.len());
978
979 for file_meta in processing_result.files {
980 let mut absolute_path = file_meta.path.clone();
981 if !absolute_path.is_absolute() {
982 absolute_path = repo_root.join(absolute_path);
983 }
984
985 let relative_path = absolute_path
986 .strip_prefix(repo_root)
987 .map(|p| p.to_string_lossy().replace('\\', "/"))
988 .unwrap_or_else(|_| absolute_path.to_string_lossy().replace('\\', "/"));
989
990 let extension = absolute_path
991 .extension()
992 .and_then(|ext| ext.to_str())
993 .unwrap_or("");
994
995 let language = language_from_identifier(&file_meta.language, &absolute_path);
996 let file_type = FileInfo::classify_file_type(&relative_path, &language, extension);
997
998 files.push(FileInfo {
999 path: absolute_path,
1000 relative_path,
1001 size: file_meta.size,
1002 modified: Some(file_meta.modified),
1003 decision: scribe_core::RenderDecision::include("scaling_engine"),
1004 file_type,
1005 language,
1006 content: None,
1007 token_estimate: None,
1008 line_count: None,
1009 char_count: None,
1010 is_binary: false,
1011 git_status: None,
1012 centrality_score: None,
1013 });
1014 }
1015
1016 let analysis = build_repository_analysis(
1017 files,
1018 &config,
1019 &[
1020 "scaling_engine",
1021 "progressive_loading",
1022 "optimized_processing",
1023 ],
1024 )?;
1025
1026 if std::env::var("SCRIBE_DEBUG").is_ok() {
1027 eprintln!(
1028 "π Scaling analysis processed {} files in {:?} (cache hits {}, misses {})",
1029 analysis.files.len(),
1030 processing_result.processing_time,
1031 processing_result.cache_hits,
1032 processing_result.cache_misses
1033 );
1034 }
1035
1036 Ok(analysis)
1037}
1038
1039#[cfg(all(feature = "scanner", feature = "patterns"))]
1044pub async fn scan_repository<P: AsRef<std::path::Path>>(
1045 path: P,
1046 include_patterns: Option<&[&str]>,
1047 exclude_patterns: Option<&[&str]>,
1048) -> Result<Vec<FileInfo>> {
1049 let scanner = Scanner::new();
1050 let mut options = ScanOptions::default()
1051 .with_git_integration(true)
1052 .with_parallel_processing(true);
1053
1054 if let (Some(includes), Some(excludes)) = (include_patterns, exclude_patterns) {
1056 let matcher = QuickMatcher::new(includes, excludes)?;
1057 }
1060
1061 scanner.scan(path, options).await
1062}
1063
1064pub mod prelude {
1082 #[cfg(feature = "core")]
1085 pub use crate::core::{
1086 Config, FileInfo, FileType, HeuristicWeights, Language, Result, ScoreComponents,
1087 ScribeError, VERSION as CORE_VERSION,
1088 };
1089
1090 #[cfg(feature = "analysis")]
1091 pub use crate::analysis::{HeuristicScorer, HeuristicSystem};
1092
1093 #[cfg(feature = "scanner")]
1094 pub use crate::scanner::{FileScanner, ScanOptions, Scanner};
1095
1096 #[cfg(feature = "patterns")]
1097 pub use crate::patterns::{presets, PatternMatcher, PatternMatcherBuilder, QuickMatcher};
1098
1099 #[cfg(feature = "graph")]
1100 pub use crate::graph::{CentralityCalculator, PageRankAnalysis};
1101
1102 #[cfg(feature = "selection")]
1103 pub use crate::selection::{CodeSelector, SelectionEngine};
1104
1105 #[cfg(all(feature = "analysis", feature = "scanner", feature = "patterns"))]
1107 pub use crate::{analyze_repository, RepositoryAnalysis};
1108
1109 #[cfg(all(feature = "scanner", feature = "patterns"))]
1110 pub use crate::scan_repository;
1111
1112 pub use crate::VERSION;
1113}
1114
1115pub mod utils {
1117 #[cfg(feature = "core")]
1118 pub use crate::core::utils::*;
1119
1120 #[cfg(feature = "patterns")]
1121 pub use crate::patterns::utils as pattern_utils;
1122
1123 #[cfg(feature = "graph")]
1124 pub use crate::graph::utils as graph_utils;
1125}
1126
1127#[cfg(feature = "core")]
1129pub use crate::core::AnalysisMetadata;
1130
1131#[cfg(test)]
1132mod tests {
1133 use super::*;
1134
1135 #[test]
1136 fn test_version() {
1137 assert!(!VERSION.is_empty());
1138 }
1139
1140 #[cfg(feature = "core")]
1141 #[test]
1142 fn test_core_reexport() {
1143 let config = Config::default();
1144 assert!(config.validate().is_ok());
1145 }
1146
1147 #[cfg(all(feature = "analysis", feature = "scanner", feature = "patterns"))]
1148 #[tokio::test]
1149 async fn test_repository_analysis_interface() {
1150 use std::fs;
1151 use tempfile::TempDir;
1152
1153 let temp_dir = TempDir::new().unwrap();
1154 let test_file = temp_dir.path().join("test.rs");
1155 fs::write(&test_file, "fn main() { println!(\"Hello world\"); }").unwrap();
1156
1157 let config = Config::default();
1158 let result = analyze_repository(temp_dir.path(), &config).await;
1159
1160 match result {
1162 Ok(analysis) => {
1163 assert!(analysis.file_count() > 0);
1164 assert!(!analysis.summary().is_empty());
1165 }
1166 Err(_) => {
1167 }
1170 }
1171 }
1172
1173 #[cfg(all(feature = "scanner", feature = "patterns"))]
1174 #[tokio::test]
1175 async fn test_scan_repository_interface() {
1176 use std::fs;
1177 use tempfile::TempDir;
1178
1179 let temp_dir = TempDir::new().unwrap();
1180 let test_file = temp_dir.path().join("test.rs");
1181 fs::write(&test_file, "fn main() {}").unwrap();
1182
1183 let result =
1184 scan_repository(temp_dir.path(), Some(&["**/*.rs"]), Some(&["**/target/**"])).await;
1185
1186 match result {
1188 Ok(files) => {
1189 assert!(!files.is_empty());
1190 assert!(files
1191 .iter()
1192 .any(|f| f.path.file_name().unwrap() == "test.rs"));
1193 }
1194 Err(_) => {
1195 }
1197 }
1198 }
1199
1200 #[cfg(feature = "core")]
1201 #[test]
1202 fn test_prelude_imports() {
1203 use crate::prelude::*;
1204
1205 let config = Config::default();
1207 assert!(config.validate().is_ok());
1208
1209 assert!(!VERSION.is_empty());
1211 }
1212}