pub mod import_analysis;
pub mod scoring;
pub mod template_detection;
pub use scoring::{
HeuristicScorer, HeuristicWeights, ScoreComponents, ScoringFeatures, WeightPreset,
};
pub use template_detection::{
get_template_score_boost, is_template_file, TemplateDetectionMethod, TemplateDetector,
TemplateEngine,
};
pub use import_analysis::{
import_matches_file, CentralityCalculator, ImportGraph, ImportGraphBuilder,
};
use scribe_core::Result;
use std::collections::HashMap;
#[derive(Debug)]
pub struct HeuristicSystem {
scorer: HeuristicScorer,
template_detector: TemplateDetector,
}
impl HeuristicSystem {
pub fn new() -> Result<Self> {
Ok(Self {
scorer: HeuristicScorer::new(HeuristicWeights::default()),
template_detector: TemplateDetector::new()?,
})
}
pub fn with_weights(weights: HeuristicWeights) -> Result<Self> {
Ok(Self {
scorer: HeuristicScorer::new(weights),
template_detector: TemplateDetector::new()?,
})
}
pub fn with_v2_features() -> Result<Self> {
Ok(Self {
scorer: HeuristicScorer::new(HeuristicWeights::with_v2_features()),
template_detector: TemplateDetector::new()?,
})
}
pub fn score_file<T>(&mut self, file: &T, all_files: &[T]) -> Result<ScoreComponents>
where
T: ScanResult,
{
self.scorer.score_file(file, all_files)
}
pub fn score_all_files<T>(&mut self, files: &[T]) -> Result<Vec<(usize, ScoreComponents)>>
where
T: ScanResult,
{
self.scorer.score_all_files(files)
}
pub fn get_top_files<T>(
&mut self,
files: &[T],
top_k: usize,
) -> Result<Vec<(usize, ScoreComponents)>>
where
T: ScanResult,
{
Ok(self
.score_all_files(files)?
.into_iter()
.take(top_k)
.collect())
}
pub fn get_template_boost(&self, file_path: &str) -> Result<f64> {
self.template_detector.get_score_boost(file_path)
}
pub fn import_matches(&self, import_name: &str, file_path: &str) -> bool {
import_analysis::import_matches_file(import_name, file_path)
}
}
impl Default for HeuristicSystem {
fn default() -> Self {
Self::new().expect("Failed to create HeuristicSystem")
}
}
pub trait ScanResult {
fn path(&self) -> &str;
fn relative_path(&self) -> &str;
fn depth(&self) -> usize;
fn is_docs(&self) -> bool;
fn is_readme(&self) -> bool;
fn is_test(&self) -> bool;
fn is_entrypoint(&self) -> bool;
fn has_examples(&self) -> bool;
fn priority_boost(&self) -> f64;
fn churn_score(&self) -> f64;
fn centrality_in(&self) -> f64;
fn imports(&self) -> Option<&[String]>;
fn doc_analysis(&self) -> Option<&DocumentAnalysis>;
}
#[derive(Debug, Clone)]
pub struct DocumentAnalysis {
pub heading_count: usize,
pub toc_indicators: usize,
pub link_count: usize,
pub code_block_count: usize,
pub is_well_structured: bool,
}
impl DocumentAnalysis {
pub fn new() -> Self {
Self {
heading_count: 0,
toc_indicators: 0,
link_count: 0,
code_block_count: 0,
is_well_structured: false,
}
}
pub fn structure_score(&self) -> f64 {
let mut score = 0.0;
if self.heading_count > 0 {
score += (self.heading_count as f64 / 10.0).min(0.5);
}
if self.toc_indicators > 0 {
score += 0.3;
}
if self.link_count > 0 {
score += (self.link_count as f64 / 20.0).min(0.3);
}
if self.code_block_count > 0 {
score += (self.code_block_count as f64 / 10.0).min(0.2);
}
score
}
}
impl Default for DocumentAnalysis {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone)]
pub struct HeuristicMetrics {
pub files_processed: usize,
pub processing_time_ms: u64,
pub import_graph_time_ms: u64,
pub template_detection_time_ms: u64,
pub avg_time_per_file_ms: f64,
pub cache_hit_rates: HashMap<String, f64>,
}
impl HeuristicMetrics {
pub fn new() -> Self {
Self {
files_processed: 0,
processing_time_ms: 0,
import_graph_time_ms: 0,
template_detection_time_ms: 0,
avg_time_per_file_ms: 0.0,
cache_hit_rates: HashMap::new(),
}
}
pub fn finalize(&mut self) {
if self.files_processed > 0 {
self.avg_time_per_file_ms =
self.processing_time_ms as f64 / self.files_processed as f64;
}
}
}
impl Default for HeuristicMetrics {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_heuristic_system_creation() {
let system = HeuristicSystem::new();
assert!(system.is_ok());
let v2_system = HeuristicSystem::with_v2_features();
assert!(v2_system.is_ok());
}
#[test]
fn test_document_analysis() {
let mut doc = DocumentAnalysis::new();
doc.heading_count = 5;
doc.link_count = 10;
doc.code_block_count = 3;
let score = doc.structure_score();
assert!(score > 0.0);
assert!(score < 2.0); }
#[test]
fn test_metrics() {
let mut metrics = HeuristicMetrics::new();
metrics.files_processed = 100;
metrics.processing_time_ms = 500;
metrics.finalize();
assert_eq!(metrics.avg_time_per_file_ms, 5.0);
}
}