use crate::parse::prelude::{score_languages, LanguageCompleteness};
use crate::phase::context::PhaseExecutionContext;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct Phase1Summary {
pub total_files: usize,
pub parsed_files: usize,
pub parse_failures: usize,
pub signatures: usize,
pub language_distribution: HashMap<String, usize>,
pub parser_completeness: Vec<LanguageCompleteness>,
#[serde(default)]
pub cache_hit: bool,
}
pub fn run(context: &PhaseExecutionContext) -> Phase1Summary {
let mut language_distribution: HashMap<String, usize> = HashMap::new();
for node_idx in context.pdg.node_indices() {
if let Some(node) = context.pdg.get_node(node_idx) {
*language_distribution
.entry(node.language.clone())
.or_insert(0) += 1;
}
}
let cache_hit = context.parse_results.is_empty()
&& !context.file_inventory.is_empty()
&& context.changed_files.is_empty()
&& context.deleted_files.is_empty();
let parser_completeness = if !context.parse_results.is_empty() {
merge_completeness_with_pdg(
score_languages(&context.parse_results),
&language_distribution,
)
} else {
completeness_from_pdg(&language_distribution)
};
Phase1Summary {
total_files: context.file_inventory.len(),
parsed_files: context.parse_results.len(),
parse_failures: context
.parse_results
.iter()
.filter(|r| r.is_failure())
.count(),
signatures: context
.parse_results
.iter()
.map(|r| r.signatures.len())
.sum(),
language_distribution,
parser_completeness,
cache_hit,
}
}
fn completeness_from_pdg(
language_distribution: &HashMap<String, usize>,
) -> Vec<LanguageCompleteness> {
language_distribution
.iter()
.map(|(language, &count)| LanguageCompleteness {
language: language.clone(),
signatures: count,
calls_ratio: 1.0,
imports_ratio: 1.0,
byte_range_ratio: 1.0,
score: 1.0,
})
.collect()
}
fn merge_completeness_with_pdg(
parse_scores: Vec<LanguageCompleteness>,
language_distribution: &HashMap<String, usize>,
) -> Vec<LanguageCompleteness> {
let mut result = parse_scores;
let covered: std::collections::HashSet<String> =
result.iter().map(|lc| lc.language.clone()).collect();
for (language, &count) in language_distribution {
if language == "external" || language.is_empty() {
continue;
}
if !covered.contains(language) {
result.push(LanguageCompleteness {
language: language.clone(),
signatures: count,
calls_ratio: 1.0,
imports_ratio: 1.0,
byte_range_ratio: 1.0,
score: 1.0,
});
}
}
result.sort_by(|a, b| a.language.cmp(&b.language));
result
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn merge_completeness_adds_missing_languages() {
let parse_scores = vec![LanguageCompleteness {
language: "rust".to_string(),
signatures: 50,
calls_ratio: 0.8,
imports_ratio: 0.9,
byte_range_ratio: 1.0,
score: 0.9,
}];
let mut pdg_dist = HashMap::new();
pdg_dist.insert("rust".to_string(), 50);
pdg_dist.insert("javascript".to_string(), 15);
pdg_dist.insert("python".to_string(), 10);
pdg_dist.insert("external".to_string(), 5);
let merged = merge_completeness_with_pdg(parse_scores, &pdg_dist);
assert_eq!(merged.len(), 3);
let languages: Vec<&str> = merged.iter().map(|lc| lc.language.as_str()).collect();
assert!(languages.contains(&"rust"));
assert!(languages.contains(&"javascript"));
assert!(languages.contains(&"python"));
assert!(!languages.contains(&"external"));
let rust = merged.iter().find(|lc| lc.language == "rust").unwrap();
assert_eq!(rust.signatures, 50);
assert!((rust.score - 0.9).abs() < 0.001);
let js = merged
.iter()
.find(|lc| lc.language == "javascript")
.unwrap();
assert_eq!(js.signatures, 15);
assert!((js.score - 1.0).abs() < 0.001);
}
#[test]
fn merge_completeness_empty_pdg() {
let parse_scores = vec![LanguageCompleteness {
language: "rust".to_string(),
signatures: 10,
calls_ratio: 1.0,
imports_ratio: 1.0,
byte_range_ratio: 1.0,
score: 1.0,
}];
let pdg_dist = HashMap::new();
let merged = merge_completeness_with_pdg(parse_scores, &pdg_dist);
assert_eq!(merged.len(), 1);
}
#[test]
fn merge_completeness_all_from_pdg() {
let parse_scores = vec![];
let mut pdg_dist = HashMap::new();
pdg_dist.insert("javascript".to_string(), 20);
pdg_dist.insert("typescript".to_string(), 30);
let merged = merge_completeness_with_pdg(parse_scores, &pdg_dist);
assert_eq!(merged.len(), 2);
let js = merged
.iter()
.find(|lc| lc.language == "javascript")
.unwrap();
assert_eq!(js.signatures, 20);
}
}