use crate::types::{AcbResult, CodeUnitType};
use super::resolver::ResolvedUnit;
pub struct ConceptExtractor {
concepts: Vec<ConceptDefinition>,
}
#[derive(Debug, Clone)]
struct ConceptDefinition {
name: String,
keywords: Vec<String>,
typical_types: Vec<CodeUnitType>,
}
#[derive(Debug, Clone)]
pub struct ExtractedConcept {
pub name: String,
pub units: Vec<ConceptUnit>,
pub confidence: f32,
}
#[derive(Debug, Clone)]
pub struct ConceptUnit {
pub unit_id: u64,
pub role: ConceptRole,
pub score: f32,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ConceptRole {
Definition,
Implementation,
Usage,
Test,
}
impl ConceptExtractor {
pub fn new() -> Self {
let concepts = vec![
ConceptDefinition {
name: "Authentication".to_string(),
keywords: vec![
"auth",
"login",
"logout",
"session",
"token",
"jwt",
"oauth",
"password",
"credential",
"authenticate",
]
.into_iter()
.map(String::from)
.collect(),
typical_types: vec![CodeUnitType::Function, CodeUnitType::Type],
},
ConceptDefinition {
name: "Payment".to_string(),
keywords: vec![
"payment",
"charge",
"refund",
"transaction",
"stripe",
"paypal",
"billing",
"invoice",
"checkout",
]
.into_iter()
.map(String::from)
.collect(),
typical_types: vec![CodeUnitType::Function, CodeUnitType::Type],
},
ConceptDefinition {
name: "UserManagement".to_string(),
keywords: vec![
"user",
"account",
"profile",
"registration",
"signup",
"settings",
"preferences",
]
.into_iter()
.map(String::from)
.collect(),
typical_types: vec![CodeUnitType::Type, CodeUnitType::Function],
},
ConceptDefinition {
name: "Database".to_string(),
keywords: vec![
"database",
"db",
"query",
"sql",
"migration",
"schema",
"repository",
"model",
"entity",
"table",
"record",
]
.into_iter()
.map(String::from)
.collect(),
typical_types: vec![CodeUnitType::Type, CodeUnitType::Function],
},
ConceptDefinition {
name: "API".to_string(),
keywords: vec![
"api",
"endpoint",
"route",
"handler",
"controller",
"request",
"response",
"middleware",
"rest",
"graphql",
]
.into_iter()
.map(String::from)
.collect(),
typical_types: vec![CodeUnitType::Function, CodeUnitType::Type],
},
ConceptDefinition {
name: "Logging".to_string(),
keywords: vec![
"log",
"logger",
"logging",
"trace",
"debug",
"info",
"warn",
"error",
"metric",
"telemetry",
]
.into_iter()
.map(String::from)
.collect(),
typical_types: vec![CodeUnitType::Function, CodeUnitType::Type],
},
ConceptDefinition {
name: "Configuration".to_string(),
keywords: vec![
"config",
"configuration",
"setting",
"env",
"environment",
"option",
"preference",
"feature_flag",
]
.into_iter()
.map(String::from)
.collect(),
typical_types: vec![CodeUnitType::Type, CodeUnitType::Function],
},
ConceptDefinition {
name: "Testing".to_string(),
keywords: vec![
"test",
"mock",
"stub",
"fixture",
"assert",
"expect",
"spec",
"bench",
"benchmark",
]
.into_iter()
.map(String::from)
.collect(),
typical_types: vec![CodeUnitType::Test, CodeUnitType::Function],
},
ConceptDefinition {
name: "ErrorHandling".to_string(),
keywords: vec![
"error",
"exception",
"fault",
"retry",
"fallback",
"recovery",
"panic",
"catch",
"throw",
]
.into_iter()
.map(String::from)
.collect(),
typical_types: vec![CodeUnitType::Type, CodeUnitType::Function],
},
ConceptDefinition {
name: "Caching".to_string(),
keywords: vec![
"cache",
"memoize",
"lru",
"ttl",
"invalidate",
"redis",
"memcached",
]
.into_iter()
.map(String::from)
.collect(),
typical_types: vec![CodeUnitType::Function, CodeUnitType::Type],
},
];
Self { concepts }
}
pub fn extract(&self, units: &[ResolvedUnit]) -> AcbResult<Vec<ExtractedConcept>> {
let mut extracted = Vec::new();
for concept_def in &self.concepts {
let mut concept_units = Vec::new();
for unit in units {
let score = self.score_unit(unit, concept_def);
if score > 0.3 {
concept_units.push(ConceptUnit {
unit_id: unit.unit.temp_id,
role: self.determine_role(unit),
score,
});
}
}
if !concept_units.is_empty() {
let avg_score =
concept_units.iter().map(|u| u.score).sum::<f32>() / concept_units.len() as f32;
extracted.push(ExtractedConcept {
name: concept_def.name.clone(),
units: concept_units,
confidence: avg_score,
});
}
}
Ok(extracted)
}
fn score_unit(&self, unit: &ResolvedUnit, concept: &ConceptDefinition) -> f32 {
let mut score = 0.0f32;
let name_lower = unit.unit.name.to_lowercase();
let qname_lower = unit.unit.qualified_name.to_lowercase();
for keyword in &concept.keywords {
if name_lower.contains(keyword.as_str()) {
score += 0.4;
} else if qname_lower.contains(keyword.as_str()) {
score += 0.2;
}
}
if let Some(ref doc) = unit.unit.doc {
let doc_lower = doc.to_lowercase();
for keyword in &concept.keywords {
if doc_lower.contains(keyword.as_str()) {
score += 0.15;
}
}
}
if concept.typical_types.contains(&unit.unit.unit_type) {
score += 0.1;
}
score.min(1.0)
}
fn determine_role(&self, unit: &ResolvedUnit) -> ConceptRole {
match unit.unit.unit_type {
CodeUnitType::Type | CodeUnitType::Trait => ConceptRole::Definition,
CodeUnitType::Test => ConceptRole::Test,
CodeUnitType::Function | CodeUnitType::Impl => ConceptRole::Implementation,
_ => ConceptRole::Usage,
}
}
}
impl Default for ConceptExtractor {
fn default() -> Self {
Self::new()
}
}