Skip to main content

agentic_codebase/semantic/
concept_extractor.rs

1//! High-level concept extraction.
2//!
3//! Extracts concepts like "authentication", "payments", "user management"
4//! by analyzing symbol names, docstrings, and usage patterns.
5
6use crate::types::{AcbResult, CodeUnitType};
7
8use super::resolver::ResolvedUnit;
9
10/// Extracts high-level concepts from code.
11pub struct ConceptExtractor {
12    /// Concept definitions with keywords.
13    concepts: Vec<ConceptDefinition>,
14}
15
16/// Definition of a concept to detect.
17#[derive(Debug, Clone)]
18struct ConceptDefinition {
19    /// Concept name.
20    name: String,
21    /// Keywords that indicate this concept.
22    keywords: Vec<String>,
23    /// Typical code unit types for this concept.
24    typical_types: Vec<CodeUnitType>,
25}
26
27/// An extracted concept grouping related code units.
28#[derive(Debug, Clone)]
29pub struct ExtractedConcept {
30    /// Concept name.
31    pub name: String,
32    /// Code units belonging to this concept.
33    pub units: Vec<ConceptUnit>,
34    /// Overall confidence.
35    pub confidence: f32,
36}
37
38/// A code unit's membership in a concept.
39#[derive(Debug, Clone)]
40pub struct ConceptUnit {
41    /// Unit temp_id.
42    pub unit_id: u64,
43    /// Role in the concept.
44    pub role: ConceptRole,
45    /// Score (0.0 to 1.0).
46    pub score: f32,
47}
48
49/// The role a code unit plays in a concept.
50#[derive(Debug, Clone, Copy, PartialEq, Eq)]
51pub enum ConceptRole {
52    /// Defines the concept (interface, base class).
53    Definition,
54    /// Implements the concept.
55    Implementation,
56    /// Uses the concept.
57    Usage,
58    /// Tests the concept.
59    Test,
60}
61
62impl ConceptExtractor {
63    /// Create a new concept extractor with built-in concept definitions.
64    pub fn new() -> Self {
65        let concepts = vec![
66            ConceptDefinition {
67                name: "Authentication".to_string(),
68                keywords: vec![
69                    "auth",
70                    "login",
71                    "logout",
72                    "session",
73                    "token",
74                    "jwt",
75                    "oauth",
76                    "password",
77                    "credential",
78                    "authenticate",
79                ]
80                .into_iter()
81                .map(String::from)
82                .collect(),
83                typical_types: vec![CodeUnitType::Function, CodeUnitType::Type],
84            },
85            ConceptDefinition {
86                name: "Payment".to_string(),
87                keywords: vec![
88                    "payment",
89                    "charge",
90                    "refund",
91                    "transaction",
92                    "stripe",
93                    "paypal",
94                    "billing",
95                    "invoice",
96                    "checkout",
97                ]
98                .into_iter()
99                .map(String::from)
100                .collect(),
101                typical_types: vec![CodeUnitType::Function, CodeUnitType::Type],
102            },
103            ConceptDefinition {
104                name: "UserManagement".to_string(),
105                keywords: vec![
106                    "user",
107                    "account",
108                    "profile",
109                    "registration",
110                    "signup",
111                    "settings",
112                    "preferences",
113                ]
114                .into_iter()
115                .map(String::from)
116                .collect(),
117                typical_types: vec![CodeUnitType::Type, CodeUnitType::Function],
118            },
119            ConceptDefinition {
120                name: "Database".to_string(),
121                keywords: vec![
122                    "database",
123                    "db",
124                    "query",
125                    "sql",
126                    "migration",
127                    "schema",
128                    "repository",
129                    "model",
130                    "entity",
131                    "table",
132                    "record",
133                ]
134                .into_iter()
135                .map(String::from)
136                .collect(),
137                typical_types: vec![CodeUnitType::Type, CodeUnitType::Function],
138            },
139            ConceptDefinition {
140                name: "API".to_string(),
141                keywords: vec![
142                    "api",
143                    "endpoint",
144                    "route",
145                    "handler",
146                    "controller",
147                    "request",
148                    "response",
149                    "middleware",
150                    "rest",
151                    "graphql",
152                ]
153                .into_iter()
154                .map(String::from)
155                .collect(),
156                typical_types: vec![CodeUnitType::Function, CodeUnitType::Type],
157            },
158            ConceptDefinition {
159                name: "Logging".to_string(),
160                keywords: vec![
161                    "log",
162                    "logger",
163                    "logging",
164                    "trace",
165                    "debug",
166                    "info",
167                    "warn",
168                    "error",
169                    "metric",
170                    "telemetry",
171                ]
172                .into_iter()
173                .map(String::from)
174                .collect(),
175                typical_types: vec![CodeUnitType::Function, CodeUnitType::Type],
176            },
177            ConceptDefinition {
178                name: "Configuration".to_string(),
179                keywords: vec![
180                    "config",
181                    "configuration",
182                    "setting",
183                    "env",
184                    "environment",
185                    "option",
186                    "preference",
187                    "feature_flag",
188                ]
189                .into_iter()
190                .map(String::from)
191                .collect(),
192                typical_types: vec![CodeUnitType::Type, CodeUnitType::Function],
193            },
194            ConceptDefinition {
195                name: "Testing".to_string(),
196                keywords: vec![
197                    "test",
198                    "mock",
199                    "stub",
200                    "fixture",
201                    "assert",
202                    "expect",
203                    "spec",
204                    "bench",
205                    "benchmark",
206                ]
207                .into_iter()
208                .map(String::from)
209                .collect(),
210                typical_types: vec![CodeUnitType::Test, CodeUnitType::Function],
211            },
212            ConceptDefinition {
213                name: "ErrorHandling".to_string(),
214                keywords: vec![
215                    "error",
216                    "exception",
217                    "fault",
218                    "retry",
219                    "fallback",
220                    "recovery",
221                    "panic",
222                    "catch",
223                    "throw",
224                ]
225                .into_iter()
226                .map(String::from)
227                .collect(),
228                typical_types: vec![CodeUnitType::Type, CodeUnitType::Function],
229            },
230            ConceptDefinition {
231                name: "Caching".to_string(),
232                keywords: vec![
233                    "cache",
234                    "memoize",
235                    "lru",
236                    "ttl",
237                    "invalidate",
238                    "redis",
239                    "memcached",
240                ]
241                .into_iter()
242                .map(String::from)
243                .collect(),
244                typical_types: vec![CodeUnitType::Function, CodeUnitType::Type],
245            },
246        ];
247
248        Self { concepts }
249    }
250
251    /// Extract concepts from the resolved units.
252    pub fn extract(&self, units: &[ResolvedUnit]) -> AcbResult<Vec<ExtractedConcept>> {
253        let mut extracted = Vec::new();
254
255        for concept_def in &self.concepts {
256            let mut concept_units = Vec::new();
257
258            for unit in units {
259                let score = self.score_unit(unit, concept_def);
260                if score > 0.3 {
261                    concept_units.push(ConceptUnit {
262                        unit_id: unit.unit.temp_id,
263                        role: self.determine_role(unit),
264                        score,
265                    });
266                }
267            }
268
269            if !concept_units.is_empty() {
270                let avg_score =
271                    concept_units.iter().map(|u| u.score).sum::<f32>() / concept_units.len() as f32;
272
273                extracted.push(ExtractedConcept {
274                    name: concept_def.name.clone(),
275                    units: concept_units,
276                    confidence: avg_score,
277                });
278            }
279        }
280
281        Ok(extracted)
282    }
283
284    fn score_unit(&self, unit: &ResolvedUnit, concept: &ConceptDefinition) -> f32 {
285        let mut score = 0.0f32;
286
287        let name_lower = unit.unit.name.to_lowercase();
288        let qname_lower = unit.unit.qualified_name.to_lowercase();
289
290        // Keyword matching in name
291        for keyword in &concept.keywords {
292            if name_lower.contains(keyword.as_str()) {
293                score += 0.4;
294            } else if qname_lower.contains(keyword.as_str()) {
295                score += 0.2;
296            }
297        }
298
299        // Doc matching
300        if let Some(ref doc) = unit.unit.doc {
301            let doc_lower = doc.to_lowercase();
302            for keyword in &concept.keywords {
303                if doc_lower.contains(keyword.as_str()) {
304                    score += 0.15;
305                }
306            }
307        }
308
309        // Type bonus
310        if concept.typical_types.contains(&unit.unit.unit_type) {
311            score += 0.1;
312        }
313
314        score.min(1.0)
315    }
316
317    fn determine_role(&self, unit: &ResolvedUnit) -> ConceptRole {
318        match unit.unit.unit_type {
319            CodeUnitType::Type | CodeUnitType::Trait => ConceptRole::Definition,
320            CodeUnitType::Test => ConceptRole::Test,
321            CodeUnitType::Function | CodeUnitType::Impl => ConceptRole::Implementation,
322            _ => ConceptRole::Usage,
323        }
324    }
325}
326
327impl Default for ConceptExtractor {
328    fn default() -> Self {
329        Self::new()
330    }
331}