Skip to main content

agentic_codebase/semantic/
architecture.rs

1//! Architecture Inference — Invention 8.
2//!
3//! Infer architecture from the code structure itself. No documentation needed.
4//! Detects patterns like Layered, MVC, Microservices, etc. from module structure
5//! and dependency directions.
6
7use std::collections::{HashMap, HashSet};
8
9use serde::{Deserialize, Serialize};
10
11use crate::graph::CodeGraph;
12use crate::types::{CodeUnitType, EdgeType};
13
14// ── Types ────────────────────────────────────────────────────────────────────
15
16/// Inferred architecture.
17#[derive(Debug, Clone, Serialize, Deserialize)]
18pub struct InferredArchitecture {
19    /// Overall pattern detected.
20    pub pattern: ArchitecturePattern,
21    /// Layers/tiers.
22    pub layers: Vec<ArchitectureLayer>,
23    /// Key components.
24    pub components: Vec<ArchitectureComponent>,
25    /// Data flows.
26    pub flows: Vec<DataFlow>,
27    /// Confidence in inference.
28    pub confidence: f64,
29    /// Anomalies (violations of pattern).
30    pub anomalies: Vec<ArchitectureAnomaly>,
31}
32
33/// Detected architecture pattern.
34#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
35pub enum ArchitecturePattern {
36    Monolith,
37    Microservices,
38    Layered,
39    Hexagonal,
40    EventDriven,
41    CQRS,
42    Serverless,
43    MVC,
44    Unknown,
45}
46
47/// An architectural layer.
48#[derive(Debug, Clone, Serialize, Deserialize)]
49pub struct ArchitectureLayer {
50    pub name: String,
51    pub purpose: String,
52    pub modules: Vec<String>,
53    pub depends_on: Vec<String>,
54}
55
56/// An architectural component.
57#[derive(Debug, Clone, Serialize, Deserialize)]
58pub struct ArchitectureComponent {
59    pub name: String,
60    pub role: ComponentRole,
61    pub node_ids: Vec<u64>,
62    pub external_deps: Vec<String>,
63}
64
65/// Role of an architectural component.
66#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
67pub enum ComponentRole {
68    Entrypoint,
69    Controller,
70    Service,
71    Repository,
72    Model,
73    Utility,
74    Configuration,
75    Test,
76}
77
78/// A data flow between components.
79#[derive(Debug, Clone, Serialize, Deserialize)]
80pub struct DataFlow {
81    pub name: String,
82    pub source: String,
83    pub destination: String,
84    pub via: Vec<String>,
85    pub data_type: String,
86}
87
88/// An architecture anomaly.
89#[derive(Debug, Clone, Serialize, Deserialize)]
90pub struct ArchitectureAnomaly {
91    pub description: String,
92    pub node_id: u64,
93    pub expected: String,
94    pub actual: String,
95    pub severity: AnomalySeverity,
96}
97
98/// Severity of an anomaly.
99#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
100pub enum AnomalySeverity {
101    Info,
102    Warning,
103    Error,
104    Critical,
105}
106
107// ── ArchitectureInferrer ─────────────────────────────────────────────────────
108
109/// Infers architecture from code structure.
110pub struct ArchitectureInferrer<'g> {
111    graph: &'g CodeGraph,
112}
113
114impl<'g> ArchitectureInferrer<'g> {
115    pub fn new(graph: &'g CodeGraph) -> Self {
116        Self { graph }
117    }
118
119    /// Infer the architecture from the code graph.
120    pub fn infer(&self) -> InferredArchitecture {
121        let components = self.detect_components();
122        let layers = self.detect_layers(&components);
123        let flows = self.detect_flows(&components);
124        let pattern = self.classify_pattern(&components, &layers);
125        let anomalies = self.detect_anomalies(&components, &pattern);
126        let confidence = self.compute_confidence(&components, &layers);
127
128        InferredArchitecture {
129            pattern,
130            layers,
131            components,
132            flows,
133            confidence,
134            anomalies,
135        }
136    }
137
138    /// Generate diagram-ready data.
139    pub fn diagram(&self, arch: &InferredArchitecture) -> serde_json::Value {
140        serde_json::json!({
141            "pattern": format!("{:?}", arch.pattern),
142            "layers": arch.layers.iter().map(|l| serde_json::json!({
143                "name": l.name,
144                "purpose": l.purpose,
145                "modules": l.modules,
146                "depends_on": l.depends_on,
147            })).collect::<Vec<_>>(),
148            "components": arch.components.iter().map(|c| serde_json::json!({
149                "name": c.name,
150                "role": format!("{:?}", c.role),
151                "size": c.node_ids.len(),
152            })).collect::<Vec<_>>(),
153            "flows": arch.flows.iter().map(|f| serde_json::json!({
154                "from": f.source,
155                "to": f.destination,
156                "via": f.via,
157            })).collect::<Vec<_>>(),
158        })
159    }
160
161    /// Validate code against an expected architecture pattern.
162    pub fn validate(&self, expected: ArchitecturePattern) -> Vec<ArchitectureAnomaly> {
163        let inferred = self.infer();
164        let mut anomalies = inferred.anomalies;
165
166        if inferred.pattern != expected {
167            anomalies.push(ArchitectureAnomaly {
168                description: format!(
169                    "Expected {:?} architecture but detected {:?}",
170                    expected, inferred.pattern
171                ),
172                node_id: 0,
173                expected: format!("{:?}", expected),
174                actual: format!("{:?}", inferred.pattern),
175                severity: AnomalySeverity::Warning,
176            });
177        }
178
179        anomalies
180    }
181
182    // ── Internal ─────────────────────────────────────────────────────────
183
184    fn detect_components(&self) -> Vec<ArchitectureComponent> {
185        let mut role_map: HashMap<ComponentRole, Vec<u64>> = HashMap::new();
186
187        for unit in self.graph.units() {
188            let name_lower = unit.name.to_lowercase();
189            let qname_lower = unit.qualified_name.to_lowercase();
190            let path_lower = unit.file_path.display().to_string().to_lowercase();
191
192            let role = if Self::matches_any(
193                &[&name_lower, &qname_lower, &path_lower],
194                &["controller", "handler", "view", "endpoint"],
195            ) {
196                ComponentRole::Controller
197            } else if Self::matches_any(
198                &[&name_lower, &qname_lower, &path_lower],
199                &["service", "usecase", "interactor"],
200            ) {
201                ComponentRole::Service
202            } else if Self::matches_any(
203                &[&name_lower, &qname_lower, &path_lower],
204                &["repository", "repo", "dao", "store", "adapter"],
205            ) {
206                ComponentRole::Repository
207            } else if Self::matches_any(
208                &[&name_lower, &qname_lower, &path_lower],
209                &["model", "entity", "schema", "dto"],
210            ) {
211                ComponentRole::Model
212            } else if Self::matches_any(
213                &[&name_lower, &qname_lower, &path_lower],
214                &["config", "setting", "env"],
215            ) {
216                ComponentRole::Configuration
217            } else if unit.unit_type == CodeUnitType::Test {
218                ComponentRole::Test
219            } else if Self::matches_any(
220                &[&name_lower, &qname_lower, &path_lower],
221                &["main", "app", "server", "cli", "entry"],
222            ) {
223                ComponentRole::Entrypoint
224            } else {
225                ComponentRole::Utility
226            };
227
228            role_map.entry(role).or_default().push(unit.id);
229        }
230
231        role_map
232            .into_iter()
233            .map(|(role, ids)| {
234                let name = format!("{:?}", role);
235                let external_deps = self.find_external_deps(&ids);
236                ArchitectureComponent {
237                    name,
238                    role,
239                    node_ids: ids,
240                    external_deps,
241                }
242            })
243            .collect()
244    }
245
246    fn detect_layers(&self, components: &[ArchitectureComponent]) -> Vec<ArchitectureLayer> {
247        let mut layers = Vec::new();
248
249        let has_controllers = components
250            .iter()
251            .any(|c| c.role == ComponentRole::Controller);
252        let has_services = components.iter().any(|c| c.role == ComponentRole::Service);
253        let has_repos = components
254            .iter()
255            .any(|c| c.role == ComponentRole::Repository);
256
257        if has_controllers {
258            layers.push(ArchitectureLayer {
259                name: "Presentation".to_string(),
260                purpose: "Handle external requests and responses".to_string(),
261                modules: self.modules_for_role(components, ComponentRole::Controller),
262                depends_on: vec!["Business Logic".to_string()],
263            });
264        }
265
266        if has_services {
267            layers.push(ArchitectureLayer {
268                name: "Business Logic".to_string(),
269                purpose: "Core business rules and workflows".to_string(),
270                modules: self.modules_for_role(components, ComponentRole::Service),
271                depends_on: vec!["Data Access".to_string()],
272            });
273        }
274
275        if has_repos {
276            layers.push(ArchitectureLayer {
277                name: "Data Access".to_string(),
278                purpose: "Data persistence and retrieval".to_string(),
279                modules: self.modules_for_role(components, ComponentRole::Repository),
280                depends_on: Vec::new(),
281            });
282        }
283
284        layers
285    }
286
287    fn detect_flows(&self, components: &[ArchitectureComponent]) -> Vec<DataFlow> {
288        let mut flows = Vec::new();
289
290        // Detect flows from call edges between component roles
291        let role_names: Vec<(ComponentRole, &str)> = components
292            .iter()
293            .map(|c| (c.role, c.name.as_str()))
294            .collect();
295
296        for comp in components {
297            for &node_id in &comp.node_ids {
298                for edge in self.graph.edges_from(node_id) {
299                    if edge.edge_type != EdgeType::Calls {
300                        continue;
301                    }
302                    // Find which component the target belongs to
303                    for other in components {
304                        if other.role != comp.role && other.node_ids.contains(&edge.target_id) {
305                            let flow_name = format!("{:?} -> {:?}", comp.role, other.role);
306                            if !flows.iter().any(|f: &DataFlow| f.name == flow_name) {
307                                flows.push(DataFlow {
308                                    name: flow_name,
309                                    source: format!("{:?}", comp.role),
310                                    destination: format!("{:?}", other.role),
311                                    via: Vec::new(),
312                                    data_type: "function call".to_string(),
313                                });
314                            }
315                            break;
316                        }
317                    }
318                }
319            }
320        }
321
322        let _ = role_names; // suppress unused warning
323        flows
324    }
325
326    fn classify_pattern(
327        &self,
328        components: &[ArchitectureComponent],
329        layers: &[ArchitectureLayer],
330    ) -> ArchitecturePattern {
331        let has_controllers = components
332            .iter()
333            .any(|c| c.role == ComponentRole::Controller);
334        let has_services = components.iter().any(|c| c.role == ComponentRole::Service);
335        let has_repos = components
336            .iter()
337            .any(|c| c.role == ComponentRole::Repository);
338        let has_models = components.iter().any(|c| c.role == ComponentRole::Model);
339
340        // MVC: controllers + models + views
341        if has_controllers && has_models && !has_repos {
342            return ArchitecturePattern::MVC;
343        }
344
345        // Layered: clear layer separation
346        if layers.len() >= 3 && has_controllers && has_services && has_repos {
347            return ArchitecturePattern::Layered;
348        }
349
350        // Hexagonal: services + repositories with clear interfaces
351        if has_services && has_repos && !has_controllers {
352            return ArchitecturePattern::Hexagonal;
353        }
354
355        // If only utilities and entrypoints, likely monolith
356        let non_utility = components
357            .iter()
358            .filter(|c| c.role != ComponentRole::Utility && c.role != ComponentRole::Test)
359            .count();
360        if non_utility <= 2 {
361            return ArchitecturePattern::Monolith;
362        }
363
364        ArchitecturePattern::Unknown
365    }
366
367    fn detect_anomalies(
368        &self,
369        components: &[ArchitectureComponent],
370        _pattern: &ArchitecturePattern,
371    ) -> Vec<ArchitectureAnomaly> {
372        let mut anomalies = Vec::new();
373
374        // Check for bidirectional dependencies between layers (layer violation)
375        let controller_ids: HashSet<u64> = components
376            .iter()
377            .filter(|c| c.role == ComponentRole::Controller)
378            .flat_map(|c| c.node_ids.iter().copied())
379            .collect();
380
381        let repo_ids: HashSet<u64> = components
382            .iter()
383            .filter(|c| c.role == ComponentRole::Repository)
384            .flat_map(|c| c.node_ids.iter().copied())
385            .collect();
386
387        // Repos should not call controllers
388        for &repo_id in &repo_ids {
389            for edge in self.graph.edges_from(repo_id) {
390                if edge.edge_type == EdgeType::Calls && controller_ids.contains(&edge.target_id) {
391                    anomalies.push(ArchitectureAnomaly {
392                        description: "Repository layer calls presentation layer (layer violation)"
393                            .to_string(),
394                        node_id: repo_id,
395                        expected: "Data Access should not depend on Presentation".to_string(),
396                        actual: "Upward dependency detected".to_string(),
397                        severity: AnomalySeverity::Error,
398                    });
399                }
400            }
401        }
402
403        anomalies
404    }
405
406    fn compute_confidence(
407        &self,
408        components: &[ArchitectureComponent],
409        layers: &[ArchitectureLayer],
410    ) -> f64 {
411        let total_units = self.graph.unit_count();
412        if total_units == 0 {
413            return 0.0;
414        }
415
416        let classified = components
417            .iter()
418            .filter(|c| c.role != ComponentRole::Utility)
419            .map(|c| c.node_ids.len())
420            .sum::<usize>();
421
422        let classification_ratio = classified as f64 / total_units as f64;
423        let layer_bonus = (layers.len() as f64 * 0.1).min(0.3);
424
425        (classification_ratio * 0.7 + layer_bonus).min(1.0)
426    }
427
428    fn find_external_deps(&self, ids: &[u64]) -> Vec<String> {
429        let id_set: HashSet<u64> = ids.iter().copied().collect();
430        let mut external = HashSet::new();
431
432        for &id in ids {
433            for edge in self.graph.edges_from(id) {
434                if edge.edge_type == EdgeType::Imports && !id_set.contains(&edge.target_id) {
435                    if let Some(unit) = self.graph.get_unit(edge.target_id) {
436                        external.insert(unit.qualified_name.clone());
437                    }
438                }
439            }
440        }
441
442        external.into_iter().collect()
443    }
444
445    fn modules_for_role(
446        &self,
447        components: &[ArchitectureComponent],
448        role: ComponentRole,
449    ) -> Vec<String> {
450        let mut modules = HashSet::new();
451        for comp in components {
452            if comp.role == role {
453                for &id in &comp.node_ids {
454                    if let Some(unit) = self.graph.get_unit(id) {
455                        if let Some(last_sep) = unit
456                            .qualified_name
457                            .rfind("::")
458                            .or_else(|| unit.qualified_name.rfind('.'))
459                        {
460                            modules.insert(unit.qualified_name[..last_sep].to_string());
461                        }
462                    }
463                }
464            }
465        }
466        modules.into_iter().collect()
467    }
468
469    fn matches_any(targets: &[&str], keywords: &[&str]) -> bool {
470        targets
471            .iter()
472            .any(|t| keywords.iter().any(|k| t.contains(k)))
473    }
474}
475
476// ── Tests ────────────────────────────────────────────────────────────────────
477
478#[cfg(test)]
479mod tests {
480    use super::*;
481    use crate::types::{CodeUnit, CodeUnitType, Language, Span};
482    use std::path::PathBuf;
483
484    fn test_graph() -> CodeGraph {
485        let mut graph = CodeGraph::with_default_dimension();
486        graph.add_unit(CodeUnit::new(
487            CodeUnitType::Function,
488            Language::Python,
489            "user_controller".to_string(),
490            "app.controllers.user_controller".to_string(),
491            PathBuf::from("src/controllers/user.py"),
492            Span::new(1, 0, 30, 0),
493        ));
494        graph.add_unit(CodeUnit::new(
495            CodeUnitType::Function,
496            Language::Python,
497            "user_service".to_string(),
498            "app.services.user_service".to_string(),
499            PathBuf::from("src/services/user.py"),
500            Span::new(1, 0, 40, 0),
501        ));
502        graph.add_unit(CodeUnit::new(
503            CodeUnitType::Function,
504            Language::Python,
505            "user_repository".to_string(),
506            "app.repos.user_repository".to_string(),
507            PathBuf::from("src/repos/user.py"),
508            Span::new(1, 0, 25, 0),
509        ));
510        graph
511    }
512
513    #[test]
514    fn infer_detects_components() {
515        let graph = test_graph();
516        let inferrer = ArchitectureInferrer::new(&graph);
517        let arch = inferrer.infer();
518        assert!(!arch.components.is_empty());
519    }
520
521    #[test]
522    fn infer_detects_layered_pattern() {
523        let graph = test_graph();
524        let inferrer = ArchitectureInferrer::new(&graph);
525        let arch = inferrer.infer();
526        // Should detect layered or at least not Unknown
527        assert!(arch.layers.len() >= 2);
528    }
529
530    #[test]
531    fn diagram_produces_json() {
532        let graph = test_graph();
533        let inferrer = ArchitectureInferrer::new(&graph);
534        let arch = inferrer.infer();
535        let diagram = inferrer.diagram(&arch);
536        assert!(diagram.get("pattern").is_some());
537    }
538}