Skip to main content

gid_core/
semantify.rs

1//! Semantify module for upgrading file-level graphs to semantic graphs.
2//!
3//! Generates prompts and parses LLM responses. Does NOT call LLM directly.
4
5use anyhow::{Context, Result, bail};
6use serde::{Deserialize, Serialize};
7use crate::graph::{Graph, Node, Edge};
8use std::collections::HashMap;
9
10/// A proposed semantic enhancement.
11#[derive(Debug, Clone, Serialize, Deserialize)]
12#[serde(tag = "type", rename_all = "snake_case")]
13pub enum SemanticProposal {
14    /// Assign a layer to a node
15    AssignLayer {
16        node_id: String,
17        layer: String,
18        reason: String,
19        #[serde(default)]
20        confidence: f32,
21    },
22    /// Upgrade a file node to a component
23    UpgradeToComponent {
24        node_id: String,
25        component_name: String,
26        description: String,
27        #[serde(default)]
28        confidence: f32,
29    },
30    /// Add a feature node
31    AddFeature {
32        name: String,
33        description: String,
34        implementing_nodes: Vec<String>,
35        #[serde(default)]
36        confidence: f32,
37    },
38    /// Add description to a node
39    AddDescription {
40        node_id: String,
41        description: String,
42        #[serde(default)]
43        confidence: f32,
44    },
45    /// Group nodes into a module
46    GroupIntoModule {
47        module_name: String,
48        node_ids: Vec<String>,
49        #[serde(default)]
50        confidence: f32,
51    },
52}
53
54/// Result from parsing LLM semantify response.
55#[derive(Debug, Clone, Serialize, Deserialize)]
56pub struct SemantifyResult {
57    pub proposals: Vec<SemanticProposal>,
58    /// Optional: the full transformed graph
59    pub graph: Option<Graph>,
60}
61
62/// Generate a prompt to upgrade a file-level graph to a semantic graph.
63pub fn generate_semantify_prompt(graph: &Graph) -> String {
64    // Build context from graph
65    let node_summary = build_node_summary(graph);
66    let edge_summary = build_edge_summary(graph);
67    
68    format!(r#"You are a software architect. Analyze this code graph and suggest semantic enhancements.
69
70CURRENT GRAPH:
71
72Nodes ({} total):
73{}
74
75Edges ({} total):
76{}
77
78TASK:
791. Assign architectural layers to nodes (interface, application, domain, infrastructure)
802. Identify features that nodes implement
813. Add meaningful descriptions to important nodes
824. Group related files into logical components
83
84LAYER DEFINITIONS:
85- interface: User-facing (CLI commands, API routes, UI components, handlers)
86- application: Use cases, services, orchestration
87- domain: Core business logic, types, entities
88- infrastructure: External integrations (DB, filesystem, parsers, adapters)
89
90Respond with a JSON object:
91```json
92{{
93  "proposals": [
94    {{
95      "type": "assign_layer",
96      "node_id": "src/commands/init.ts",
97      "layer": "interface",
98      "reason": "CLI command handler",
99      "confidence": 0.9
100    }},
101    {{
102      "type": "add_feature",
103      "name": "graph_visualization",
104      "description": "Visualize the graph in various formats",
105      "implementing_nodes": ["src/visual.ts", "src/render.ts"],
106      "confidence": 0.85
107    }},
108    {{
109      "type": "add_description",
110      "node_id": "src/core/query.ts",
111      "description": "Graph traversal and query engine",
112      "confidence": 0.8
113    }}
114  ]
115}}
116```
117
118Only output valid JSON. No explanation before or after."#,
119        graph.nodes.len(),
120        node_summary,
121        graph.edges.len(),
122        edge_summary
123    )
124}
125
126/// Generate a prompt for full graph transformation.
127pub fn generate_full_transform_prompt(graph: &Graph) -> String {
128    let yaml = serde_yaml::to_string(graph).unwrap_or_default();
129    
130    format!(r#"You are a software architect. Transform this file-level graph into a semantic architecture graph.
131
132CURRENT GRAPH (YAML):
133```yaml
134{}
135```
136
137Transform the graph by:
1381. Adding a `layer` field to each node (interface, application, domain, infrastructure)
1392. Adding meaningful `description` fields
1403. Creating Feature nodes for logical feature groupings
1414. Adding `implements` edges from components to features
1425. Keeping all existing `depends_on` edges
143
144Output the complete transformed graph as YAML:
145```yaml
146project:
147  name: project-name
148  description: Semantic architecture graph
149
150nodes:
151  - id: feature_visualization
152    title: Graph Visualization
153    type: feature
154    description: Visualize graphs in multiple formats
155    
156  - id: src/visual.ts
157    title: Visual Renderer
158    type: component
159    layer: interface
160    description: Renders graph in ASCII, DOT, and Mermaid formats
161    
162edges:
163  - from: src/visual.ts
164    to: feature_visualization
165    relation: implements
166    
167  - from: src/visual.ts
168    to: src/core/graph.ts
169    relation: depends_on
170```
171
172Only output valid YAML. Start with "```yaml" and end with "```"."#, yaml)
173}
174
175/// Parse an LLM response containing semantic proposals.
176pub fn parse_semantify_response(response: &str) -> Result<SemantifyResult> {
177    let json_str = extract_json(response)?;
178    
179    #[derive(Deserialize)]
180    struct ProposalsResponse {
181        proposals: Vec<SemanticProposal>,
182    }
183    
184    let parsed: ProposalsResponse = serde_json::from_str(&json_str)
185        .context("Failed to parse proposals JSON")?;
186    
187    Ok(SemantifyResult {
188        proposals: parsed.proposals,
189        graph: None,
190    })
191}
192
193/// Parse an LLM response containing a full transformed graph.
194pub fn parse_full_transform_response(response: &str) -> Result<Graph> {
195    let yaml_str = extract_yaml(response)?;
196    
197    let graph: Graph = serde_yaml::from_str(&yaml_str)
198        .context("Failed to parse graph YAML")?;
199    
200    Ok(graph)
201}
202
203/// Apply semantic proposals to a graph.
204pub fn apply_proposals(graph: &mut Graph, proposals: &[SemanticProposal]) -> usize {
205    let mut applied_count = 0;
206    
207    for proposal in proposals {
208        match proposal {
209            SemanticProposal::AssignLayer { node_id, layer, .. } => {
210                if let Some(node) = graph.get_node_mut(node_id) {
211                    node.metadata.insert("layer".to_string(), serde_json::json!(layer));
212                    applied_count += 1;
213                }
214            }
215            
216            SemanticProposal::UpgradeToComponent { node_id, component_name, description, .. } => {
217                if let Some(node) = graph.get_node_mut(node_id) {
218                    node.node_type = Some("component".to_string());
219                    node.title = component_name.clone();
220                    node.description = Some(description.clone());
221                    applied_count += 1;
222                }
223            }
224            
225            SemanticProposal::AddFeature { name, description, implementing_nodes, .. } => {
226                // Create feature node
227                let feature_id = format!("feature_{}", name.to_lowercase().replace(' ', "_"));
228                let mut feature_node = Node::new(&feature_id, name);
229                feature_node.node_type = Some("feature".to_string());
230                feature_node.description = Some(description.clone());
231                graph.add_node(feature_node);
232                applied_count += 1;
233                
234                // Add implements edges
235                for impl_node in implementing_nodes {
236                    if graph.get_node(impl_node).is_some() {
237                        graph.add_edge(Edge::new(impl_node, &feature_id, "implements"));
238                        applied_count += 1;
239                    }
240                }
241            }
242            
243            SemanticProposal::AddDescription { node_id, description, .. } => {
244                if let Some(node) = graph.get_node_mut(node_id) {
245                    if node.description.is_none() {
246                        node.description = Some(description.clone());
247                        applied_count += 1;
248                    }
249                }
250            }
251            
252            SemanticProposal::GroupIntoModule { module_name, node_ids, .. } => {
253                // Create module node
254                let module_id = format!("module_{}", module_name.to_lowercase().replace(' ', "_"));
255                let mut module_node = Node::new(&module_id, module_name);
256                module_node.node_type = Some("module".to_string());
257                graph.add_node(module_node);
258                applied_count += 1;
259                
260                // Add contains edges
261                for node_id in node_ids {
262                    if graph.get_node(node_id).is_some() {
263                        graph.add_edge(Edge::new(&module_id, node_id, "contains"));
264                        applied_count += 1;
265                    }
266                }
267            }
268        }
269    }
270    
271    applied_count
272}
273
274/// Build a summary of nodes for the prompt.
275fn build_node_summary(graph: &Graph) -> String {
276    let mut lines = Vec::new();
277    
278    for node in &graph.nodes {
279        let node_type = node.node_type.as_deref().unwrap_or("unknown");
280        let desc = node.description.as_deref().unwrap_or("");
281        let layer = node.metadata.get("layer")
282            .and_then(|v| v.as_str())
283            .unwrap_or("none");
284        
285        lines.push(format!(
286            "  - {} (type: {}, layer: {}) {}",
287            node.id,
288            node_type,
289            layer,
290            if desc.is_empty() { String::new() } else { format!("// {}", desc) }
291        ));
292    }
293    
294    lines.join("\n")
295}
296
297/// Build a summary of edges for the prompt.
298fn build_edge_summary(graph: &Graph) -> String {
299    // Group edges by relation
300    let mut by_relation: HashMap<&str, Vec<(&str, &str)>> = HashMap::new();
301    
302    for edge in &graph.edges {
303        by_relation.entry(&edge.relation)
304            .or_default()
305            .push((&edge.from, &edge.to));
306    }
307    
308    let mut lines = Vec::new();
309    
310    for (relation, edges) in &by_relation {
311        lines.push(format!("  {} edges ({}):", relation, edges.len()));
312        for (from, to) in edges.iter().take(10) {
313            lines.push(format!("    {} -> {}", from, to));
314        }
315        if edges.len() > 10 {
316            lines.push(format!("    ... and {} more", edges.len() - 10));
317        }
318    }
319    
320    lines.join("\n")
321}
322
323/// Extract JSON from response with markdown code blocks.
324fn extract_json(response: &str) -> Result<String> {
325    // Try to find JSON in code block
326    if let Some(start) = response.find("```json") {
327        let content = &response[start + 7..];
328        if let Some(end) = content.find("```") {
329            return Ok(content[..end].trim().to_string());
330        }
331    }
332    
333    // Try plain code block
334    if let Some(start) = response.find("```") {
335        let content = &response[start + 3..];
336        if let Some(end) = content.find("```") {
337            let inner = content[..end].trim();
338            if let Some(newline) = inner.find('\n') {
339                let first_line = &inner[..newline];
340                if !first_line.starts_with('{') && !first_line.starts_with('[') {
341                    return Ok(inner[newline..].trim().to_string());
342                }
343            }
344            return Ok(inner.to_string());
345        }
346    }
347    
348    // Try raw JSON
349    let trimmed = response.trim();
350    if trimmed.starts_with('{') || trimmed.starts_with('[') {
351        return Ok(trimmed.to_string());
352    }
353    
354    bail!("No JSON found in response")
355}
356
357/// Extract YAML from response.
358fn extract_yaml(response: &str) -> Result<String> {
359    if let Some(start) = response.find("```yaml") {
360        let content = &response[start + 7..];
361        if let Some(end) = content.find("```") {
362            return Ok(content[..end].trim().to_string());
363        }
364    }
365    
366    if let Some(start) = response.find("```yml") {
367        let content = &response[start + 6..];
368        if let Some(end) = content.find("```") {
369            return Ok(content[..end].trim().to_string());
370        }
371    }
372    
373    // Assume raw YAML
374    let trimmed = response.trim();
375    if trimmed.contains(':') {
376        return Ok(trimmed.to_string());
377    }
378    
379    bail!("No YAML found in response")
380}
381
382/// Heuristic layer assignment based on file paths.
383pub fn heuristic_assign_layer(file_path: &str) -> Option<&'static str> {
384    let path_lower = file_path.to_lowercase();
385    
386    // Interface layer patterns
387    if path_lower.contains("/commands/") 
388        || path_lower.contains("/cmd/")
389        || path_lower.contains("/api/")
390        || path_lower.contains("/routes/")
391        || path_lower.contains("/controllers/")
392        || path_lower.contains("/handlers/")
393        || path_lower.contains("/web/")
394        || path_lower.contains("/ui/")
395        || path_lower.contains("/views/")
396        || path_lower.contains("/pages/")
397        || path_lower.contains("/components/")
398    {
399        return Some("interface");
400    }
401    
402    // Application layer patterns
403    if path_lower.contains("/services/")
404        || path_lower.contains("/usecases/")
405        || path_lower.contains("/use_cases/")
406        || path_lower.contains("/orchestrators/")
407        || path_lower.contains("/workflows/")
408        || path_lower.contains("/ai/")
409        || path_lower.contains("/llm/")
410    {
411        return Some("application");
412    }
413    
414    // Domain layer patterns
415    if path_lower.contains("/core/")
416        || path_lower.contains("/domain/")
417        || path_lower.contains("/entities/")
418        || path_lower.contains("/models/")
419        || path_lower.contains("/types/")
420        || path_lower.contains("/lib/")
421        || path_lower.ends_with("types.ts")
422        || path_lower.ends_with("types.rs")
423    {
424        return Some("domain");
425    }
426    
427    // Infrastructure layer patterns
428    if path_lower.contains("/infrastructure/")
429        || path_lower.contains("/db/")
430        || path_lower.contains("/database/")
431        || path_lower.contains("/repositories/")
432        || path_lower.contains("/adapters/")
433        || path_lower.contains("/clients/")
434        || path_lower.contains("/extractors/")
435        || path_lower.contains("/parsers/")
436        || path_lower.contains("/config/")
437    {
438        return Some("infrastructure");
439    }
440    
441    None
442}
443
444/// Apply heuristic layer assignments to a graph.
445pub fn apply_heuristic_layers(graph: &mut Graph) -> usize {
446    let mut assigned = 0;
447    
448    for node in &mut graph.nodes {
449        // Skip if already has layer
450        if node.metadata.contains_key("layer") {
451            continue;
452        }
453        
454        // Try to infer from path (stored in id for file nodes)
455        if let Some(layer) = heuristic_assign_layer(&node.id) {
456            node.metadata.insert("layer".to_string(), serde_json::json!(layer));
457            assigned += 1;
458        }
459    }
460    
461    assigned
462}
463
464#[cfg(test)]
465mod tests {
466    use super::*;
467    
468    #[test]
469    fn test_heuristic_layer_assignment() {
470        assert_eq!(heuristic_assign_layer("src/commands/init.ts"), Some("interface"));
471        assert_eq!(heuristic_assign_layer("src/services/auth.ts"), Some("application"));
472        assert_eq!(heuristic_assign_layer("src/core/graph.ts"), Some("domain"));
473        assert_eq!(heuristic_assign_layer("src/extractors/typescript.ts"), Some("infrastructure"));
474        assert_eq!(heuristic_assign_layer("src/utils.ts"), None);
475    }
476    
477    #[test]
478    fn test_parse_proposals() {
479        let response = r#"```json
480{
481  "proposals": [
482    {
483      "type": "assign_layer",
484      "node_id": "src/cli.ts",
485      "layer": "interface",
486      "reason": "CLI entry point",
487      "confidence": 0.9
488    }
489  ]
490}
491```"#;
492        
493        let result = parse_semantify_response(response).unwrap();
494        assert_eq!(result.proposals.len(), 1);
495    }
496    
497    #[test]
498    fn test_apply_proposals() {
499        let mut graph = Graph::new();
500        graph.add_node(Node::new("src/cli.ts", "CLI"));
501        
502        let proposals = vec![
503            SemanticProposal::AssignLayer {
504                node_id: "src/cli.ts".to_string(),
505                layer: "interface".to_string(),
506                reason: "CLI".to_string(),
507                confidence: 0.9,
508            },
509        ];
510        
511        let applied = apply_proposals(&mut graph, &proposals);
512        assert_eq!(applied, 1);
513        
514        let node = graph.get_node("src/cli.ts").unwrap();
515        assert_eq!(node.metadata.get("layer").and_then(|v| v.as_str()), Some("interface"));
516    }
517}