cadi_core/rehydration/
assembler.rs

1//! Code Assembler
2//!
3//! Assembles atoms into syntactically valid code.
4
5use super::config::{ViewConfig, ViewFormat};
6use super::view::{ViewFragment, InclusionReason};
7use crate::graph::GraphNode;
8
9/// Assembler for creating virtual views from atoms
10pub struct Assembler {
11    config: ViewConfig,
12}
13
14impl Assembler {
15    pub fn new(config: ViewConfig) -> Self {
16        Self { config }
17    }
18
19    /// Assemble atoms into a single coherent source
20    pub fn assemble(
21        &self,
22        atoms: Vec<(GraphNode, String)>,
23        language: &str,
24    ) -> AssemblyResult {
25        let mut output = String::new();
26        let mut fragments = Vec::new();
27        let mut symbol_locations = std::collections::HashMap::new();
28        let mut current_line = 1;
29        let mut total_tokens = 0;
30
31        // Sort atoms by priority if configured
32        let sorted_atoms = if self.config.sort_by_type {
33            self.sort_by_type(atoms)
34        } else {
35            atoms
36        };
37
38        for (node, content) in sorted_atoms {
39            // Check token limit
40            let atom_tokens = content.len() / 4;
41            if total_tokens + atom_tokens > self.config.max_tokens {
42                break;
43            }
44
45            // Add separator if configured
46            if self.config.add_separators && !output.is_empty() {
47                let separator = self.create_separator(&node, language);
48                output.push_str(&separator);
49                current_line += separator.lines().count();
50            }
51
52            // Track symbol locations
53            for symbol in &node.symbols_defined {
54                symbol_locations.insert(symbol.clone(), current_line);
55            }
56
57            // Add content
58            let formatted_content = self.format_content(&content, language);
59            let content_lines = formatted_content.lines().count();
60
61            // Create fragment
62            fragments.push(ViewFragment {
63                chunk_id: node.chunk_id.clone(),
64                alias: node.primary_alias.clone(),
65                start_line: current_line,
66                end_line: current_line + content_lines - 1,
67                token_count: atom_tokens,
68                inclusion_reason: InclusionReason::Requested,
69                defines: node.symbols_defined.clone(),
70            });
71
72            output.push_str(&formatted_content);
73            if !formatted_content.ends_with('\n') {
74                output.push('\n');
75            }
76            output.push('\n');
77
78            current_line += content_lines + 1;
79            total_tokens += atom_tokens;
80        }
81
82        AssemblyResult {
83            source: output,
84            fragments,
85            symbol_locations,
86            total_tokens,
87            truncated: total_tokens >= self.config.max_tokens,
88        }
89    }
90
91    /// Sort atoms by type priority
92    fn sort_by_type(&self, mut atoms: Vec<(GraphNode, String)>) -> Vec<(GraphNode, String)> {
93        atoms.sort_by_key(|(node, _)| {
94            match node.granularity.as_str() {
95                "import" => 0,
96                "type" | "struct" | "interface" | "enum" => 1,
97                "trait" => 2,
98                "constant" => 3,
99                "function" => 4,
100                "async_function" => 4,
101                "class" => 5,
102                "module" => 6,
103                _ => 10,
104            }
105        });
106        atoms
107    }
108
109    /// Create a separator comment
110    fn create_separator(&self, node: &GraphNode, language: &str) -> String {
111        let label = node.primary_alias.as_ref()
112            .unwrap_or(&node.chunk_id);
113        
114        let comment_style = match language {
115            "python" => "#",
116            "rust" | "typescript" | "javascript" | "go" | "java" | "c" | "cpp" => "//",
117            _ => "//",
118        };
119        
120        format!("{} --- {} ---\n", comment_style, label)
121    }
122
123    /// Format content based on view format
124    fn format_content(&self, content: &str, language: &str) -> String {
125        match self.config.format {
126            ViewFormat::Source => content.to_string(),
127            ViewFormat::Minimal => self.minimize(content),
128            ViewFormat::Documented => content.to_string(),
129            ViewFormat::Signatures => self.extract_signatures(content, language),
130            ViewFormat::Json => content.to_string(),
131        }
132    }
133
134    /// Minimize content (remove comments, compact whitespace)
135    fn minimize(&self, content: &str) -> String {
136        let mut result = String::new();
137        let mut in_block_comment = false;
138
139        for line in content.lines() {
140            let trimmed = line.trim();
141            
142            // Skip empty lines
143            if trimmed.is_empty() {
144                continue;
145            }
146            
147            // Skip single-line comments
148            if trimmed.starts_with("//") || trimmed.starts_with("#") {
149                continue;
150            }
151            
152            // Handle block comments (simplified)
153            if trimmed.contains("/*") {
154                in_block_comment = true;
155            }
156            if in_block_comment {
157                if trimmed.contains("*/") {
158                    in_block_comment = false;
159                }
160                continue;
161            }
162            
163            result.push_str(line);
164            result.push('\n');
165        }
166
167        result
168    }
169
170    /// Extract just the signatures (no bodies)
171    fn extract_signatures(&self, content: &str, language: &str) -> String {
172        match language {
173            "rust" => self.extract_rust_signatures(content),
174            "typescript" => self.extract_ts_signatures(content),
175            "python" => self.extract_python_signatures(content),
176            _ => content.to_string(),
177        }
178    }
179
180    fn extract_rust_signatures(&self, content: &str) -> String {
181        let mut result = String::new();
182        
183        // Match function signatures
184        let fn_regex = regex::Regex::new(
185            r"(?m)^(\s*)(?:pub(?:\([^)]*\))?\s+)?(async\s+)?fn\s+\w+[^{]+\{"
186        ).unwrap();
187        
188        for cap in fn_regex.captures_iter(content) {
189            let sig = cap.get(0).unwrap().as_str();
190            let sig = sig.trim_end_matches('{').trim();
191            result.push_str(sig);
192            result.push_str(";\n");
193        }
194        
195        // Match struct definitions
196        let struct_regex = regex::Regex::new(
197            r"(?m)^(?:pub(?:\([^)]*\))?\s+)?struct\s+\w+[^{]*\{[^}]+\}"
198        ).unwrap();
199        
200        for cap in struct_regex.find_iter(content) {
201            result.push_str(cap.as_str());
202            result.push('\n');
203        }
204        
205        result
206    }
207
208    fn extract_ts_signatures(&self, content: &str) -> String {
209        let mut result = String::new();
210        
211        // Match function signatures
212        let fn_regex = regex::Regex::new(
213            r"(?m)^(?:export\s+)?(?:async\s+)?function\s+\w+\([^)]*\)[^{]*"
214        ).unwrap();
215        
216        for cap in fn_regex.find_iter(content) {
217            result.push_str(cap.as_str().trim());
218            result.push_str(";\n");
219        }
220        
221        // Match interface definitions
222        let interface_regex = regex::Regex::new(
223            r"(?m)^(?:export\s+)?interface\s+\w+[^{]*\{[^}]+\}"
224        ).unwrap();
225        
226        for cap in interface_regex.find_iter(content) {
227            result.push_str(cap.as_str());
228            result.push('\n');
229        }
230        
231        result
232    }
233
234    fn extract_python_signatures(&self, content: &str) -> String {
235        let mut result = String::new();
236        
237        // Match function definitions
238        let fn_regex = regex::Regex::new(
239            r"(?m)^(\s*)(?:async\s+)?def\s+\w+\([^)]*\)(?:\s*->\s*[^:]+)?:"
240        ).unwrap();
241        
242        for cap in fn_regex.find_iter(content) {
243            result.push_str(cap.as_str().trim_end_matches(':'));
244            result.push_str(": ...\n");
245        }
246        
247        result
248    }
249}
250
251/// Result of assembly operation
252pub struct AssemblyResult {
253    pub source: String,
254    pub fragments: Vec<ViewFragment>,
255    pub symbol_locations: std::collections::HashMap<String, usize>,
256    pub total_tokens: usize,
257    pub truncated: bool,
258}
259
260#[cfg(test)]
261mod tests {
262    use super::*;
263
264    #[test]
265    fn test_minimize() {
266        let assembler = Assembler::new(ViewConfig::minimal());
267        
268        let content = r#"
269// This is a comment
270fn hello() {
271    // Another comment
272    println!("hello");
273}
274"#;
275        
276        let minimized = assembler.minimize(content);
277        assert!(!minimized.contains("// This is a comment"));
278        assert!(minimized.contains("fn hello()"));
279    }
280
281    #[test]
282    fn test_rust_signatures() {
283        let assembler = Assembler::new(ViewConfig::default());
284        
285        let content = r#"
286pub fn hello(name: &str) -> String {
287    format!("Hello, {}", name)
288}
289
290pub struct Person {
291    name: String,
292    age: u32,
293}
294"#;
295        
296        let signatures = assembler.extract_rust_signatures(content);
297        assert!(signatures.contains("pub fn hello(name: &str) -> String;"));
298        assert!(signatures.contains("pub struct Person"));
299    }
300}