helix_core/compiler/
optimizer.rs

1use crate::codegen::{HelixIR, Instruction};
2use std::collections::{HashMap, HashSet};
3#[derive(Debug, Clone, Copy, PartialEq, Eq)]
4pub enum OptimizationLevel {
5    Zero,
6    One,
7    Two,
8    Three,
9}
10impl Default for OptimizationLevel {
11    fn default() -> Self {
12        Self::Two
13    }
14}
15impl From<u8> for OptimizationLevel {
16    fn from(level: u8) -> Self {
17        match level {
18            0 => Self::Zero,
19            1 => Self::One,
20            2 => Self::Two,
21            3 | _ => Self::Three,
22        }
23    }
24}
25pub struct Optimizer {
26    level: OptimizationLevel,
27    stats: OptimizationStats,
28}
29impl Optimizer {
30    pub fn new(level: OptimizationLevel) -> Self {
31        Self {
32            level,
33            stats: OptimizationStats::default(),
34        }
35    }
36    pub fn optimize(&mut self, ir: &mut HelixIR) {
37        match self.level {
38            OptimizationLevel::Zero => {}
39            OptimizationLevel::One => {
40                self.apply_basic_optimizations(ir);
41            }
42            OptimizationLevel::Two => {
43                self.apply_basic_optimizations(ir);
44                self.apply_standard_optimizations(ir);
45            }
46            OptimizationLevel::Three => {
47                self.apply_basic_optimizations(ir);
48                self.apply_standard_optimizations(ir);
49                self.apply_aggressive_optimizations(ir);
50            }
51        }
52    }
53    fn apply_basic_optimizations(&mut self, ir: &mut HelixIR) {
54        self.deduplicate_strings(ir);
55        self.remove_dead_code(ir);
56        self.optimize_string_pool(ir);
57    }
58    fn apply_standard_optimizations(&mut self, ir: &mut HelixIR) {
59        self.fold_constants(ir);
60        self.inline_small_functions(ir);
61        self.optimize_instruction_sequence(ir);
62        self.merge_duplicate_sections(ir);
63    }
64    fn apply_aggressive_optimizations(&mut self, ir: &mut HelixIR) {
65        self.eliminate_cross_references(ir);
66        self.optimize_pipelines(ir);
67        self.compress_data_sections(ir);
68        self.reorder_for_cache_locality(ir);
69    }
70    fn deduplicate_strings(&mut self, ir: &mut HelixIR) {
71        let mut seen = HashMap::new();
72        let mut new_strings = Vec::new();
73        let mut remap = HashMap::new();
74        for (idx, string) in ir.string_pool.strings.iter().enumerate() {
75            if let Some(&existing_idx) = seen.get(string) {
76                remap.insert(idx as u32, existing_idx);
77                self.stats.strings_deduplicated += 1;
78            } else {
79                let new_idx = new_strings.len() as u32;
80                seen.insert(string.clone(), new_idx);
81                new_strings.push(string.clone());
82                remap.insert(idx as u32, new_idx);
83            }
84        }
85        let original_size = ir.string_pool.strings.len();
86        ir.string_pool.strings = new_strings;
87        self.stats.strings_removed = original_size - ir.string_pool.strings.len();
88        for instruction in &mut ir.instructions {
89            self.remap_instruction_strings(instruction, &remap);
90        }
91    }
92    fn remove_dead_code(&mut self, ir: &mut HelixIR) {
93        let mut reachable = HashSet::new();
94        let mut work_list = vec![0];
95        while let Some(idx) = work_list.pop() {
96            if idx >= ir.instructions.len() || !reachable.insert(idx) {
97                continue;
98            }
99            work_list.push(idx + 1);
100        }
101        let mut new_instructions = Vec::new();
102        let mut remap = HashMap::new();
103        for (idx, instruction) in ir.instructions.iter().enumerate() {
104            if reachable.contains(&idx) {
105                remap.insert(idx, new_instructions.len());
106                new_instructions.push(instruction.clone());
107            } else {
108                self.stats.instructions_removed += 1;
109            }
110        }
111        for instruction in &mut new_instructions {
112            self.remap_jump_targets(instruction, &remap);
113        }
114        ir.instructions = new_instructions;
115    }
116    fn optimize_string_pool(&mut self, ir: &mut HelixIR) {
117        let mut frequency = HashMap::new();
118        for instruction in &ir.instructions {
119            self.count_string_usage(instruction, &mut frequency);
120        }
121        let mut indexed_strings: Vec<(u32, String)> = ir
122            .string_pool
123            .strings
124            .iter()
125            .enumerate()
126            .map(|(i, s)| (i as u32, s.clone()))
127            .collect();
128        indexed_strings
129            .sort_by_key(|(idx, _)| {
130                std::cmp::Reverse(frequency.get(idx).copied().unwrap_or(0))
131            });
132        let mut remap = HashMap::new();
133        let mut new_strings = Vec::new();
134        for (old_idx, string) in indexed_strings {
135            let new_idx = new_strings.len() as u32;
136            remap.insert(old_idx, new_idx);
137            new_strings.push(string);
138        }
139        ir.string_pool.strings = new_strings;
140        for instruction in &mut ir.instructions {
141            self.remap_instruction_strings(instruction, &remap);
142        }
143    }
144    fn fold_constants(&mut self, _ir: &mut HelixIR) {}
145    fn inline_small_functions(&mut self, ir: &mut HelixIR) {
146        let mut reference_count = std::collections::HashMap::new();
147        for instruction in &ir.instructions {
148            if let Instruction::ResolveReference { index, .. } = instruction {
149                *reference_count.entry(*index).or_insert(0) += 1;
150            }
151        }
152        self.stats.functions_inlined = 0;
153    }
154    fn optimize_instruction_sequence(&mut self, ir: &mut HelixIR) {
155        let mut seen_properties: std::collections::HashSet<(u32, u32)> = std::collections::HashSet::new();
156        let mut i = 0;
157        while i < ir.instructions.len() {
158            match &ir.instructions[i] {
159                Instruction::SetProperty { target, key, .. } => {
160                    let prop_key = (*target, *key);
161                    if seen_properties.contains(&prop_key) {
162                        ir.instructions.remove(i);
163                        self.stats.instructions_removed += 1;
164                        continue;
165                    } else {
166                        seen_properties.insert(prop_key);
167                    }
168                }
169                Instruction::SetCapability { agent, capability } => {
170                    let cap_key = (*agent, *capability);
171                    if seen_properties.contains(&cap_key) {
172                        ir.instructions.remove(i);
173                        self.stats.instructions_removed += 1;
174                        continue;
175                    } else {
176                        seen_properties.insert(cap_key);
177                    }
178                }
179                _ => {}
180            }
181            i += 1;
182        }
183    }
184    fn merge_duplicate_sections(&mut self, ir: &mut HelixIR) {
185        use std::collections::HashMap;
186        let mut agent_signatures: HashMap<String, Vec<u32>> = HashMap::new();
187        for (id, agent) in &ir.symbol_table.agents {
188            let signature = format!(
189                "{}-{}-{:?}-{:?}", agent.model_idx, agent.role_idx, agent.temperature,
190                agent.max_tokens
191            );
192            agent_signatures.entry(signature).or_insert_with(Vec::new).push(*id);
193        }
194        for (_, agents) in &agent_signatures {
195            if agents.len() > 1 {
196                self.stats.sections_merged += agents.len() - 1;
197            }
198        }
199        let mut workflow_signatures: HashMap<String, Vec<u32>> = HashMap::new();
200        for (id, workflow) in &ir.symbol_table.workflows {
201            let signature = format!("{:?}", workflow.trigger_type);
202            workflow_signatures.entry(signature).or_insert_with(Vec::new).push(*id);
203        }
204    }
205    fn eliminate_cross_references(&mut self, ir: &mut HelixIR) {
206        use std::collections::HashSet;
207        let mut referenced_agents: HashSet<u32> = HashSet::new();
208        let mut referenced_workflows: HashSet<u32> = HashSet::new();
209        for crew in ir.symbol_table.crews.values() {
210            for agent_id in &crew.agent_ids {
211                referenced_agents.insert(*agent_id);
212            }
213        }
214        for workflow in ir.symbol_table.workflows.values() {
215            if let Some(pipeline) = &workflow.pipeline {
216                for node_id in pipeline {
217                    referenced_workflows.insert(*node_id);
218                }
219            }
220        }
221        for instruction in &ir.instructions {
222            match instruction {
223                Instruction::ResolveReference { ref_type: _, index: _ } => {}
224                _ => {}
225            }
226        }
227        let unreferenced_agents: Vec<u32> = ir
228            .symbol_table
229            .agents
230            .keys()
231            .filter(|id| !referenced_agents.contains(id))
232            .cloned()
233            .collect();
234        for agent_id in unreferenced_agents {
235            ir.symbol_table.agents.remove(&agent_id);
236            self.stats.instructions_removed += 1;
237        }
238        let unreferenced_workflows: Vec<u32> = ir
239            .symbol_table
240            .workflows
241            .keys()
242            .filter(|id| !referenced_workflows.contains(id))
243            .cloned()
244            .collect();
245        for workflow_id in unreferenced_workflows {
246            ir.symbol_table.workflows.remove(&workflow_id);
247            self.stats.instructions_removed += 1;
248        }
249    }
250    fn optimize_pipelines(&mut self, ir: &mut HelixIR) {
251        for i in 0..ir.instructions.len() {
252            if let Instruction::DefinePipeline { .. } = &ir.instructions[i] {
253                self.stats.pipelines_optimized += 1;
254            }
255        }
256    }
257    fn compress_data_sections(&mut self, ir: &mut HelixIR) {
258        let mut string_frequency: HashMap<u32, usize> = HashMap::new();
259        for instruction in &ir.instructions {
260            match instruction {
261                Instruction::SetProperty { key, .. } => {
262                    *string_frequency.entry(*key).or_insert(0) += 1;
263                }
264                Instruction::SetCapability { capability, .. } => {
265                    *string_frequency.entry(*capability).or_insert(0) += 1;
266                }
267                Instruction::SetMetadata { key, value } => {
268                    *string_frequency.entry(*key).or_insert(0) += 1;
269                    *string_frequency.entry(*value).or_insert(0) += 1;
270                }
271                _ => {}
272            }
273        }
274        for agent in ir.symbol_table.agents.values() {
275            *string_frequency.entry(agent.name_idx).or_insert(0) += 1;
276            *string_frequency.entry(agent.model_idx).or_insert(0) += 1;
277            *string_frequency.entry(agent.role_idx).or_insert(0) += 1;
278        }
279        let _total_strings = ir.string_pool.strings.len();
280        let frequently_used = string_frequency
281            .iter()
282            .filter(|(_, count)| **count > 1)
283            .count();
284        if frequently_used > 0 {
285            self.stats.bytes_saved += frequently_used * 8;
286        }
287    }
288    fn reorder_for_cache_locality(&mut self, ir: &mut HelixIR) {
289        let mut reordered = Vec::new();
290        let mut agent_instructions = Vec::new();
291        let mut workflow_instructions = Vec::new();
292        let mut other_instructions = Vec::new();
293        for instruction in ir.instructions.drain(..) {
294            match &instruction {
295                Instruction::DeclareAgent(_) => agent_instructions.push(instruction),
296                Instruction::DeclareWorkflow(_) => {
297                    workflow_instructions.push(instruction)
298                }
299                Instruction::DefinePipeline { .. } => {
300                    workflow_instructions.push(instruction)
301                }
302                _ => other_instructions.push(instruction),
303            }
304        }
305        reordered.extend(agent_instructions);
306        reordered.extend(workflow_instructions);
307        reordered.extend(other_instructions);
308        ir.instructions = reordered;
309    }
310    fn remap_instruction_strings(
311        &self,
312        instruction: &mut Instruction,
313        remap: &HashMap<u32, u32>,
314    ) {
315        match instruction {
316            Instruction::SetProperty { key, value, .. } => {
317                if let Some(&new_idx) = remap.get(key) {
318                    *key = new_idx;
319                }
320                if let crate::codegen::ConstantValue::String(idx) = value {
321                    if let Some(&new_idx) = remap.get(idx) {
322                        *idx = new_idx;
323                    }
324                }
325            }
326            Instruction::SetCapability { capability, .. } => {
327                if let Some(&new_idx) = remap.get(capability) {
328                    *capability = new_idx;
329                }
330            }
331            Instruction::SetMetadata { key, value } => {
332                if let Some(&new_idx) = remap.get(key) {
333                    *key = new_idx;
334                }
335                if let Some(&new_idx) = remap.get(value) {
336                    *value = new_idx;
337                }
338            }
339            _ => {}
340        }
341    }
342    fn remap_jump_targets(
343        &self,
344        _instruction: &mut Instruction,
345        _remap: &HashMap<usize, usize>,
346    ) {}
347    fn count_string_usage(
348        &self,
349        instruction: &Instruction,
350        frequency: &mut HashMap<u32, usize>,
351    ) {
352        match instruction {
353            Instruction::SetProperty { key, value, .. } => {
354                *frequency.entry(*key).or_insert(0) += 1;
355                if let crate::codegen::ConstantValue::String(idx) = value {
356                    *frequency.entry(*idx).or_insert(0) += 1;
357                }
358            }
359            Instruction::SetCapability { capability, .. } => {
360                *frequency.entry(*capability).or_insert(0) += 1;
361            }
362            Instruction::SetMetadata { key, value } => {
363                *frequency.entry(*key).or_insert(0) += 1;
364                *frequency.entry(*value).or_insert(0) += 1;
365            }
366            _ => {}
367        }
368    }
369    pub fn stats(&self) -> &OptimizationStats {
370        &self.stats
371    }
372}
373#[derive(Debug, Default)]
374pub struct OptimizationStats {
375    pub strings_deduplicated: usize,
376    pub strings_removed: usize,
377    pub instructions_removed: usize,
378    pub constants_folded: usize,
379    pub functions_inlined: usize,
380    pub pipelines_optimized: usize,
381    pub sections_merged: usize,
382    pub bytes_saved: usize,
383}
384impl OptimizationStats {
385    pub fn report(&self) -> String {
386        format!(
387            "Optimization Results:\n\
388             - Strings deduplicated: {}\n\
389             - Strings removed: {}\n\
390             - Instructions removed: {}\n\
391             - Constants folded: {}\n\
392             - Functions inlined: {}\n\
393             - Pipelines optimized: {}\n\
394             - Sections merged: {}\n\
395             - Total bytes saved: {}",
396            self.strings_deduplicated, self.strings_removed, self.instructions_removed,
397            self.constants_folded, self.functions_inlined, self.pipelines_optimized, self
398            .sections_merged, self.bytes_saved
399        )
400    }
401}
402#[cfg(test)]
403mod tests {
404    use super::*;
405    use crate::codegen::{StringPool, SymbolTable, Metadata, ConstantPool};
406    #[test]
407    fn test_optimization_levels() {
408        assert_eq!(OptimizationLevel::from(0), OptimizationLevel::Zero);
409        assert_eq!(OptimizationLevel::from(1), OptimizationLevel::One);
410        assert_eq!(OptimizationLevel::from(2), OptimizationLevel::Two);
411        assert_eq!(OptimizationLevel::from(3), OptimizationLevel::Three);
412        assert_eq!(OptimizationLevel::from(99), OptimizationLevel::Three);
413    }
414    #[test]
415    fn test_string_deduplication() {
416        let mut ir = HelixIR {
417            version: 1,
418            metadata: Metadata::default(),
419            symbol_table: SymbolTable::default(),
420            instructions: vec![
421                Instruction::DeclareAgent(0), Instruction::DeclareWorkflow(1),
422                Instruction::DeclareContext(2),
423            ],
424            string_pool: StringPool {
425                strings: vec![
426                    "hello".to_string(), "world".to_string(), "hello".to_string(),
427                ],
428                index: std::collections::HashMap::new(),
429            },
430            constants: ConstantPool::default(),
431        };
432        let mut optimizer = Optimizer::new(OptimizationLevel::One);
433        optimizer.deduplicate_strings(&mut ir);
434        assert_eq!(ir.string_pool.strings.len(), 2);
435        assert_eq!(optimizer.stats.strings_deduplicated, 1);
436    }
437    #[test]
438    fn test_constant_folding() {
439        let mut ir = HelixIR {
440            version: 1,
441            metadata: Metadata::default(),
442            symbol_table: SymbolTable::default(),
443            instructions: vec![Instruction::DeclareAgent(0),],
444            string_pool: StringPool::default(),
445            constants: ConstantPool::default(),
446        };
447        let mut optimizer = Optimizer::new(OptimizationLevel::Two);
448        optimizer.fold_constants(&mut ir);
449        assert_eq!(ir.instructions.len(), 1);
450        match &ir.instructions[0] {
451            Instruction::DeclareAgent(0) => {}
452            _ => panic!("Expected DeclareAgent(0)"),
453        }
454    }
455}