1use crate::codegen::{HelixIR, Instruction};
2use std::collections::{HashMap, HashSet};
3#[derive(Debug, Clone, Copy, PartialEq, Eq)]
4pub enum OptimizationLevel {
5 Zero,
6 One,
7 Two,
8 Three,
9}
10impl Default for OptimizationLevel {
11 fn default() -> Self {
12 Self::Two
13 }
14}
15impl From<u8> for OptimizationLevel {
16 fn from(level: u8) -> Self {
17 match level {
18 0 => Self::Zero,
19 1 => Self::One,
20 2 => Self::Two,
21 3 | _ => Self::Three,
22 }
23 }
24}
25pub struct Optimizer {
26 level: OptimizationLevel,
27 stats: OptimizationStats,
28}
29impl Optimizer {
30 pub fn new(level: OptimizationLevel) -> Self {
31 Self {
32 level,
33 stats: OptimizationStats::default(),
34 }
35 }
36 pub fn optimize(&mut self, ir: &mut HelixIR) {
37 match self.level {
38 OptimizationLevel::Zero => {}
39 OptimizationLevel::One => {
40 self.apply_basic_optimizations(ir);
41 }
42 OptimizationLevel::Two => {
43 self.apply_basic_optimizations(ir);
44 self.apply_standard_optimizations(ir);
45 }
46 OptimizationLevel::Three => {
47 self.apply_basic_optimizations(ir);
48 self.apply_standard_optimizations(ir);
49 self.apply_aggressive_optimizations(ir);
50 }
51 }
52 }
53 fn apply_basic_optimizations(&mut self, ir: &mut HelixIR) {
54 self.deduplicate_strings(ir);
55 self.remove_dead_code(ir);
56 self.optimize_string_pool(ir);
57 }
58 fn apply_standard_optimizations(&mut self, ir: &mut HelixIR) {
59 self.fold_constants(ir);
60 self.inline_small_functions(ir);
61 self.optimize_instruction_sequence(ir);
62 self.merge_duplicate_sections(ir);
63 }
64 fn apply_aggressive_optimizations(&mut self, ir: &mut HelixIR) {
65 self.eliminate_cross_references(ir);
66 self.optimize_pipelines(ir);
67 self.compress_data_sections(ir);
68 self.reorder_for_cache_locality(ir);
69 }
70 fn deduplicate_strings(&mut self, ir: &mut HelixIR) {
71 let mut seen = HashMap::new();
72 let mut new_strings = Vec::new();
73 let mut remap = HashMap::new();
74 for (idx, string) in ir.string_pool.strings.iter().enumerate() {
75 if let Some(&existing_idx) = seen.get(string) {
76 remap.insert(idx as u32, existing_idx);
77 self.stats.strings_deduplicated += 1;
78 } else {
79 let new_idx = new_strings.len() as u32;
80 seen.insert(string.clone(), new_idx);
81 new_strings.push(string.clone());
82 remap.insert(idx as u32, new_idx);
83 }
84 }
85 let original_size = ir.string_pool.strings.len();
86 ir.string_pool.strings = new_strings;
87 self.stats.strings_removed = original_size - ir.string_pool.strings.len();
88 for instruction in &mut ir.instructions {
89 self.remap_instruction_strings(instruction, &remap);
90 }
91 }
92 fn remove_dead_code(&mut self, ir: &mut HelixIR) {
93 let mut reachable = HashSet::new();
94 let mut work_list = vec![0];
95 while let Some(idx) = work_list.pop() {
96 if idx >= ir.instructions.len() || !reachable.insert(idx) {
97 continue;
98 }
99 work_list.push(idx + 1);
100 }
101 let mut new_instructions = Vec::new();
102 let mut remap = HashMap::new();
103 for (idx, instruction) in ir.instructions.iter().enumerate() {
104 if reachable.contains(&idx) {
105 remap.insert(idx, new_instructions.len());
106 new_instructions.push(instruction.clone());
107 } else {
108 self.stats.instructions_removed += 1;
109 }
110 }
111 for instruction in &mut new_instructions {
112 self.remap_jump_targets(instruction, &remap);
113 }
114 ir.instructions = new_instructions;
115 }
116 fn optimize_string_pool(&mut self, ir: &mut HelixIR) {
117 let mut frequency = HashMap::new();
118 for instruction in &ir.instructions {
119 self.count_string_usage(instruction, &mut frequency);
120 }
121 let mut indexed_strings: Vec<(u32, String)> = ir
122 .string_pool
123 .strings
124 .iter()
125 .enumerate()
126 .map(|(i, s)| (i as u32, s.clone()))
127 .collect();
128 indexed_strings
129 .sort_by_key(|(idx, _)| {
130 std::cmp::Reverse(frequency.get(idx).copied().unwrap_or(0))
131 });
132 let mut remap = HashMap::new();
133 let mut new_strings = Vec::new();
134 for (old_idx, string) in indexed_strings {
135 let new_idx = new_strings.len() as u32;
136 remap.insert(old_idx, new_idx);
137 new_strings.push(string);
138 }
139 ir.string_pool.strings = new_strings;
140 for instruction in &mut ir.instructions {
141 self.remap_instruction_strings(instruction, &remap);
142 }
143 }
144 fn fold_constants(&mut self, _ir: &mut HelixIR) {}
145 fn inline_small_functions(&mut self, ir: &mut HelixIR) {
146 let mut reference_count = std::collections::HashMap::new();
147 for instruction in &ir.instructions {
148 if let Instruction::ResolveReference { index, .. } = instruction {
149 *reference_count.entry(*index).or_insert(0) += 1;
150 }
151 }
152 self.stats.functions_inlined = 0;
153 }
154 fn optimize_instruction_sequence(&mut self, ir: &mut HelixIR) {
155 let mut seen_properties: std::collections::HashSet<(u32, u32)> = std::collections::HashSet::new();
156 let mut i = 0;
157 while i < ir.instructions.len() {
158 match &ir.instructions[i] {
159 Instruction::SetProperty { target, key, .. } => {
160 let prop_key = (*target, *key);
161 if seen_properties.contains(&prop_key) {
162 ir.instructions.remove(i);
163 self.stats.instructions_removed += 1;
164 continue;
165 } else {
166 seen_properties.insert(prop_key);
167 }
168 }
169 Instruction::SetCapability { agent, capability } => {
170 let cap_key = (*agent, *capability);
171 if seen_properties.contains(&cap_key) {
172 ir.instructions.remove(i);
173 self.stats.instructions_removed += 1;
174 continue;
175 } else {
176 seen_properties.insert(cap_key);
177 }
178 }
179 _ => {}
180 }
181 i += 1;
182 }
183 }
184 fn merge_duplicate_sections(&mut self, ir: &mut HelixIR) {
185 use std::collections::HashMap;
186 let mut agent_signatures: HashMap<String, Vec<u32>> = HashMap::new();
187 for (id, agent) in &ir.symbol_table.agents {
188 let signature = format!(
189 "{}-{}-{:?}-{:?}", agent.model_idx, agent.role_idx, agent.temperature,
190 agent.max_tokens
191 );
192 agent_signatures.entry(signature).or_insert_with(Vec::new).push(*id);
193 }
194 for (_, agents) in &agent_signatures {
195 if agents.len() > 1 {
196 self.stats.sections_merged += agents.len() - 1;
197 }
198 }
199 let mut workflow_signatures: HashMap<String, Vec<u32>> = HashMap::new();
200 for (id, workflow) in &ir.symbol_table.workflows {
201 let signature = format!("{:?}", workflow.trigger_type);
202 workflow_signatures.entry(signature).or_insert_with(Vec::new).push(*id);
203 }
204 }
205 fn eliminate_cross_references(&mut self, ir: &mut HelixIR) {
206 use std::collections::HashSet;
207 let mut referenced_agents: HashSet<u32> = HashSet::new();
208 let mut referenced_workflows: HashSet<u32> = HashSet::new();
209 for crew in ir.symbol_table.crews.values() {
210 for agent_id in &crew.agent_ids {
211 referenced_agents.insert(*agent_id);
212 }
213 }
214 for workflow in ir.symbol_table.workflows.values() {
215 if let Some(pipeline) = &workflow.pipeline {
216 for node_id in pipeline {
217 referenced_workflows.insert(*node_id);
218 }
219 }
220 }
221 for instruction in &ir.instructions {
222 match instruction {
223 Instruction::ResolveReference { ref_type: _, index: _ } => {}
224 _ => {}
225 }
226 }
227 let unreferenced_agents: Vec<u32> = ir
228 .symbol_table
229 .agents
230 .keys()
231 .filter(|id| !referenced_agents.contains(id))
232 .cloned()
233 .collect();
234 for agent_id in unreferenced_agents {
235 ir.symbol_table.agents.remove(&agent_id);
236 self.stats.instructions_removed += 1;
237 }
238 let unreferenced_workflows: Vec<u32> = ir
239 .symbol_table
240 .workflows
241 .keys()
242 .filter(|id| !referenced_workflows.contains(id))
243 .cloned()
244 .collect();
245 for workflow_id in unreferenced_workflows {
246 ir.symbol_table.workflows.remove(&workflow_id);
247 self.stats.instructions_removed += 1;
248 }
249 }
250 fn optimize_pipelines(&mut self, ir: &mut HelixIR) {
251 for i in 0..ir.instructions.len() {
252 if let Instruction::DefinePipeline { .. } = &ir.instructions[i] {
253 self.stats.pipelines_optimized += 1;
254 }
255 }
256 }
257 fn compress_data_sections(&mut self, ir: &mut HelixIR) {
258 let mut string_frequency: HashMap<u32, usize> = HashMap::new();
259 for instruction in &ir.instructions {
260 match instruction {
261 Instruction::SetProperty { key, .. } => {
262 *string_frequency.entry(*key).or_insert(0) += 1;
263 }
264 Instruction::SetCapability { capability, .. } => {
265 *string_frequency.entry(*capability).or_insert(0) += 1;
266 }
267 Instruction::SetMetadata { key, value } => {
268 *string_frequency.entry(*key).or_insert(0) += 1;
269 *string_frequency.entry(*value).or_insert(0) += 1;
270 }
271 _ => {}
272 }
273 }
274 for agent in ir.symbol_table.agents.values() {
275 *string_frequency.entry(agent.name_idx).or_insert(0) += 1;
276 *string_frequency.entry(agent.model_idx).or_insert(0) += 1;
277 *string_frequency.entry(agent.role_idx).or_insert(0) += 1;
278 }
279 let _total_strings = ir.string_pool.strings.len();
280 let frequently_used = string_frequency
281 .iter()
282 .filter(|(_, count)| **count > 1)
283 .count();
284 if frequently_used > 0 {
285 self.stats.bytes_saved += frequently_used * 8;
286 }
287 }
288 fn reorder_for_cache_locality(&mut self, ir: &mut HelixIR) {
289 let mut reordered = Vec::new();
290 let mut agent_instructions = Vec::new();
291 let mut workflow_instructions = Vec::new();
292 let mut other_instructions = Vec::new();
293 for instruction in ir.instructions.drain(..) {
294 match &instruction {
295 Instruction::DeclareAgent(_) => agent_instructions.push(instruction),
296 Instruction::DeclareWorkflow(_) => {
297 workflow_instructions.push(instruction)
298 }
299 Instruction::DefinePipeline { .. } => {
300 workflow_instructions.push(instruction)
301 }
302 _ => other_instructions.push(instruction),
303 }
304 }
305 reordered.extend(agent_instructions);
306 reordered.extend(workflow_instructions);
307 reordered.extend(other_instructions);
308 ir.instructions = reordered;
309 }
310 fn remap_instruction_strings(
311 &self,
312 instruction: &mut Instruction,
313 remap: &HashMap<u32, u32>,
314 ) {
315 match instruction {
316 Instruction::SetProperty { key, value, .. } => {
317 if let Some(&new_idx) = remap.get(key) {
318 *key = new_idx;
319 }
320 if let crate::codegen::ConstantValue::String(idx) = value {
321 if let Some(&new_idx) = remap.get(idx) {
322 *idx = new_idx;
323 }
324 }
325 }
326 Instruction::SetCapability { capability, .. } => {
327 if let Some(&new_idx) = remap.get(capability) {
328 *capability = new_idx;
329 }
330 }
331 Instruction::SetMetadata { key, value } => {
332 if let Some(&new_idx) = remap.get(key) {
333 *key = new_idx;
334 }
335 if let Some(&new_idx) = remap.get(value) {
336 *value = new_idx;
337 }
338 }
339 _ => {}
340 }
341 }
342 fn remap_jump_targets(
343 &self,
344 _instruction: &mut Instruction,
345 _remap: &HashMap<usize, usize>,
346 ) {}
347 fn count_string_usage(
348 &self,
349 instruction: &Instruction,
350 frequency: &mut HashMap<u32, usize>,
351 ) {
352 match instruction {
353 Instruction::SetProperty { key, value, .. } => {
354 *frequency.entry(*key).or_insert(0) += 1;
355 if let crate::codegen::ConstantValue::String(idx) = value {
356 *frequency.entry(*idx).or_insert(0) += 1;
357 }
358 }
359 Instruction::SetCapability { capability, .. } => {
360 *frequency.entry(*capability).or_insert(0) += 1;
361 }
362 Instruction::SetMetadata { key, value } => {
363 *frequency.entry(*key).or_insert(0) += 1;
364 *frequency.entry(*value).or_insert(0) += 1;
365 }
366 _ => {}
367 }
368 }
369 pub fn stats(&self) -> &OptimizationStats {
370 &self.stats
371 }
372}
373#[derive(Debug, Default)]
374pub struct OptimizationStats {
375 pub strings_deduplicated: usize,
376 pub strings_removed: usize,
377 pub instructions_removed: usize,
378 pub constants_folded: usize,
379 pub functions_inlined: usize,
380 pub pipelines_optimized: usize,
381 pub sections_merged: usize,
382 pub bytes_saved: usize,
383}
384impl OptimizationStats {
385 pub fn report(&self) -> String {
386 format!(
387 "Optimization Results:\n\
388 - Strings deduplicated: {}\n\
389 - Strings removed: {}\n\
390 - Instructions removed: {}\n\
391 - Constants folded: {}\n\
392 - Functions inlined: {}\n\
393 - Pipelines optimized: {}\n\
394 - Sections merged: {}\n\
395 - Total bytes saved: {}",
396 self.strings_deduplicated, self.strings_removed, self.instructions_removed,
397 self.constants_folded, self.functions_inlined, self.pipelines_optimized, self
398 .sections_merged, self.bytes_saved
399 )
400 }
401}
402#[cfg(test)]
403mod tests {
404 use super::*;
405 use crate::codegen::{StringPool, SymbolTable, Metadata, ConstantPool};
406 #[test]
407 fn test_optimization_levels() {
408 assert_eq!(OptimizationLevel::from(0), OptimizationLevel::Zero);
409 assert_eq!(OptimizationLevel::from(1), OptimizationLevel::One);
410 assert_eq!(OptimizationLevel::from(2), OptimizationLevel::Two);
411 assert_eq!(OptimizationLevel::from(3), OptimizationLevel::Three);
412 assert_eq!(OptimizationLevel::from(99), OptimizationLevel::Three);
413 }
414 #[test]
415 fn test_string_deduplication() {
416 let mut ir = HelixIR {
417 version: 1,
418 metadata: Metadata::default(),
419 symbol_table: SymbolTable::default(),
420 instructions: vec![
421 Instruction::DeclareAgent(0), Instruction::DeclareWorkflow(1),
422 Instruction::DeclareContext(2),
423 ],
424 string_pool: StringPool {
425 strings: vec![
426 "hello".to_string(), "world".to_string(), "hello".to_string(),
427 ],
428 index: std::collections::HashMap::new(),
429 },
430 constants: ConstantPool::default(),
431 };
432 let mut optimizer = Optimizer::new(OptimizationLevel::One);
433 optimizer.deduplicate_strings(&mut ir);
434 assert_eq!(ir.string_pool.strings.len(), 2);
435 assert_eq!(optimizer.stats.strings_deduplicated, 1);
436 }
437 #[test]
438 fn test_constant_folding() {
439 let mut ir = HelixIR {
440 version: 1,
441 metadata: Metadata::default(),
442 symbol_table: SymbolTable::default(),
443 instructions: vec![Instruction::DeclareAgent(0),],
444 string_pool: StringPool::default(),
445 constants: ConstantPool::default(),
446 };
447 let mut optimizer = Optimizer::new(OptimizationLevel::Two);
448 optimizer.fold_constants(&mut ir);
449 assert_eq!(ir.instructions.len(), 1);
450 match &ir.instructions[0] {
451 Instruction::DeclareAgent(0) => {}
452 _ => panic!("Expected DeclareAgent(0)"),
453 }
454 }
455}