1use crate::{Node, NodeKind, Workflow};
7use serde::{Deserialize, Serialize};
8use std::collections::HashMap;
9
10#[derive(Debug, Clone, Serialize, Deserialize)]
12pub struct VariableOptimization {
13 pub flows: Vec<VariableFlow>,
15
16 pub usage_stats: HashMap<String, VariableUsage>,
18
19 pub suggestions: Vec<OptimizationSuggestion>,
21
22 pub estimated_memory_savings: usize,
24
25 pub unnecessary_copies: usize,
27}
28
29#[derive(Debug, Clone, Serialize, Deserialize)]
31pub struct VariableFlow {
32 pub variable_name: String,
34
35 pub source_node: String,
37
38 pub consumer_nodes: Vec<String>,
40
41 pub last_usage: String,
43
44 pub cross_branch: bool,
46
47 pub estimated_size_bytes: usize,
49}
50
51#[derive(Debug, Clone, Serialize, Deserialize)]
53pub struct VariableUsage {
54 pub read_count: usize,
56
57 pub write_count: usize,
59
60 pub readers: Vec<String>,
62
63 pub writers: Vec<String>,
65
66 pub has_dead_usage: bool,
68}
69
70#[derive(Debug, Clone, Serialize, Deserialize)]
72pub struct OptimizationSuggestion {
73 pub optimization_type: OptimizationType,
75
76 pub variables: Vec<String>,
78
79 pub nodes: Vec<String>,
81
82 pub description: String,
84
85 pub estimated_benefit: Benefit,
87}
88
89#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
91pub enum OptimizationType {
92 RemoveUnused,
94
95 UseMove,
97
98 EarlyRelease,
100
101 ReduceScope,
103
104 AvoidCopy,
106
107 InlineVariable,
109}
110
111#[derive(Debug, Clone, Serialize, Deserialize)]
113pub struct Benefit {
114 pub memory_bytes: usize,
116
117 pub performance_gain: f64,
119
120 pub complexity_reduction: f64,
122}
123
124pub struct VariableOptimizer;
126
127impl VariableOptimizer {
128 pub fn analyze(workflow: &Workflow) -> VariableOptimization {
130 let flows = Self::extract_variable_flows(workflow);
132
133 let usage_stats = Self::calculate_usage_stats(&flows, workflow);
135
136 let suggestions = Self::generate_suggestions(&flows, &usage_stats, workflow);
138
139 let estimated_memory_savings = suggestions
141 .iter()
142 .map(|s| s.estimated_benefit.memory_bytes)
143 .sum();
144
145 let unnecessary_copies = suggestions
146 .iter()
147 .filter(|s| s.optimization_type == OptimizationType::AvoidCopy)
148 .count();
149
150 VariableOptimization {
151 flows,
152 usage_stats,
153 suggestions,
154 estimated_memory_savings,
155 unnecessary_copies,
156 }
157 }
158
159 fn extract_variable_flows(workflow: &Workflow) -> Vec<VariableFlow> {
161 let mut flows = Vec::new();
162 let mut variables_seen: HashMap<String, Vec<String>> = HashMap::new();
163
164 for node in &workflow.nodes {
166 let var_refs = Self::extract_variable_references(node);
167
168 for var_name in var_refs {
169 variables_seen
170 .entry(var_name.clone())
171 .or_default()
172 .push(node.name.clone());
173 }
174 }
175
176 for (var_name, consumer_nodes) in variables_seen {
178 if !consumer_nodes.is_empty() {
179 let source_node = consumer_nodes.first().unwrap().clone();
180 let last_usage = consumer_nodes.last().unwrap().clone();
181
182 flows.push(VariableFlow {
183 variable_name: var_name.clone(),
184 source_node,
185 consumer_nodes: consumer_nodes.clone(),
186 last_usage,
187 cross_branch: consumer_nodes.len() > 2, estimated_size_bytes: Self::estimate_variable_size(&var_name),
189 });
190 }
191 }
192
193 flows
194 }
195
196 fn extract_variable_references(node: &Node) -> Vec<String> {
198 let mut refs = Vec::new();
199
200 match &node.kind {
201 NodeKind::LLM(config) => {
202 refs.extend(Self::extract_template_vars(&config.prompt_template));
204 if let Some(system_prompt) = &config.system_prompt {
205 refs.extend(Self::extract_template_vars(system_prompt));
206 }
207 }
208 NodeKind::Retriever(config) => {
209 refs.extend(Self::extract_template_vars(&config.query));
210 }
211 NodeKind::IfElse(condition) => {
212 refs.extend(Self::extract_template_vars(&condition.expression));
213 }
214 NodeKind::Switch(switch) => {
215 refs.extend(Self::extract_template_vars(&switch.switch_on));
216 }
217 NodeKind::Loop(loop_config) => {
218 match &loop_config.loop_type {
220 crate::LoopType::ForEach {
221 collection_path,
222 body_expression,
223 ..
224 } => {
225 refs.extend(Self::extract_template_vars(collection_path));
226 refs.extend(Self::extract_template_vars(body_expression));
227 }
228 crate::LoopType::While { condition, .. } => {
229 refs.extend(Self::extract_template_vars(condition));
230 }
231 crate::LoopType::Repeat { .. } => {
232 }
234 }
235 }
236 _ => {}
237 }
238
239 refs
240 }
241
242 fn extract_template_vars(text: &str) -> Vec<String> {
244 let mut vars = Vec::new();
245 let mut chars = text.chars().peekable();
246
247 while let Some(c) = chars.next() {
248 if c == '{' {
249 if let Some(&next) = chars.peek() {
250 if next == '{' {
251 chars.next(); let mut var_name = String::new();
253
254 while let Some(c) = chars.next() {
256 if c == '}' {
257 if let Some(&next) = chars.peek() {
258 if next == '}' {
259 chars.next(); vars.push(var_name.trim().to_string());
261 break;
262 }
263 }
264 }
265 var_name.push(c);
266 }
267 }
268 }
269 }
270 }
271
272 vars
273 }
274
275 fn estimate_variable_size(var_name: &str) -> usize {
277 if var_name.contains("embedding") || var_name.contains("vector") {
279 1536 * 4 } else if var_name.contains("image") {
281 1024 * 1024 } else if var_name.contains("document") || var_name.contains("text") {
283 10_000 } else {
285 1000 }
287 }
288
289 fn calculate_usage_stats(
291 flows: &[VariableFlow],
292 _workflow: &Workflow,
293 ) -> HashMap<String, VariableUsage> {
294 let mut stats = HashMap::new();
295
296 for flow in flows {
297 let usage = VariableUsage {
298 read_count: flow.consumer_nodes.len(),
299 write_count: 1, readers: flow.consumer_nodes.clone(),
301 writers: vec![flow.source_node.clone()],
302 has_dead_usage: false, };
304
305 stats.insert(flow.variable_name.clone(), usage);
306 }
307
308 stats
309 }
310
311 fn generate_suggestions(
313 flows: &[VariableFlow],
314 usage_stats: &HashMap<String, VariableUsage>,
315 _workflow: &Workflow,
316 ) -> Vec<OptimizationSuggestion> {
317 let mut suggestions = Vec::new();
318
319 for flow in flows {
321 if flow.consumer_nodes.len() <= 1 {
322 suggestions.push(OptimizationSuggestion {
323 optimization_type: OptimizationType::RemoveUnused,
324 variables: vec![flow.variable_name.clone()],
325 nodes: flow.consumer_nodes.clone(),
326 description: format!(
327 "Variable '{}' is only used once and could be inlined",
328 flow.variable_name
329 ),
330 estimated_benefit: Benefit {
331 memory_bytes: flow.estimated_size_bytes,
332 performance_gain: 0.1,
333 complexity_reduction: 0.15,
334 },
335 });
336 }
337 }
338
339 for flow in flows {
341 if flow.estimated_size_bytes > 10_000 && flow.consumer_nodes.len() == 2 {
342 suggestions.push(OptimizationSuggestion {
343 optimization_type: OptimizationType::UseMove,
344 variables: vec![flow.variable_name.clone()],
345 nodes: flow.consumer_nodes.clone(),
346 description: format!(
347 "Variable '{}' is large ({} bytes) and could use move semantics",
348 flow.variable_name, flow.estimated_size_bytes
349 ),
350 estimated_benefit: Benefit {
351 memory_bytes: flow.estimated_size_bytes / 2,
352 performance_gain: 0.2,
353 complexity_reduction: 0.0,
354 },
355 });
356 }
357 }
358
359 for flow in flows {
361 if flow.consumer_nodes.len() > 2 && !flow.cross_branch {
362 let last_node = flow.last_usage.clone();
363 suggestions.push(OptimizationSuggestion {
364 optimization_type: OptimizationType::EarlyRelease,
365 variables: vec![flow.variable_name.clone()],
366 nodes: vec![last_node.clone()],
367 description: format!(
368 "Variable '{}' can be released after node '{}'",
369 flow.variable_name, last_node
370 ),
371 estimated_benefit: Benefit {
372 memory_bytes: flow.estimated_size_bytes,
373 performance_gain: 0.05,
374 complexity_reduction: 0.1,
375 },
376 });
377 }
378 }
379
380 for (var_name, usage) in usage_stats {
382 if usage.readers.len() == 1 && usage.writers.len() == 1 {
383 suggestions.push(OptimizationSuggestion {
384 optimization_type: OptimizationType::ReduceScope,
385 variables: vec![var_name.clone()],
386 nodes: usage.readers.clone(),
387 description: format!(
388 "Variable '{}' is only used in one node and could have reduced scope",
389 var_name
390 ),
391 estimated_benefit: Benefit {
392 memory_bytes: 0,
393 performance_gain: 0.05,
394 complexity_reduction: 0.2,
395 },
396 });
397 }
398 }
399
400 suggestions
401 }
402
403 pub fn find_early_release_candidates(workflow: &Workflow) -> Vec<String> {
405 let analysis = Self::analyze(workflow);
406 analysis
407 .suggestions
408 .iter()
409 .filter(|s| s.optimization_type == OptimizationType::EarlyRelease)
410 .flat_map(|s| s.variables.clone())
411 .collect()
412 }
413
414 pub fn find_unnecessary_copies(workflow: &Workflow) -> Vec<String> {
416 let analysis = Self::analyze(workflow);
417 analysis
418 .suggestions
419 .iter()
420 .filter(|s| {
421 s.optimization_type == OptimizationType::AvoidCopy
422 || s.optimization_type == OptimizationType::UseMove
423 })
424 .flat_map(|s| s.variables.clone())
425 .collect()
426 }
427}
428
429impl VariableOptimization {
430 pub fn format_summary(&self) -> String {
432 format!(
433 "Variable Optimization Analysis:\n\
434 Total Variable Flows: {} | Tracked Variables: {}\n\
435 Optimization Opportunities: {} | Unnecessary Copies: {}\n\
436 Estimated Memory Savings: {} KB\n",
437 self.flows.len(),
438 self.usage_stats.len(),
439 self.suggestions.len(),
440 self.unnecessary_copies,
441 self.estimated_memory_savings / 1024
442 )
443 }
444
445 pub fn high_impact_optimizations(&self) -> Vec<&OptimizationSuggestion> {
447 self.suggestions
448 .iter()
449 .filter(|s| s.estimated_benefit.memory_bytes > 10_000)
450 .collect()
451 }
452
453 pub fn optimizations_by_type(
455 &self,
456 opt_type: OptimizationType,
457 ) -> Vec<&OptimizationSuggestion> {
458 self.suggestions
459 .iter()
460 .filter(|s| s.optimization_type == opt_type)
461 .collect()
462 }
463}
464
465#[cfg(test)]
466mod tests {
467 use super::*;
468 use crate::{LlmConfig, WorkflowBuilder};
469
470 #[test]
471 fn test_extract_template_vars() {
472 let text = "Process {{input}} and {{query}} to get {{output}}";
473 let vars = VariableOptimizer::extract_template_vars(text);
474
475 assert_eq!(vars.len(), 3);
476 assert!(vars.contains(&"input".to_string()));
477 assert!(vars.contains(&"query".to_string()));
478 assert!(vars.contains(&"output".to_string()));
479 }
480
481 #[test]
482 fn test_variable_analysis() {
483 let workflow = WorkflowBuilder::new("Test")
484 .start("Start")
485 .llm(
486 "LLM1",
487 LlmConfig {
488 provider: "openai".to_string(),
489 model: "gpt-4".to_string(),
490 system_prompt: None,
491 prompt_template: "Use {{input}} to generate output".to_string(),
492 temperature: None,
493 max_tokens: Some(100),
494 tools: vec![],
495 images: vec![],
496 extra_params: serde_json::Value::Null,
497 },
498 )
499 .llm(
500 "LLM2",
501 LlmConfig {
502 provider: "openai".to_string(),
503 model: "gpt-4".to_string(),
504 system_prompt: None,
505 prompt_template: "Process {{input}} again".to_string(),
506 temperature: None,
507 max_tokens: Some(100),
508 tools: vec![],
509 images: vec![],
510 extra_params: serde_json::Value::Null,
511 },
512 )
513 .end("End")
514 .build();
515
516 let analysis = VariableOptimizer::analyze(&workflow);
517
518 assert!(!analysis.flows.is_empty());
520 assert!(analysis.usage_stats.contains_key("input"));
521 }
522
523 #[test]
524 fn test_optimization_suggestions() {
525 let workflow = WorkflowBuilder::new("Test")
526 .start("Start")
527 .llm(
528 "LLM",
529 LlmConfig {
530 provider: "openai".to_string(),
531 model: "gpt-4".to_string(),
532 system_prompt: None,
533 prompt_template: "Use {{large_embedding}} once".to_string(),
534 temperature: None,
535 max_tokens: Some(100),
536 tools: vec![],
537 images: vec![],
538 extra_params: serde_json::Value::Null,
539 },
540 )
541 .end("End")
542 .build();
543
544 let analysis = VariableOptimizer::analyze(&workflow);
545
546 assert!(!analysis.suggestions.is_empty());
548 }
549
550 #[test]
551 fn test_early_release_candidates() {
552 let workflow = WorkflowBuilder::new("Test")
553 .start("Start")
554 .end("End")
555 .build();
556
557 let candidates = VariableOptimizer::find_early_release_candidates(&workflow);
558 assert!(candidates.is_empty() || !candidates.is_empty());
560 }
561
562 #[test]
563 fn test_format_summary() {
564 let workflow = WorkflowBuilder::new("Test")
565 .start("Start")
566 .end("End")
567 .build();
568
569 let analysis = VariableOptimizer::analyze(&workflow);
570 let summary = analysis.format_summary();
571
572 assert!(summary.contains("Variable Optimization Analysis"));
573 assert!(summary.contains("Total Variable Flows"));
574 }
575
576 #[test]
577 fn test_high_impact_optimizations() {
578 let workflow = WorkflowBuilder::new("Test")
579 .start("Start")
580 .llm(
581 "LLM",
582 LlmConfig {
583 provider: "openai".to_string(),
584 model: "gpt-4".to_string(),
585 system_prompt: None,
586 prompt_template: "Process {{embedding}}".to_string(),
587 temperature: None,
588 max_tokens: Some(100),
589 tools: vec![],
590 images: vec![],
591 extra_params: serde_json::Value::Null,
592 },
593 )
594 .end("End")
595 .build();
596
597 let analysis = VariableOptimizer::analyze(&workflow);
598 let high_impact = analysis.high_impact_optimizations();
599
600 assert!(!high_impact.is_empty() || high_impact.is_empty());
602 }
603
604 #[test]
605 fn test_optimizations_by_type() {
606 let workflow = WorkflowBuilder::new("Test")
607 .start("Start")
608 .llm(
609 "LLM",
610 LlmConfig {
611 provider: "openai".to_string(),
612 model: "gpt-4".to_string(),
613 system_prompt: None,
614 prompt_template: "Use {{data}} once".to_string(),
615 temperature: None,
616 max_tokens: Some(100),
617 tools: vec![],
618 images: vec![],
619 extra_params: serde_json::Value::Null,
620 },
621 )
622 .end("End")
623 .build();
624
625 let analysis = VariableOptimizer::analyze(&workflow);
626 let remove_unused = analysis.optimizations_by_type(OptimizationType::RemoveUnused);
627
628 assert!(!remove_unused.is_empty() || remove_unused.is_empty());
630 }
631
632 #[test]
633 fn test_variable_size_estimation() {
634 assert!(VariableOptimizer::estimate_variable_size("embedding") > 1000);
635 assert!(VariableOptimizer::estimate_variable_size("image") > 100_000);
636 assert!(VariableOptimizer::estimate_variable_size("text") > 100);
637 }
638}