Skip to main content

agentic_evolve_core/crystallization/
extractor.rs

1//! PatternExtractor — extracts patterns from successful code.
2
3use crate::types::error::{EvolveError, EvolveResult};
4use crate::types::pattern::{FunctionSignature, Language, ParamSignature, Pattern, Visibility};
5use crate::types::skill::SuccessfulExecution;
6
7use super::confidence::ConfidenceCalculator;
8use super::template_generator::TemplateGenerator;
9use super::variable_detector::VariableDetector;
10
11/// Extracts reusable patterns from successfully executed code.
12#[derive(Debug, Default)]
13pub struct PatternExtractor {
14    variable_detector: VariableDetector,
15    template_generator: TemplateGenerator,
16    confidence_calculator: ConfidenceCalculator,
17}
18
19impl PatternExtractor {
20    pub fn new() -> Self {
21        Self::default()
22    }
23
24    pub fn extract(&self, execution: &SuccessfulExecution) -> EvolveResult<Vec<Pattern>> {
25        let functions = self.extract_functions(&execution.code, &execution.language)?;
26        let mut patterns = Vec::new();
27
28        for func in functions {
29            let variables = self
30                .variable_detector
31                .detect(&func.body, &execution.language);
32            let template = self.template_generator.generate(&func.body, &variables);
33            let confidence = self.confidence_calculator.calculate(execution);
34
35            if confidence >= 0.5 {
36                let pattern = Pattern::new(
37                    &func.name,
38                    &execution.domain,
39                    execution.language.clone(),
40                    FunctionSignature {
41                        name: func.name.clone(),
42                        params: func.params.clone(),
43                        return_type: func.return_type.clone(),
44                        language: execution.language.clone(),
45                        is_async: func.is_async,
46                        visibility: func.visibility.clone(),
47                    },
48                    &template,
49                    variables,
50                    confidence,
51                );
52                patterns.push(pattern);
53            }
54        }
55
56        Ok(patterns)
57    }
58
59    fn extract_functions(
60        &self,
61        code: &str,
62        language: &Language,
63    ) -> EvolveResult<Vec<ExtractedFunction>> {
64        match language {
65            Language::Rust => self.extract_rust_functions(code),
66            Language::Python => self.extract_python_functions(code),
67            _ => self.extract_generic_functions(code),
68        }
69    }
70
71    fn extract_rust_functions(&self, code: &str) -> EvolveResult<Vec<ExtractedFunction>> {
72        let mut functions = Vec::new();
73        let re = regex::Regex::new(
74            r"(?m)^(\s*)(pub\s+)?(async\s+)?fn\s+(\w+)\s*(\([^)]*\))\s*(->\s*[^{]+)?\s*\{",
75        )
76        .map_err(|e| EvolveError::CrystallizationError(e.to_string()))?;
77
78        for cap in re.captures_iter(code) {
79            let is_pub = cap.get(2).is_some();
80            let is_async = cap.get(3).is_some();
81            let name = cap[4].to_string();
82            let params_str = &cap[5];
83            let return_type = cap
84                .get(6)
85                .map(|m| m.as_str().trim_start_matches("->").trim().to_string());
86
87            // Extract body (simple brace counting)
88            let fn_start = cap.get(0).map(|m| m.end()).unwrap_or(0);
89            let body = extract_braced_body(code, fn_start);
90
91            let params = parse_rust_params(params_str);
92
93            functions.push(ExtractedFunction {
94                name,
95                params,
96                return_type,
97                body,
98                is_async,
99                visibility: if is_pub {
100                    Visibility::Public
101                } else {
102                    Visibility::Private
103                },
104            });
105        }
106
107        Ok(functions)
108    }
109
110    fn extract_python_functions(&self, code: &str) -> EvolveResult<Vec<ExtractedFunction>> {
111        let mut functions = Vec::new();
112        let re = regex::Regex::new(
113            r"(?m)^(\s*)(async\s+)?def\s+(\w+)\s*\(([^)]*)\)\s*(?:->\s*([^:]+))?\s*:",
114        )
115        .map_err(|e| EvolveError::CrystallizationError(e.to_string()))?;
116
117        for cap in re.captures_iter(code) {
118            let indent = cap[1].len();
119            let is_async = cap.get(2).is_some();
120            let name = cap[3].to_string();
121            let params_str = &cap[4];
122            let return_type = cap.get(5).map(|m| m.as_str().trim().to_string());
123
124            let fn_end = cap.get(0).map(|m| m.end()).unwrap_or(0);
125            let body = extract_indented_body(code, fn_end, indent);
126
127            let params = parse_python_params(params_str);
128
129            functions.push(ExtractedFunction {
130                name,
131                params,
132                return_type,
133                body,
134                is_async,
135                visibility: Visibility::Public,
136            });
137        }
138
139        Ok(functions)
140    }
141
142    fn extract_generic_functions(&self, code: &str) -> EvolveResult<Vec<ExtractedFunction>> {
143        // Generic: treat entire code as one function body
144        Ok(vec![ExtractedFunction {
145            name: "main".to_string(),
146            params: Vec::new(),
147            return_type: None,
148            body: code.to_string(),
149            is_async: false,
150            visibility: Visibility::Public,
151        }])
152    }
153}
154
155#[derive(Debug, Clone)]
156struct ExtractedFunction {
157    name: String,
158    params: Vec<ParamSignature>,
159    return_type: Option<String>,
160    body: String,
161    is_async: bool,
162    visibility: Visibility,
163}
164
165fn extract_braced_body(code: &str, start: usize) -> String {
166    let mut depth = 1;
167    let mut end = start;
168    for (i, ch) in code[start..].char_indices() {
169        match ch {
170            '{' => depth += 1,
171            '}' => {
172                depth -= 1;
173                if depth == 0 {
174                    end = start + i;
175                    break;
176                }
177            }
178            _ => {}
179        }
180    }
181    code[start..end].trim().to_string()
182}
183
184fn extract_indented_body(code: &str, start: usize, base_indent: usize) -> String {
185    let mut lines = Vec::new();
186    for line in code[start..].lines() {
187        if line.trim().is_empty() {
188            lines.push(String::new());
189            continue;
190        }
191        let indent = line.len() - line.trim_start().len();
192        if indent > base_indent {
193            lines.push(line.to_string());
194        } else if !lines.is_empty() {
195            break;
196        }
197    }
198    lines.join("\n").trim().to_string()
199}
200
201fn parse_rust_params(params_str: &str) -> Vec<ParamSignature> {
202    let inner = params_str.trim_start_matches('(').trim_end_matches(')');
203    inner
204        .split(',')
205        .filter_map(|p| {
206            let p = p.trim();
207            if p.is_empty() || p == "&self" || p == "&mut self" || p == "self" {
208                return None;
209            }
210            let parts: Vec<&str> = p.splitn(2, ':').collect();
211            if parts.len() == 2 {
212                Some(ParamSignature {
213                    name: parts[0].trim().to_string(),
214                    param_type: parts[1].trim().to_string(),
215                    is_optional: parts[1].contains("Option"),
216                })
217            } else {
218                None
219            }
220        })
221        .collect()
222}
223
224fn parse_python_params(params_str: &str) -> Vec<ParamSignature> {
225    params_str
226        .split(',')
227        .filter_map(|p| {
228            let p = p.trim();
229            if p.is_empty() || p == "self" || p == "cls" {
230                return None;
231            }
232            let parts: Vec<&str> = p.splitn(2, ':').collect();
233            let name = parts[0].trim().to_string();
234            let param_type = if parts.len() > 1 {
235                parts[1]
236                    .split('=')
237                    .next()
238                    .unwrap_or("Any")
239                    .trim()
240                    .to_string()
241            } else {
242                "Any".to_string()
243            };
244            let is_optional = p.contains('=');
245            Some(ParamSignature {
246                name,
247                param_type,
248                is_optional,
249            })
250        })
251        .collect()
252}