Skip to main content

hydra_compiler/
normalizer.rs

1//! SequenceNormalizer — extracts variables and normalizes action sequences.
2
3use std::collections::HashMap;
4
5use serde::{Deserialize, Serialize};
6
7/// A normalized action sequence with variables extracted
8#[derive(Debug, Clone, Serialize, Deserialize)]
9pub struct NormalizedSequence {
10    /// Template actions with variables replaced by placeholders
11    pub actions: Vec<NormalizedAction>,
12    /// Extracted variable names and their sample values
13    pub variables: HashMap<String, VariableInfo>,
14    /// Signature for deduplication
15    pub signature: String,
16}
17
18/// A single normalized action
19#[derive(Debug, Clone, Serialize, Deserialize)]
20pub struct NormalizedAction {
21    pub tool: String,
22    pub params: HashMap<String, NormalizedParam>,
23}
24
25/// A parameter that may be a literal or a variable
26#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
27#[serde(tag = "type", rename_all = "snake_case")]
28pub enum NormalizedParam {
29    Literal(serde_json::Value),
30    Variable { name: String },
31}
32
33/// Info about an extracted variable
34#[derive(Debug, Clone, Serialize, Deserialize)]
35pub struct VariableInfo {
36    pub name: String,
37    pub sample_values: Vec<serde_json::Value>,
38    pub inferred_type: InferredType,
39}
40
41#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
42#[serde(rename_all = "snake_case")]
43pub enum InferredType {
44    String,
45    Number,
46    Boolean,
47    Path,
48    Unknown,
49}
50
51/// Normalizes action sequences by extracting varying parameters as variables
52pub struct SequenceNormalizer;
53
54impl SequenceNormalizer {
55    /// Normalize multiple instances of the same action sequence.
56    /// Parameters that vary across instances become variables.
57    pub fn normalize(instances: &[Vec<RawAction>]) -> Option<NormalizedSequence> {
58        if instances.is_empty() {
59            return None;
60        }
61
62        let first = &instances[0];
63        if first.is_empty() {
64            return None;
65        }
66
67        // All instances must have the same number of actions
68        if !instances.iter().all(|i| i.len() == first.len()) {
69            return None;
70        }
71
72        // All instances must use the same tools in the same order
73        if !instances
74            .iter()
75            .all(|i| i.iter().zip(first.iter()).all(|(a, b)| a.tool == b.tool))
76        {
77            return None;
78        }
79
80        let mut variables: HashMap<String, VariableInfo> = HashMap::new();
81        let mut normalized_actions = Vec::new();
82        let mut sig_parts = Vec::new();
83        let mut var_counter = 0u32;
84
85        for (step_idx, template_action) in first.iter().enumerate() {
86            let mut norm_params = HashMap::new();
87            sig_parts.push(template_action.tool.clone());
88
89            for (key, _) in &template_action.params {
90                // Collect all values for this param across instances
91                let values: Vec<&serde_json::Value> = instances
92                    .iter()
93                    .filter_map(|inst| inst.get(step_idx)?.params.get(key))
94                    .collect();
95
96                if values.is_empty() {
97                    continue;
98                }
99
100                // Check if all values are the same
101                let all_same = values.windows(2).all(|w| w[0] == w[1]);
102
103                if all_same {
104                    norm_params.insert(key.clone(), NormalizedParam::Literal(values[0].clone()));
105                } else {
106                    let var_name = format!("var_{}_{}", step_idx, var_counter);
107                    var_counter += 1;
108
109                    let inferred = infer_type(values[0]);
110                    variables.insert(
111                        var_name.clone(),
112                        VariableInfo {
113                            name: var_name.clone(),
114                            sample_values: values.into_iter().cloned().collect(),
115                            inferred_type: inferred,
116                        },
117                    );
118
119                    norm_params.insert(key.clone(), NormalizedParam::Variable { name: var_name });
120                }
121            }
122
123            normalized_actions.push(NormalizedAction {
124                tool: template_action.tool.clone(),
125                params: norm_params,
126            });
127        }
128
129        let signature = sig_parts.join("→");
130
131        Some(NormalizedSequence {
132            actions: normalized_actions,
133            variables,
134            signature,
135        })
136    }
137}
138
139/// A raw action from execution history
140#[derive(Debug, Clone, Serialize, Deserialize)]
141pub struct RawAction {
142    pub tool: String,
143    pub params: HashMap<String, serde_json::Value>,
144}
145
146fn infer_type(value: &serde_json::Value) -> InferredType {
147    match value {
148        serde_json::Value::String(s) => {
149            if s.contains('/') || s.contains('\\') || s.ends_with(".rs") || s.ends_with(".ts") {
150                InferredType::Path
151            } else {
152                InferredType::String
153            }
154        }
155        serde_json::Value::Number(_) => InferredType::Number,
156        serde_json::Value::Bool(_) => InferredType::Boolean,
157        _ => InferredType::Unknown,
158    }
159}
160
161#[cfg(test)]
162mod tests {
163    use super::*;
164
165    fn make_action(tool: &str, params: &[(&str, serde_json::Value)]) -> RawAction {
166        RawAction {
167            tool: tool.into(),
168            params: params
169                .iter()
170                .map(|(k, v)| (k.to_string(), v.clone()))
171                .collect(),
172        }
173    }
174
175    #[test]
176    fn test_normalize_constant_params() {
177        let instances = vec![
178            vec![make_action("git_add", &[("path", serde_json::json!("."))])],
179            vec![make_action("git_add", &[("path", serde_json::json!("."))])],
180        ];
181        let norm = SequenceNormalizer::normalize(&instances).unwrap();
182        assert_eq!(norm.actions.len(), 1);
183        assert_eq!(
184            norm.actions[0].params["path"],
185            NormalizedParam::Literal(serde_json::json!("."))
186        );
187        assert!(norm.variables.is_empty());
188    }
189
190    #[test]
191    fn test_normalize_variable_extraction() {
192        let instances = vec![
193            vec![make_action(
194                "git_commit",
195                &[("message", serde_json::json!("fix: bug A"))],
196            )],
197            vec![make_action(
198                "git_commit",
199                &[("message", serde_json::json!("feat: feature B"))],
200            )],
201            vec![make_action(
202                "git_commit",
203                &[("message", serde_json::json!("chore: cleanup"))],
204            )],
205        ];
206        let norm = SequenceNormalizer::normalize(&instances).unwrap();
207        assert_eq!(norm.variables.len(), 1);
208        let var = norm.variables.values().next().unwrap();
209        assert_eq!(var.sample_values.len(), 3);
210        assert_eq!(var.inferred_type, InferredType::String);
211    }
212
213    #[test]
214    fn test_normalize_mismatched_tools() {
215        let instances = vec![
216            vec![make_action("git_add", &[])],
217            vec![make_action("git_commit", &[])],
218        ];
219        assert!(SequenceNormalizer::normalize(&instances).is_none());
220    }
221
222    #[test]
223    fn test_normalize_signature() {
224        let instances = vec![
225            vec![
226                make_action("git_add", &[("path", serde_json::json!("."))]),
227                make_action("git_commit", &[("msg", serde_json::json!("a"))]),
228            ],
229            vec![
230                make_action("git_add", &[("path", serde_json::json!("."))]),
231                make_action("git_commit", &[("msg", serde_json::json!("b"))]),
232            ],
233        ];
234        let norm = SequenceNormalizer::normalize(&instances).unwrap();
235        assert_eq!(norm.signature, "git_add→git_commit");
236    }
237
238    #[test]
239    fn test_path_type_inference() {
240        let instances = vec![
241            vec![make_action(
242                "lint",
243                &[("path", serde_json::json!("src/main.rs"))],
244            )],
245            vec![make_action(
246                "lint",
247                &[("path", serde_json::json!("src/lib.rs"))],
248            )],
249        ];
250        let norm = SequenceNormalizer::normalize(&instances).unwrap();
251        let var = norm.variables.values().next().unwrap();
252        assert_eq!(var.inferred_type, InferredType::Path);
253    }
254
255    #[test]
256    fn test_normalize_empty() {
257        assert!(SequenceNormalizer::normalize(&[]).is_none());
258    }
259
260    #[test]
261    fn test_normalize_empty_actions() {
262        let instances = vec![vec![]];
263        assert!(SequenceNormalizer::normalize(&instances).is_none());
264    }
265
266    #[test]
267    fn test_normalize_different_lengths() {
268        let instances = vec![
269            vec![make_action("a", &[])],
270            vec![make_action("a", &[]), make_action("b", &[])],
271        ];
272        assert!(SequenceNormalizer::normalize(&instances).is_none());
273    }
274
275    #[test]
276    fn test_infer_type_number() {
277        let instances = vec![
278            vec![make_action("set", &[("count", serde_json::json!(1))])],
279            vec![make_action("set", &[("count", serde_json::json!(2))])],
280        ];
281        let norm = SequenceNormalizer::normalize(&instances).unwrap();
282        let var = norm.variables.values().next().unwrap();
283        assert_eq!(var.inferred_type, InferredType::Number);
284    }
285
286    #[test]
287    fn test_infer_type_boolean() {
288        let instances = vec![
289            vec![make_action("flag", &[("enabled", serde_json::json!(true))])],
290            vec![make_action("flag", &[("enabled", serde_json::json!(false))])],
291        ];
292        let norm = SequenceNormalizer::normalize(&instances).unwrap();
293        let var = norm.variables.values().next().unwrap();
294        assert_eq!(var.inferred_type, InferredType::Boolean);
295    }
296
297    #[test]
298    fn test_infer_type_unknown() {
299        let instances = vec![
300            vec![make_action("set", &[("val", serde_json::json!(null))])],
301            vec![make_action("set", &[("val", serde_json::json!([1,2]))])],
302        ];
303        let norm = SequenceNormalizer::normalize(&instances).unwrap();
304        let var = norm.variables.values().next().unwrap();
305        assert_eq!(var.inferred_type, InferredType::Unknown);
306    }
307
308    #[test]
309    fn test_normalized_param_variable_serde() {
310        let var = NormalizedParam::Variable { name: "x".into() };
311        let json = serde_json::to_string(&var).unwrap();
312        let restored: NormalizedParam = serde_json::from_str(&json).unwrap();
313        assert_eq!(restored, var);
314    }
315
316    #[test]
317    fn test_inferred_type_serde() {
318        for t in [InferredType::String, InferredType::Number, InferredType::Boolean, InferredType::Path, InferredType::Unknown] {
319            let json = serde_json::to_string(&t).unwrap();
320            let restored: InferredType = serde_json::from_str(&json).unwrap();
321            assert_eq!(restored, t);
322        }
323    }
324
325    #[test]
326    fn test_raw_action_serde() {
327        let action = make_action("test", &[("key", serde_json::json!("val"))]);
328        let json = serde_json::to_string(&action).unwrap();
329        let restored: RawAction = serde_json::from_str(&json).unwrap();
330        assert_eq!(restored.tool, "test");
331    }
332
333    #[test]
334    fn test_single_instance_all_constant() {
335        let instances = vec![
336            vec![make_action("deploy", &[("env", serde_json::json!("prod"))])],
337        ];
338        let norm = SequenceNormalizer::normalize(&instances).unwrap();
339        assert!(norm.variables.is_empty());
340        assert_eq!(norm.actions[0].params["env"], NormalizedParam::Literal(serde_json::json!("prod")));
341    }
342}