1use std::collections::HashMap;
4
5use serde::{Deserialize, Serialize};
6
7#[derive(Debug, Clone, Serialize, Deserialize)]
9pub struct NormalizedSequence {
10 pub actions: Vec<NormalizedAction>,
12 pub variables: HashMap<String, VariableInfo>,
14 pub signature: String,
16}
17
18#[derive(Debug, Clone, Serialize, Deserialize)]
20pub struct NormalizedAction {
21 pub tool: String,
22 pub params: HashMap<String, NormalizedParam>,
23}
24
25#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
27#[serde(tag = "type", rename_all = "snake_case")]
28pub enum NormalizedParam {
29 Literal(serde_json::Value),
30 Variable { name: String },
31}
32
33#[derive(Debug, Clone, Serialize, Deserialize)]
35pub struct VariableInfo {
36 pub name: String,
37 pub sample_values: Vec<serde_json::Value>,
38 pub inferred_type: InferredType,
39}
40
41#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
42#[serde(rename_all = "snake_case")]
43pub enum InferredType {
44 String,
45 Number,
46 Boolean,
47 Path,
48 Unknown,
49}
50
51pub struct SequenceNormalizer;
53
54impl SequenceNormalizer {
55 pub fn normalize(instances: &[Vec<RawAction>]) -> Option<NormalizedSequence> {
58 if instances.is_empty() {
59 return None;
60 }
61
62 let first = &instances[0];
63 if first.is_empty() {
64 return None;
65 }
66
67 if !instances.iter().all(|i| i.len() == first.len()) {
69 return None;
70 }
71
72 if !instances
74 .iter()
75 .all(|i| i.iter().zip(first.iter()).all(|(a, b)| a.tool == b.tool))
76 {
77 return None;
78 }
79
80 let mut variables: HashMap<String, VariableInfo> = HashMap::new();
81 let mut normalized_actions = Vec::new();
82 let mut sig_parts = Vec::new();
83 let mut var_counter = 0u32;
84
85 for (step_idx, template_action) in first.iter().enumerate() {
86 let mut norm_params = HashMap::new();
87 sig_parts.push(template_action.tool.clone());
88
89 for (key, _) in &template_action.params {
90 let values: Vec<&serde_json::Value> = instances
92 .iter()
93 .filter_map(|inst| inst.get(step_idx)?.params.get(key))
94 .collect();
95
96 if values.is_empty() {
97 continue;
98 }
99
100 let all_same = values.windows(2).all(|w| w[0] == w[1]);
102
103 if all_same {
104 norm_params.insert(key.clone(), NormalizedParam::Literal(values[0].clone()));
105 } else {
106 let var_name = format!("var_{}_{}", step_idx, var_counter);
107 var_counter += 1;
108
109 let inferred = infer_type(values[0]);
110 variables.insert(
111 var_name.clone(),
112 VariableInfo {
113 name: var_name.clone(),
114 sample_values: values.into_iter().cloned().collect(),
115 inferred_type: inferred,
116 },
117 );
118
119 norm_params.insert(key.clone(), NormalizedParam::Variable { name: var_name });
120 }
121 }
122
123 normalized_actions.push(NormalizedAction {
124 tool: template_action.tool.clone(),
125 params: norm_params,
126 });
127 }
128
129 let signature = sig_parts.join("→");
130
131 Some(NormalizedSequence {
132 actions: normalized_actions,
133 variables,
134 signature,
135 })
136 }
137}
138
139#[derive(Debug, Clone, Serialize, Deserialize)]
141pub struct RawAction {
142 pub tool: String,
143 pub params: HashMap<String, serde_json::Value>,
144}
145
146fn infer_type(value: &serde_json::Value) -> InferredType {
147 match value {
148 serde_json::Value::String(s) => {
149 if s.contains('/') || s.contains('\\') || s.ends_with(".rs") || s.ends_with(".ts") {
150 InferredType::Path
151 } else {
152 InferredType::String
153 }
154 }
155 serde_json::Value::Number(_) => InferredType::Number,
156 serde_json::Value::Bool(_) => InferredType::Boolean,
157 _ => InferredType::Unknown,
158 }
159}
160
161#[cfg(test)]
162mod tests {
163 use super::*;
164
165 fn make_action(tool: &str, params: &[(&str, serde_json::Value)]) -> RawAction {
166 RawAction {
167 tool: tool.into(),
168 params: params
169 .iter()
170 .map(|(k, v)| (k.to_string(), v.clone()))
171 .collect(),
172 }
173 }
174
175 #[test]
176 fn test_normalize_constant_params() {
177 let instances = vec![
178 vec![make_action("git_add", &[("path", serde_json::json!("."))])],
179 vec![make_action("git_add", &[("path", serde_json::json!("."))])],
180 ];
181 let norm = SequenceNormalizer::normalize(&instances).unwrap();
182 assert_eq!(norm.actions.len(), 1);
183 assert_eq!(
184 norm.actions[0].params["path"],
185 NormalizedParam::Literal(serde_json::json!("."))
186 );
187 assert!(norm.variables.is_empty());
188 }
189
190 #[test]
191 fn test_normalize_variable_extraction() {
192 let instances = vec![
193 vec![make_action(
194 "git_commit",
195 &[("message", serde_json::json!("fix: bug A"))],
196 )],
197 vec![make_action(
198 "git_commit",
199 &[("message", serde_json::json!("feat: feature B"))],
200 )],
201 vec![make_action(
202 "git_commit",
203 &[("message", serde_json::json!("chore: cleanup"))],
204 )],
205 ];
206 let norm = SequenceNormalizer::normalize(&instances).unwrap();
207 assert_eq!(norm.variables.len(), 1);
208 let var = norm.variables.values().next().unwrap();
209 assert_eq!(var.sample_values.len(), 3);
210 assert_eq!(var.inferred_type, InferredType::String);
211 }
212
213 #[test]
214 fn test_normalize_mismatched_tools() {
215 let instances = vec![
216 vec![make_action("git_add", &[])],
217 vec![make_action("git_commit", &[])],
218 ];
219 assert!(SequenceNormalizer::normalize(&instances).is_none());
220 }
221
222 #[test]
223 fn test_normalize_signature() {
224 let instances = vec![
225 vec![
226 make_action("git_add", &[("path", serde_json::json!("."))]),
227 make_action("git_commit", &[("msg", serde_json::json!("a"))]),
228 ],
229 vec![
230 make_action("git_add", &[("path", serde_json::json!("."))]),
231 make_action("git_commit", &[("msg", serde_json::json!("b"))]),
232 ],
233 ];
234 let norm = SequenceNormalizer::normalize(&instances).unwrap();
235 assert_eq!(norm.signature, "git_add→git_commit");
236 }
237
238 #[test]
239 fn test_path_type_inference() {
240 let instances = vec![
241 vec![make_action(
242 "lint",
243 &[("path", serde_json::json!("src/main.rs"))],
244 )],
245 vec![make_action(
246 "lint",
247 &[("path", serde_json::json!("src/lib.rs"))],
248 )],
249 ];
250 let norm = SequenceNormalizer::normalize(&instances).unwrap();
251 let var = norm.variables.values().next().unwrap();
252 assert_eq!(var.inferred_type, InferredType::Path);
253 }
254
255 #[test]
256 fn test_normalize_empty() {
257 assert!(SequenceNormalizer::normalize(&[]).is_none());
258 }
259
260 #[test]
261 fn test_normalize_empty_actions() {
262 let instances = vec![vec![]];
263 assert!(SequenceNormalizer::normalize(&instances).is_none());
264 }
265
266 #[test]
267 fn test_normalize_different_lengths() {
268 let instances = vec![
269 vec![make_action("a", &[])],
270 vec![make_action("a", &[]), make_action("b", &[])],
271 ];
272 assert!(SequenceNormalizer::normalize(&instances).is_none());
273 }
274
275 #[test]
276 fn test_infer_type_number() {
277 let instances = vec![
278 vec![make_action("set", &[("count", serde_json::json!(1))])],
279 vec![make_action("set", &[("count", serde_json::json!(2))])],
280 ];
281 let norm = SequenceNormalizer::normalize(&instances).unwrap();
282 let var = norm.variables.values().next().unwrap();
283 assert_eq!(var.inferred_type, InferredType::Number);
284 }
285
286 #[test]
287 fn test_infer_type_boolean() {
288 let instances = vec![
289 vec![make_action("flag", &[("enabled", serde_json::json!(true))])],
290 vec![make_action("flag", &[("enabled", serde_json::json!(false))])],
291 ];
292 let norm = SequenceNormalizer::normalize(&instances).unwrap();
293 let var = norm.variables.values().next().unwrap();
294 assert_eq!(var.inferred_type, InferredType::Boolean);
295 }
296
297 #[test]
298 fn test_infer_type_unknown() {
299 let instances = vec![
300 vec![make_action("set", &[("val", serde_json::json!(null))])],
301 vec![make_action("set", &[("val", serde_json::json!([1,2]))])],
302 ];
303 let norm = SequenceNormalizer::normalize(&instances).unwrap();
304 let var = norm.variables.values().next().unwrap();
305 assert_eq!(var.inferred_type, InferredType::Unknown);
306 }
307
308 #[test]
309 fn test_normalized_param_variable_serde() {
310 let var = NormalizedParam::Variable { name: "x".into() };
311 let json = serde_json::to_string(&var).unwrap();
312 let restored: NormalizedParam = serde_json::from_str(&json).unwrap();
313 assert_eq!(restored, var);
314 }
315
316 #[test]
317 fn test_inferred_type_serde() {
318 for t in [InferredType::String, InferredType::Number, InferredType::Boolean, InferredType::Path, InferredType::Unknown] {
319 let json = serde_json::to_string(&t).unwrap();
320 let restored: InferredType = serde_json::from_str(&json).unwrap();
321 assert_eq!(restored, t);
322 }
323 }
324
325 #[test]
326 fn test_raw_action_serde() {
327 let action = make_action("test", &[("key", serde_json::json!("val"))]);
328 let json = serde_json::to_string(&action).unwrap();
329 let restored: RawAction = serde_json::from_str(&json).unwrap();
330 assert_eq!(restored.tool, "test");
331 }
332
333 #[test]
334 fn test_single_instance_all_constant() {
335 let instances = vec![
336 vec![make_action("deploy", &[("env", serde_json::json!("prod"))])],
337 ];
338 let norm = SequenceNormalizer::normalize(&instances).unwrap();
339 assert!(norm.variables.is_empty());
340 assert_eq!(norm.actions[0].params["env"], NormalizedParam::Literal(serde_json::json!("prod")));
341 }
342}