use std::collections::HashMap;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct NormalizedSequence {
pub actions: Vec<NormalizedAction>,
pub variables: HashMap<String, VariableInfo>,
pub signature: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct NormalizedAction {
pub tool: String,
pub params: HashMap<String, NormalizedParam>,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum NormalizedParam {
Literal(serde_json::Value),
Variable { name: String },
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VariableInfo {
pub name: String,
pub sample_values: Vec<serde_json::Value>,
pub inferred_type: InferredType,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum InferredType {
String,
Number,
Boolean,
Path,
Unknown,
}
pub struct SequenceNormalizer;
impl SequenceNormalizer {
pub fn normalize(instances: &[Vec<RawAction>]) -> Option<NormalizedSequence> {
if instances.is_empty() {
return None;
}
let first = &instances[0];
if first.is_empty() {
return None;
}
if !instances.iter().all(|i| i.len() == first.len()) {
return None;
}
if !instances
.iter()
.all(|i| i.iter().zip(first.iter()).all(|(a, b)| a.tool == b.tool))
{
return None;
}
let mut variables: HashMap<String, VariableInfo> = HashMap::new();
let mut normalized_actions = Vec::new();
let mut sig_parts = Vec::new();
let mut var_counter = 0u32;
for (step_idx, template_action) in first.iter().enumerate() {
let mut norm_params = HashMap::new();
sig_parts.push(template_action.tool.clone());
for (key, _) in &template_action.params {
let values: Vec<&serde_json::Value> = instances
.iter()
.filter_map(|inst| inst.get(step_idx)?.params.get(key))
.collect();
if values.is_empty() {
continue;
}
let all_same = values.windows(2).all(|w| w[0] == w[1]);
if all_same {
norm_params.insert(key.clone(), NormalizedParam::Literal(values[0].clone()));
} else {
let var_name = format!("var_{}_{}", step_idx, var_counter);
var_counter += 1;
let inferred = infer_type(values[0]);
variables.insert(
var_name.clone(),
VariableInfo {
name: var_name.clone(),
sample_values: values.into_iter().cloned().collect(),
inferred_type: inferred,
},
);
norm_params.insert(key.clone(), NormalizedParam::Variable { name: var_name });
}
}
normalized_actions.push(NormalizedAction {
tool: template_action.tool.clone(),
params: norm_params,
});
}
let signature = sig_parts.join("→");
Some(NormalizedSequence {
actions: normalized_actions,
variables,
signature,
})
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RawAction {
pub tool: String,
pub params: HashMap<String, serde_json::Value>,
}
fn infer_type(value: &serde_json::Value) -> InferredType {
match value {
serde_json::Value::String(s) => {
if s.contains('/') || s.contains('\\') || s.ends_with(".rs") || s.ends_with(".ts") {
InferredType::Path
} else {
InferredType::String
}
}
serde_json::Value::Number(_) => InferredType::Number,
serde_json::Value::Bool(_) => InferredType::Boolean,
_ => InferredType::Unknown,
}
}
#[cfg(test)]
mod tests {
use super::*;
fn make_action(tool: &str, params: &[(&str, serde_json::Value)]) -> RawAction {
RawAction {
tool: tool.into(),
params: params
.iter()
.map(|(k, v)| (k.to_string(), v.clone()))
.collect(),
}
}
#[test]
fn test_normalize_constant_params() {
let instances = vec![
vec![make_action("git_add", &[("path", serde_json::json!("."))])],
vec![make_action("git_add", &[("path", serde_json::json!("."))])],
];
let norm = SequenceNormalizer::normalize(&instances).unwrap();
assert_eq!(norm.actions.len(), 1);
assert_eq!(
norm.actions[0].params["path"],
NormalizedParam::Literal(serde_json::json!("."))
);
assert!(norm.variables.is_empty());
}
#[test]
fn test_normalize_variable_extraction() {
let instances = vec![
vec![make_action(
"git_commit",
&[("message", serde_json::json!("fix: bug A"))],
)],
vec![make_action(
"git_commit",
&[("message", serde_json::json!("feat: feature B"))],
)],
vec![make_action(
"git_commit",
&[("message", serde_json::json!("chore: cleanup"))],
)],
];
let norm = SequenceNormalizer::normalize(&instances).unwrap();
assert_eq!(norm.variables.len(), 1);
let var = norm.variables.values().next().unwrap();
assert_eq!(var.sample_values.len(), 3);
assert_eq!(var.inferred_type, InferredType::String);
}
#[test]
fn test_normalize_mismatched_tools() {
let instances = vec![
vec![make_action("git_add", &[])],
vec![make_action("git_commit", &[])],
];
assert!(SequenceNormalizer::normalize(&instances).is_none());
}
#[test]
fn test_normalize_signature() {
let instances = vec![
vec![
make_action("git_add", &[("path", serde_json::json!("."))]),
make_action("git_commit", &[("msg", serde_json::json!("a"))]),
],
vec![
make_action("git_add", &[("path", serde_json::json!("."))]),
make_action("git_commit", &[("msg", serde_json::json!("b"))]),
],
];
let norm = SequenceNormalizer::normalize(&instances).unwrap();
assert_eq!(norm.signature, "git_add→git_commit");
}
#[test]
fn test_path_type_inference() {
let instances = vec![
vec![make_action(
"lint",
&[("path", serde_json::json!("src/main.rs"))],
)],
vec![make_action(
"lint",
&[("path", serde_json::json!("src/lib.rs"))],
)],
];
let norm = SequenceNormalizer::normalize(&instances).unwrap();
let var = norm.variables.values().next().unwrap();
assert_eq!(var.inferred_type, InferredType::Path);
}
#[test]
fn test_normalize_empty() {
assert!(SequenceNormalizer::normalize(&[]).is_none());
}
#[test]
fn test_normalize_empty_actions() {
let instances = vec![vec![]];
assert!(SequenceNormalizer::normalize(&instances).is_none());
}
#[test]
fn test_normalize_different_lengths() {
let instances = vec![
vec![make_action("a", &[])],
vec![make_action("a", &[]), make_action("b", &[])],
];
assert!(SequenceNormalizer::normalize(&instances).is_none());
}
#[test]
fn test_infer_type_number() {
let instances = vec![
vec![make_action("set", &[("count", serde_json::json!(1))])],
vec![make_action("set", &[("count", serde_json::json!(2))])],
];
let norm = SequenceNormalizer::normalize(&instances).unwrap();
let var = norm.variables.values().next().unwrap();
assert_eq!(var.inferred_type, InferredType::Number);
}
#[test]
fn test_infer_type_boolean() {
let instances = vec![
vec![make_action("flag", &[("enabled", serde_json::json!(true))])],
vec![make_action("flag", &[("enabled", serde_json::json!(false))])],
];
let norm = SequenceNormalizer::normalize(&instances).unwrap();
let var = norm.variables.values().next().unwrap();
assert_eq!(var.inferred_type, InferredType::Boolean);
}
#[test]
fn test_infer_type_unknown() {
let instances = vec![
vec![make_action("set", &[("val", serde_json::json!(null))])],
vec![make_action("set", &[("val", serde_json::json!([1,2]))])],
];
let norm = SequenceNormalizer::normalize(&instances).unwrap();
let var = norm.variables.values().next().unwrap();
assert_eq!(var.inferred_type, InferredType::Unknown);
}
#[test]
fn test_normalized_param_variable_serde() {
let var = NormalizedParam::Variable { name: "x".into() };
let json = serde_json::to_string(&var).unwrap();
let restored: NormalizedParam = serde_json::from_str(&json).unwrap();
assert_eq!(restored, var);
}
#[test]
fn test_inferred_type_serde() {
for t in [InferredType::String, InferredType::Number, InferredType::Boolean, InferredType::Path, InferredType::Unknown] {
let json = serde_json::to_string(&t).unwrap();
let restored: InferredType = serde_json::from_str(&json).unwrap();
assert_eq!(restored, t);
}
}
#[test]
fn test_raw_action_serde() {
let action = make_action("test", &[("key", serde_json::json!("val"))]);
let json = serde_json::to_string(&action).unwrap();
let restored: RawAction = serde_json::from_str(&json).unwrap();
assert_eq!(restored.tool, "test");
}
#[test]
fn test_single_instance_all_constant() {
let instances = vec![
vec![make_action("deploy", &[("env", serde_json::json!("prod"))])],
];
let norm = SequenceNormalizer::normalize(&instances).unwrap();
assert!(norm.variables.is_empty());
assert_eq!(norm.actions[0].params["env"], NormalizedParam::Literal(serde_json::json!("prod")));
}
}