use std::collections::{HashMap, HashSet};
use crate::parser::types::{Command, Job, ParameterType};
pub fn detect_absorbed_jobs(
removed_jobs: &HashMap<String, &Job>,
new_jobs: &HashMap<String, &Job>,
old_jobs: &HashMap<String, &Job>,
) -> HashMap<String, String> {
let mut absorbed = HashMap::new();
for removed_name in removed_jobs.keys() {
if let Some(absorbing) = find_absorbing_job(removed_name, new_jobs, old_jobs) {
absorbed.insert(removed_name.clone(), absorbing);
}
}
absorbed
}
fn find_absorbing_job(
removed_name: &str,
new_jobs: &HashMap<String, &Job>,
old_jobs: &HashMap<String, &Job>,
) -> Option<String> {
let candidate_param_names = absorption_candidate_params(removed_name);
for (job_name, new_job) in new_jobs {
let old_params: HashSet<&str> = old_jobs
.get(job_name.as_str())
.map(|j| j.parameters.keys().map(|s| s.as_str()).collect())
.unwrap_or_default();
for param_name in &candidate_param_names {
if let Some(param) = new_job.parameters.get(param_name.as_str()) {
if param.param_type == ParameterType::Boolean
&& !old_params.contains(param_name.as_str())
{
return Some(job_name.clone());
}
}
}
}
None
}
fn absorption_candidate_params(job_name: &str) -> Vec<String> {
vec![
format!("run_{job_name}"),
format!("include_{job_name}"),
format!("{job_name}_step"),
]
}
pub fn detect_renamed_jobs(
removed_names: &HashSet<String>,
new_jobs: &HashMap<String, &Job>,
old_jobs: &HashMap<String, &Job>,
threshold: f64,
git_hints: &HashMap<String, String>,
) -> HashMap<String, String> {
let mut renamed = HashMap::new();
let covered_by_hint = apply_git_hints(removed_names, new_jobs, git_hints, &mut renamed);
let added_jobs: HashMap<&str, &Job> = new_jobs
.iter()
.filter(|(name, _)| !old_jobs.contains_key(name.as_str()))
.map(|(name, job)| (name.as_str(), *job))
.collect();
for removed_name in removed_names {
if covered_by_hint.contains(removed_name) {
continue;
}
let Some(old_job) = old_jobs.get(removed_name.as_str()) else {
continue;
};
let old_params: HashSet<&str> = old_job.parameters.keys().map(|s| s.as_str()).collect();
if let Some(new_name) = best_jaccard_match(&old_params, &added_jobs, threshold) {
renamed.insert(removed_name.clone(), new_name.to_string());
}
}
renamed
}
fn apply_git_hints<'a>(
removed_names: &'a HashSet<String>,
new_jobs: &HashMap<String, &Job>,
git_hints: &HashMap<String, String>,
renamed: &mut HashMap<String, String>,
) -> HashSet<&'a String> {
let mut covered = HashSet::new();
for removed_name in removed_names {
if let Some(new_name) = git_hints.get(removed_name) {
if new_jobs.contains_key(new_name.as_str()) {
renamed.insert(removed_name.clone(), new_name.clone());
covered.insert(removed_name);
}
}
}
covered
}
fn best_jaccard_match<'a>(
old_params: &HashSet<&str>,
added_jobs: &HashMap<&'a str, &Job>,
threshold: f64,
) -> Option<&'a str> {
let mut best: Option<(&str, f64)> = None;
for (new_name, new_job) in added_jobs {
let new_params: HashSet<&str> = new_job.parameters.keys().map(|s| s.as_str()).collect();
let sim = jaccard_similarity(old_params, &new_params);
if sim >= threshold {
match best {
None => best = Some((new_name, sim)),
Some((_, best_sim)) if sim > best_sim => best = Some((new_name, sim)),
_ => {}
}
}
}
best.map(|(name, _)| name)
}
pub fn detect_renamed_commands(
removed_names: &HashSet<String>,
new_commands: &HashMap<String, &Command>,
old_commands: &HashMap<String, &Command>,
threshold: f64,
) -> HashMap<String, String> {
let mut renamed = HashMap::new();
let added_commands: HashMap<&str, &Command> = new_commands
.iter()
.filter(|(name, _)| !old_commands.contains_key(name.as_str()))
.map(|(name, cmd)| (name.as_str(), *cmd))
.collect();
for removed_name in removed_names {
let Some(old_cmd) = old_commands.get(removed_name.as_str()) else {
continue;
};
let old_params: HashSet<&str> = old_cmd.parameters.keys().map(|s| s.as_str()).collect();
let mut best_match: Option<(&str, f64)> = None;
for (new_name, new_cmd) in &added_commands {
let new_params: HashSet<&str> = new_cmd.parameters.keys().map(|s| s.as_str()).collect();
let sim = jaccard_similarity(&old_params, &new_params);
if sim >= threshold {
match best_match {
None => best_match = Some((new_name, sim)),
Some((_, best_sim)) if sim > best_sim => best_match = Some((new_name, sim)),
_ => {}
}
}
}
if let Some((new_name, _)) = best_match {
renamed.insert(removed_name.clone(), new_name.to_string());
}
}
renamed
}
fn jaccard_similarity(a: &HashSet<&str>, b: &HashSet<&str>) -> f64 {
let intersection = a.intersection(b).count();
let union = a.union(b).count();
if union == 0 {
return 0.0;
}
intersection as f64 / union as f64
}
#[cfg(test)]
mod tests {
use super::*;
use crate::parser::types::{Parameter, ParameterType};
fn job_with_params(param_names: &[(&str, ParameterType)]) -> Job {
let mut parameters = HashMap::new();
for (name, ptype) in param_names {
parameters.insert(
name.to_string(),
Parameter {
param_type: *ptype,
description: None,
default: None,
enum_values: None,
},
);
}
Job {
parameters,
..Default::default()
}
}
#[test]
fn test_absorption_candidate_params() {
let candidates = absorption_candidate_params("label");
assert!(candidates.contains(&"run_label".to_string()));
assert!(candidates.contains(&"include_label".to_string()));
assert!(candidates.contains(&"label_step".to_string()));
}
#[test]
fn test_detect_absorbed_jobs_toolkit_case() {
let old_label = job_with_params(&[("context", ParameterType::String)]);
let old_update_prlog = job_with_params(&[("min_rust_version", ParameterType::String)]);
let new_update_prlog = job_with_params(&[
("context", ParameterType::String),
("run_label", ParameterType::Boolean), ]);
let removed_jobs: HashMap<String, &Job> =
[("label".to_string(), &old_label)].into_iter().collect();
let new_jobs: HashMap<String, &Job> = [("update_prlog".to_string(), &new_update_prlog)]
.into_iter()
.collect();
let old_jobs: HashMap<String, &Job> = [
("label".to_string(), &old_label),
("update_prlog".to_string(), &old_update_prlog),
]
.into_iter()
.collect();
let absorbed = detect_absorbed_jobs(&removed_jobs, &new_jobs, &old_jobs);
assert_eq!(absorbed.get("label"), Some(&"update_prlog".to_string()));
}
#[test]
fn test_detect_absorbed_jobs_no_match() {
let old_label = job_with_params(&[]);
let new_other = job_with_params(&[("some_string", ParameterType::String)]);
let removed_jobs: HashMap<String, &Job> =
[("label".to_string(), &old_label)].into_iter().collect();
let new_jobs: HashMap<String, &Job> =
[("other".to_string(), &new_other)].into_iter().collect();
let old_jobs: HashMap<String, &Job> =
[("label".to_string(), &old_label)].into_iter().collect();
let absorbed = detect_absorbed_jobs(&removed_jobs, &new_jobs, &old_jobs);
assert!(absorbed.is_empty());
}
#[test]
fn test_jaccard_similarity_identical() {
let a: HashSet<&str> = ["foo", "bar", "baz"].into_iter().collect();
let b = a.clone();
assert!((jaccard_similarity(&a, &b) - 1.0).abs() < f64::EPSILON);
}
#[test]
fn test_jaccard_similarity_disjoint() {
let a: HashSet<&str> = ["foo"].into_iter().collect();
let b: HashSet<&str> = ["bar"].into_iter().collect();
assert!((jaccard_similarity(&a, &b)).abs() < f64::EPSILON);
}
#[test]
fn test_jaccard_similarity_partial() {
let a: HashSet<&str> = ["foo", "bar", "baz"].into_iter().collect();
let b: HashSet<&str> = ["foo", "bar", "qux"].into_iter().collect();
let expected = 2.0 / 4.0;
assert!((jaccard_similarity(&a, &b) - expected).abs() < 1e-9);
}
#[test]
fn test_detect_renamed_jobs() {
let shared_params = [
("context", ParameterType::String),
("cargo_all_features", ParameterType::Boolean),
("cache_version", ParameterType::String),
];
let old_job = job_with_params(&shared_params);
let new_job = job_with_params(&shared_params);
let removed: HashSet<String> = ["idiomatic_rust".to_string()].into_iter().collect();
let new_jobs: HashMap<String, &Job> = [("idiomatic_rust_rolling".to_string(), &new_job)]
.into_iter()
.collect();
let old_jobs: HashMap<String, &Job> = [("idiomatic_rust".to_string(), &old_job)]
.into_iter()
.collect();
let no_hints = HashMap::new();
let renamed = detect_renamed_jobs(&removed, &new_jobs, &old_jobs, 0.7, &no_hints);
assert_eq!(
renamed.get("idiomatic_rust"),
Some(&"idiomatic_rust_rolling".to_string())
);
}
#[test]
fn test_detect_renamed_jobs_with_git_hint_when_target_existed() {
let shared = [
("min_rust_version", ParameterType::String),
("cargo_all_features", ParameterType::Boolean),
("cache_version", ParameterType::String),
];
let old_rolling = job_with_params(&shared); let old_pinned = job_with_params(&shared); let new_standard = job_with_params(&shared);
let removed: HashSet<String> = ["required_builds_rolling".to_string()]
.into_iter()
.collect();
let new_jobs: HashMap<String, &Job> = [("required_builds".to_string(), &new_standard)]
.into_iter()
.collect();
let old_jobs: HashMap<String, &Job> = [
("required_builds_rolling".to_string(), &old_rolling),
("required_builds".to_string(), &old_pinned),
]
.into_iter()
.collect();
let no_hints = HashMap::new();
let without_hint = detect_renamed_jobs(&removed, &new_jobs, &old_jobs, 0.7, &no_hints);
assert!(
without_hint.is_empty(),
"Without git hint, Jaccard should NOT detect rename when target existed before"
);
let mut hints = HashMap::new();
hints.insert(
"required_builds_rolling".to_string(),
"required_builds".to_string(),
);
let with_hint = detect_renamed_jobs(&removed, &new_jobs, &old_jobs, 0.7, &hints);
assert_eq!(
with_hint.get("required_builds_rolling"),
Some(&"required_builds".to_string()),
"With git hint, rename must be detected even when target existed before"
);
}
#[test]
fn test_detect_renamed_jobs_below_threshold() {
let old_job =
job_with_params(&[("a", ParameterType::String), ("b", ParameterType::String)]);
let new_job =
job_with_params(&[("x", ParameterType::String), ("y", ParameterType::String)]);
let removed: HashSet<String> = ["old_job".to_string()].into_iter().collect();
let new_jobs: HashMap<String, &Job> = [("completely_different".to_string(), &new_job)]
.into_iter()
.collect();
let old_jobs: HashMap<String, &Job> =
[("old_job".to_string(), &old_job)].into_iter().collect();
let no_hints = HashMap::new();
let renamed = detect_renamed_jobs(&removed, &new_jobs, &old_jobs, 0.7, &no_hints);
assert!(renamed.is_empty());
}
}