use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ShellFixPattern {
pub error_signal: String,
pub causal_decision: String,
pub fix_type: String,
pub confidence: f64,
pub evidence_ids: Vec<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PatternStore {
pub patterns: Vec<ShellFixPattern>,
pub total_entries: usize,
pub total_failures: usize,
pub version: String,
}
pub fn classify_failure_signals(result: &super::runner::CorpusResult) -> Vec<String> {
let mut signals = Vec::new();
if !result.transpiled {
signals.push("A_transpile_fail".to_string());
return signals; }
if !result.output_contains {
signals.push("B1_containment_fail".to_string());
}
if !result.output_exact {
signals.push("B2_exact_fail".to_string());
}
if !result.output_behavioral {
signals.push("B3_behavioral_fail".to_string());
}
if !result.lint_clean {
signals.push("D_lint_fail".to_string());
}
if !result.cross_shell_agree {
signals.push("G_cross_shell_fail".to_string());
}
signals
}
fn derive_fix_type(decision_key: &str) -> String {
let decision_type = decision_key.split(':').next().unwrap_or(decision_key);
match decision_type {
"assignment_value" => "quoting_strategy".to_string(),
"ir_dispatch" => "ir_node_handling".to_string(),
"string_emit" | "string_interpolation" => "string_handling".to_string(),
"variable_expansion" => "expansion_strategy".to_string(),
"command_substitution" => "substitution_strategy".to_string(),
"redirect" | "redirect_emit" => "redirect_handling".to_string(),
"pipe_emit" => "pipe_handling".to_string(),
"arithmetic" | "arithmetic_emit" => "arithmetic_strategy".to_string(),
"conditional" | "if_emit" => "conditional_handling".to_string(),
"loop_emit" | "for_emit" | "while_emit" => "loop_handling".to_string(),
"function_emit" => "function_handling".to_string(),
_ => format!("{decision_type}_strategy"),
}
}
fn build_entry_locations(
entry_results: &[(String, super::runner::CorpusResult)],
) -> std::collections::HashMap<String, Vec<String>> {
entry_results
.iter()
.map(|(id, result)| {
let locs = result
.decision_trace
.as_ref()
.map(|t| {
t.iter()
.map(|d| format!("{}:{}", d.decision_type, d.choice))
.collect()
})
.unwrap_or_default();
(id.clone(), locs)
})
.collect()
}
fn group_failures_by_signal(
entry_results: &[(String, super::runner::CorpusResult)],
) -> (std::collections::HashMap<String, Vec<String>>, usize) {
let mut signal_failures: std::collections::HashMap<String, Vec<String>> =
std::collections::HashMap::new();
let mut seen_failing = std::collections::HashSet::new();
for (id, result) in entry_results {
let signals = classify_failure_signals(result);
if !signals.is_empty() {
seen_failing.insert(id.clone());
for signal in signals {
signal_failures.entry(signal).or_default().push(id.clone());
}
}
}
let total = seen_failing.len();
(signal_failures, total)
}
fn localize_signal_patterns(
signal: &str,
failing_ids: &[String],
entry_results: &[(String, super::runner::CorpusResult)],
entry_locations: &std::collections::HashMap<String, Vec<String>>,
) -> Vec<ShellFixPattern> {
use crate::quality::sbfl::{localize_faults, SbflFormula};
let failing_set: std::collections::HashSet<&String> = failing_ids.iter().collect();
let coverage_data: Vec<(String, bool, Vec<String>)> = entry_results
.iter()
.filter_map(|(id, _)| {
let locs = entry_locations.get(id)?;
if locs.is_empty() {
return None;
}
let passed_for_signal = !failing_set.contains(id);
Some((id.clone(), passed_for_signal, locs.clone()))
})
.collect();
if coverage_data.is_empty() {
return Vec::new();
}
let rankings = localize_faults(&coverage_data, SbflFormula::Tarantula);
rankings
.iter()
.take(5)
.filter(|r| r.score > 0.0)
.filter_map(|ranking| {
let evidence: Vec<String> = failing_ids
.iter()
.filter(|id| {
entry_locations
.get(*id)
.is_some_and(|locs| locs.contains(&ranking.location))
})
.cloned()
.collect();
if evidence.is_empty() {
return None;
}
Some(ShellFixPattern {
error_signal: signal.to_string(),
causal_decision: ranking.location.clone(),
fix_type: derive_fix_type(&ranking.location),
confidence: ranking.score,
evidence_ids: evidence,
})
})
.collect()
}
pub fn mine_patterns(
registry: &super::registry::CorpusRegistry,
runner: &super::runner::CorpusRunner,
) -> PatternStore {
let entry_results: Vec<(String, super::runner::CorpusResult)> = registry
.entries
.iter()
.map(|entry| (entry.id.clone(), runner.run_entry_with_trace(entry)))
.collect();
let total_entries = entry_results.len();
let (signal_failures, total_failures) = group_failures_by_signal(&entry_results);
if total_failures == 0 {
return PatternStore {
patterns: Vec::new(),
total_entries,
total_failures: 0,
version: "1.0.0".to_string(),
};
}
let entry_locations = build_entry_locations(&entry_results);
let mut patterns: Vec<ShellFixPattern> = signal_failures
.iter()
.flat_map(|(signal, failing_ids)| {
localize_signal_patterns(signal, failing_ids, &entry_results, &entry_locations)
})
.collect();
patterns.sort_by(|a, b| {
b.confidence
.partial_cmp(&a.confidence)
.unwrap_or(std::cmp::Ordering::Equal)
});
PatternStore {
patterns,
total_entries,
total_failures,
version: "1.0.0".to_string(),
}
}
pub fn suggest_fixes(
entry_id: &str,
registry: &super::registry::CorpusRegistry,
runner: &super::runner::CorpusRunner,
store: &PatternStore,
) -> Vec<ShellFixPattern> {
let entry = match registry.entries.iter().find(|e| e.id == entry_id) {
Some(e) => e,
None => return Vec::new(),
};
let result = runner.run_entry_with_trace(entry);
let signals = classify_failure_signals(&result);
if signals.is_empty() {
return Vec::new(); }
let trace_decisions: std::collections::HashSet<String> = result
.decision_trace
.as_ref()
.map(|t| {
t.iter()
.map(|d| format!("{}:{}", d.decision_type, d.choice))
.collect()
})
.unwrap_or_default();
let mut suggestions: Vec<ShellFixPattern> = store
.patterns
.iter()
.filter(|p| {
signals.contains(&p.error_signal) && trace_decisions.contains(&p.causal_decision)
})
.cloned()
.collect();
suggestions.sort_by(|a, b| {
b.confidence
.partial_cmp(&a.confidence)
.unwrap_or(std::cmp::Ordering::Equal)
});
suggestions.dedup_by(|a, b| a.causal_decision == b.causal_decision);
suggestions
}
#[cfg(test)]
#[path = "pattern_store_tests_classify_fai.rs"]
mod tests_extracted;