use crate::parser::{SarifError, SarifResult as ParseResult};
use crate::types::{Level, Result as SarifResult, Run, SarifLog};
use crate::utils::indexing::{ResultLocation, SarifIndex};
use std::collections::{HashMap, HashSet};
#[derive(Debug, Clone)]
pub struct MergeConfig {
pub consolidate_runs: bool,
pub deduplicate_results: bool,
pub version_strategy: VersionMergeStrategy,
pub normalize_paths: bool,
pub max_runs: Option<usize>,
pub filters: MergeFilters,
}
#[derive(Debug, Clone, PartialEq)]
pub enum VersionMergeStrategy {
UseLatest,
UseFirst,
RequireMatch,
UseSpecific(String),
}
#[derive(Debug, Clone, Default)]
pub struct MergeFilters {
pub include_tools: Option<HashSet<String>>,
pub exclude_tools: HashSet<String>,
pub include_rules: Option<HashSet<String>>,
pub exclude_rules: HashSet<String>,
pub min_level: Option<Level>,
pub include_file_patterns: Vec<String>,
}
#[derive(Debug, Clone)]
pub struct DiffConfig {
pub matching_strategy: ResultMatchingStrategy,
pub include_removed: bool,
pub include_added: bool,
pub include_modified: bool,
pub ignore_message_changes: bool,
pub ignore_location_changes: bool,
}
#[derive(Debug, Clone, PartialEq)]
pub enum ResultMatchingStrategy {
ByGuid,
ByRuleAndLocation,
ByRuleAndMessage,
ByFingerprint,
}
#[derive(Debug, Clone)]
pub struct MergeResult {
pub merged_log: SarifLog,
pub stats: MergeStats,
pub warnings: Vec<String>,
}
#[derive(Debug, Clone, Default)]
pub struct MergeStats {
pub input_logs: usize,
pub input_runs: usize,
pub output_runs: usize,
pub input_results: usize,
pub output_results: usize,
pub deduplicated_results: usize,
pub consolidated_runs: usize,
}
#[derive(Debug, Clone)]
pub struct DiffResult {
pub added: Vec<(SarifResult, ResultLocation)>,
pub removed: Vec<(SarifResult, ResultLocation)>,
pub modified: Vec<ResultDiff>,
pub unchanged: Vec<(SarifResult, ResultLocation)>,
pub stats: DiffStats,
}
#[derive(Debug, Clone)]
pub struct ResultDiff {
pub baseline: (SarifResult, ResultLocation),
pub comparison: (SarifResult, ResultLocation),
pub changes: Vec<ResultChange>,
}
#[derive(Debug, Clone, PartialEq)]
pub enum ResultChange {
MessageChanged { old: String, new: String },
LevelChanged {
old: Option<Level>,
new: Option<Level>,
},
LocationChanged { old: String, new: String },
RuleChanged {
old: Option<String>,
new: Option<String>,
},
PropertyChanged {
key: String,
old: Option<String>,
new: Option<String>,
},
}
#[derive(Debug, Clone, Default)]
pub struct DiffStats {
pub baseline_count: usize,
pub comparison_count: usize,
pub added_count: usize,
pub removed_count: usize,
pub modified_count: usize,
pub unchanged_count: usize,
}
pub struct SarifMerger {
config: MergeConfig,
}
impl SarifMerger {
pub fn new() -> Self {
Self {
config: MergeConfig::default(),
}
}
pub fn with_config(config: MergeConfig) -> Self {
Self { config }
}
pub fn merge(&self, logs: &[SarifLog]) -> ParseResult<MergeResult> {
if logs.is_empty() {
return Err(SarifError::custom("Cannot merge empty list of SARIF logs"));
}
let mut stats = MergeStats {
input_logs: logs.len(),
..Default::default()
};
let mut warnings = Vec::new();
let target_version = self.determine_target_version(logs, &mut warnings)?;
let mut all_runs = Vec::new();
for log in logs {
if self.should_include_log(log) {
for run in &log.runs {
if self.should_include_run(run) {
all_runs.push(run.clone());
stats.input_runs += 1;
if let Some(results) = &run.results {
stats.input_results += results.len();
}
}
}
}
}
let final_runs = if self.config.consolidate_runs {
self.consolidate_runs(all_runs, &mut stats)
} else {
all_runs
};
let final_runs = if self.config.deduplicate_results {
self.deduplicate_results(final_runs, &mut stats)
} else {
final_runs
};
let final_runs = if let Some(max_runs) = self.config.max_runs {
if final_runs.len() > max_runs {
warnings.push(format!(
"Truncated output to {} runs (was {})",
max_runs,
final_runs.len()
));
final_runs.into_iter().take(max_runs).collect()
} else {
final_runs
}
} else {
final_runs
};
stats.output_runs = final_runs.len();
for run in &final_runs {
if let Some(results) = &run.results {
stats.output_results += results.len();
}
}
let merged_log = SarifLog {
version: target_version,
schema: logs.first().and_then(|log| log.schema.clone()),
runs: final_runs,
inline_external_properties: None,
properties: None,
};
Ok(MergeResult {
merged_log,
stats,
warnings,
})
}
fn determine_target_version(
&self,
logs: &[SarifLog],
_warnings: &mut Vec<String>,
) -> ParseResult<String> {
match &self.config.version_strategy {
VersionMergeStrategy::UseSpecific(version) => Ok(version.clone()),
VersionMergeStrategy::UseFirst => Ok(logs[0].version.clone()),
VersionMergeStrategy::UseLatest => {
let mut versions: Vec<_> = logs.iter().map(|log| &log.version).collect();
versions.sort();
Ok(versions.last().unwrap().to_string())
}
VersionMergeStrategy::RequireMatch => {
let first_version = &logs[0].version;
for log in &logs[1..] {
if log.version != *first_version {
return Err(SarifError::custom(format!(
"Version mismatch: {} vs {}",
first_version, log.version
)));
}
}
Ok(first_version.clone())
}
}
}
fn should_include_log(&self, _log: &SarifLog) -> bool {
true
}
fn should_include_run(&self, run: &Run) -> bool {
let tool_name = &run.tool.driver.name;
if let Some(ref include_tools) = self.config.filters.include_tools
&& !include_tools.contains(tool_name)
{
return false;
}
if self.config.filters.exclude_tools.contains(tool_name) {
return false;
}
true
}
fn consolidate_runs(&self, runs: Vec<Run>, stats: &mut MergeStats) -> Vec<Run> {
let mut tool_runs: HashMap<String, Vec<Run>> = HashMap::new();
for run in runs {
tool_runs
.entry(run.tool.driver.name.clone())
.or_default()
.push(run);
}
let mut consolidated = Vec::new();
for (_tool_name, mut tool_run_list) in tool_runs {
if tool_run_list.len() > 1 {
stats.consolidated_runs += tool_run_list.len() - 1;
let mut base_run = tool_run_list.remove(0);
for additional_run in tool_run_list {
if let Some(additional_results) = additional_run.results {
base_run
.results
.get_or_insert_with(Vec::new)
.extend(additional_results);
}
if let Some(additional_artifacts) = additional_run.artifacts {
base_run
.artifacts
.get_or_insert_with(Vec::new)
.extend(additional_artifacts);
}
if let Some(additional_invocations) = additional_run.invocations {
base_run
.invocations
.get_or_insert_with(Vec::new)
.extend(additional_invocations);
}
}
consolidated.push(base_run);
} else {
consolidated.push(tool_run_list.into_iter().next().unwrap());
}
}
consolidated
}
fn deduplicate_results(&self, mut runs: Vec<Run>, stats: &mut MergeStats) -> Vec<Run> {
for run in &mut runs {
if let Some(ref mut results) = run.results {
let original_count = results.len();
let mut seen_fingerprints = HashSet::new();
results.retain(|result| {
let fingerprint = self.compute_result_fingerprint(result);
if seen_fingerprints.contains(&fingerprint) {
false
} else {
seen_fingerprints.insert(fingerprint);
true
}
});
stats.deduplicated_results += original_count - results.len();
}
}
runs
}
fn compute_result_fingerprint(&self, result: &SarifResult) -> String {
let mut parts = Vec::new();
if let Some(ref rule_id) = result.rule_id {
parts.push(rule_id.clone());
}
if let Some(ref message) = result.message.text {
parts.push(message.clone());
}
if let Some(ref locations) = result.locations {
for location in locations {
if let Some(ref physical_location) = location.physical_location {
if let Some(ref artifact_location) = physical_location.artifact_location
&& let Some(ref uri) = artifact_location.uri
{
parts.push(uri.clone());
}
if let Some(ref region) = physical_location.region {
parts.push(format!(
"{}:{}",
region.start_line.unwrap_or(0),
region.start_column.unwrap_or(0)
));
}
}
}
}
parts.join("|")
}
}
pub struct SarifDiffer {
config: DiffConfig,
}
impl SarifDiffer {
pub fn new() -> Self {
Self {
config: DiffConfig::default(),
}
}
pub fn with_config(config: DiffConfig) -> Self {
Self { config }
}
pub fn diff(&self, baseline: &SarifLog, comparison: &SarifLog) -> ParseResult<DiffResult> {
let baseline_index = SarifIndex::from_sarif_log(baseline);
let comparison_index = SarifIndex::from_sarif_log(comparison);
let mut added = Vec::new();
let mut removed = Vec::new();
let mut modified = Vec::new();
let mut unchanged = Vec::new();
let baseline_results: HashMap<String, (SarifResult, ResultLocation)> =
baseline_index.results.clone();
let comparison_results: HashMap<String, (SarifResult, ResultLocation)> =
comparison_index.results.clone();
for (key, (result, location)) in &comparison_results {
match self.find_matching_result(key, result, &baseline_results) {
Some(_) => {
}
None => {
if self.config.include_added {
added.push((result.clone(), location.clone()));
}
}
}
}
for (key, (result, location)) in &baseline_results {
match self.find_matching_result(key, &result, &comparison_results) {
Some(_) => {
}
None => {
if self.config.include_removed {
removed.push((result.clone(), location.clone()));
}
}
}
}
for (baseline_key, (baseline_result, baseline_location)) in &baseline_results {
if let Some((_comparison_key, (comparison_result, comparison_location))) =
self.find_matching_result(baseline_key, baseline_result, &comparison_results)
{
let changes = self.compute_result_changes(baseline_result, &comparison_result);
if changes.is_empty() {
unchanged.push((baseline_result.clone(), baseline_location.clone()));
} else if self.config.include_modified {
modified.push(ResultDiff {
baseline: (baseline_result.clone(), baseline_location.clone()),
comparison: (comparison_result.clone(), comparison_location.clone()),
changes,
});
}
}
}
let stats = DiffStats {
baseline_count: baseline_results.len(),
comparison_count: comparison_results.len(),
added_count: added.len(),
removed_count: removed.len(),
modified_count: modified.len(),
unchanged_count: unchanged.len(),
};
Ok(DiffResult {
added,
removed,
modified,
unchanged,
stats,
})
}
fn find_matching_result(
&self,
key: &str,
result: &SarifResult,
results: &HashMap<String, (SarifResult, ResultLocation)>,
) -> Option<(String, (SarifResult, ResultLocation))> {
match self.config.matching_strategy {
ResultMatchingStrategy::ByGuid => results
.get(key)
.map(|(r, l)| (key.to_string(), (r.clone(), l.clone()))),
ResultMatchingStrategy::ByRuleAndLocation => {
let target_signature = self.compute_rule_location_signature(result);
for (other_key, (other_result, other_location)) in results {
let other_signature = self.compute_rule_location_signature(other_result);
if target_signature == other_signature {
return Some((
other_key.clone(),
(other_result.clone(), other_location.clone()),
));
}
}
None
}
ResultMatchingStrategy::ByRuleAndMessage => {
let target_signature = self.compute_rule_message_signature(result);
for (other_key, (other_result, other_location)) in results {
let other_signature = self.compute_rule_message_signature(other_result);
if target_signature == other_signature {
return Some((
other_key.clone(),
(other_result.clone(), other_location.clone()),
));
}
}
None
}
ResultMatchingStrategy::ByFingerprint => {
if let Some(ref fingerprints) = result.fingerprints {
for (other_key, (other_result, other_location)) in results {
if let Some(ref other_fingerprints) = other_result.fingerprints {
for (fp_key, fp_value) in fingerprints {
if let Some(other_fp_value) = other_fingerprints.get(fp_key)
&& fp_value == other_fp_value
{
return Some((
other_key.clone(),
(other_result.clone(), other_location.clone()),
));
}
}
}
}
}
None
}
}
}
fn compute_rule_location_signature(&self, result: &SarifResult) -> String {
let mut parts = Vec::new();
if let Some(ref rule_id) = result.rule_id {
parts.push(rule_id.clone());
}
if let Some(ref locations) = result.locations
&& let Some(location) = locations.first()
&& let Some(ref physical_location) = location.physical_location
{
if let Some(ref artifact_location) = physical_location.artifact_location
&& let Some(ref uri) = artifact_location.uri
{
parts.push(uri.clone());
}
if let Some(ref region) = physical_location.region {
parts.push(format!(
"{}:{}",
region.start_line.unwrap_or(0),
region.start_column.unwrap_or(0)
));
}
}
parts.join("|")
}
fn compute_rule_message_signature(&self, result: &SarifResult) -> String {
let mut parts = Vec::new();
if let Some(ref rule_id) = result.rule_id {
parts.push(rule_id.clone());
}
if let Some(ref message) = result.message.text {
parts.push(message.clone());
}
parts.join("|")
}
fn compute_result_changes(
&self,
baseline: &SarifResult,
comparison: &SarifResult,
) -> Vec<ResultChange> {
let mut changes = Vec::new();
if !self.config.ignore_message_changes
&& baseline.message.text != comparison.message.text
{
changes.push(ResultChange::MessageChanged {
old: baseline.message.text.clone().unwrap_or_default(),
new: comparison.message.text.clone().unwrap_or_default(),
});
}
if baseline.level != comparison.level {
changes.push(ResultChange::LevelChanged {
old: baseline.level.clone(),
new: comparison.level.clone(),
});
}
if baseline.rule_id != comparison.rule_id {
changes.push(ResultChange::RuleChanged {
old: baseline.rule_id.clone(),
new: comparison.rule_id.clone(),
});
}
if !self.config.ignore_location_changes {
let baseline_location = self.extract_location_string(baseline);
let comparison_location = self.extract_location_string(comparison);
if baseline_location != comparison_location {
changes.push(ResultChange::LocationChanged {
old: baseline_location,
new: comparison_location,
});
}
}
changes
}
fn extract_location_string(&self, result: &SarifResult) -> String {
if let Some(ref locations) = result.locations
&& let Some(location) = locations.first()
&& let Some(ref physical_location) = location.physical_location
{
let mut parts = Vec::new();
if let Some(ref artifact_location) = physical_location.artifact_location
&& let Some(ref uri) = artifact_location.uri
{
parts.push(uri.clone());
}
if let Some(ref region) = physical_location.region {
parts.push(format!(
"{}:{}",
region.start_line.unwrap_or(0),
region.start_column.unwrap_or(0)
));
}
return parts.join(":");
}
"unknown".to_string()
}
}
impl Default for MergeConfig {
fn default() -> Self {
Self {
consolidate_runs: false,
deduplicate_results: true,
version_strategy: VersionMergeStrategy::UseLatest,
normalize_paths: false,
max_runs: None,
filters: MergeFilters::default(),
}
}
}
impl Default for DiffConfig {
fn default() -> Self {
Self {
matching_strategy: ResultMatchingStrategy::ByGuid,
include_removed: true,
include_added: true,
include_modified: true,
ignore_message_changes: false,
ignore_location_changes: false,
}
}
}
impl Default for SarifMerger {
fn default() -> Self {
Self::new()
}
}
impl Default for SarifDiffer {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::builder::SarifLogBuilder;
#[test]
fn test_simple_merge() {
let log1 =
SarifLogBuilder::single_error("tool1", "Error 1", "file1.rs", 10).build_unchecked();
let log2 =
SarifLogBuilder::single_warning("tool2", "Warning 1", "file2.rs", 20).build_unchecked();
let merger = SarifMerger::new();
let result = merger.merge(&[log1, log2]).unwrap();
assert_eq!(result.stats.input_logs, 2);
assert_eq!(result.stats.input_runs, 2);
assert_eq!(result.stats.output_runs, 2);
assert_eq!(result.merged_log.runs.len(), 2);
}
#[test]
fn test_merge_with_consolidation() {
let log1 =
SarifLogBuilder::single_error("same-tool", "Error 1", "file1.rs", 10).build_unchecked();
let log2 = SarifLogBuilder::single_warning("same-tool", "Warning 1", "file2.rs", 20)
.build_unchecked();
let config = MergeConfig {
consolidate_runs: true,
..Default::default()
};
let merger = SarifMerger::with_config(config);
let result = merger.merge(&[log1, log2]).unwrap();
assert_eq!(result.stats.input_runs, 2);
assert_eq!(result.stats.output_runs, 1);
assert_eq!(result.stats.consolidated_runs, 1);
let merged_run = &result.merged_log.runs[0];
assert_eq!(merged_run.tool.driver.name, "same-tool");
assert_eq!(merged_run.results.as_ref().unwrap().len(), 2);
}
#[test]
fn test_merge_with_deduplication() {
let log1 = SarifLogBuilder::error_finding(
"tool",
"RULE001",
"Duplicate error",
"file.rs",
10,
5,
10,
15,
)
.build_unchecked();
let log2 = SarifLogBuilder::error_finding(
"tool",
"RULE001",
"Duplicate error",
"file.rs",
10,
5,
10,
15,
)
.build_unchecked();
let config = MergeConfig {
consolidate_runs: true,
deduplicate_results: true,
..Default::default()
};
let merger = SarifMerger::with_config(config);
let result = merger.merge(&[log1, log2]).unwrap();
assert_eq!(result.stats.input_results, 2);
assert_eq!(result.stats.output_results, 1);
assert_eq!(result.stats.deduplicated_results, 1);
}
#[test]
fn test_merge_with_filters() {
let log1 =
SarifLogBuilder::single_error("tool1", "Error 1", "file1.rs", 10).build_unchecked();
let log2 =
SarifLogBuilder::single_warning("tool2", "Warning 1", "file2.rs", 20).build_unchecked();
let mut include_tools = HashSet::new();
include_tools.insert("tool1".to_string());
let config = MergeConfig {
filters: MergeFilters {
include_tools: Some(include_tools),
..Default::default()
},
..Default::default()
};
let merger = SarifMerger::with_config(config);
let result = merger.merge(&[log1, log2]).unwrap();
assert_eq!(result.stats.output_runs, 1);
assert_eq!(result.merged_log.runs[0].tool.driver.name, "tool1");
}
#[test]
fn test_simple_diff() {
let baseline =
SarifLogBuilder::single_error("tool", "Error 1", "file.rs", 10).build_unchecked();
let comparison =
SarifLogBuilder::single_warning("tool", "Warning 1", "file.rs", 20).build_unchecked();
let config = DiffConfig {
matching_strategy: ResultMatchingStrategy::ByRuleAndLocation,
..Default::default()
};
let differ = SarifDiffer::with_config(config);
let result = differ.diff(&baseline, &comparison).unwrap();
assert_eq!(result.stats.baseline_count, 1);
assert_eq!(result.stats.comparison_count, 1);
assert_eq!(result.stats.added_count, 1);
assert_eq!(result.stats.removed_count, 1);
assert_eq!(result.stats.unchanged_count, 0);
}
#[test]
fn test_diff_with_rule_location_matching() {
let baseline = SarifLogBuilder::error_finding(
"tool",
"RULE001",
"Original message",
"file.rs",
10,
5,
10,
15,
)
.build_unchecked();
let comparison = SarifLogBuilder::error_finding(
"tool",
"RULE001",
"Updated message",
"file.rs",
10,
5,
10,
15,
)
.build_unchecked();
let config = DiffConfig {
matching_strategy: ResultMatchingStrategy::ByRuleAndLocation,
..Default::default()
};
let differ = SarifDiffer::with_config(config);
let result = differ.diff(&baseline, &comparison).unwrap();
assert_eq!(result.stats.modified_count, 1);
assert_eq!(result.stats.added_count, 0);
assert_eq!(result.stats.removed_count, 0);
let modified = &result.modified[0];
assert_eq!(modified.changes.len(), 1);
assert!(matches!(
modified.changes[0],
ResultChange::MessageChanged { .. }
));
}
#[test]
fn test_diff_ignore_message_changes() {
let baseline = SarifLogBuilder::error_finding(
"tool",
"RULE001",
"Original message",
"file.rs",
10,
5,
10,
15,
)
.build_unchecked();
let comparison = SarifLogBuilder::error_finding(
"tool",
"RULE001",
"Updated message",
"file.rs",
10,
5,
10,
15,
)
.build_unchecked();
let config = DiffConfig {
matching_strategy: ResultMatchingStrategy::ByRuleAndLocation,
ignore_message_changes: true,
..Default::default()
};
let differ = SarifDiffer::with_config(config);
let result = differ.diff(&baseline, &comparison).unwrap();
assert_eq!(result.stats.unchanged_count, 1);
assert_eq!(result.stats.modified_count, 0);
}
#[test]
fn test_version_merge_strategies() {
let mut log1 =
SarifLogBuilder::single_error("tool", "Error 1", "file.rs", 10).build_unchecked();
log1.version = "2.0.0".to_string();
let mut log2 =
SarifLogBuilder::single_error("tool", "Error 2", "file.rs", 20).build_unchecked();
log2.version = "2.1.0".to_string();
let config = MergeConfig {
version_strategy: VersionMergeStrategy::UseLatest,
..Default::default()
};
let merger = SarifMerger::with_config(config);
let result = merger.merge(&[log1.clone(), log2.clone()]).unwrap();
assert_eq!(result.merged_log.version, "2.1.0");
let config = MergeConfig {
version_strategy: VersionMergeStrategy::UseFirst,
..Default::default()
};
let merger = SarifMerger::with_config(config);
let result = merger.merge(&[log1.clone(), log2.clone()]).unwrap();
assert_eq!(result.merged_log.version, "2.0.0");
let config = MergeConfig {
version_strategy: VersionMergeStrategy::RequireMatch,
..Default::default()
};
let merger = SarifMerger::with_config(config);
let result = merger.merge(&[log1, log2]);
assert!(result.is_err());
}
}