use indexmap::IndexMap;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DeterminismConfig {
pub canon_mode: CanonMode,
pub sort_strategy: SortStrategy,
pub custom_sort_order: Option<IndexMap<String, Vec<String>>>,
pub namespace_strategy: NamespaceStrategy,
pub locked_prefixes: IndexMap<String, String>,
pub output_mode: OutputMode,
pub line_ending: LineEnding,
pub indent_char: IndentChar,
pub indent_width: usize,
pub unicode_normalization: UnicodeNormalization,
pub xml_character_policy: XmlCharacterPolicy,
pub quote_style: QuoteStyle,
pub time_zone_policy: TimeZonePolicy,
pub date_time_format: DateTimeFormat,
pub emit_reproducibility_banner: bool,
pub verify_determinism: Option<usize>,
}
impl Default for DeterminismConfig {
fn default() -> Self {
Self {
canon_mode: CanonMode::DbC14n,
sort_strategy: SortStrategy::Canonical,
custom_sort_order: None,
namespace_strategy: NamespaceStrategy::Locked,
locked_prefixes: Self::default_namespace_prefixes(),
output_mode: OutputMode::DbC14n,
line_ending: LineEnding::LF,
indent_char: IndentChar::Space,
indent_width: 2,
unicode_normalization: UnicodeNormalization::NFC,
xml_character_policy: XmlCharacterPolicy::Escape,
quote_style: QuoteStyle::Double,
time_zone_policy: TimeZonePolicy::UTC,
date_time_format: DateTimeFormat::ISO8601Z,
emit_reproducibility_banner: false,
verify_determinism: None,
}
}
}
impl DeterminismConfig {
fn default_namespace_prefixes() -> IndexMap<String, String> {
let mut prefixes = IndexMap::new();
prefixes.insert("http://ddex.net/xml/ern/43".to_string(), "ern".to_string());
prefixes.insert("http://ddex.net/xml/ern/42".to_string(), "ern".to_string());
prefixes.insert("http://ddex.net/xml/ern/382".to_string(), "ern".to_string());
prefixes.insert("http://ddex.net/xml/avs".to_string(), "avs".to_string());
prefixes.insert(
"http://www.w3.org/2001/XMLSchema-instance".to_string(),
"xsi".to_string(),
);
prefixes
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum CanonMode {
DbC14n,
Pretty,
Compact,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum SortStrategy {
Canonical,
InputOrder,
Custom,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum NamespaceStrategy {
Locked,
Inherit,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum OutputMode {
DbC14n,
Pretty,
Compact,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum LineEnding {
LF,
CRLF,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum IndentChar {
Space,
Tab,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum UnicodeNormalization {
NFC,
NFD,
NFKC,
NFKD,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum XmlCharacterPolicy {
Escape,
CData,
Reject,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum QuoteStyle {
Double,
Single,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum TimeZonePolicy {
UTC,
Preserve,
Local,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum DateTimeFormat {
ISO8601Z,
ISO8601,
Custom,
}
#[derive(Debug, Clone, PartialEq)]
pub struct DeterminismResult {
pub is_deterministic: bool,
pub iterations: usize,
pub outputs: Vec<String>,
pub hashes: Vec<String>,
pub differences: Vec<DeterminismDifference>,
pub runtime_stats: DeterminismStats,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct DeterminismDifference {
pub iteration1: usize,
pub iteration2: usize,
pub first_difference_byte: Option<usize>,
pub hash_difference: HashDifference,
pub length_difference: LengthDifference,
pub context: Option<DifferenceContext>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct HashDifference {
pub sha256_1: String,
pub sha256_2: String,
pub blake3_1: String,
pub blake3_2: String,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct LengthDifference {
pub length_1: usize,
pub length_2: usize,
pub diff: i64,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct DifferenceContext {
pub position: usize,
pub before: String,
pub after_1: String,
pub after_2: String,
pub line_number: Option<usize>,
pub column_number: Option<usize>,
}
#[derive(Debug, Clone, PartialEq)]
pub struct DeterminismStats {
pub total_time_ms: u64,
pub avg_build_time_ms: u64,
pub min_build_time_ms: u64,
pub max_build_time_ms: u64,
pub overhead_percentage: f64,
}
pub struct DeterminismVerifier {
config: DeterminismConfig,
include_outputs: bool,
context_chars: usize,
}
impl DeterminismVerifier {
pub fn new(config: DeterminismConfig) -> Self {
Self {
config,
include_outputs: false,
context_chars: 100,
}
}
pub fn with_outputs_retained(mut self) -> Self {
self.include_outputs = true;
self
}
pub fn with_context_chars(mut self, chars: usize) -> Self {
self.context_chars = chars;
self
}
pub fn verify(
&self,
request: &super::builder::BuildRequest,
iterations: usize,
) -> Result<DeterminismResult, super::error::BuildError> {
if iterations < 2 {
return Ok(DeterminismResult {
is_deterministic: true,
iterations: 1,
outputs: vec![],
hashes: vec![],
differences: vec![],
runtime_stats: DeterminismStats {
total_time_ms: 0,
avg_build_time_ms: 0,
min_build_time_ms: 0,
max_build_time_ms: 0,
overhead_percentage: 0.0,
},
});
}
let start_time = std::time::Instant::now();
let mut results = Vec::with_capacity(iterations);
let mut hashes = Vec::with_capacity(iterations);
let mut build_times = Vec::with_capacity(iterations);
for _ in 0..iterations {
let build_start = std::time::Instant::now();
let builder = super::Builder::with_config(self.config.clone());
let result = builder.build_internal(request)?;
let build_time = build_start.elapsed();
build_times.push(build_time.as_millis() as u64);
let sha256_hash = self.calculate_sha256(&result.xml);
let blake3_hash = self.calculate_blake3(&result.xml);
results.push(result.xml);
hashes.push((sha256_hash, blake3_hash));
}
let total_time = start_time.elapsed().as_millis() as u64;
let mut differences = Vec::new();
let first_output = &results[0];
let first_hashes = &hashes[0];
for (i, (output, hash_pair)) in results[1..].iter().zip(hashes[1..].iter()).enumerate() {
if output != first_output || hash_pair != first_hashes {
let diff = self.analyze_difference(
first_output,
output,
&first_hashes,
hash_pair,
0,
i + 1,
);
differences.push(diff);
}
}
let min_time = *build_times.iter().min().unwrap_or(&0);
let max_time = *build_times.iter().max().unwrap_or(&0);
let avg_time = if !build_times.is_empty() {
build_times.iter().sum::<u64>() / build_times.len() as u64
} else {
0
};
let overhead = if iterations > 1 && min_time > 0 {
((total_time - min_time) as f64 / min_time as f64) * 100.0
} else {
0.0
};
let outputs = if self.include_outputs {
results
} else {
vec![]
};
let final_hashes = hashes.into_iter().map(|(sha256, _)| sha256).collect();
Ok(DeterminismResult {
is_deterministic: differences.is_empty(),
iterations,
outputs,
hashes: final_hashes,
differences,
runtime_stats: DeterminismStats {
total_time_ms: total_time,
avg_build_time_ms: avg_time,
min_build_time_ms: min_time,
max_build_time_ms: max_time,
overhead_percentage: overhead,
},
})
}
pub fn verify_legacy(
request: &super::builder::BuildRequest,
config: &DeterminismConfig,
iterations: usize,
) -> Result<bool, super::error::BuildError> {
let verifier = Self::new(config.clone());
let result = verifier.verify(request, iterations)?;
Ok(result.is_deterministic)
}
pub fn verify_with_hashmap_stress(
&self,
request: &super::builder::BuildRequest,
iterations: usize,
) -> Result<DeterminismResult, super::error::BuildError> {
use std::collections::HashMap;
for i in 0..iterations {
let mut dummy_map = HashMap::new();
for j in 0..(i % 10 + 1) {
dummy_map.insert(format!("key_{}", j), format!("value_{}", j));
}
let _: Vec<_> = dummy_map.iter().collect();
}
self.verify(request, iterations)
}
pub fn verify_with_threading_stress(
&self,
request: &super::builder::BuildRequest,
iterations: usize,
) -> Result<DeterminismResult, super::error::BuildError> {
use std::sync::Arc;
use std::sync::Mutex;
use std::thread;
let results = Arc::new(Mutex::new(Vec::new()));
let mut handles = vec![];
for _ in 0..iterations {
let results_clone = Arc::clone(&results);
let request_clone = request.clone();
let config = self.config.clone();
let handle = thread::spawn(move || {
let builder = super::Builder::with_config(config);
let result = builder.build_internal(&request_clone);
results_clone.lock().unwrap().push(result);
});
handles.push(handle);
}
for handle in handles {
handle.join().unwrap();
}
let _thread_results = results.lock().unwrap();
self.verify(request, iterations)
}
fn calculate_sha256(&self, data: &str) -> String {
use sha2::{Digest, Sha256};
let mut hasher = Sha256::new();
hasher.update(data.as_bytes());
format!("{:x}", hasher.finalize())
}
fn calculate_blake3(&self, data: &str) -> String {
let hash = blake3::hash(data.as_bytes());
hash.to_hex().to_string()
}
fn analyze_difference(
&self,
output1: &str,
output2: &str,
hashes1: &(String, String),
hashes2: &(String, String),
iter1: usize,
iter2: usize,
) -> DeterminismDifference {
let first_diff_byte = self.find_first_difference(output1, output2);
let context =
first_diff_byte.map(|pos| self.create_difference_context(output1, output2, pos));
DeterminismDifference {
iteration1: iter1,
iteration2: iter2,
first_difference_byte: first_diff_byte,
hash_difference: HashDifference {
sha256_1: hashes1.0.clone(),
sha256_2: hashes2.0.clone(),
blake3_1: hashes1.1.clone(),
blake3_2: hashes2.1.clone(),
},
length_difference: LengthDifference {
length_1: output1.len(),
length_2: output2.len(),
diff: output2.len() as i64 - output1.len() as i64,
},
context,
}
}
fn find_first_difference(&self, a: &str, b: &str) -> Option<usize> {
a.bytes()
.zip(b.bytes())
.position(|(x, y)| x != y)
.or_else(|| {
if a.len() != b.len() {
Some(std::cmp::min(a.len(), b.len()))
} else {
None
}
})
}
fn create_difference_context(
&self,
output1: &str,
output2: &str,
pos: usize,
) -> DifferenceContext {
let start = pos.saturating_sub(self.context_chars / 2);
let end1 = std::cmp::min(pos + self.context_chars / 2, output1.len());
let end2 = std::cmp::min(pos + self.context_chars / 2, output2.len());
let (line, col) = self.calculate_line_col(output1, pos);
DifferenceContext {
position: pos,
before: output1[start..pos].to_string(),
after_1: output1[pos..end1].to_string(),
after_2: output2[pos..end2].to_string(),
line_number: line,
column_number: col,
}
}
fn calculate_line_col(&self, text: &str, pos: usize) -> (Option<usize>, Option<usize>) {
if pos >= text.len() {
return (None, None);
}
let before_pos = &text[..pos];
let line_num = before_pos.lines().count();
let last_line_start = before_pos.rfind('\n').map(|i| i + 1).unwrap_or(0);
let col_num = pos - last_line_start + 1;
(Some(line_num), Some(col_num))
}
}
impl DeterminismVerifier {
pub fn quick_check(
request: &super::builder::BuildRequest,
) -> Result<bool, super::error::BuildError> {
let config = DeterminismConfig::default();
let verifier = Self::new(config);
let result = verifier.verify(request, 3)?;
Ok(result.is_deterministic)
}
pub fn thorough_check(
request: &super::builder::BuildRequest,
iterations: usize,
) -> Result<DeterminismResult, super::error::BuildError> {
let config = DeterminismConfig::default();
let verifier = Self::new(config).with_outputs_retained();
let standard_result = verifier.verify(request, iterations)?;
if !standard_result.is_deterministic {
return Ok(standard_result);
}
let hashmap_result = verifier.verify_with_hashmap_stress(request, iterations)?;
if !hashmap_result.is_deterministic {
return Ok(hashmap_result);
}
Ok(standard_result)
}
}