1use indexmap::IndexMap;
113use serde::{Deserialize, Serialize};
114
115#[derive(Debug, Clone, Serialize, Deserialize)]
117pub struct DeterminismConfig {
118 pub canon_mode: CanonMode,
120
121 pub sort_strategy: SortStrategy,
123
124 pub custom_sort_order: Option<IndexMap<String, Vec<String>>>,
126
127 pub namespace_strategy: NamespaceStrategy,
129
130 pub locked_prefixes: IndexMap<String, String>,
132
133 pub output_mode: OutputMode,
135 pub line_ending: LineEnding,
136 pub indent_char: IndentChar,
137 pub indent_width: usize,
138
139 pub unicode_normalization: UnicodeNormalization,
141 pub xml_character_policy: XmlCharacterPolicy,
142 pub quote_style: QuoteStyle,
143
144 pub time_zone_policy: TimeZonePolicy,
146 pub date_time_format: DateTimeFormat,
147
148 pub emit_reproducibility_banner: bool,
150 pub verify_determinism: Option<usize>,
151}
152
153impl Default for DeterminismConfig {
154 fn default() -> Self {
155 Self {
156 canon_mode: CanonMode::DbC14n,
157 sort_strategy: SortStrategy::Canonical,
158 custom_sort_order: None,
159 namespace_strategy: NamespaceStrategy::Locked,
160 locked_prefixes: Self::default_namespace_prefixes(),
161 output_mode: OutputMode::DbC14n,
162 line_ending: LineEnding::LF,
163 indent_char: IndentChar::Space,
164 indent_width: 2,
165 unicode_normalization: UnicodeNormalization::NFC,
166 xml_character_policy: XmlCharacterPolicy::Escape,
167 quote_style: QuoteStyle::Double,
168 time_zone_policy: TimeZonePolicy::UTC,
169 date_time_format: DateTimeFormat::ISO8601Z,
170 emit_reproducibility_banner: false,
171 verify_determinism: None,
172 }
173 }
174}
175
176impl DeterminismConfig {
177 fn default_namespace_prefixes() -> IndexMap<String, String> {
178 let mut prefixes = IndexMap::new();
179 prefixes.insert("http://ddex.net/xml/ern/43".to_string(), "ern".to_string());
180 prefixes.insert("http://ddex.net/xml/ern/42".to_string(), "ern".to_string());
181 prefixes.insert("http://ddex.net/xml/ern/382".to_string(), "ern".to_string());
182 prefixes.insert("http://ddex.net/xml/avs".to_string(), "avs".to_string());
183 prefixes.insert("http://www.w3.org/2001/XMLSchema-instance".to_string(), "xsi".to_string());
184 prefixes
185 }
186}
187
188#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
190pub enum CanonMode {
191 DbC14n,
193 Pretty,
195 Compact,
197}
198
199#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
201pub enum SortStrategy {
202 Canonical,
204 InputOrder,
206 Custom,
208}
209
210#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
212pub enum NamespaceStrategy {
213 Locked,
215 Inherit,
217}
218
219#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
221pub enum OutputMode {
222 DbC14n,
224 Pretty,
226 Compact,
228}
229
230#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
232pub enum LineEnding {
233 LF,
235 CRLF,
237}
238
239#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
241pub enum IndentChar {
242 Space,
244 Tab,
246}
247
248#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
250pub enum UnicodeNormalization {
251 NFC,
253 NFD,
255 NFKC,
257 NFKD,
259}
260
261#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
263pub enum XmlCharacterPolicy {
264 Escape,
266 CData,
268 Reject,
270}
271
272#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
274pub enum QuoteStyle {
275 Double,
277 Single,
279}
280
281#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
283pub enum TimeZonePolicy {
284 UTC,
286 Preserve,
288 Local,
290}
291
292#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
294pub enum DateTimeFormat {
295 ISO8601Z,
297 ISO8601,
299 Custom,
301}
302
303#[derive(Debug, Clone, PartialEq)]
305pub struct DeterminismResult {
306 pub is_deterministic: bool,
307 pub iterations: usize,
308 pub outputs: Vec<String>,
309 pub hashes: Vec<String>,
310 pub differences: Vec<DeterminismDifference>,
311 pub runtime_stats: DeterminismStats,
312}
313
314#[derive(Debug, Clone, PartialEq, Eq)]
316pub struct DeterminismDifference {
317 pub iteration1: usize,
318 pub iteration2: usize,
319 pub first_difference_byte: Option<usize>,
320 pub hash_difference: HashDifference,
321 pub length_difference: LengthDifference,
322 pub context: Option<DifferenceContext>,
323}
324
325#[derive(Debug, Clone, PartialEq, Eq)]
327pub struct HashDifference {
328 pub sha256_1: String,
329 pub sha256_2: String,
330 pub blake3_1: String,
331 pub blake3_2: String,
332}
333
334#[derive(Debug, Clone, PartialEq, Eq)]
336pub struct LengthDifference {
337 pub length_1: usize,
338 pub length_2: usize,
339 pub diff: i64,
340}
341
342#[derive(Debug, Clone, PartialEq, Eq)]
344pub struct DifferenceContext {
345 pub position: usize,
346 pub before: String,
347 pub after_1: String,
348 pub after_2: String,
349 pub line_number: Option<usize>,
350 pub column_number: Option<usize>,
351}
352
353#[derive(Debug, Clone, PartialEq)]
355pub struct DeterminismStats {
356 pub total_time_ms: u64,
357 pub avg_build_time_ms: u64,
358 pub min_build_time_ms: u64,
359 pub max_build_time_ms: u64,
360 pub overhead_percentage: f64,
361}
362
363pub struct DeterminismVerifier {
365 config: DeterminismConfig,
366 include_outputs: bool,
367 context_chars: usize,
368}
369
370impl DeterminismVerifier {
371 pub fn new(config: DeterminismConfig) -> Self {
373 Self {
374 config,
375 include_outputs: false,
376 context_chars: 100,
377 }
378 }
379
380 pub fn with_outputs_retained(mut self) -> Self {
382 self.include_outputs = true;
383 self
384 }
385
386 pub fn with_context_chars(mut self, chars: usize) -> Self {
388 self.context_chars = chars;
389 self
390 }
391
392 pub fn verify(
394 &self,
395 request: &super::builder::BuildRequest,
396 iterations: usize,
397 ) -> Result<DeterminismResult, super::error::BuildError> {
398 if iterations < 2 {
399 return Ok(DeterminismResult {
400 is_deterministic: true,
401 iterations: 1,
402 outputs: vec![],
403 hashes: vec![],
404 differences: vec![],
405 runtime_stats: DeterminismStats {
406 total_time_ms: 0,
407 avg_build_time_ms: 0,
408 min_build_time_ms: 0,
409 max_build_time_ms: 0,
410 overhead_percentage: 0.0,
411 },
412 });
413 }
414
415 let start_time = std::time::Instant::now();
416 let mut results = Vec::with_capacity(iterations);
417 let mut hashes = Vec::with_capacity(iterations);
418 let mut build_times = Vec::with_capacity(iterations);
419
420 for _ in 0..iterations {
422 let build_start = std::time::Instant::now();
423 let builder = super::Builder::with_config(self.config.clone());
424 let result = builder.build_internal(request)?;
425 let build_time = build_start.elapsed();
426 build_times.push(build_time.as_millis() as u64);
427
428 let sha256_hash = self.calculate_sha256(&result.xml);
430 let blake3_hash = self.calculate_blake3(&result.xml);
431
432 results.push(result.xml);
433 hashes.push((sha256_hash, blake3_hash));
434 }
435
436 let total_time = start_time.elapsed().as_millis() as u64;
437
438 let mut differences = Vec::new();
440 let first_output = &results[0];
441 let first_hashes = &hashes[0];
442
443 for (i, (output, hash_pair)) in results[1..].iter().zip(hashes[1..].iter()).enumerate() {
444 if output != first_output || hash_pair != first_hashes {
445 let diff = self.analyze_difference(
446 first_output,
447 output,
448 &first_hashes,
449 hash_pair,
450 0,
451 i + 1,
452 );
453 differences.push(diff);
454 }
455 }
456
457 let min_time = *build_times.iter().min().unwrap_or(&0);
459 let max_time = *build_times.iter().max().unwrap_or(&0);
460 let avg_time = if !build_times.is_empty() {
461 build_times.iter().sum::<u64>() / build_times.len() as u64
462 } else {
463 0
464 };
465
466 let overhead = if iterations > 1 && min_time > 0 {
467 ((total_time - min_time) as f64 / min_time as f64) * 100.0
468 } else {
469 0.0
470 };
471
472 let outputs = if self.include_outputs { results } else { vec![] };
473 let final_hashes = hashes.into_iter().map(|(sha256, _)| sha256).collect();
474
475 Ok(DeterminismResult {
476 is_deterministic: differences.is_empty(),
477 iterations,
478 outputs,
479 hashes: final_hashes,
480 differences,
481 runtime_stats: DeterminismStats {
482 total_time_ms: total_time,
483 avg_build_time_ms: avg_time,
484 min_build_time_ms: min_time,
485 max_build_time_ms: max_time,
486 overhead_percentage: overhead,
487 },
488 })
489 }
490
491 pub fn verify_legacy(
493 request: &super::builder::BuildRequest,
494 config: &DeterminismConfig,
495 iterations: usize,
496 ) -> Result<bool, super::error::BuildError> {
497 let verifier = Self::new(config.clone());
498 let result = verifier.verify(request, iterations)?;
499 Ok(result.is_deterministic)
500 }
501
502 pub fn verify_with_hashmap_stress(
504 &self,
505 request: &super::builder::BuildRequest,
506 iterations: usize,
507 ) -> Result<DeterminismResult, super::error::BuildError> {
508 use std::collections::HashMap;
509
510 for i in 0..iterations {
513 let mut dummy_map = HashMap::new();
514 for j in 0..(i % 10 + 1) {
515 dummy_map.insert(format!("key_{}", j), format!("value_{}", j));
516 }
517 let _: Vec<_> = dummy_map.iter().collect();
519 }
520
521 self.verify(request, iterations)
522 }
523
524 pub fn verify_with_threading_stress(
526 &self,
527 request: &super::builder::BuildRequest,
528 iterations: usize,
529 ) -> Result<DeterminismResult, super::error::BuildError> {
530 use std::sync::Arc;
531 use std::thread;
532 use std::sync::Mutex;
533
534 let results = Arc::new(Mutex::new(Vec::new()));
535 let mut handles = vec![];
536
537 for _ in 0..iterations {
538 let results_clone = Arc::clone(&results);
539 let request_clone = request.clone();
540 let config = self.config.clone();
541
542 let handle = thread::spawn(move || {
543 let builder = super::Builder::with_config(config);
544 let result = builder.build_internal(&request_clone);
545 results_clone.lock().unwrap().push(result);
546 });
547 handles.push(handle);
548 }
549
550 for handle in handles {
552 handle.join().unwrap();
553 }
554
555 let _thread_results = results.lock().unwrap();
556 self.verify(request, iterations)
559 }
560
561 fn calculate_sha256(&self, data: &str) -> String {
562 use sha2::{Sha256, Digest};
563 let mut hasher = Sha256::new();
564 hasher.update(data.as_bytes());
565 format!("{:x}", hasher.finalize())
566 }
567
568 fn calculate_blake3(&self, data: &str) -> String {
569 let hash = blake3::hash(data.as_bytes());
570 hash.to_hex().to_string()
571 }
572
573 fn analyze_difference(
574 &self,
575 output1: &str,
576 output2: &str,
577 hashes1: &(String, String),
578 hashes2: &(String, String),
579 iter1: usize,
580 iter2: usize,
581 ) -> DeterminismDifference {
582 let first_diff_byte = self.find_first_difference(output1, output2);
583
584 let context = first_diff_byte.map(|pos| {
585 self.create_difference_context(output1, output2, pos)
586 });
587
588 DeterminismDifference {
589 iteration1: iter1,
590 iteration2: iter2,
591 first_difference_byte: first_diff_byte,
592 hash_difference: HashDifference {
593 sha256_1: hashes1.0.clone(),
594 sha256_2: hashes2.0.clone(),
595 blake3_1: hashes1.1.clone(),
596 blake3_2: hashes2.1.clone(),
597 },
598 length_difference: LengthDifference {
599 length_1: output1.len(),
600 length_2: output2.len(),
601 diff: output2.len() as i64 - output1.len() as i64,
602 },
603 context,
604 }
605 }
606
607 fn find_first_difference(&self, a: &str, b: &str) -> Option<usize> {
608 a.bytes().zip(b.bytes()).position(|(x, y)| x != y)
609 .or_else(|| {
610 if a.len() != b.len() {
611 Some(std::cmp::min(a.len(), b.len()))
612 } else {
613 None
614 }
615 })
616 }
617
618 fn create_difference_context(&self, output1: &str, output2: &str, pos: usize) -> DifferenceContext {
619 let start = pos.saturating_sub(self.context_chars / 2);
620 let end1 = std::cmp::min(pos + self.context_chars / 2, output1.len());
621 let end2 = std::cmp::min(pos + self.context_chars / 2, output2.len());
622
623 let (line, col) = self.calculate_line_col(output1, pos);
625
626 DifferenceContext {
627 position: pos,
628 before: output1[start..pos].to_string(),
629 after_1: output1[pos..end1].to_string(),
630 after_2: output2[pos..end2].to_string(),
631 line_number: line,
632 column_number: col,
633 }
634 }
635
636 fn calculate_line_col(&self, text: &str, pos: usize) -> (Option<usize>, Option<usize>) {
637 if pos >= text.len() {
638 return (None, None);
639 }
640
641 let before_pos = &text[..pos];
642 let line_num = before_pos.lines().count();
643 let last_line_start = before_pos.rfind('\n').map(|i| i + 1).unwrap_or(0);
644 let col_num = pos - last_line_start + 1;
645
646 (Some(line_num), Some(col_num))
647 }
648}
649
650impl DeterminismVerifier {
652 pub fn quick_check(
654 request: &super::builder::BuildRequest,
655 ) -> Result<bool, super::error::BuildError> {
656 let config = DeterminismConfig::default();
657 let verifier = Self::new(config);
658 let result = verifier.verify(request, 3)?;
659 Ok(result.is_deterministic)
660 }
661
662 pub fn thorough_check(
664 request: &super::builder::BuildRequest,
665 iterations: usize,
666 ) -> Result<DeterminismResult, super::error::BuildError> {
667 let config = DeterminismConfig::default();
668 let verifier = Self::new(config).with_outputs_retained();
669
670 let standard_result = verifier.verify(request, iterations)?;
672 if !standard_result.is_deterministic {
673 return Ok(standard_result);
674 }
675
676 let hashmap_result = verifier.verify_with_hashmap_stress(request, iterations)?;
678 if !hashmap_result.is_deterministic {
679 return Ok(hashmap_result);
680 }
681
682 Ok(standard_result)
684 }
685}