1use indexmap::IndexMap;
113use serde::{Deserialize, Serialize};
114
115#[derive(Debug, Clone, Serialize, Deserialize)]
117pub struct DeterminismConfig {
118 pub canon_mode: CanonMode,
120
121 pub sort_strategy: SortStrategy,
123
124 pub custom_sort_order: Option<IndexMap<String, Vec<String>>>,
126
127 pub namespace_strategy: NamespaceStrategy,
129
130 pub locked_prefixes: IndexMap<String, String>,
132
133 pub output_mode: OutputMode,
135 pub line_ending: LineEnding,
137 pub indent_char: IndentChar,
139 pub indent_width: usize,
141
142 pub unicode_normalization: UnicodeNormalization,
144 pub xml_character_policy: XmlCharacterPolicy,
146 pub quote_style: QuoteStyle,
148
149 pub time_zone_policy: TimeZonePolicy,
151 pub date_time_format: DateTimeFormat,
153
154 pub emit_reproducibility_banner: bool,
156 pub verify_determinism: Option<usize>,
158}
159
160impl Default for DeterminismConfig {
161 fn default() -> Self {
162 Self {
163 canon_mode: CanonMode::DbC14n,
164 sort_strategy: SortStrategy::Canonical,
165 custom_sort_order: None,
166 namespace_strategy: NamespaceStrategy::Locked,
167 locked_prefixes: Self::default_namespace_prefixes(),
168 output_mode: OutputMode::DbC14n,
169 line_ending: LineEnding::LF,
170 indent_char: IndentChar::Space,
171 indent_width: 2,
172 unicode_normalization: UnicodeNormalization::NFC,
173 xml_character_policy: XmlCharacterPolicy::Escape,
174 quote_style: QuoteStyle::Double,
175 time_zone_policy: TimeZonePolicy::UTC,
176 date_time_format: DateTimeFormat::ISO8601Z,
177 emit_reproducibility_banner: false,
178 verify_determinism: None,
179 }
180 }
181}
182
183impl DeterminismConfig {
184 fn default_namespace_prefixes() -> IndexMap<String, String> {
185 let mut prefixes = IndexMap::new();
186 prefixes.insert("http://ddex.net/xml/ern/43".to_string(), "ern".to_string());
187 prefixes.insert("http://ddex.net/xml/ern/42".to_string(), "ern".to_string());
188 prefixes.insert("http://ddex.net/xml/ern/382".to_string(), "ern".to_string());
189 prefixes.insert("http://ddex.net/xml/avs".to_string(), "avs".to_string());
190 prefixes.insert(
191 "http://www.w3.org/2001/XMLSchema-instance".to_string(),
192 "xsi".to_string(),
193 );
194 prefixes
195 }
196}
197
198#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
200pub enum CanonMode {
201 DbC14n,
203 Pretty,
205 Compact,
207}
208
209#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
211pub enum SortStrategy {
212 Canonical,
214 InputOrder,
216 Custom,
218}
219
220#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
222pub enum NamespaceStrategy {
223 Locked,
225 Inherit,
227}
228
229#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
231pub enum OutputMode {
232 DbC14n,
234 Pretty,
236 Compact,
238}
239
240#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
242pub enum LineEnding {
243 LF,
245 CRLF,
247}
248
249#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
251pub enum IndentChar {
252 Space,
254 Tab,
256}
257
258#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
260pub enum UnicodeNormalization {
261 NFC,
263 NFD,
265 NFKC,
267 NFKD,
269}
270
271#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
273pub enum XmlCharacterPolicy {
274 Escape,
276 CData,
278 Reject,
280}
281
282#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
284pub enum QuoteStyle {
285 Double,
287 Single,
289}
290
291#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
293pub enum TimeZonePolicy {
294 UTC,
296 Preserve,
298 Local,
300}
301
302#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
304pub enum DateTimeFormat {
305 ISO8601Z,
307 ISO8601,
309 Custom,
311}
312
313#[derive(Debug, Clone, PartialEq)]
315pub struct DeterminismResult {
316 pub is_deterministic: bool,
318 pub iterations: usize,
320 pub outputs: Vec<String>,
322 pub hashes: Vec<String>,
324 pub differences: Vec<DeterminismDifference>,
326 pub runtime_stats: DeterminismStats,
328}
329
330#[derive(Debug, Clone, PartialEq, Eq)]
332pub struct DeterminismDifference {
333 pub iteration1: usize,
335 pub iteration2: usize,
337 pub first_difference_byte: Option<usize>,
339 pub hash_difference: HashDifference,
341 pub length_difference: LengthDifference,
343 pub context: Option<DifferenceContext>,
345}
346
347#[derive(Debug, Clone, PartialEq, Eq)]
349pub struct HashDifference {
350 pub sha256_1: String,
352 pub sha256_2: String,
354 pub blake3_1: String,
356 pub blake3_2: String,
358}
359
360#[derive(Debug, Clone, PartialEq, Eq)]
362pub struct LengthDifference {
363 pub length_1: usize,
365 pub length_2: usize,
367 pub diff: i64,
369}
370
371#[derive(Debug, Clone, PartialEq, Eq)]
373pub struct DifferenceContext {
374 pub position: usize,
376 pub before: String,
378 pub after_1: String,
380 pub after_2: String,
382 pub line_number: Option<usize>,
384 pub column_number: Option<usize>,
386}
387
388#[derive(Debug, Clone, PartialEq)]
390pub struct DeterminismStats {
391 pub total_time_ms: u64,
393 pub avg_build_time_ms: u64,
395 pub min_build_time_ms: u64,
397 pub max_build_time_ms: u64,
399 pub overhead_percentage: f64,
401}
402
403pub struct DeterminismVerifier {
405 config: DeterminismConfig,
406 include_outputs: bool,
407 context_chars: usize,
408}
409
410impl DeterminismVerifier {
411 pub fn new(config: DeterminismConfig) -> Self {
413 Self {
414 config,
415 include_outputs: false,
416 context_chars: 100,
417 }
418 }
419
420 pub fn with_outputs_retained(mut self) -> Self {
422 self.include_outputs = true;
423 self
424 }
425
426 pub fn with_context_chars(mut self, chars: usize) -> Self {
428 self.context_chars = chars;
429 self
430 }
431
432 pub fn verify(
434 &self,
435 request: &super::builder::BuildRequest,
436 iterations: usize,
437 ) -> Result<DeterminismResult, super::error::BuildError> {
438 if iterations < 2 {
439 return Ok(DeterminismResult {
440 is_deterministic: true,
441 iterations: 1,
442 outputs: vec![],
443 hashes: vec![],
444 differences: vec![],
445 runtime_stats: DeterminismStats {
446 total_time_ms: 0,
447 avg_build_time_ms: 0,
448 min_build_time_ms: 0,
449 max_build_time_ms: 0,
450 overhead_percentage: 0.0,
451 },
452 });
453 }
454
455 let start_time = std::time::Instant::now();
456 let mut results = Vec::with_capacity(iterations);
457 let mut hashes = Vec::with_capacity(iterations);
458 let mut build_times = Vec::with_capacity(iterations);
459
460 for _ in 0..iterations {
462 let build_start = std::time::Instant::now();
463 let builder = super::Builder::with_config(self.config.clone());
464 let result = builder.build_internal(request)?;
465 let build_time = build_start.elapsed();
466 build_times.push(build_time.as_millis() as u64);
467
468 let sha256_hash = self.calculate_sha256(&result.xml);
470 let blake3_hash = self.calculate_blake3(&result.xml);
471
472 results.push(result.xml);
473 hashes.push((sha256_hash, blake3_hash));
474 }
475
476 let total_time = start_time.elapsed().as_millis() as u64;
477
478 let mut differences = Vec::new();
480 let first_output = &results[0];
481 let first_hashes = &hashes[0];
482
483 for (i, (output, hash_pair)) in results[1..].iter().zip(hashes[1..].iter()).enumerate() {
484 if output != first_output || hash_pair != first_hashes {
485 let diff = self.analyze_difference(
486 first_output,
487 output,
488 &first_hashes,
489 hash_pair,
490 0,
491 i + 1,
492 );
493 differences.push(diff);
494 }
495 }
496
497 let min_time = *build_times.iter().min().unwrap_or(&0);
499 let max_time = *build_times.iter().max().unwrap_or(&0);
500 let avg_time = if !build_times.is_empty() {
501 build_times.iter().sum::<u64>() / build_times.len() as u64
502 } else {
503 0
504 };
505
506 let overhead = if iterations > 1 && min_time > 0 {
507 ((total_time - min_time) as f64 / min_time as f64) * 100.0
508 } else {
509 0.0
510 };
511
512 let outputs = if self.include_outputs {
513 results
514 } else {
515 vec![]
516 };
517 let final_hashes = hashes.into_iter().map(|(sha256, _)| sha256).collect();
518
519 Ok(DeterminismResult {
520 is_deterministic: differences.is_empty(),
521 iterations,
522 outputs,
523 hashes: final_hashes,
524 differences,
525 runtime_stats: DeterminismStats {
526 total_time_ms: total_time,
527 avg_build_time_ms: avg_time,
528 min_build_time_ms: min_time,
529 max_build_time_ms: max_time,
530 overhead_percentage: overhead,
531 },
532 })
533 }
534
535 pub fn verify_legacy(
537 request: &super::builder::BuildRequest,
538 config: &DeterminismConfig,
539 iterations: usize,
540 ) -> Result<bool, super::error::BuildError> {
541 let verifier = Self::new(config.clone());
542 let result = verifier.verify(request, iterations)?;
543 Ok(result.is_deterministic)
544 }
545
546 pub fn verify_with_hashmap_stress(
548 &self,
549 request: &super::builder::BuildRequest,
550 iterations: usize,
551 ) -> Result<DeterminismResult, super::error::BuildError> {
552 use std::collections::HashMap;
553
554 for i in 0..iterations {
557 let mut dummy_map = HashMap::new();
558 for j in 0..(i % 10 + 1) {
559 dummy_map.insert(format!("key_{}", j), format!("value_{}", j));
560 }
561 let _: Vec<_> = dummy_map.iter().collect();
563 }
564
565 self.verify(request, iterations)
566 }
567
568 pub fn verify_with_threading_stress(
570 &self,
571 request: &super::builder::BuildRequest,
572 iterations: usize,
573 ) -> Result<DeterminismResult, super::error::BuildError> {
574 use std::sync::Arc;
575 use std::sync::Mutex;
576 use std::thread;
577
578 let results = Arc::new(Mutex::new(Vec::new()));
579 let mut handles = vec![];
580
581 for _ in 0..iterations {
582 let results_clone = Arc::clone(&results);
583 let request_clone = request.clone();
584 let config = self.config.clone();
585
586 let handle = thread::spawn(move || {
587 let builder = super::Builder::with_config(config);
588 let result = builder.build_internal(&request_clone);
589 results_clone.lock().unwrap().push(result);
590 });
591 handles.push(handle);
592 }
593
594 for handle in handles {
596 handle.join().unwrap();
597 }
598
599 let _thread_results = results.lock().unwrap();
600 self.verify(request, iterations)
603 }
604
605 fn calculate_sha256(&self, data: &str) -> String {
606 use sha2::{Digest, Sha256};
607 let mut hasher = Sha256::new();
608 hasher.update(data.as_bytes());
609 format!("{:x}", hasher.finalize())
610 }
611
612 fn calculate_blake3(&self, data: &str) -> String {
613 let hash = blake3::hash(data.as_bytes());
614 hash.to_hex().to_string()
615 }
616
617 fn analyze_difference(
618 &self,
619 output1: &str,
620 output2: &str,
621 hashes1: &(String, String),
622 hashes2: &(String, String),
623 iter1: usize,
624 iter2: usize,
625 ) -> DeterminismDifference {
626 let first_diff_byte = self.find_first_difference(output1, output2);
627
628 let context =
629 first_diff_byte.map(|pos| self.create_difference_context(output1, output2, pos));
630
631 DeterminismDifference {
632 iteration1: iter1,
633 iteration2: iter2,
634 first_difference_byte: first_diff_byte,
635 hash_difference: HashDifference {
636 sha256_1: hashes1.0.clone(),
637 sha256_2: hashes2.0.clone(),
638 blake3_1: hashes1.1.clone(),
639 blake3_2: hashes2.1.clone(),
640 },
641 length_difference: LengthDifference {
642 length_1: output1.len(),
643 length_2: output2.len(),
644 diff: output2.len() as i64 - output1.len() as i64,
645 },
646 context,
647 }
648 }
649
650 fn find_first_difference(&self, a: &str, b: &str) -> Option<usize> {
651 a.bytes()
652 .zip(b.bytes())
653 .position(|(x, y)| x != y)
654 .or_else(|| {
655 if a.len() != b.len() {
656 Some(std::cmp::min(a.len(), b.len()))
657 } else {
658 None
659 }
660 })
661 }
662
663 fn create_difference_context(
664 &self,
665 output1: &str,
666 output2: &str,
667 pos: usize,
668 ) -> DifferenceContext {
669 let start = pos.saturating_sub(self.context_chars / 2);
670 let end1 = std::cmp::min(pos + self.context_chars / 2, output1.len());
671 let end2 = std::cmp::min(pos + self.context_chars / 2, output2.len());
672
673 let (line, col) = self.calculate_line_col(output1, pos);
675
676 DifferenceContext {
677 position: pos,
678 before: output1[start..pos].to_string(),
679 after_1: output1[pos..end1].to_string(),
680 after_2: output2[pos..end2].to_string(),
681 line_number: line,
682 column_number: col,
683 }
684 }
685
686 fn calculate_line_col(&self, text: &str, pos: usize) -> (Option<usize>, Option<usize>) {
687 if pos >= text.len() {
688 return (None, None);
689 }
690
691 let before_pos = &text[..pos];
692 let line_num = before_pos.lines().count();
693 let last_line_start = before_pos.rfind('\n').map(|i| i + 1).unwrap_or(0);
694 let col_num = pos - last_line_start + 1;
695
696 (Some(line_num), Some(col_num))
697 }
698}
699
700impl DeterminismVerifier {
702 pub fn quick_check(
704 request: &super::builder::BuildRequest,
705 ) -> Result<bool, super::error::BuildError> {
706 let config = DeterminismConfig::default();
707 let verifier = Self::new(config);
708 let result = verifier.verify(request, 3)?;
709 Ok(result.is_deterministic)
710 }
711
712 pub fn thorough_check(
714 request: &super::builder::BuildRequest,
715 iterations: usize,
716 ) -> Result<DeterminismResult, super::error::BuildError> {
717 let config = DeterminismConfig::default();
718 let verifier = Self::new(config).with_outputs_retained();
719
720 let standard_result = verifier.verify(request, iterations)?;
722 if !standard_result.is_deterministic {
723 return Ok(standard_result);
724 }
725
726 let hashmap_result = verifier.verify_with_hashmap_stress(request, iterations)?;
728 if !hashmap_result.is_deterministic {
729 return Ok(hashmap_result);
730 }
731
732 Ok(standard_result)
734 }
735}