1use indexmap::IndexMap;
113use serde::{Deserialize, Serialize};
114
115#[derive(Debug, Clone, Serialize, Deserialize)]
117pub struct DeterminismConfig {
118    pub canon_mode: CanonMode,
120
121    pub sort_strategy: SortStrategy,
123
124    pub custom_sort_order: Option<IndexMap<String, Vec<String>>>,
126
127    pub namespace_strategy: NamespaceStrategy,
129
130    pub locked_prefixes: IndexMap<String, String>,
132
133    pub output_mode: OutputMode,
135    pub line_ending: LineEnding,
137    pub indent_char: IndentChar,
139    pub indent_width: usize,
141
142    pub unicode_normalization: UnicodeNormalization,
144    pub xml_character_policy: XmlCharacterPolicy,
146    pub quote_style: QuoteStyle,
148
149    pub time_zone_policy: TimeZonePolicy,
151    pub date_time_format: DateTimeFormat,
153
154    pub emit_reproducibility_banner: bool,
156    pub verify_determinism: Option<usize>,
158}
159
160impl Default for DeterminismConfig {
161    fn default() -> Self {
162        Self {
163            canon_mode: CanonMode::DbC14n,
164            sort_strategy: SortStrategy::Canonical,
165            custom_sort_order: None,
166            namespace_strategy: NamespaceStrategy::Locked,
167            locked_prefixes: Self::default_namespace_prefixes(),
168            output_mode: OutputMode::DbC14n,
169            line_ending: LineEnding::LF,
170            indent_char: IndentChar::Space,
171            indent_width: 2,
172            unicode_normalization: UnicodeNormalization::NFC,
173            xml_character_policy: XmlCharacterPolicy::Escape,
174            quote_style: QuoteStyle::Double,
175            time_zone_policy: TimeZonePolicy::UTC,
176            date_time_format: DateTimeFormat::ISO8601Z,
177            emit_reproducibility_banner: false,
178            verify_determinism: None,
179        }
180    }
181}
182
183impl DeterminismConfig {
184    fn default_namespace_prefixes() -> IndexMap<String, String> {
185        let mut prefixes = IndexMap::new();
186        prefixes.insert("http://ddex.net/xml/ern/43".to_string(), "ern".to_string());
187        prefixes.insert("http://ddex.net/xml/ern/42".to_string(), "ern".to_string());
188        prefixes.insert("http://ddex.net/xml/ern/382".to_string(), "ern".to_string());
189        prefixes.insert("http://ddex.net/xml/avs".to_string(), "avs".to_string());
190        prefixes.insert(
191            "http://www.w3.org/2001/XMLSchema-instance".to_string(),
192            "xsi".to_string(),
193        );
194        prefixes
195    }
196}
197
198#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
200pub enum CanonMode {
201    DbC14n,
203    Pretty,
205    Compact,
207}
208
209#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
211pub enum SortStrategy {
212    Canonical,
214    InputOrder,
216    Custom,
218}
219
220#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
222pub enum NamespaceStrategy {
223    Locked,
225    Inherit,
227}
228
229#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
231pub enum OutputMode {
232    DbC14n,
234    Pretty,
236    Compact,
238}
239
240#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
242pub enum LineEnding {
243    LF,
245    CRLF,
247}
248
249#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
251pub enum IndentChar {
252    Space,
254    Tab,
256}
257
258#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
260pub enum UnicodeNormalization {
261    NFC,
263    NFD,
265    NFKC,
267    NFKD,
269}
270
271#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
273pub enum XmlCharacterPolicy {
274    Escape,
276    CData,
278    Reject,
280}
281
282#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
284pub enum QuoteStyle {
285    Double,
287    Single,
289}
290
291#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
293pub enum TimeZonePolicy {
294    UTC,
296    Preserve,
298    Local,
300}
301
302#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
304pub enum DateTimeFormat {
305    ISO8601Z,
307    ISO8601,
309    Custom,
311}
312
313#[derive(Debug, Clone, PartialEq)]
315pub struct DeterminismResult {
316    pub is_deterministic: bool,
318    pub iterations: usize,
320    pub outputs: Vec<String>,
322    pub hashes: Vec<String>,
324    pub differences: Vec<DeterminismDifference>,
326    pub runtime_stats: DeterminismStats,
328}
329
330#[derive(Debug, Clone, PartialEq, Eq)]
332pub struct DeterminismDifference {
333    pub iteration1: usize,
335    pub iteration2: usize,
337    pub first_difference_byte: Option<usize>,
339    pub hash_difference: HashDifference,
341    pub length_difference: LengthDifference,
343    pub context: Option<DifferenceContext>,
345}
346
347#[derive(Debug, Clone, PartialEq, Eq)]
349pub struct HashDifference {
350    pub sha256_1: String,
352    pub sha256_2: String,
354    pub blake3_1: String,
356    pub blake3_2: String,
358}
359
360#[derive(Debug, Clone, PartialEq, Eq)]
362pub struct LengthDifference {
363    pub length_1: usize,
365    pub length_2: usize,
367    pub diff: i64,
369}
370
371#[derive(Debug, Clone, PartialEq, Eq)]
373pub struct DifferenceContext {
374    pub position: usize,
376    pub before: String,
378    pub after_1: String,
380    pub after_2: String,
382    pub line_number: Option<usize>,
384    pub column_number: Option<usize>,
386}
387
388#[derive(Debug, Clone, PartialEq)]
390pub struct DeterminismStats {
391    pub total_time_ms: u64,
393    pub avg_build_time_ms: u64,
395    pub min_build_time_ms: u64,
397    pub max_build_time_ms: u64,
399    pub overhead_percentage: f64,
401}
402
403pub struct DeterminismVerifier {
405    config: DeterminismConfig,
406    include_outputs: bool,
407    context_chars: usize,
408}
409
410impl DeterminismVerifier {
411    pub fn new(config: DeterminismConfig) -> Self {
413        Self {
414            config,
415            include_outputs: false,
416            context_chars: 100,
417        }
418    }
419
420    pub fn with_outputs_retained(mut self) -> Self {
422        self.include_outputs = true;
423        self
424    }
425
426    pub fn with_context_chars(mut self, chars: usize) -> Self {
428        self.context_chars = chars;
429        self
430    }
431
432    pub fn verify(
434        &self,
435        request: &super::builder::BuildRequest,
436        iterations: usize,
437    ) -> Result<DeterminismResult, super::error::BuildError> {
438        if iterations < 2 {
439            return Ok(DeterminismResult {
440                is_deterministic: true,
441                iterations: 1,
442                outputs: vec![],
443                hashes: vec![],
444                differences: vec![],
445                runtime_stats: DeterminismStats {
446                    total_time_ms: 0,
447                    avg_build_time_ms: 0,
448                    min_build_time_ms: 0,
449                    max_build_time_ms: 0,
450                    overhead_percentage: 0.0,
451                },
452            });
453        }
454
455        let start_time = std::time::Instant::now();
456        let mut results = Vec::with_capacity(iterations);
457        let mut hashes = Vec::with_capacity(iterations);
458        let mut build_times = Vec::with_capacity(iterations);
459
460        for _ in 0..iterations {
462            let build_start = std::time::Instant::now();
463            let builder = super::Builder::with_config(self.config.clone());
464            let result = builder.build_internal(request)?;
465            let build_time = build_start.elapsed();
466            build_times.push(build_time.as_millis() as u64);
467
468            let sha256_hash = self.calculate_sha256(&result.xml);
470            let blake3_hash = self.calculate_blake3(&result.xml);
471
472            results.push(result.xml);
473            hashes.push((sha256_hash, blake3_hash));
474        }
475
476        let total_time = start_time.elapsed().as_millis() as u64;
477
478        let mut differences = Vec::new();
480        let first_output = &results[0];
481        let first_hashes = &hashes[0];
482
483        for (i, (output, hash_pair)) in results[1..].iter().zip(hashes[1..].iter()).enumerate() {
484            if output != first_output || hash_pair != first_hashes {
485                let diff = self.analyze_difference(
486                    first_output,
487                    output,
488                    &first_hashes,
489                    hash_pair,
490                    0,
491                    i + 1,
492                );
493                differences.push(diff);
494            }
495        }
496
497        let min_time = *build_times.iter().min().unwrap_or(&0);
499        let max_time = *build_times.iter().max().unwrap_or(&0);
500        let avg_time = if !build_times.is_empty() {
501            build_times.iter().sum::<u64>() / build_times.len() as u64
502        } else {
503            0
504        };
505
506        let overhead = if iterations > 1 && min_time > 0 {
507            ((total_time - min_time) as f64 / min_time as f64) * 100.0
508        } else {
509            0.0
510        };
511
512        let outputs = if self.include_outputs {
513            results
514        } else {
515            vec![]
516        };
517        let final_hashes = hashes.into_iter().map(|(sha256, _)| sha256).collect();
518
519        Ok(DeterminismResult {
520            is_deterministic: differences.is_empty(),
521            iterations,
522            outputs,
523            hashes: final_hashes,
524            differences,
525            runtime_stats: DeterminismStats {
526                total_time_ms: total_time,
527                avg_build_time_ms: avg_time,
528                min_build_time_ms: min_time,
529                max_build_time_ms: max_time,
530                overhead_percentage: overhead,
531            },
532        })
533    }
534
535    pub fn verify_legacy(
537        request: &super::builder::BuildRequest,
538        config: &DeterminismConfig,
539        iterations: usize,
540    ) -> Result<bool, super::error::BuildError> {
541        let verifier = Self::new(config.clone());
542        let result = verifier.verify(request, iterations)?;
543        Ok(result.is_deterministic)
544    }
545
546    pub fn verify_with_hashmap_stress(
548        &self,
549        request: &super::builder::BuildRequest,
550        iterations: usize,
551    ) -> Result<DeterminismResult, super::error::BuildError> {
552        use std::collections::HashMap;
553
554        for i in 0..iterations {
557            let mut dummy_map = HashMap::new();
558            for j in 0..(i % 10 + 1) {
559                dummy_map.insert(format!("key_{}", j), format!("value_{}", j));
560            }
561            let _: Vec<_> = dummy_map.iter().collect();
563        }
564
565        self.verify(request, iterations)
566    }
567
568    pub fn verify_with_threading_stress(
570        &self,
571        request: &super::builder::BuildRequest,
572        iterations: usize,
573    ) -> Result<DeterminismResult, super::error::BuildError> {
574        use std::sync::Arc;
575        use std::sync::Mutex;
576        use std::thread;
577
578        let results = Arc::new(Mutex::new(Vec::new()));
579        let mut handles = vec![];
580
581        for _ in 0..iterations {
582            let results_clone = Arc::clone(&results);
583            let request_clone = request.clone();
584            let config = self.config.clone();
585
586            let handle = thread::spawn(move || {
587                let builder = super::Builder::with_config(config);
588                let result = builder.build_internal(&request_clone);
589                results_clone.lock().unwrap().push(result);
590            });
591            handles.push(handle);
592        }
593
594        for handle in handles {
596            handle.join().unwrap();
597        }
598
599        let _thread_results = results.lock().unwrap();
600        self.verify(request, iterations)
603    }
604
605    fn calculate_sha256(&self, data: &str) -> String {
606        use sha2::{Digest, Sha256};
607        let mut hasher = Sha256::new();
608        hasher.update(data.as_bytes());
609        format!("{:x}", hasher.finalize())
610    }
611
612    fn calculate_blake3(&self, data: &str) -> String {
613        let hash = blake3::hash(data.as_bytes());
614        hash.to_hex().to_string()
615    }
616
617    fn analyze_difference(
618        &self,
619        output1: &str,
620        output2: &str,
621        hashes1: &(String, String),
622        hashes2: &(String, String),
623        iter1: usize,
624        iter2: usize,
625    ) -> DeterminismDifference {
626        let first_diff_byte = self.find_first_difference(output1, output2);
627
628        let context =
629            first_diff_byte.map(|pos| self.create_difference_context(output1, output2, pos));
630
631        DeterminismDifference {
632            iteration1: iter1,
633            iteration2: iter2,
634            first_difference_byte: first_diff_byte,
635            hash_difference: HashDifference {
636                sha256_1: hashes1.0.clone(),
637                sha256_2: hashes2.0.clone(),
638                blake3_1: hashes1.1.clone(),
639                blake3_2: hashes2.1.clone(),
640            },
641            length_difference: LengthDifference {
642                length_1: output1.len(),
643                length_2: output2.len(),
644                diff: output2.len() as i64 - output1.len() as i64,
645            },
646            context,
647        }
648    }
649
650    fn find_first_difference(&self, a: &str, b: &str) -> Option<usize> {
651        a.bytes()
652            .zip(b.bytes())
653            .position(|(x, y)| x != y)
654            .or_else(|| {
655                if a.len() != b.len() {
656                    Some(std::cmp::min(a.len(), b.len()))
657                } else {
658                    None
659                }
660            })
661    }
662
663    fn create_difference_context(
664        &self,
665        output1: &str,
666        output2: &str,
667        pos: usize,
668    ) -> DifferenceContext {
669        let start = pos.saturating_sub(self.context_chars / 2);
670        let end1 = std::cmp::min(pos + self.context_chars / 2, output1.len());
671        let end2 = std::cmp::min(pos + self.context_chars / 2, output2.len());
672
673        let (line, col) = self.calculate_line_col(output1, pos);
675
676        DifferenceContext {
677            position: pos,
678            before: output1[start..pos].to_string(),
679            after_1: output1[pos..end1].to_string(),
680            after_2: output2[pos..end2].to_string(),
681            line_number: line,
682            column_number: col,
683        }
684    }
685
686    fn calculate_line_col(&self, text: &str, pos: usize) -> (Option<usize>, Option<usize>) {
687        if pos >= text.len() {
688            return (None, None);
689        }
690
691        let before_pos = &text[..pos];
692        let line_num = before_pos.lines().count();
693        let last_line_start = before_pos.rfind('\n').map(|i| i + 1).unwrap_or(0);
694        let col_num = pos - last_line_start + 1;
695
696        (Some(line_num), Some(col_num))
697    }
698}
699
700impl DeterminismVerifier {
702    pub fn quick_check(
704        request: &super::builder::BuildRequest,
705    ) -> Result<bool, super::error::BuildError> {
706        let config = DeterminismConfig::default();
707        let verifier = Self::new(config);
708        let result = verifier.verify(request, 3)?;
709        Ok(result.is_deterministic)
710    }
711
712    pub fn thorough_check(
714        request: &super::builder::BuildRequest,
715        iterations: usize,
716    ) -> Result<DeterminismResult, super::error::BuildError> {
717        let config = DeterminismConfig::default();
718        let verifier = Self::new(config).with_outputs_retained();
719
720        let standard_result = verifier.verify(request, iterations)?;
722        if !standard_result.is_deterministic {
723            return Ok(standard_result);
724        }
725
726        let hashmap_result = verifier.verify_with_hashmap_stress(request, iterations)?;
728        if !hashmap_result.is_deterministic {
729            return Ok(hashmap_result);
730        }
731
732        Ok(standard_result)
734    }
735}