Skip to main content

datasynth_generators/data_quality/
format_variations.rs

1//! Format variations for data quality simulation.
2//!
3//! Simulates realistic format inconsistencies including:
4//! - Date formats (ISO, US, EU, various separators)
5//! - Amount formats (decimal separators, thousand separators, currency symbols)
6//! - Identifier formats (padding, prefixes, case variations)
7//! - Text formats (case, whitespace, encoding)
8
9use chrono::NaiveDate;
10use datasynth_core::CountryPack;
11use rand::Rng;
12use rust_decimal::Decimal;
13use serde::{Deserialize, Serialize};
14
15/// Date format variations.
16#[derive(Debug, Clone, Copy, PartialEq)]
17pub enum DateFormat {
18    /// ISO 8601: 2024-01-15
19    ISO,
20    /// US format: 01/15/2024
21    US,
22    /// US with dashes: 01-15-2024
23    USDash,
24    /// European: 15/01/2024
25    EU,
26    /// European with dashes: 15-01-2024
27    EUDash,
28    /// European with dots: 15.01.2024
29    EUDot,
30    /// Long format: January 15, 2024
31    Long,
32    /// Short year: 01/15/24
33    ShortYear,
34    /// Compact: 20240115
35    Compact,
36    /// Unix timestamp
37    Unix,
38    /// Excel serial number
39    ExcelSerial,
40}
41
42impl DateFormat {
43    /// Returns all date formats.
44    pub fn all() -> Vec<Self> {
45        vec![
46            DateFormat::ISO,
47            DateFormat::US,
48            DateFormat::USDash,
49            DateFormat::EU,
50            DateFormat::EUDash,
51            DateFormat::EUDot,
52            DateFormat::Long,
53            DateFormat::ShortYear,
54            DateFormat::Compact,
55        ]
56    }
57
58    /// Infer the baseline `DateFormat` from a country pack's short date format
59    /// string (e.g. "MM/DD/YYYY", "DD.MM.YYYY", "DD/MM/YYYY").
60    /// Falls back to `DateFormat::ISO` for unrecognised patterns.
61    pub fn from_locale_short(short: &str) -> Self {
62        let s = short.to_uppercase();
63        if s.starts_with("YYYY") {
64            // ISO family (year-first)
65            DateFormat::ISO
66        } else if s.starts_with("MM") {
67            // Month-first (US family)
68            if s.contains('-') {
69                DateFormat::USDash
70            } else {
71                DateFormat::US
72            }
73        } else if s.starts_with("DD") {
74            // Day-first (European family)
75            if s.contains('.') {
76                DateFormat::EUDot
77            } else if s.contains('-') {
78                DateFormat::EUDash
79            } else {
80                DateFormat::EU
81            }
82        } else {
83            DateFormat::ISO
84        }
85    }
86
87    /// Formats a date using this format.
88    pub fn format(&self, date: NaiveDate) -> String {
89        match self {
90            DateFormat::ISO => date.format("%Y-%m-%d").to_string(),
91            DateFormat::US => date.format("%m/%d/%Y").to_string(),
92            DateFormat::USDash => date.format("%m-%d-%Y").to_string(),
93            DateFormat::EU => date.format("%d/%m/%Y").to_string(),
94            DateFormat::EUDash => date.format("%d-%m-%Y").to_string(),
95            DateFormat::EUDot => date.format("%d.%m.%Y").to_string(),
96            DateFormat::Long => date.format("%B %d, %Y").to_string(),
97            DateFormat::ShortYear => date.format("%m/%d/%y").to_string(),
98            DateFormat::Compact => date.format("%Y%m%d").to_string(),
99            DateFormat::Unix => {
100                let epoch = NaiveDate::from_ymd_opt(1970, 1, 1).expect("valid unix epoch date");
101                let days = (date - epoch).num_days();
102                (days * 86400).to_string()
103            }
104            DateFormat::ExcelSerial => {
105                // Excel epoch is December 30, 1899
106                let epoch = NaiveDate::from_ymd_opt(1899, 12, 30).expect("valid excel epoch date");
107                let days = (date - epoch).num_days();
108                days.to_string()
109            }
110        }
111    }
112}
113
114/// Amount format variations.
115#[derive(Debug, Clone, PartialEq)]
116pub enum AmountFormat {
117    /// Plain number: 1234.56
118    Plain,
119    /// With thousand separator (comma): 1,234.56
120    USComma,
121    /// European (dot thousand, comma decimal): 1.234,56
122    EUFormat,
123    /// Space thousand separator: 1 234.56
124    SpaceSeparator,
125    /// With currency prefix: $1,234.56
126    CurrencyPrefix(String),
127    /// With currency suffix: 1,234.56 USD
128    CurrencySuffix(String),
129    /// Accounting format (parentheses for negative): (1,234.56)
130    Accounting,
131    /// Scientific notation: 1.23456E+03
132    Scientific,
133    /// No decimal places: 1235
134    NoDecimals,
135    /// Four decimal places: 1234.5600
136    FourDecimals,
137}
138
139impl AmountFormat {
140    /// Returns common amount formats.
141    pub fn common() -> Vec<Self> {
142        vec![
143            AmountFormat::Plain,
144            AmountFormat::USComma,
145            AmountFormat::EUFormat,
146            AmountFormat::SpaceSeparator,
147            AmountFormat::CurrencyPrefix("$".to_string()),
148            AmountFormat::CurrencySuffix("USD".to_string()),
149            AmountFormat::Accounting,
150            AmountFormat::NoDecimals,
151        ]
152    }
153
154    /// Infer the baseline `AmountFormat` from a country pack's locale settings.
155    ///
156    /// Uses the `decimal_separator`, `thousands_separator`, `currency_symbol`,
157    /// and `default_currency` to pick the most appropriate "correct" format.
158    pub fn from_locale(
159        decimal_sep: &str,
160        thousands_sep: &str,
161        currency_symbol: &str,
162        _default_currency: &str,
163    ) -> Self {
164        match (decimal_sep, thousands_sep) {
165            (",", ".") => AmountFormat::EUFormat,
166            (".", " ") => AmountFormat::SpaceSeparator,
167            (".", ",") => {
168                if !currency_symbol.is_empty() {
169                    AmountFormat::CurrencyPrefix(currency_symbol.to_string())
170                } else {
171                    AmountFormat::USComma
172                }
173            }
174            _ => AmountFormat::Plain,
175        }
176    }
177
178    /// Formats a decimal using this format.
179    pub fn format(&self, amount: Decimal) -> String {
180        let is_negative = amount < Decimal::ZERO;
181        let abs_amount = amount.abs();
182        let amount_f64: f64 = abs_amount.try_into().unwrap_or(0.0);
183
184        match self {
185            AmountFormat::Plain => {
186                if is_negative {
187                    format!("-{:.2}", amount_f64)
188                } else {
189                    format!("{:.2}", amount_f64)
190                }
191            }
192            AmountFormat::USComma => {
193                let formatted = format_with_thousands(amount_f64, ',', '.');
194                if is_negative {
195                    format!("-{}", formatted)
196                } else {
197                    formatted
198                }
199            }
200            AmountFormat::EUFormat => {
201                let formatted = format_with_thousands(amount_f64, '.', ',');
202                if is_negative {
203                    format!("-{}", formatted)
204                } else {
205                    formatted
206                }
207            }
208            AmountFormat::SpaceSeparator => {
209                let formatted = format_with_thousands(amount_f64, ' ', '.');
210                if is_negative {
211                    format!("-{}", formatted)
212                } else {
213                    formatted
214                }
215            }
216            AmountFormat::CurrencyPrefix(symbol) => {
217                let formatted = format_with_thousands(amount_f64, ',', '.');
218                if is_negative {
219                    format!("-{}{}", symbol, formatted)
220                } else {
221                    format!("{}{}", symbol, formatted)
222                }
223            }
224            AmountFormat::CurrencySuffix(code) => {
225                let formatted = format_with_thousands(amount_f64, ',', '.');
226                if is_negative {
227                    format!("-{} {}", formatted, code)
228                } else {
229                    format!("{} {}", formatted, code)
230                }
231            }
232            AmountFormat::Accounting => {
233                let formatted = format_with_thousands(amount_f64, ',', '.');
234                if is_negative {
235                    format!("({})", formatted)
236                } else {
237                    formatted
238                }
239            }
240            AmountFormat::Scientific => {
241                if is_negative {
242                    format!("-{:.5E}", amount_f64)
243                } else {
244                    format!("{:.5E}", amount_f64)
245                }
246            }
247            AmountFormat::NoDecimals => {
248                let rounded = amount_f64.round() as i64;
249                if is_negative {
250                    format!("-{}", rounded.abs())
251                } else {
252                    rounded.to_string()
253                }
254            }
255            AmountFormat::FourDecimals => {
256                if is_negative {
257                    format!("-{:.4}", amount_f64)
258                } else {
259                    format!("{:.4}", amount_f64)
260                }
261            }
262        }
263    }
264}
265
266/// Formats a number with thousand separators.
267fn format_with_thousands(value: f64, thousand_sep: char, decimal_sep: char) -> String {
268    let integer_part = value.trunc() as i64;
269    let decimal_part = ((value.fract() * 100.0).round() as i64).abs();
270
271    let integer_str = integer_part.abs().to_string();
272    let mut result = String::new();
273
274    for (i, c) in integer_str.chars().rev().enumerate() {
275        if i > 0 && i % 3 == 0 {
276            result.push(thousand_sep);
277        }
278        result.push(c);
279    }
280
281    let integer_formatted: String = result.chars().rev().collect();
282    format!("{}{}{:02}", integer_formatted, decimal_sep, decimal_part)
283}
284
285/// Identifier format variations.
286#[derive(Debug, Clone)]
287pub enum IdentifierFormat {
288    /// Original case.
289    Original,
290    /// Uppercase.
291    Upper,
292    /// Lowercase.
293    Lower,
294    /// With prefix.
295    WithPrefix(String),
296    /// With suffix.
297    WithSuffix(String),
298    /// Zero-padded to length.
299    ZeroPadded(usize),
300    /// Space-padded to length.
301    SpacePadded(usize),
302    /// With separator.
303    WithSeparator { separator: char, interval: usize },
304}
305
306impl IdentifierFormat {
307    /// Formats an identifier using this format.
308    pub fn format(&self, id: &str) -> String {
309        match self {
310            IdentifierFormat::Original => id.to_string(),
311            IdentifierFormat::Upper => id.to_uppercase(),
312            IdentifierFormat::Lower => id.to_lowercase(),
313            IdentifierFormat::WithPrefix(prefix) => format!("{}{}", prefix, id),
314            IdentifierFormat::WithSuffix(suffix) => format!("{}{}", id, suffix),
315            IdentifierFormat::ZeroPadded(len) => {
316                if id.len() >= *len {
317                    id.to_string()
318                } else {
319                    format!("{:0>width$}", id, width = len)
320                }
321            }
322            IdentifierFormat::SpacePadded(len) => {
323                if id.len() >= *len {
324                    id.to_string()
325                } else {
326                    format!("{:>width$}", id, width = len)
327                }
328            }
329            IdentifierFormat::WithSeparator {
330                separator,
331                interval,
332            } => {
333                let mut result = String::new();
334                for (i, c) in id.chars().enumerate() {
335                    if i > 0 && i % interval == 0 {
336                        result.push(*separator);
337                    }
338                    result.push(c);
339                }
340                result
341            }
342        }
343    }
344}
345
346/// Text format variations.
347#[derive(Debug, Clone)]
348pub enum TextFormat {
349    /// Original text.
350    Original,
351    /// Uppercase.
352    Upper,
353    /// Lowercase.
354    Lower,
355    /// Title case.
356    Title,
357    /// With leading whitespace.
358    LeadingWhitespace(usize),
359    /// With trailing whitespace.
360    TrailingWhitespace(usize),
361    /// With extra internal spaces.
362    ExtraSpaces,
363    /// Trimmed.
364    Trimmed,
365    /// With non-breaking spaces.
366    NonBreakingSpaces,
367}
368
369impl TextFormat {
370    /// Formats text using this format.
371    pub fn format(&self, text: &str) -> String {
372        match self {
373            TextFormat::Original => text.to_string(),
374            TextFormat::Upper => text.to_uppercase(),
375            TextFormat::Lower => text.to_lowercase(),
376            TextFormat::Title => text
377                .split_whitespace()
378                .map(|word| {
379                    let mut chars = word.chars();
380                    match chars.next() {
381                        None => String::new(),
382                        Some(first) => {
383                            first.to_uppercase().to_string()
384                                + chars.as_str().to_lowercase().as_str()
385                        }
386                    }
387                })
388                .collect::<Vec<_>>()
389                .join(" "),
390            TextFormat::LeadingWhitespace(n) => {
391                format!("{}{}", " ".repeat(*n), text)
392            }
393            TextFormat::TrailingWhitespace(n) => {
394                format!("{}{}", text, " ".repeat(*n))
395            }
396            TextFormat::ExtraSpaces => text.split_whitespace().collect::<Vec<_>>().join("  "),
397            TextFormat::Trimmed => text.trim().to_string(),
398            TextFormat::NonBreakingSpaces => text.replace(' ', "\u{00A0}"),
399        }
400    }
401}
402
403/// Configuration for format variations.
404#[derive(Debug, Clone)]
405pub struct FormatVariationConfig {
406    /// Probability of applying date format variation.
407    pub date_variation_rate: f64,
408    /// Probability of applying amount format variation.
409    pub amount_variation_rate: f64,
410    /// Probability of applying identifier format variation.
411    pub identifier_variation_rate: f64,
412    /// Probability of applying text format variation.
413    pub text_variation_rate: f64,
414    /// Allowed date formats.
415    pub allowed_date_formats: Vec<DateFormat>,
416    /// Allowed amount formats.
417    pub allowed_amount_formats: Vec<AmountFormat>,
418}
419
420impl Default for FormatVariationConfig {
421    fn default() -> Self {
422        Self {
423            date_variation_rate: 0.05,
424            amount_variation_rate: 0.03,
425            identifier_variation_rate: 0.02,
426            text_variation_rate: 0.05,
427            allowed_date_formats: DateFormat::all(),
428            allowed_amount_formats: AmountFormat::common(),
429        }
430    }
431}
432
433/// Format variation injector.
434pub struct FormatVariationInjector {
435    config: FormatVariationConfig,
436    stats: FormatVariationStats,
437    /// Optional country pack for locale-aware baseline formats.
438    country_pack: Option<CountryPack>,
439}
440
441/// Statistics for format variations.
442#[derive(Debug, Clone, Default, Serialize, Deserialize)]
443pub struct FormatVariationStats {
444    pub date_variations: usize,
445    pub amount_variations: usize,
446    pub identifier_variations: usize,
447    pub text_variations: usize,
448    pub total_processed: usize,
449}
450
451impl FormatVariationInjector {
452    /// Creates a new format variation injector.
453    pub fn new(config: FormatVariationConfig) -> Self {
454        Self {
455            config,
456            stats: FormatVariationStats::default(),
457            country_pack: None,
458        }
459    }
460
461    /// Set the country pack for locale-aware format baselines.
462    ///
463    /// When a country pack is set, the "correct" (non-varied) format for dates
464    /// and amounts is derived from the pack's locale settings instead of
465    /// defaulting to ISO/Plain.
466    pub fn set_country_pack(&mut self, pack: CountryPack) {
467        self.country_pack = Some(pack);
468    }
469
470    /// Returns the baseline date format derived from the country pack (if set)
471    /// or `DateFormat::ISO` as the default.
472    fn baseline_date_format(&self) -> DateFormat {
473        match &self.country_pack {
474            Some(pack) => {
475                let short = &pack.locale.date_format.short;
476                if short.is_empty() {
477                    DateFormat::ISO
478                } else {
479                    DateFormat::from_locale_short(short)
480                }
481            }
482            None => DateFormat::ISO,
483        }
484    }
485
486    /// Returns the baseline amount format derived from the country pack (if set)
487    /// or `AmountFormat::Plain` as the default.
488    fn baseline_amount_format(&self) -> AmountFormat {
489        match &self.country_pack {
490            Some(pack) => {
491                let locale = &pack.locale;
492                let dec_sep = &locale.number_format.decimal_separator;
493                let thou_sep = &locale.number_format.thousands_separator;
494                let symbol = &locale.currency_symbol;
495                let currency = &locale.default_currency;
496                if dec_sep.is_empty() && thou_sep.is_empty() {
497                    AmountFormat::Plain
498                } else {
499                    AmountFormat::from_locale(dec_sep, thou_sep, symbol, currency)
500                }
501            }
502            None => AmountFormat::Plain,
503        }
504    }
505
506    /// Potentially applies a date format variation.
507    ///
508    /// When a country pack is set, the baseline (non-varied) format is derived
509    /// from the pack's locale `date_format.short` field. Otherwise ISO 8601 is
510    /// used as the default.
511    pub fn vary_date<R: Rng>(&mut self, date: NaiveDate, rng: &mut R) -> String {
512        self.stats.total_processed += 1;
513
514        if rng.gen::<f64>() < self.config.date_variation_rate {
515            self.stats.date_variations += 1;
516            let format = &self.config.allowed_date_formats
517                [rng.gen_range(0..self.config.allowed_date_formats.len())];
518            format.format(date)
519        } else {
520            self.baseline_date_format().format(date)
521        }
522    }
523
524    /// Potentially applies an amount format variation.
525    ///
526    /// When a country pack is set, the baseline (non-varied) format is derived
527    /// from the pack's locale number/currency settings. Otherwise plain format
528    /// is used as the default.
529    pub fn vary_amount<R: Rng>(&mut self, amount: Decimal, rng: &mut R) -> String {
530        self.stats.total_processed += 1;
531
532        if rng.gen::<f64>() < self.config.amount_variation_rate {
533            self.stats.amount_variations += 1;
534            let format = &self.config.allowed_amount_formats
535                [rng.gen_range(0..self.config.allowed_amount_formats.len())];
536            format.format(amount)
537        } else {
538            self.baseline_amount_format().format(amount)
539        }
540    }
541
542    /// Potentially applies an identifier format variation.
543    pub fn vary_identifier<R: Rng>(&mut self, id: &str, rng: &mut R) -> String {
544        self.stats.total_processed += 1;
545
546        if rng.gen::<f64>() < self.config.identifier_variation_rate {
547            self.stats.identifier_variations += 1;
548
549            let variations = [
550                IdentifierFormat::Upper,
551                IdentifierFormat::Lower,
552                IdentifierFormat::ZeroPadded(10),
553                IdentifierFormat::WithPrefix(" ".to_string()),
554                IdentifierFormat::WithSuffix(" ".to_string()),
555            ];
556
557            let format = &variations[rng.gen_range(0..variations.len())];
558            format.format(id)
559        } else {
560            id.to_string()
561        }
562    }
563
564    /// Potentially applies a text format variation.
565    pub fn vary_text<R: Rng>(&mut self, text: &str, rng: &mut R) -> String {
566        self.stats.total_processed += 1;
567
568        if rng.gen::<f64>() < self.config.text_variation_rate {
569            self.stats.text_variations += 1;
570
571            let variations = [
572                TextFormat::Upper,
573                TextFormat::Lower,
574                TextFormat::Title,
575                TextFormat::LeadingWhitespace(1),
576                TextFormat::TrailingWhitespace(1),
577                TextFormat::ExtraSpaces,
578            ];
579
580            let format = &variations[rng.gen_range(0..variations.len())];
581            format.format(text)
582        } else {
583            text.to_string()
584        }
585    }
586
587    /// Returns statistics.
588    pub fn stats(&self) -> &FormatVariationStats {
589        &self.stats
590    }
591
592    /// Resets statistics.
593    pub fn reset_stats(&mut self) {
594        self.stats = FormatVariationStats::default();
595    }
596}
597
598#[cfg(test)]
599#[allow(clippy::unwrap_used)]
600mod tests {
601    use super::*;
602    use rust_decimal_macros::dec;
603
604    #[test]
605    fn test_date_formats() {
606        let date = NaiveDate::from_ymd_opt(2024, 1, 15).unwrap();
607
608        assert_eq!(DateFormat::ISO.format(date), "2024-01-15");
609        assert_eq!(DateFormat::US.format(date), "01/15/2024");
610        assert_eq!(DateFormat::EU.format(date), "15/01/2024");
611        assert_eq!(DateFormat::Compact.format(date), "20240115");
612    }
613
614    #[test]
615    fn test_amount_formats() {
616        let amount = dec!(1234567.89);
617
618        assert_eq!(AmountFormat::Plain.format(amount), "1234567.89");
619        assert_eq!(AmountFormat::USComma.format(amount), "1,234,567.89");
620        assert_eq!(AmountFormat::EUFormat.format(amount), "1.234.567,89");
621        assert_eq!(AmountFormat::NoDecimals.format(amount), "1234568");
622    }
623
624    #[test]
625    fn test_negative_amounts() {
626        let amount = dec!(-1234.56);
627
628        assert_eq!(AmountFormat::Plain.format(amount), "-1234.56");
629        assert_eq!(AmountFormat::Accounting.format(amount), "(1,234.56)");
630    }
631
632    #[test]
633    fn test_identifier_formats() {
634        let id = "abc123";
635
636        assert_eq!(IdentifierFormat::Upper.format(id), "ABC123");
637        assert_eq!(IdentifierFormat::ZeroPadded(10).format(id), "0000abc123");
638    }
639
640    #[test]
641    fn test_text_formats() {
642        let text = "hello world";
643
644        assert_eq!(TextFormat::Upper.format(text), "HELLO WORLD");
645        assert_eq!(TextFormat::Title.format(text), "Hello World");
646        assert_eq!(TextFormat::ExtraSpaces.format(text), "hello  world");
647    }
648
649    #[test]
650    fn test_format_injector() {
651        use rand::SeedableRng;
652        use rand_chacha::ChaCha8Rng;
653
654        let config = FormatVariationConfig {
655            date_variation_rate: 1.0, // Always vary for testing
656            ..Default::default()
657        };
658
659        let mut injector = FormatVariationInjector::new(config);
660        let mut rng = ChaCha8Rng::seed_from_u64(42);
661
662        let date = NaiveDate::from_ymd_opt(2024, 1, 15).unwrap();
663        let formatted = injector.vary_date(date, &mut rng);
664
665        // Formatted date should not be empty and stats should be updated
666        assert!(!formatted.is_empty());
667        assert_eq!(injector.stats().date_variations, 1);
668    }
669
670    #[test]
671    fn test_date_format_from_locale_short() {
672        assert_eq!(DateFormat::from_locale_short("MM/DD/YYYY"), DateFormat::US);
673        assert_eq!(
674            DateFormat::from_locale_short("MM-DD-YYYY"),
675            DateFormat::USDash
676        );
677        assert_eq!(DateFormat::from_locale_short("DD/MM/YYYY"), DateFormat::EU);
678        assert_eq!(
679            DateFormat::from_locale_short("DD-MM-YYYY"),
680            DateFormat::EUDash
681        );
682        assert_eq!(
683            DateFormat::from_locale_short("DD.MM.YYYY"),
684            DateFormat::EUDot
685        );
686        assert_eq!(DateFormat::from_locale_short("YYYY-MM-DD"), DateFormat::ISO);
687        assert_eq!(DateFormat::from_locale_short(""), DateFormat::ISO);
688    }
689
690    #[test]
691    fn test_amount_format_from_locale() {
692        // German: comma decimal, dot thousands
693        assert_eq!(
694            AmountFormat::from_locale(",", ".", "\u{20ac}", "EUR"),
695            AmountFormat::EUFormat
696        );
697        // US: dot decimal, comma thousands, dollar symbol
698        assert_eq!(
699            AmountFormat::from_locale(".", ",", "$", "USD"),
700            AmountFormat::CurrencyPrefix("$".to_string())
701        );
702        // Space-separated thousands
703        assert_eq!(
704            AmountFormat::from_locale(".", " ", "", "CHF"),
705            AmountFormat::SpaceSeparator
706        );
707        // Plain fallback
708        assert_eq!(
709            AmountFormat::from_locale("X", "Y", "", "XYZ"),
710            AmountFormat::Plain
711        );
712    }
713
714    #[test]
715    fn test_injector_with_country_pack_date_baseline() {
716        use datasynth_core::country::schema::{DateFormatConfig, LocaleConfig};
717        use rand::SeedableRng;
718        use rand_chacha::ChaCha8Rng;
719
720        // German pack: DD.MM.YYYY
721        let mut pack = CountryPack::default();
722        pack.locale = LocaleConfig {
723            date_format: DateFormatConfig {
724                short: "DD.MM.YYYY".to_string(),
725                ..Default::default()
726            },
727            ..Default::default()
728        };
729
730        let config = FormatVariationConfig {
731            date_variation_rate: 0.0, // Never vary -> always use baseline
732            ..Default::default()
733        };
734        let mut injector = FormatVariationInjector::new(config);
735        injector.set_country_pack(pack);
736
737        let mut rng = ChaCha8Rng::seed_from_u64(42);
738        let date = NaiveDate::from_ymd_opt(2024, 1, 15).unwrap();
739        let formatted = injector.vary_date(date, &mut rng);
740
741        // Baseline should be EU dot: 15.01.2024
742        assert_eq!(formatted, "15.01.2024");
743    }
744
745    #[test]
746    fn test_injector_with_country_pack_amount_baseline() {
747        use datasynth_core::country::schema::{LocaleConfig, NumberFormatConfig};
748        use rand::SeedableRng;
749        use rand_chacha::ChaCha8Rng;
750
751        // German pack: comma decimal, dot thousands
752        let mut pack = CountryPack::default();
753        pack.locale = LocaleConfig {
754            number_format: NumberFormatConfig {
755                decimal_separator: ",".to_string(),
756                thousands_separator: ".".to_string(),
757                ..Default::default()
758            },
759            currency_symbol: "\u{20ac}".to_string(),
760            default_currency: "EUR".to_string(),
761            ..Default::default()
762        };
763
764        let config = FormatVariationConfig {
765            amount_variation_rate: 0.0, // Never vary -> always use baseline
766            ..Default::default()
767        };
768        let mut injector = FormatVariationInjector::new(config);
769        injector.set_country_pack(pack);
770
771        let mut rng = ChaCha8Rng::seed_from_u64(42);
772        let amount = dec!(1234.56);
773        let formatted = injector.vary_amount(amount, &mut rng);
774
775        // Baseline should be EU format: 1.234,56
776        assert_eq!(formatted, "1.234,56");
777    }
778
779    #[test]
780    fn test_injector_without_country_pack_uses_defaults() {
781        use rand::SeedableRng;
782        use rand_chacha::ChaCha8Rng;
783
784        let config = FormatVariationConfig {
785            date_variation_rate: 0.0,
786            amount_variation_rate: 0.0,
787            ..Default::default()
788        };
789        let mut injector = FormatVariationInjector::new(config);
790        // No country pack set
791
792        let mut rng = ChaCha8Rng::seed_from_u64(42);
793        let date = NaiveDate::from_ymd_opt(2024, 1, 15).unwrap();
794        let formatted_date = injector.vary_date(date, &mut rng);
795        assert_eq!(formatted_date, "2024-01-15"); // ISO default
796
797        let amount = dec!(1234.56);
798        let formatted_amount = injector.vary_amount(amount, &mut rng);
799        assert_eq!(formatted_amount, "1234.56"); // Plain default
800    }
801}