1use chrono::NaiveDate;
10use datasynth_core::CountryPack;
11use rand::Rng;
12use rust_decimal::Decimal;
13use serde::{Deserialize, Serialize};
14
15#[derive(Debug, Clone, Copy, PartialEq)]
17pub enum DateFormat {
18 ISO,
20 US,
22 USDash,
24 EU,
26 EUDash,
28 EUDot,
30 Long,
32 ShortYear,
34 Compact,
36 Unix,
38 ExcelSerial,
40}
41
42impl DateFormat {
43 pub fn all() -> Vec<Self> {
45 vec![
46 DateFormat::ISO,
47 DateFormat::US,
48 DateFormat::USDash,
49 DateFormat::EU,
50 DateFormat::EUDash,
51 DateFormat::EUDot,
52 DateFormat::Long,
53 DateFormat::ShortYear,
54 DateFormat::Compact,
55 ]
56 }
57
58 pub fn from_locale_short(short: &str) -> Self {
62 let s = short.to_uppercase();
63 if s.starts_with("YYYY") {
64 DateFormat::ISO
66 } else if s.starts_with("MM") {
67 if s.contains('-') {
69 DateFormat::USDash
70 } else {
71 DateFormat::US
72 }
73 } else if s.starts_with("DD") {
74 if s.contains('.') {
76 DateFormat::EUDot
77 } else if s.contains('-') {
78 DateFormat::EUDash
79 } else {
80 DateFormat::EU
81 }
82 } else {
83 DateFormat::ISO
84 }
85 }
86
87 pub fn format(&self, date: NaiveDate) -> String {
89 match self {
90 DateFormat::ISO => date.format("%Y-%m-%d").to_string(),
91 DateFormat::US => date.format("%m/%d/%Y").to_string(),
92 DateFormat::USDash => date.format("%m-%d-%Y").to_string(),
93 DateFormat::EU => date.format("%d/%m/%Y").to_string(),
94 DateFormat::EUDash => date.format("%d-%m-%Y").to_string(),
95 DateFormat::EUDot => date.format("%d.%m.%Y").to_string(),
96 DateFormat::Long => date.format("%B %d, %Y").to_string(),
97 DateFormat::ShortYear => date.format("%m/%d/%y").to_string(),
98 DateFormat::Compact => date.format("%Y%m%d").to_string(),
99 DateFormat::Unix => {
100 let epoch = NaiveDate::from_ymd_opt(1970, 1, 1).expect("valid unix epoch date");
101 let days = (date - epoch).num_days();
102 (days * 86400).to_string()
103 }
104 DateFormat::ExcelSerial => {
105 let epoch = NaiveDate::from_ymd_opt(1899, 12, 30).expect("valid excel epoch date");
107 let days = (date - epoch).num_days();
108 days.to_string()
109 }
110 }
111 }
112}
113
114#[derive(Debug, Clone, PartialEq)]
116pub enum AmountFormat {
117 Plain,
119 USComma,
121 EUFormat,
123 SpaceSeparator,
125 CurrencyPrefix(String),
127 CurrencySuffix(String),
129 Accounting,
131 Scientific,
133 NoDecimals,
135 FourDecimals,
137}
138
139impl AmountFormat {
140 pub fn common() -> Vec<Self> {
142 vec![
143 AmountFormat::Plain,
144 AmountFormat::USComma,
145 AmountFormat::EUFormat,
146 AmountFormat::SpaceSeparator,
147 AmountFormat::CurrencyPrefix("$".to_string()),
148 AmountFormat::CurrencySuffix("USD".to_string()),
149 AmountFormat::Accounting,
150 AmountFormat::NoDecimals,
151 ]
152 }
153
154 pub fn from_locale(
159 decimal_sep: &str,
160 thousands_sep: &str,
161 currency_symbol: &str,
162 _default_currency: &str,
163 ) -> Self {
164 match (decimal_sep, thousands_sep) {
165 (",", ".") => AmountFormat::EUFormat,
166 (".", " ") => AmountFormat::SpaceSeparator,
167 (".", ",") => {
168 if !currency_symbol.is_empty() {
169 AmountFormat::CurrencyPrefix(currency_symbol.to_string())
170 } else {
171 AmountFormat::USComma
172 }
173 }
174 _ => AmountFormat::Plain,
175 }
176 }
177
178 pub fn format(&self, amount: Decimal) -> String {
180 let is_negative = amount < Decimal::ZERO;
181 let abs_amount = amount.abs();
182 let amount_f64: f64 = abs_amount.try_into().unwrap_or(0.0);
183
184 match self {
185 AmountFormat::Plain => {
186 if is_negative {
187 format!("-{:.2}", amount_f64)
188 } else {
189 format!("{:.2}", amount_f64)
190 }
191 }
192 AmountFormat::USComma => {
193 let formatted = format_with_thousands(amount_f64, ',', '.');
194 if is_negative {
195 format!("-{}", formatted)
196 } else {
197 formatted
198 }
199 }
200 AmountFormat::EUFormat => {
201 let formatted = format_with_thousands(amount_f64, '.', ',');
202 if is_negative {
203 format!("-{}", formatted)
204 } else {
205 formatted
206 }
207 }
208 AmountFormat::SpaceSeparator => {
209 let formatted = format_with_thousands(amount_f64, ' ', '.');
210 if is_negative {
211 format!("-{}", formatted)
212 } else {
213 formatted
214 }
215 }
216 AmountFormat::CurrencyPrefix(symbol) => {
217 let formatted = format_with_thousands(amount_f64, ',', '.');
218 if is_negative {
219 format!("-{}{}", symbol, formatted)
220 } else {
221 format!("{}{}", symbol, formatted)
222 }
223 }
224 AmountFormat::CurrencySuffix(code) => {
225 let formatted = format_with_thousands(amount_f64, ',', '.');
226 if is_negative {
227 format!("-{} {}", formatted, code)
228 } else {
229 format!("{} {}", formatted, code)
230 }
231 }
232 AmountFormat::Accounting => {
233 let formatted = format_with_thousands(amount_f64, ',', '.');
234 if is_negative {
235 format!("({})", formatted)
236 } else {
237 formatted
238 }
239 }
240 AmountFormat::Scientific => {
241 if is_negative {
242 format!("-{:.5E}", amount_f64)
243 } else {
244 format!("{:.5E}", amount_f64)
245 }
246 }
247 AmountFormat::NoDecimals => {
248 let rounded = amount_f64.round() as i64;
249 if is_negative {
250 format!("-{}", rounded.abs())
251 } else {
252 rounded.to_string()
253 }
254 }
255 AmountFormat::FourDecimals => {
256 if is_negative {
257 format!("-{:.4}", amount_f64)
258 } else {
259 format!("{:.4}", amount_f64)
260 }
261 }
262 }
263 }
264}
265
266fn format_with_thousands(value: f64, thousand_sep: char, decimal_sep: char) -> String {
268 let integer_part = value.trunc() as i64;
269 let decimal_part = ((value.fract() * 100.0).round() as i64).abs();
270
271 let integer_str = integer_part.abs().to_string();
272 let mut result = String::new();
273
274 for (i, c) in integer_str.chars().rev().enumerate() {
275 if i > 0 && i % 3 == 0 {
276 result.push(thousand_sep);
277 }
278 result.push(c);
279 }
280
281 let integer_formatted: String = result.chars().rev().collect();
282 format!("{}{}{:02}", integer_formatted, decimal_sep, decimal_part)
283}
284
285#[derive(Debug, Clone)]
287pub enum IdentifierFormat {
288 Original,
290 Upper,
292 Lower,
294 WithPrefix(String),
296 WithSuffix(String),
298 ZeroPadded(usize),
300 SpacePadded(usize),
302 WithSeparator { separator: char, interval: usize },
304}
305
306impl IdentifierFormat {
307 pub fn format(&self, id: &str) -> String {
309 match self {
310 IdentifierFormat::Original => id.to_string(),
311 IdentifierFormat::Upper => id.to_uppercase(),
312 IdentifierFormat::Lower => id.to_lowercase(),
313 IdentifierFormat::WithPrefix(prefix) => format!("{}{}", prefix, id),
314 IdentifierFormat::WithSuffix(suffix) => format!("{}{}", id, suffix),
315 IdentifierFormat::ZeroPadded(len) => {
316 if id.len() >= *len {
317 id.to_string()
318 } else {
319 format!("{:0>width$}", id, width = len)
320 }
321 }
322 IdentifierFormat::SpacePadded(len) => {
323 if id.len() >= *len {
324 id.to_string()
325 } else {
326 format!("{:>width$}", id, width = len)
327 }
328 }
329 IdentifierFormat::WithSeparator {
330 separator,
331 interval,
332 } => {
333 let mut result = String::new();
334 for (i, c) in id.chars().enumerate() {
335 if i > 0 && i % interval == 0 {
336 result.push(*separator);
337 }
338 result.push(c);
339 }
340 result
341 }
342 }
343 }
344}
345
346#[derive(Debug, Clone)]
348pub enum TextFormat {
349 Original,
351 Upper,
353 Lower,
355 Title,
357 LeadingWhitespace(usize),
359 TrailingWhitespace(usize),
361 ExtraSpaces,
363 Trimmed,
365 NonBreakingSpaces,
367}
368
369impl TextFormat {
370 pub fn format(&self, text: &str) -> String {
372 match self {
373 TextFormat::Original => text.to_string(),
374 TextFormat::Upper => text.to_uppercase(),
375 TextFormat::Lower => text.to_lowercase(),
376 TextFormat::Title => text
377 .split_whitespace()
378 .map(|word| {
379 let mut chars = word.chars();
380 match chars.next() {
381 None => String::new(),
382 Some(first) => {
383 first.to_uppercase().to_string()
384 + chars.as_str().to_lowercase().as_str()
385 }
386 }
387 })
388 .collect::<Vec<_>>()
389 .join(" "),
390 TextFormat::LeadingWhitespace(n) => {
391 format!("{}{}", " ".repeat(*n), text)
392 }
393 TextFormat::TrailingWhitespace(n) => {
394 format!("{}{}", text, " ".repeat(*n))
395 }
396 TextFormat::ExtraSpaces => text.split_whitespace().collect::<Vec<_>>().join(" "),
397 TextFormat::Trimmed => text.trim().to_string(),
398 TextFormat::NonBreakingSpaces => text.replace(' ', "\u{00A0}"),
399 }
400 }
401}
402
403#[derive(Debug, Clone)]
405pub struct FormatVariationConfig {
406 pub date_variation_rate: f64,
408 pub amount_variation_rate: f64,
410 pub identifier_variation_rate: f64,
412 pub text_variation_rate: f64,
414 pub allowed_date_formats: Vec<DateFormat>,
416 pub allowed_amount_formats: Vec<AmountFormat>,
418}
419
420impl Default for FormatVariationConfig {
421 fn default() -> Self {
422 Self {
423 date_variation_rate: 0.05,
424 amount_variation_rate: 0.03,
425 identifier_variation_rate: 0.02,
426 text_variation_rate: 0.05,
427 allowed_date_formats: DateFormat::all(),
428 allowed_amount_formats: AmountFormat::common(),
429 }
430 }
431}
432
433pub struct FormatVariationInjector {
435 config: FormatVariationConfig,
436 stats: FormatVariationStats,
437 country_pack: Option<CountryPack>,
439}
440
441#[derive(Debug, Clone, Default, Serialize, Deserialize)]
443pub struct FormatVariationStats {
444 pub date_variations: usize,
445 pub amount_variations: usize,
446 pub identifier_variations: usize,
447 pub text_variations: usize,
448 pub total_processed: usize,
449}
450
451impl FormatVariationInjector {
452 pub fn new(config: FormatVariationConfig) -> Self {
454 Self {
455 config,
456 stats: FormatVariationStats::default(),
457 country_pack: None,
458 }
459 }
460
461 pub fn set_country_pack(&mut self, pack: CountryPack) {
467 self.country_pack = Some(pack);
468 }
469
470 fn baseline_date_format(&self) -> DateFormat {
473 match &self.country_pack {
474 Some(pack) => {
475 let short = &pack.locale.date_format.short;
476 if short.is_empty() {
477 DateFormat::ISO
478 } else {
479 DateFormat::from_locale_short(short)
480 }
481 }
482 None => DateFormat::ISO,
483 }
484 }
485
486 fn baseline_amount_format(&self) -> AmountFormat {
489 match &self.country_pack {
490 Some(pack) => {
491 let locale = &pack.locale;
492 let dec_sep = &locale.number_format.decimal_separator;
493 let thou_sep = &locale.number_format.thousands_separator;
494 let symbol = &locale.currency_symbol;
495 let currency = &locale.default_currency;
496 if dec_sep.is_empty() && thou_sep.is_empty() {
497 AmountFormat::Plain
498 } else {
499 AmountFormat::from_locale(dec_sep, thou_sep, symbol, currency)
500 }
501 }
502 None => AmountFormat::Plain,
503 }
504 }
505
506 pub fn vary_date<R: Rng>(&mut self, date: NaiveDate, rng: &mut R) -> String {
512 self.stats.total_processed += 1;
513
514 if rng.random::<f64>() < self.config.date_variation_rate {
515 self.stats.date_variations += 1;
516 let format = &self.config.allowed_date_formats
517 [rng.random_range(0..self.config.allowed_date_formats.len())];
518 format.format(date)
519 } else {
520 self.baseline_date_format().format(date)
521 }
522 }
523
524 pub fn vary_amount<R: Rng>(&mut self, amount: Decimal, rng: &mut R) -> String {
530 self.stats.total_processed += 1;
531
532 if rng.random::<f64>() < self.config.amount_variation_rate {
533 self.stats.amount_variations += 1;
534 let format = &self.config.allowed_amount_formats
535 [rng.random_range(0..self.config.allowed_amount_formats.len())];
536 format.format(amount)
537 } else {
538 self.baseline_amount_format().format(amount)
539 }
540 }
541
542 pub fn vary_identifier<R: Rng>(&mut self, id: &str, rng: &mut R) -> String {
544 self.stats.total_processed += 1;
545
546 if rng.random::<f64>() < self.config.identifier_variation_rate {
547 self.stats.identifier_variations += 1;
548
549 let variations = [
550 IdentifierFormat::Upper,
551 IdentifierFormat::Lower,
552 IdentifierFormat::ZeroPadded(10),
553 IdentifierFormat::WithPrefix(" ".to_string()),
554 IdentifierFormat::WithSuffix(" ".to_string()),
555 ];
556
557 let format = &variations[rng.random_range(0..variations.len())];
558 format.format(id)
559 } else {
560 id.to_string()
561 }
562 }
563
564 pub fn vary_text<R: Rng>(&mut self, text: &str, rng: &mut R) -> String {
566 self.stats.total_processed += 1;
567
568 if rng.random::<f64>() < self.config.text_variation_rate {
569 self.stats.text_variations += 1;
570
571 let variations = [
572 TextFormat::Upper,
573 TextFormat::Lower,
574 TextFormat::Title,
575 TextFormat::LeadingWhitespace(1),
576 TextFormat::TrailingWhitespace(1),
577 TextFormat::ExtraSpaces,
578 ];
579
580 let format = &variations[rng.random_range(0..variations.len())];
581 format.format(text)
582 } else {
583 text.to_string()
584 }
585 }
586
587 pub fn stats(&self) -> &FormatVariationStats {
589 &self.stats
590 }
591
592 pub fn reset_stats(&mut self) {
594 self.stats = FormatVariationStats::default();
595 }
596}
597
598#[cfg(test)]
599#[allow(clippy::unwrap_used)]
600mod tests {
601 use super::*;
602 use rust_decimal_macros::dec;
603
604 #[test]
605 fn test_date_formats() {
606 let date = NaiveDate::from_ymd_opt(2024, 1, 15).unwrap();
607
608 assert_eq!(DateFormat::ISO.format(date), "2024-01-15");
609 assert_eq!(DateFormat::US.format(date), "01/15/2024");
610 assert_eq!(DateFormat::EU.format(date), "15/01/2024");
611 assert_eq!(DateFormat::Compact.format(date), "20240115");
612 }
613
614 #[test]
615 fn test_amount_formats() {
616 let amount = dec!(1234567.89);
617
618 assert_eq!(AmountFormat::Plain.format(amount), "1234567.89");
619 assert_eq!(AmountFormat::USComma.format(amount), "1,234,567.89");
620 assert_eq!(AmountFormat::EUFormat.format(amount), "1.234.567,89");
621 assert_eq!(AmountFormat::NoDecimals.format(amount), "1234568");
622 }
623
624 #[test]
625 fn test_negative_amounts() {
626 let amount = dec!(-1234.56);
627
628 assert_eq!(AmountFormat::Plain.format(amount), "-1234.56");
629 assert_eq!(AmountFormat::Accounting.format(amount), "(1,234.56)");
630 }
631
632 #[test]
633 fn test_identifier_formats() {
634 let id = "abc123";
635
636 assert_eq!(IdentifierFormat::Upper.format(id), "ABC123");
637 assert_eq!(IdentifierFormat::ZeroPadded(10).format(id), "0000abc123");
638 }
639
640 #[test]
641 fn test_text_formats() {
642 let text = "hello world";
643
644 assert_eq!(TextFormat::Upper.format(text), "HELLO WORLD");
645 assert_eq!(TextFormat::Title.format(text), "Hello World");
646 assert_eq!(TextFormat::ExtraSpaces.format(text), "hello world");
647 }
648
649 #[test]
650 fn test_format_injector() {
651 use rand::SeedableRng;
652 use rand_chacha::ChaCha8Rng;
653
654 let config = FormatVariationConfig {
655 date_variation_rate: 1.0, ..Default::default()
657 };
658
659 let mut injector = FormatVariationInjector::new(config);
660 let mut rng = ChaCha8Rng::seed_from_u64(42);
661
662 let date = NaiveDate::from_ymd_opt(2024, 1, 15).unwrap();
663 let formatted = injector.vary_date(date, &mut rng);
664
665 assert!(!formatted.is_empty());
667 assert_eq!(injector.stats().date_variations, 1);
668 }
669
670 #[test]
671 fn test_date_format_from_locale_short() {
672 assert_eq!(DateFormat::from_locale_short("MM/DD/YYYY"), DateFormat::US);
673 assert_eq!(
674 DateFormat::from_locale_short("MM-DD-YYYY"),
675 DateFormat::USDash
676 );
677 assert_eq!(DateFormat::from_locale_short("DD/MM/YYYY"), DateFormat::EU);
678 assert_eq!(
679 DateFormat::from_locale_short("DD-MM-YYYY"),
680 DateFormat::EUDash
681 );
682 assert_eq!(
683 DateFormat::from_locale_short("DD.MM.YYYY"),
684 DateFormat::EUDot
685 );
686 assert_eq!(DateFormat::from_locale_short("YYYY-MM-DD"), DateFormat::ISO);
687 assert_eq!(DateFormat::from_locale_short(""), DateFormat::ISO);
688 }
689
690 #[test]
691 fn test_amount_format_from_locale() {
692 assert_eq!(
694 AmountFormat::from_locale(",", ".", "\u{20ac}", "EUR"),
695 AmountFormat::EUFormat
696 );
697 assert_eq!(
699 AmountFormat::from_locale(".", ",", "$", "USD"),
700 AmountFormat::CurrencyPrefix("$".to_string())
701 );
702 assert_eq!(
704 AmountFormat::from_locale(".", " ", "", "CHF"),
705 AmountFormat::SpaceSeparator
706 );
707 assert_eq!(
709 AmountFormat::from_locale("X", "Y", "", "XYZ"),
710 AmountFormat::Plain
711 );
712 }
713
714 #[test]
715 fn test_injector_with_country_pack_date_baseline() {
716 use datasynth_core::country::schema::{DateFormatConfig, LocaleConfig};
717 use rand::SeedableRng;
718 use rand_chacha::ChaCha8Rng;
719
720 let mut pack = CountryPack::default();
722 pack.locale = LocaleConfig {
723 date_format: DateFormatConfig {
724 short: "DD.MM.YYYY".to_string(),
725 ..Default::default()
726 },
727 ..Default::default()
728 };
729
730 let config = FormatVariationConfig {
731 date_variation_rate: 0.0, ..Default::default()
733 };
734 let mut injector = FormatVariationInjector::new(config);
735 injector.set_country_pack(pack);
736
737 let mut rng = ChaCha8Rng::seed_from_u64(42);
738 let date = NaiveDate::from_ymd_opt(2024, 1, 15).unwrap();
739 let formatted = injector.vary_date(date, &mut rng);
740
741 assert_eq!(formatted, "15.01.2024");
743 }
744
745 #[test]
746 fn test_injector_with_country_pack_amount_baseline() {
747 use datasynth_core::country::schema::{LocaleConfig, NumberFormatConfig};
748 use rand::SeedableRng;
749 use rand_chacha::ChaCha8Rng;
750
751 let mut pack = CountryPack::default();
753 pack.locale = LocaleConfig {
754 number_format: NumberFormatConfig {
755 decimal_separator: ",".to_string(),
756 thousands_separator: ".".to_string(),
757 ..Default::default()
758 },
759 currency_symbol: "\u{20ac}".to_string(),
760 default_currency: "EUR".to_string(),
761 ..Default::default()
762 };
763
764 let config = FormatVariationConfig {
765 amount_variation_rate: 0.0, ..Default::default()
767 };
768 let mut injector = FormatVariationInjector::new(config);
769 injector.set_country_pack(pack);
770
771 let mut rng = ChaCha8Rng::seed_from_u64(42);
772 let amount = dec!(1234.56);
773 let formatted = injector.vary_amount(amount, &mut rng);
774
775 assert_eq!(formatted, "1.234,56");
777 }
778
779 #[test]
780 fn test_injector_without_country_pack_uses_defaults() {
781 use rand::SeedableRng;
782 use rand_chacha::ChaCha8Rng;
783
784 let config = FormatVariationConfig {
785 date_variation_rate: 0.0,
786 amount_variation_rate: 0.0,
787 ..Default::default()
788 };
789 let mut injector = FormatVariationInjector::new(config);
790 let mut rng = ChaCha8Rng::seed_from_u64(42);
793 let date = NaiveDate::from_ymd_opt(2024, 1, 15).unwrap();
794 let formatted_date = injector.vary_date(date, &mut rng);
795 assert_eq!(formatted_date, "2024-01-15"); let amount = dec!(1234.56);
798 let formatted_amount = injector.vary_amount(amount, &mut rng);
799 assert_eq!(formatted_amount, "1234.56"); }
801}