1use chrono::NaiveDate;
10use rand::Rng;
11use rust_decimal::Decimal;
12
13#[derive(Debug, Clone, Copy, PartialEq)]
15pub enum DateFormat {
16 ISO,
18 US,
20 USDash,
22 EU,
24 EUDash,
26 EUDot,
28 Long,
30 ShortYear,
32 Compact,
34 Unix,
36 ExcelSerial,
38}
39
40impl DateFormat {
41 pub fn all() -> Vec<Self> {
43 vec![
44 DateFormat::ISO,
45 DateFormat::US,
46 DateFormat::USDash,
47 DateFormat::EU,
48 DateFormat::EUDash,
49 DateFormat::EUDot,
50 DateFormat::Long,
51 DateFormat::ShortYear,
52 DateFormat::Compact,
53 ]
54 }
55
56 pub fn format(&self, date: NaiveDate) -> String {
58 match self {
59 DateFormat::ISO => date.format("%Y-%m-%d").to_string(),
60 DateFormat::US => date.format("%m/%d/%Y").to_string(),
61 DateFormat::USDash => date.format("%m-%d-%Y").to_string(),
62 DateFormat::EU => date.format("%d/%m/%Y").to_string(),
63 DateFormat::EUDash => date.format("%d-%m-%Y").to_string(),
64 DateFormat::EUDot => date.format("%d.%m.%Y").to_string(),
65 DateFormat::Long => date.format("%B %d, %Y").to_string(),
66 DateFormat::ShortYear => date.format("%m/%d/%y").to_string(),
67 DateFormat::Compact => date.format("%Y%m%d").to_string(),
68 DateFormat::Unix => {
69 let epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
70 let days = (date - epoch).num_days();
71 (days * 86400).to_string()
72 }
73 DateFormat::ExcelSerial => {
74 let epoch = NaiveDate::from_ymd_opt(1899, 12, 30).unwrap();
76 let days = (date - epoch).num_days();
77 days.to_string()
78 }
79 }
80 }
81}
82
83#[derive(Debug, Clone, PartialEq)]
85pub enum AmountFormat {
86 Plain,
88 USComma,
90 EUFormat,
92 SpaceSeparator,
94 CurrencyPrefix(String),
96 CurrencySuffix(String),
98 Accounting,
100 Scientific,
102 NoDecimals,
104 FourDecimals,
106}
107
108impl AmountFormat {
109 pub fn common() -> Vec<Self> {
111 vec![
112 AmountFormat::Plain,
113 AmountFormat::USComma,
114 AmountFormat::EUFormat,
115 AmountFormat::SpaceSeparator,
116 AmountFormat::CurrencyPrefix("$".to_string()),
117 AmountFormat::CurrencySuffix("USD".to_string()),
118 AmountFormat::Accounting,
119 AmountFormat::NoDecimals,
120 ]
121 }
122
123 pub fn format(&self, amount: Decimal) -> String {
125 let is_negative = amount < Decimal::ZERO;
126 let abs_amount = amount.abs();
127 let amount_f64: f64 = abs_amount.try_into().unwrap_or(0.0);
128
129 match self {
130 AmountFormat::Plain => {
131 if is_negative {
132 format!("-{:.2}", amount_f64)
133 } else {
134 format!("{:.2}", amount_f64)
135 }
136 }
137 AmountFormat::USComma => {
138 let formatted = format_with_thousands(amount_f64, ',', '.');
139 if is_negative {
140 format!("-{}", formatted)
141 } else {
142 formatted
143 }
144 }
145 AmountFormat::EUFormat => {
146 let formatted = format_with_thousands(amount_f64, '.', ',');
147 if is_negative {
148 format!("-{}", formatted)
149 } else {
150 formatted
151 }
152 }
153 AmountFormat::SpaceSeparator => {
154 let formatted = format_with_thousands(amount_f64, ' ', '.');
155 if is_negative {
156 format!("-{}", formatted)
157 } else {
158 formatted
159 }
160 }
161 AmountFormat::CurrencyPrefix(symbol) => {
162 let formatted = format_with_thousands(amount_f64, ',', '.');
163 if is_negative {
164 format!("-{}{}", symbol, formatted)
165 } else {
166 format!("{}{}", symbol, formatted)
167 }
168 }
169 AmountFormat::CurrencySuffix(code) => {
170 let formatted = format_with_thousands(amount_f64, ',', '.');
171 if is_negative {
172 format!("-{} {}", formatted, code)
173 } else {
174 format!("{} {}", formatted, code)
175 }
176 }
177 AmountFormat::Accounting => {
178 let formatted = format_with_thousands(amount_f64, ',', '.');
179 if is_negative {
180 format!("({})", formatted)
181 } else {
182 formatted
183 }
184 }
185 AmountFormat::Scientific => {
186 if is_negative {
187 format!("-{:.5E}", amount_f64)
188 } else {
189 format!("{:.5E}", amount_f64)
190 }
191 }
192 AmountFormat::NoDecimals => {
193 let rounded = amount_f64.round() as i64;
194 if is_negative {
195 format!("-{}", rounded.abs())
196 } else {
197 rounded.to_string()
198 }
199 }
200 AmountFormat::FourDecimals => {
201 if is_negative {
202 format!("-{:.4}", amount_f64)
203 } else {
204 format!("{:.4}", amount_f64)
205 }
206 }
207 }
208 }
209}
210
211fn format_with_thousands(value: f64, thousand_sep: char, decimal_sep: char) -> String {
213 let integer_part = value.trunc() as i64;
214 let decimal_part = ((value.fract() * 100.0).round() as i64).abs();
215
216 let integer_str = integer_part.abs().to_string();
217 let mut result = String::new();
218
219 for (i, c) in integer_str.chars().rev().enumerate() {
220 if i > 0 && i % 3 == 0 {
221 result.push(thousand_sep);
222 }
223 result.push(c);
224 }
225
226 let integer_formatted: String = result.chars().rev().collect();
227 format!("{}{}{:02}", integer_formatted, decimal_sep, decimal_part)
228}
229
230#[derive(Debug, Clone)]
232pub enum IdentifierFormat {
233 Original,
235 Upper,
237 Lower,
239 WithPrefix(String),
241 WithSuffix(String),
243 ZeroPadded(usize),
245 SpacePadded(usize),
247 WithSeparator { separator: char, interval: usize },
249}
250
251impl IdentifierFormat {
252 pub fn format(&self, id: &str) -> String {
254 match self {
255 IdentifierFormat::Original => id.to_string(),
256 IdentifierFormat::Upper => id.to_uppercase(),
257 IdentifierFormat::Lower => id.to_lowercase(),
258 IdentifierFormat::WithPrefix(prefix) => format!("{}{}", prefix, id),
259 IdentifierFormat::WithSuffix(suffix) => format!("{}{}", id, suffix),
260 IdentifierFormat::ZeroPadded(len) => {
261 if id.len() >= *len {
262 id.to_string()
263 } else {
264 format!("{:0>width$}", id, width = len)
265 }
266 }
267 IdentifierFormat::SpacePadded(len) => {
268 if id.len() >= *len {
269 id.to_string()
270 } else {
271 format!("{:>width$}", id, width = len)
272 }
273 }
274 IdentifierFormat::WithSeparator {
275 separator,
276 interval,
277 } => {
278 let mut result = String::new();
279 for (i, c) in id.chars().enumerate() {
280 if i > 0 && i % interval == 0 {
281 result.push(*separator);
282 }
283 result.push(c);
284 }
285 result
286 }
287 }
288 }
289}
290
291#[derive(Debug, Clone)]
293pub enum TextFormat {
294 Original,
296 Upper,
298 Lower,
300 Title,
302 LeadingWhitespace(usize),
304 TrailingWhitespace(usize),
306 ExtraSpaces,
308 Trimmed,
310 NonBreakingSpaces,
312}
313
314impl TextFormat {
315 pub fn format(&self, text: &str) -> String {
317 match self {
318 TextFormat::Original => text.to_string(),
319 TextFormat::Upper => text.to_uppercase(),
320 TextFormat::Lower => text.to_lowercase(),
321 TextFormat::Title => text
322 .split_whitespace()
323 .map(|word| {
324 let mut chars = word.chars();
325 match chars.next() {
326 None => String::new(),
327 Some(first) => {
328 first.to_uppercase().to_string()
329 + chars.as_str().to_lowercase().as_str()
330 }
331 }
332 })
333 .collect::<Vec<_>>()
334 .join(" "),
335 TextFormat::LeadingWhitespace(n) => {
336 format!("{}{}", " ".repeat(*n), text)
337 }
338 TextFormat::TrailingWhitespace(n) => {
339 format!("{}{}", text, " ".repeat(*n))
340 }
341 TextFormat::ExtraSpaces => text.split_whitespace().collect::<Vec<_>>().join(" "),
342 TextFormat::Trimmed => text.trim().to_string(),
343 TextFormat::NonBreakingSpaces => text.replace(' ', "\u{00A0}"),
344 }
345 }
346}
347
348#[derive(Debug, Clone)]
350pub struct FormatVariationConfig {
351 pub date_variation_rate: f64,
353 pub amount_variation_rate: f64,
355 pub identifier_variation_rate: f64,
357 pub text_variation_rate: f64,
359 pub allowed_date_formats: Vec<DateFormat>,
361 pub allowed_amount_formats: Vec<AmountFormat>,
363}
364
365impl Default for FormatVariationConfig {
366 fn default() -> Self {
367 Self {
368 date_variation_rate: 0.05,
369 amount_variation_rate: 0.03,
370 identifier_variation_rate: 0.02,
371 text_variation_rate: 0.05,
372 allowed_date_formats: DateFormat::all(),
373 allowed_amount_formats: AmountFormat::common(),
374 }
375 }
376}
377
378pub struct FormatVariationInjector {
380 config: FormatVariationConfig,
381 stats: FormatVariationStats,
382}
383
384#[derive(Debug, Clone, Default)]
386pub struct FormatVariationStats {
387 pub date_variations: usize,
388 pub amount_variations: usize,
389 pub identifier_variations: usize,
390 pub text_variations: usize,
391 pub total_processed: usize,
392}
393
394impl FormatVariationInjector {
395 pub fn new(config: FormatVariationConfig) -> Self {
397 Self {
398 config,
399 stats: FormatVariationStats::default(),
400 }
401 }
402
403 pub fn vary_date<R: Rng>(&mut self, date: NaiveDate, rng: &mut R) -> String {
405 self.stats.total_processed += 1;
406
407 if rng.gen::<f64>() < self.config.date_variation_rate {
408 self.stats.date_variations += 1;
409 let format = &self.config.allowed_date_formats
410 [rng.gen_range(0..self.config.allowed_date_formats.len())];
411 format.format(date)
412 } else {
413 DateFormat::ISO.format(date)
414 }
415 }
416
417 pub fn vary_amount<R: Rng>(&mut self, amount: Decimal, rng: &mut R) -> String {
419 self.stats.total_processed += 1;
420
421 if rng.gen::<f64>() < self.config.amount_variation_rate {
422 self.stats.amount_variations += 1;
423 let format = &self.config.allowed_amount_formats
424 [rng.gen_range(0..self.config.allowed_amount_formats.len())];
425 format.format(amount)
426 } else {
427 AmountFormat::Plain.format(amount)
428 }
429 }
430
431 pub fn vary_identifier<R: Rng>(&mut self, id: &str, rng: &mut R) -> String {
433 self.stats.total_processed += 1;
434
435 if rng.gen::<f64>() < self.config.identifier_variation_rate {
436 self.stats.identifier_variations += 1;
437
438 let variations = [
439 IdentifierFormat::Upper,
440 IdentifierFormat::Lower,
441 IdentifierFormat::ZeroPadded(10),
442 IdentifierFormat::WithPrefix(" ".to_string()),
443 IdentifierFormat::WithSuffix(" ".to_string()),
444 ];
445
446 let format = &variations[rng.gen_range(0..variations.len())];
447 format.format(id)
448 } else {
449 id.to_string()
450 }
451 }
452
453 pub fn vary_text<R: Rng>(&mut self, text: &str, rng: &mut R) -> String {
455 self.stats.total_processed += 1;
456
457 if rng.gen::<f64>() < self.config.text_variation_rate {
458 self.stats.text_variations += 1;
459
460 let variations = [
461 TextFormat::Upper,
462 TextFormat::Lower,
463 TextFormat::Title,
464 TextFormat::LeadingWhitespace(1),
465 TextFormat::TrailingWhitespace(1),
466 TextFormat::ExtraSpaces,
467 ];
468
469 let format = &variations[rng.gen_range(0..variations.len())];
470 format.format(text)
471 } else {
472 text.to_string()
473 }
474 }
475
476 pub fn stats(&self) -> &FormatVariationStats {
478 &self.stats
479 }
480
481 pub fn reset_stats(&mut self) {
483 self.stats = FormatVariationStats::default();
484 }
485}
486
487#[cfg(test)]
488mod tests {
489 use super::*;
490 use rust_decimal_macros::dec;
491
492 #[test]
493 fn test_date_formats() {
494 let date = NaiveDate::from_ymd_opt(2024, 1, 15).unwrap();
495
496 assert_eq!(DateFormat::ISO.format(date), "2024-01-15");
497 assert_eq!(DateFormat::US.format(date), "01/15/2024");
498 assert_eq!(DateFormat::EU.format(date), "15/01/2024");
499 assert_eq!(DateFormat::Compact.format(date), "20240115");
500 }
501
502 #[test]
503 fn test_amount_formats() {
504 let amount = dec!(1234567.89);
505
506 assert_eq!(AmountFormat::Plain.format(amount), "1234567.89");
507 assert_eq!(AmountFormat::USComma.format(amount), "1,234,567.89");
508 assert_eq!(AmountFormat::EUFormat.format(amount), "1.234.567,89");
509 assert_eq!(AmountFormat::NoDecimals.format(amount), "1234568");
510 }
511
512 #[test]
513 fn test_negative_amounts() {
514 let amount = dec!(-1234.56);
515
516 assert_eq!(AmountFormat::Plain.format(amount), "-1234.56");
517 assert_eq!(AmountFormat::Accounting.format(amount), "(1,234.56)");
518 }
519
520 #[test]
521 fn test_identifier_formats() {
522 let id = "abc123";
523
524 assert_eq!(IdentifierFormat::Upper.format(id), "ABC123");
525 assert_eq!(IdentifierFormat::ZeroPadded(10).format(id), "0000abc123");
526 }
527
528 #[test]
529 fn test_text_formats() {
530 let text = "hello world";
531
532 assert_eq!(TextFormat::Upper.format(text), "HELLO WORLD");
533 assert_eq!(TextFormat::Title.format(text), "Hello World");
534 assert_eq!(TextFormat::ExtraSpaces.format(text), "hello world");
535 }
536
537 #[test]
538 fn test_format_injector() {
539 use rand::SeedableRng;
540 use rand_chacha::ChaCha8Rng;
541
542 let config = FormatVariationConfig {
543 date_variation_rate: 1.0, ..Default::default()
545 };
546
547 let mut injector = FormatVariationInjector::new(config);
548 let mut rng = ChaCha8Rng::seed_from_u64(42);
549
550 let date = NaiveDate::from_ymd_opt(2024, 1, 15).unwrap();
551 let formatted = injector.vary_date(date, &mut rng);
552
553 assert!(!formatted.is_empty());
555 assert_eq!(injector.stats().date_variations, 1);
556 }
557}