1use super::{ChartOfAccountsTemplate, CompanyArchetype, ExpectedFlow};
7use crate::models::{
8 Decimal128, HybridTimestamp, JournalEntry, JournalEntryFlags, JournalLineItem, SolvingMethod,
9 TransactionFlow,
10};
11use rand::prelude::*;
12use rand_distr::LogNormal;
13use std::collections::HashMap;
14use uuid::Uuid;
15
16#[derive(Debug, Clone)]
18pub struct GeneratorConfig {
19 pub transactions_per_second: f64,
21 pub batch_size: usize,
23
24 pub method_a_ratio: f64,
27 pub method_b_ratio: f64,
29 pub method_c_ratio: f64,
31 pub method_d_ratio: f64,
33 pub method_e_ratio: f64,
35
36 pub benford_compliant: bool,
39 pub amount_scale: f64,
41
42 pub business_hours_start: u8,
45 pub business_hours_end: u8,
47 pub weekend_ratio: f64,
49 pub month_end_multiplier: f64,
51
52 pub seed: Option<u64>,
54}
55
56impl Default for GeneratorConfig {
57 fn default() -> Self {
58 Self {
59 transactions_per_second: 100.0,
60 batch_size: 100,
61 method_a_ratio: 0.6068,
63 method_b_ratio: 0.1663,
64 method_c_ratio: 0.11,
65 method_d_ratio: 0.11,
66 method_e_ratio: 0.0069,
67 benford_compliant: true,
68 amount_scale: 1.0,
69 business_hours_start: 8,
70 business_hours_end: 18,
71 weekend_ratio: 0.1,
72 month_end_multiplier: 2.5,
73 seed: None,
74 }
75 }
76}
77
78impl GeneratorConfig {
79 pub fn high_volume() -> Self {
81 Self {
82 transactions_per_second: 1000.0,
83 batch_size: 500,
84 ..Default::default()
85 }
86 }
87
88 pub fn educational() -> Self {
90 Self {
91 transactions_per_second: 5.0,
92 batch_size: 10,
93 ..Default::default()
94 }
95 }
96
97 pub fn validate(&self) -> Result<(), String> {
99 let total = self.method_a_ratio
100 + self.method_b_ratio
101 + self.method_c_ratio
102 + self.method_d_ratio
103 + self.method_e_ratio;
104
105 if (total - 1.0).abs() > 0.01 {
106 return Err(format!("Method ratios must sum to 1.0, got {}", total));
107 }
108 Ok(())
109 }
110}
111
112pub struct TransactionGenerator {
114 #[allow(dead_code)]
116 archetype: CompanyArchetype,
117 coa: ChartOfAccountsTemplate,
119 config: GeneratorConfig,
121 rng: StdRng,
123 account_indices: HashMap<String, u16>,
125 flow_cdf: Vec<(f64, ExpectedFlow)>,
127 current_time: HybridTimestamp,
129 entry_counter: u64,
131 #[allow(dead_code)]
133 node_id: u32,
134 unused_accounts: Vec<u16>,
136 coverage_counter: u32,
138}
139
140impl TransactionGenerator {
141 pub fn new(archetype: CompanyArchetype, config: GeneratorConfig) -> Self {
143 let seed = config.seed.unwrap_or_else(|| rand::thread_rng().gen());
144 let rng = StdRng::seed_from_u64(seed);
145
146 let coa = ChartOfAccountsTemplate::for_archetype(&archetype);
147
148 let mut account_indices = HashMap::new();
150 for (idx, account) in coa.accounts.iter().enumerate() {
151 account_indices.insert(account.code.clone(), idx as u16);
152 }
153
154 let total_freq: f64 = coa.expected_flows.iter().map(|f| f.frequency).sum();
156 let mut cumulative = 0.0;
157 let flow_cdf: Vec<(f64, ExpectedFlow)> = coa
158 .expected_flows
159 .iter()
160 .map(|f| {
161 cumulative += f.frequency / total_freq;
162 (cumulative, f.clone())
163 })
164 .collect();
165
166 let unused_accounts: Vec<u16> = (0..coa.accounts.len() as u16).collect();
168
169 Self {
170 archetype,
171 coa,
172 config,
173 rng,
174 account_indices,
175 flow_cdf,
176 current_time: HybridTimestamp::now(),
177 entry_counter: 0,
178 node_id: 1,
179 unused_accounts,
180 coverage_counter: 0,
181 }
182 }
183
184 pub fn generate_batch(&mut self, count: usize) -> Vec<GeneratedEntry> {
186 let mut entries = Vec::with_capacity(count);
187
188 for _ in 0..count {
189 let method = self.select_method();
191
192 let entry = match method {
194 SolvingMethod::MethodA => self.generate_method_a(),
195 SolvingMethod::MethodB => self.generate_method_b(),
196 SolvingMethod::MethodC => self.generate_method_c(),
197 SolvingMethod::MethodD => self.generate_method_d(),
198 SolvingMethod::MethodE => self.generate_method_e(),
199 SolvingMethod::Pending => self.generate_method_a(), };
201
202 entries.push(entry);
203
204 self.advance_time();
206 }
207
208 entries
209 }
210
211 fn select_method(&mut self) -> SolvingMethod {
213 let r: f64 = self.rng.gen();
214 let mut cumulative = 0.0;
215
216 cumulative += self.config.method_a_ratio;
217 if r < cumulative {
218 return SolvingMethod::MethodA;
219 }
220
221 cumulative += self.config.method_b_ratio;
222 if r < cumulative {
223 return SolvingMethod::MethodB;
224 }
225
226 cumulative += self.config.method_c_ratio;
227 if r < cumulative {
228 return SolvingMethod::MethodC;
229 }
230
231 cumulative += self.config.method_d_ratio;
232 if r < cumulative {
233 return SolvingMethod::MethodD;
234 }
235
236 SolvingMethod::MethodE
237 }
238
239 fn generate_method_a(&mut self) -> GeneratedEntry {
241 self.coverage_counter += 1;
243 let (from_idx, to_idx, amount) = if self.coverage_counter.is_multiple_of(5) {
244 self.select_coverage_flow()
245 } else {
246 let flow = self.select_flow();
247 let amount = self.generate_amount(flow.amount_range.0, flow.amount_range.1);
248 let from_idx = self
249 .account_indices
250 .get(&flow.from_code)
251 .copied()
252 .unwrap_or(0);
253 let to_idx = self
254 .account_indices
255 .get(&flow.to_code)
256 .copied()
257 .unwrap_or(1);
258 (from_idx, to_idx, amount)
259 };
260
261 let mut entry = self.create_entry_header();
262 entry.debit_line_count = 1;
263 entry.credit_line_count = 1;
264 entry.line_count = 2;
265 entry.total_debits = amount;
266 entry.total_credits = amount;
267 entry.solving_method = SolvingMethod::MethodA;
268 entry.average_confidence = 1.0;
269
270 let debit_line = JournalLineItem::debit(from_idx, amount, 1);
271 let credit_line = JournalLineItem::credit(to_idx, amount, 2);
272
273 GeneratedEntry {
274 entry,
275 debit_lines: vec![debit_line],
276 credit_lines: vec![credit_line],
277 expected_flows: vec![(from_idx, to_idx, amount)],
278 }
279 }
280
281 fn select_coverage_flow(&mut self) -> (u16, u16, Decimal128) {
283 let account_count = self.coa.accounts.len() as u16;
284
285 let from_idx = if !self.unused_accounts.is_empty() {
287 let idx = self.rng.gen_range(0..self.unused_accounts.len());
288 self.unused_accounts.remove(idx)
289 } else {
290 self.rng.gen_range(0..account_count)
291 };
292
293 let to_idx = if !self.unused_accounts.is_empty() {
294 let idx = self.rng.gen_range(0..self.unused_accounts.len());
295 self.unused_accounts.remove(idx)
296 } else {
297 loop {
299 let idx = self.rng.gen_range(0..account_count);
300 if idx != from_idx {
301 break idx;
302 }
303 }
304 };
305
306 if self.unused_accounts.is_empty() {
308 self.unused_accounts = (0..account_count).collect();
309 }
310
311 let amount = self.generate_amount(100.0, 5000.0);
312 (from_idx, to_idx, amount)
313 }
314
315 fn generate_method_b(&mut self) -> GeneratedEntry {
317 let n = self.rng.gen_range(2..=4); let mut entry = self.create_entry_header();
319
320 let mut debit_lines = Vec::with_capacity(n);
321 let mut credit_lines = Vec::with_capacity(n);
322 let mut expected_flows = Vec::new();
323
324 let mut total = Decimal128::ZERO;
325
326 self.coverage_counter += 1;
328 let use_coverage = self.coverage_counter.is_multiple_of(3);
329
330 for i in 0..n {
331 let (from_idx, to_idx, amount) = if use_coverage && i == 0 {
332 self.select_coverage_flow()
333 } else {
334 let flow = self.select_flow();
335 let amount = self.generate_amount(
336 flow.amount_range.0 / n as f64,
337 flow.amount_range.1 / n as f64,
338 );
339 let from_idx = self
340 .account_indices
341 .get(&flow.from_code)
342 .copied()
343 .unwrap_or(0);
344 let to_idx = self
345 .account_indices
346 .get(&flow.to_code)
347 .copied()
348 .unwrap_or(1);
349 (from_idx, to_idx, amount)
350 };
351 total = total + amount;
352
353 debit_lines.push(JournalLineItem::debit(from_idx, amount, (i + 1) as u16));
354 credit_lines.push(JournalLineItem::credit(to_idx, amount, (n + i + 1) as u16));
355
356 expected_flows.push((from_idx, to_idx, amount));
357 }
358
359 entry.debit_line_count = n as u16;
360 entry.credit_line_count = n as u16;
361 entry.line_count = (n * 2) as u16;
362 entry.total_debits = total;
363 entry.total_credits = total;
364 entry.solving_method = SolvingMethod::MethodB;
365 entry.average_confidence = 1.0;
366
367 GeneratedEntry {
368 entry,
369 debit_lines,
370 credit_lines,
371 expected_flows,
372 }
373 }
374
375 fn generate_method_c(&mut self) -> GeneratedEntry {
377 let n_debits = self.rng.gen_range(1..=3);
378 let n_credits = self.rng.gen_range(2..=5);
379 let n_credits = if n_credits == n_debits {
381 n_credits + 1
382 } else {
383 n_credits
384 };
385
386 let mut entry = self.create_entry_header();
387 let mut debit_lines = Vec::with_capacity(n_debits);
388 let mut credit_lines = Vec::with_capacity(n_credits);
389 let mut expected_flows = Vec::new();
390
391 let total_amount = self.generate_amount(500.0, 10000.0);
393 let total_f64 = total_amount.to_f64();
394
395 self.coverage_counter += 1;
397 let use_coverage = self.coverage_counter.is_multiple_of(4);
398
399 let debit_amounts = self.split_amount(total_f64, n_debits);
401 for (i, &amt) in debit_amounts.iter().enumerate() {
402 let from_idx = if use_coverage && i == 0 {
403 let (idx, _, _) = self.select_coverage_flow();
404 idx
405 } else {
406 let flow = self.select_flow();
407 self.account_indices
408 .get(&flow.from_code)
409 .copied()
410 .unwrap_or(0)
411 };
412 debit_lines.push(JournalLineItem::debit(
413 from_idx,
414 Decimal128::from_f64(amt),
415 (i + 1) as u16,
416 ));
417 }
418
419 let credit_amounts = self.split_amount(total_f64, n_credits);
421 for (i, &amt) in credit_amounts.iter().enumerate() {
422 let to_idx = if use_coverage && i == 0 {
423 let (_, idx, _) = self.select_coverage_flow();
424 idx
425 } else {
426 let flow = self.select_flow();
427 self.account_indices
428 .get(&flow.to_code)
429 .copied()
430 .unwrap_or(1)
431 };
432 credit_lines.push(JournalLineItem::credit(
433 to_idx,
434 Decimal128::from_f64(amt),
435 (n_debits + i + 1) as u16,
436 ));
437 }
438
439 let from_idx = debit_lines.first().map(|l| l.account_index).unwrap_or(0);
442 let to_idx = credit_lines.first().map(|l| l.account_index).unwrap_or(1);
443 expected_flows.push((from_idx, to_idx, total_amount));
444
445 entry.debit_line_count = n_debits as u16;
446 entry.credit_line_count = n_credits as u16;
447 entry.line_count = (n_debits + n_credits) as u16;
448 entry.total_debits = total_amount;
449 entry.total_credits = total_amount;
450 entry.solving_method = SolvingMethod::MethodC;
451 entry.average_confidence = 0.85; GeneratedEntry {
454 entry,
455 debit_lines,
456 credit_lines,
457 expected_flows,
458 }
459 }
460
461 fn generate_method_d(&mut self) -> GeneratedEntry {
463 let mut generated = self.generate_method_b();
466 generated.entry.solving_method = SolvingMethod::MethodD;
467 generated.entry.flags.0 |= JournalEntryFlags::USES_HIGHER_AGGREGATE;
468 generated
469 }
470
471 fn generate_method_e(&mut self) -> GeneratedEntry {
473 let mut generated = self.generate_method_c();
475 generated.entry.solving_method = SolvingMethod::MethodE;
476 generated.entry.flags.0 |= JournalEntryFlags::HAS_DECOMPOSED_VALUES;
477 generated.entry.average_confidence = 0.5; generated
479 }
480
481 fn select_flow(&mut self) -> ExpectedFlow {
483 let r: f64 = self.rng.gen();
484 for (cumulative, flow) in &self.flow_cdf {
485 if r < *cumulative {
486 return flow.clone();
487 }
488 }
489 self.flow_cdf
491 .first()
492 .map(|(_, f)| f.clone())
493 .unwrap_or_else(|| ExpectedFlow::new("1100", "2100", 1.0, (100.0, 1000.0)))
494 }
495
496 fn generate_amount(&mut self, min: f64, max: f64) -> Decimal128 {
498 let amount = if self.config.benford_compliant {
499 let mean = ((min.ln() + max.ln()) / 2.0).exp();
501 let std_dev = (max / min).ln() / 4.0;
502 let dist = LogNormal::new(mean.ln(), std_dev)
503 .unwrap_or_else(|_| LogNormal::new(0.0, 1.0).unwrap());
504 let raw: f64 = self.rng.sample(dist);
505 raw.clamp(min, max)
506 } else {
507 self.rng.gen_range(min..max)
508 };
509
510 let scaled = amount * self.config.amount_scale;
512 Decimal128::from_f64((scaled * 100.0).round() / 100.0)
513 }
514
515 fn split_amount(&mut self, total: f64, parts: usize) -> Vec<f64> {
517 if parts == 0 {
518 return vec![];
519 }
520 if parts == 1 {
521 return vec![total];
522 }
523
524 let mut points: Vec<f64> = (0..parts - 1).map(|_| self.rng.gen::<f64>()).collect();
526 points.sort_by(|a, b| a.partial_cmp(b).unwrap());
527
528 let mut amounts = Vec::with_capacity(parts);
530 let mut prev = 0.0;
531 for point in points {
532 amounts.push((point - prev) * total);
533 prev = point;
534 }
535 amounts.push((1.0 - prev) * total);
536
537 amounts
539 .iter()
540 .map(|a| (a * 100.0).round() / 100.0)
541 .collect()
542 }
543
544 fn create_entry_header(&mut self) -> JournalEntry {
546 self.entry_counter += 1;
547
548 JournalEntry {
549 id: Uuid::new_v4(),
550 entity_id: Uuid::nil(), document_number_hash: self.entry_counter,
552 source_system_id: 1,
553 batch_id: (self.entry_counter / 1000) as u32,
554 posting_date: self.current_time,
555 line_count: 0,
556 debit_line_count: 0,
557 credit_line_count: 0,
558 first_line_index: 0,
559 total_debits: Decimal128::ZERO,
560 total_credits: Decimal128::ZERO,
561 solving_method: SolvingMethod::Pending,
562 average_confidence: 0.0,
563 flow_count: 0,
564 _pad: 0,
565 flags: JournalEntryFlags::new(),
566 _reserved: [0; 12],
567 }
568 }
569
570 fn advance_time(&mut self) {
572 let interval_ms = (1000.0 / self.config.transactions_per_second) as u64;
574 self.current_time.physical += interval_ms;
575 self.current_time.logical = 0;
576 }
577
578 pub fn stats(&self) -> GeneratorStats {
580 GeneratorStats {
581 entries_generated: self.entry_counter,
582 current_time: self.current_time,
583 }
584 }
585}
586
587#[derive(Debug, Clone)]
589pub struct GeneratedEntry {
590 pub entry: JournalEntry,
592 pub debit_lines: Vec<JournalLineItem>,
594 pub credit_lines: Vec<JournalLineItem>,
596 pub expected_flows: Vec<(u16, u16, Decimal128)>,
598}
599
600impl GeneratedEntry {
601 pub fn to_flows(&self) -> Vec<TransactionFlow> {
603 self.expected_flows
604 .iter()
605 .map(|&(from, to, amount)| {
606 TransactionFlow::with_provenance(
607 from,
608 to,
609 amount,
610 self.entry.id,
611 0,
612 0,
613 self.entry.posting_date,
614 self.entry.solving_method,
615 self.entry.average_confidence,
616 )
617 })
618 .collect()
619 }
620
621 pub fn total_amount(&self) -> Decimal128 {
623 self.entry.total_debits
624 }
625}
626
627#[derive(Debug, Clone)]
629pub struct GeneratorStats {
630 pub entries_generated: u64,
632 pub current_time: HybridTimestamp,
634}
635
636#[cfg(test)]
637mod tests {
638 use super::*;
639
640 #[test]
641 fn test_generator_creation() {
642 let archetype = CompanyArchetype::retail_standard();
643 let config = GeneratorConfig::default();
644 let _gen = TransactionGenerator::new(archetype, config);
645 }
646
647 #[test]
648 fn test_generate_batch() {
649 let archetype = CompanyArchetype::retail_standard();
650 let config = GeneratorConfig {
651 seed: Some(42),
652 ..Default::default()
653 };
654 let mut gen = TransactionGenerator::new(archetype, config);
655
656 let batch = gen.generate_batch(100);
657 assert_eq!(batch.len(), 100);
658
659 for entry in &batch {
661 let debit_total: f64 = entry.total_amount().to_f64();
662 assert!(debit_total > 0.0);
663 assert!(entry.entry.is_balanced());
664 }
665 }
666
667 #[test]
668 fn test_method_distribution() {
669 let archetype = CompanyArchetype::retail_standard();
670 let config = GeneratorConfig {
671 seed: Some(42),
672 ..Default::default()
673 };
674 let mut gen = TransactionGenerator::new(archetype, config);
675
676 let batch = gen.generate_batch(1000);
677
678 let mut method_counts = [0u32; 5];
679 for entry in &batch {
680 match entry.entry.solving_method {
681 SolvingMethod::MethodA => method_counts[0] += 1,
682 SolvingMethod::MethodB => method_counts[1] += 1,
683 SolvingMethod::MethodC => method_counts[2] += 1,
684 SolvingMethod::MethodD => method_counts[3] += 1,
685 SolvingMethod::MethodE => method_counts[4] += 1,
686 _ => {}
687 }
688 }
689
690 let method_a_ratio = method_counts[0] as f64 / 1000.0;
692 assert!(method_a_ratio > 0.50 && method_a_ratio < 0.70);
693 }
694
695 #[test]
696 fn test_config_validation() {
697 let mut config = GeneratorConfig::default();
698 assert!(config.validate().is_ok());
699
700 config.method_a_ratio = 0.5;
701 assert!(config.validate().is_err());
702 }
703
704 #[test]
705 fn test_full_account_coverage() {
706 use std::collections::HashSet;
707
708 let archetype = CompanyArchetype::retail_standard();
709 let coa = ChartOfAccountsTemplate::for_archetype(&archetype);
710 let total_accounts = coa.accounts.len();
711
712 let config = GeneratorConfig {
713 seed: Some(123),
714 ..Default::default()
715 };
716 let mut gen = TransactionGenerator::new(archetype, config);
717
718 let batch = gen.generate_batch(200);
722
723 let mut used_accounts: HashSet<u16> = HashSet::new();
725 for entry in &batch {
726 for (from, to, _) in &entry.expected_flows {
727 used_accounts.insert(*from);
728 used_accounts.insert(*to);
729 }
730 }
731
732 let coverage = used_accounts.len() as f64 / total_accounts as f64;
734 assert!(
735 coverage >= 0.9,
736 "Expected at least 90% account coverage, got {:.1}% ({}/{} accounts)",
737 coverage * 100.0,
738 used_accounts.len(),
739 total_accounts
740 );
741 }
742}