1use super::{ChartOfAccountsTemplate, CompanyArchetype, ExpectedFlow};
7use crate::models::{
8 Decimal128, HybridTimestamp, JournalEntry, JournalEntryFlags, JournalLineItem, SolvingMethod,
9 TransactionFlow,
10};
11use rand::prelude::*;
12use rand_distr::LogNormal;
13use std::collections::HashMap;
14use uuid::Uuid;
15
16#[derive(Debug, Clone)]
18pub struct GeneratorConfig {
19 pub transactions_per_second: f64,
21 pub batch_size: usize,
23
24 pub method_a_ratio: f64,
27 pub method_b_ratio: f64,
29 pub method_c_ratio: f64,
31 pub method_d_ratio: f64,
33 pub method_e_ratio: f64,
35
36 pub benford_compliant: bool,
39 pub amount_scale: f64,
41
42 pub business_hours_start: u8,
45 pub business_hours_end: u8,
47 pub weekend_ratio: f64,
49 pub month_end_multiplier: f64,
51
52 pub seed: Option<u64>,
54}
55
56impl Default for GeneratorConfig {
57 fn default() -> Self {
58 Self {
59 transactions_per_second: 100.0,
60 batch_size: 100,
61 method_a_ratio: 0.6068,
63 method_b_ratio: 0.1663,
64 method_c_ratio: 0.11,
65 method_d_ratio: 0.11,
66 method_e_ratio: 0.0069,
67 benford_compliant: true,
68 amount_scale: 1.0,
69 business_hours_start: 8,
70 business_hours_end: 18,
71 weekend_ratio: 0.1,
72 month_end_multiplier: 2.5,
73 seed: None,
74 }
75 }
76}
77
78impl GeneratorConfig {
79 pub fn high_volume() -> Self {
81 Self {
82 transactions_per_second: 1000.0,
83 batch_size: 500,
84 ..Default::default()
85 }
86 }
87
88 pub fn educational() -> Self {
90 Self {
91 transactions_per_second: 5.0,
92 batch_size: 10,
93 ..Default::default()
94 }
95 }
96
97 pub fn validate(&self) -> crate::Result<()> {
99 let total = self.method_a_ratio
100 + self.method_b_ratio
101 + self.method_c_ratio
102 + self.method_d_ratio
103 + self.method_e_ratio;
104
105 if (total - 1.0).abs() > 0.01 {
106 return Err(crate::AccNetError::Validation(format!(
107 "method ratios must sum to 1.0, got {}",
108 total
109 )));
110 }
111 Ok(())
112 }
113}
114
115pub struct TransactionGenerator {
117 #[allow(dead_code)]
119 archetype: CompanyArchetype,
120 coa: ChartOfAccountsTemplate,
122 config: GeneratorConfig,
124 rng: StdRng,
126 account_indices: HashMap<String, u16>,
128 flow_cdf: Vec<(f64, ExpectedFlow)>,
130 current_time: HybridTimestamp,
132 entry_counter: u64,
134 #[allow(dead_code)]
136 node_id: u32,
137 unused_accounts: Vec<u16>,
139 coverage_counter: u32,
141}
142
143impl TransactionGenerator {
144 pub fn new(archetype: CompanyArchetype, config: GeneratorConfig) -> Self {
146 let seed = config.seed.unwrap_or_else(|| rand::thread_rng().gen());
147 let rng = StdRng::seed_from_u64(seed);
148
149 let coa = ChartOfAccountsTemplate::for_archetype(&archetype);
150
151 let mut account_indices = HashMap::new();
153 for (idx, account) in coa.accounts.iter().enumerate() {
154 account_indices.insert(account.code.clone(), idx as u16);
155 }
156
157 let total_freq: f64 = coa.expected_flows.iter().map(|f| f.frequency).sum();
159 let mut cumulative = 0.0;
160 let flow_cdf: Vec<(f64, ExpectedFlow)> = coa
161 .expected_flows
162 .iter()
163 .map(|f| {
164 cumulative += f.frequency / total_freq;
165 (cumulative, f.clone())
166 })
167 .collect();
168
169 let unused_accounts: Vec<u16> = (0..coa.accounts.len() as u16).collect();
171
172 Self {
173 archetype,
174 coa,
175 config,
176 rng,
177 account_indices,
178 flow_cdf,
179 current_time: HybridTimestamp::now(),
180 entry_counter: 0,
181 node_id: 1,
182 unused_accounts,
183 coverage_counter: 0,
184 }
185 }
186
187 pub fn generate_batch(&mut self, count: usize) -> Vec<GeneratedEntry> {
189 let mut entries = Vec::with_capacity(count);
190
191 for _ in 0..count {
192 let method = self.select_method();
194
195 let entry = match method {
197 SolvingMethod::MethodA => self.generate_method_a(),
198 SolvingMethod::MethodB => self.generate_method_b(),
199 SolvingMethod::MethodC => self.generate_method_c(),
200 SolvingMethod::MethodD => self.generate_method_d(),
201 SolvingMethod::MethodE => self.generate_method_e(),
202 SolvingMethod::Pending => self.generate_method_a(), };
204
205 entries.push(entry);
206
207 self.advance_time();
209 }
210
211 entries
212 }
213
214 fn select_method(&mut self) -> SolvingMethod {
216 let r: f64 = self.rng.gen();
217 let mut cumulative = 0.0;
218
219 cumulative += self.config.method_a_ratio;
220 if r < cumulative {
221 return SolvingMethod::MethodA;
222 }
223
224 cumulative += self.config.method_b_ratio;
225 if r < cumulative {
226 return SolvingMethod::MethodB;
227 }
228
229 cumulative += self.config.method_c_ratio;
230 if r < cumulative {
231 return SolvingMethod::MethodC;
232 }
233
234 cumulative += self.config.method_d_ratio;
235 if r < cumulative {
236 return SolvingMethod::MethodD;
237 }
238
239 SolvingMethod::MethodE
240 }
241
242 fn generate_method_a(&mut self) -> GeneratedEntry {
244 self.coverage_counter += 1;
246 let (from_idx, to_idx, amount) = if self.coverage_counter.is_multiple_of(5) {
247 self.select_coverage_flow()
248 } else {
249 let flow = self.select_flow();
250 let amount = self.generate_amount(flow.amount_range.0, flow.amount_range.1);
251 let from_idx = self
252 .account_indices
253 .get(&flow.from_code)
254 .copied()
255 .unwrap_or(0);
256 let to_idx = self
257 .account_indices
258 .get(&flow.to_code)
259 .copied()
260 .unwrap_or(1);
261 (from_idx, to_idx, amount)
262 };
263
264 let mut entry = self.create_entry_header();
265 entry.debit_line_count = 1;
266 entry.credit_line_count = 1;
267 entry.line_count = 2;
268 entry.total_debits = amount;
269 entry.total_credits = amount;
270 entry.solving_method = SolvingMethod::MethodA;
271 entry.average_confidence = 1.0;
272
273 let debit_line = JournalLineItem::debit(from_idx, amount, 1);
274 let credit_line = JournalLineItem::credit(to_idx, amount, 2);
275
276 GeneratedEntry {
277 entry,
278 debit_lines: vec![debit_line],
279 credit_lines: vec![credit_line],
280 expected_flows: vec![(from_idx, to_idx, amount)],
281 }
282 }
283
284 fn select_coverage_flow(&mut self) -> (u16, u16, Decimal128) {
286 let account_count = self.coa.accounts.len() as u16;
287
288 let from_idx = if !self.unused_accounts.is_empty() {
290 let idx = self.rng.gen_range(0..self.unused_accounts.len());
291 self.unused_accounts.remove(idx)
292 } else {
293 self.rng.gen_range(0..account_count)
294 };
295
296 let to_idx = if !self.unused_accounts.is_empty() {
297 let idx = self.rng.gen_range(0..self.unused_accounts.len());
298 self.unused_accounts.remove(idx)
299 } else {
300 loop {
302 let idx = self.rng.gen_range(0..account_count);
303 if idx != from_idx {
304 break idx;
305 }
306 }
307 };
308
309 if self.unused_accounts.is_empty() {
311 self.unused_accounts = (0..account_count).collect();
312 }
313
314 let amount = self.generate_amount(100.0, 5000.0);
315 (from_idx, to_idx, amount)
316 }
317
318 fn generate_method_b(&mut self) -> GeneratedEntry {
320 let n = self.rng.gen_range(2..=4); let mut entry = self.create_entry_header();
322
323 let mut debit_lines = Vec::with_capacity(n);
324 let mut credit_lines = Vec::with_capacity(n);
325 let mut expected_flows = Vec::new();
326
327 let mut total = Decimal128::ZERO;
328
329 self.coverage_counter += 1;
331 let use_coverage = self.coverage_counter.is_multiple_of(3);
332
333 for i in 0..n {
334 let (from_idx, to_idx, amount) = if use_coverage && i == 0 {
335 self.select_coverage_flow()
336 } else {
337 let flow = self.select_flow();
338 let amount = self.generate_amount(
339 flow.amount_range.0 / n as f64,
340 flow.amount_range.1 / n as f64,
341 );
342 let from_idx = self
343 .account_indices
344 .get(&flow.from_code)
345 .copied()
346 .unwrap_or(0);
347 let to_idx = self
348 .account_indices
349 .get(&flow.to_code)
350 .copied()
351 .unwrap_or(1);
352 (from_idx, to_idx, amount)
353 };
354 total = total + amount;
355
356 debit_lines.push(JournalLineItem::debit(from_idx, amount, (i + 1) as u16));
357 credit_lines.push(JournalLineItem::credit(to_idx, amount, (n + i + 1) as u16));
358
359 expected_flows.push((from_idx, to_idx, amount));
360 }
361
362 entry.debit_line_count = n as u16;
363 entry.credit_line_count = n as u16;
364 entry.line_count = (n * 2) as u16;
365 entry.total_debits = total;
366 entry.total_credits = total;
367 entry.solving_method = SolvingMethod::MethodB;
368 entry.average_confidence = 1.0;
369
370 GeneratedEntry {
371 entry,
372 debit_lines,
373 credit_lines,
374 expected_flows,
375 }
376 }
377
378 fn generate_method_c(&mut self) -> GeneratedEntry {
380 let n_debits = self.rng.gen_range(1..=3);
381 let n_credits = self.rng.gen_range(2..=5);
382 let n_credits = if n_credits == n_debits {
384 n_credits + 1
385 } else {
386 n_credits
387 };
388
389 let mut entry = self.create_entry_header();
390 let mut debit_lines = Vec::with_capacity(n_debits);
391 let mut credit_lines = Vec::with_capacity(n_credits);
392 let mut expected_flows = Vec::new();
393
394 let total_amount = self.generate_amount(500.0, 10000.0);
396 let total_f64 = total_amount.to_f64();
397
398 self.coverage_counter += 1;
400 let use_coverage = self.coverage_counter.is_multiple_of(4);
401
402 let debit_amounts = self.split_amount(total_f64, n_debits);
404 for (i, &amt) in debit_amounts.iter().enumerate() {
405 let from_idx = if use_coverage && i == 0 {
406 let (idx, _, _) = self.select_coverage_flow();
407 idx
408 } else {
409 let flow = self.select_flow();
410 self.account_indices
411 .get(&flow.from_code)
412 .copied()
413 .unwrap_or(0)
414 };
415 debit_lines.push(JournalLineItem::debit(
416 from_idx,
417 Decimal128::from_f64(amt),
418 (i + 1) as u16,
419 ));
420 }
421
422 let credit_amounts = self.split_amount(total_f64, n_credits);
424 for (i, &amt) in credit_amounts.iter().enumerate() {
425 let to_idx = if use_coverage && i == 0 {
426 let (_, idx, _) = self.select_coverage_flow();
427 idx
428 } else {
429 let flow = self.select_flow();
430 self.account_indices
431 .get(&flow.to_code)
432 .copied()
433 .unwrap_or(1)
434 };
435 credit_lines.push(JournalLineItem::credit(
436 to_idx,
437 Decimal128::from_f64(amt),
438 (n_debits + i + 1) as u16,
439 ));
440 }
441
442 let from_idx = debit_lines.first().map(|l| l.account_index).unwrap_or(0);
445 let to_idx = credit_lines.first().map(|l| l.account_index).unwrap_or(1);
446 expected_flows.push((from_idx, to_idx, total_amount));
447
448 entry.debit_line_count = n_debits as u16;
449 entry.credit_line_count = n_credits as u16;
450 entry.line_count = (n_debits + n_credits) as u16;
451 entry.total_debits = total_amount;
452 entry.total_credits = total_amount;
453 entry.solving_method = SolvingMethod::MethodC;
454 entry.average_confidence = 0.85; GeneratedEntry {
457 entry,
458 debit_lines,
459 credit_lines,
460 expected_flows,
461 }
462 }
463
464 fn generate_method_d(&mut self) -> GeneratedEntry {
466 let mut generated = self.generate_method_b();
469 generated.entry.solving_method = SolvingMethod::MethodD;
470 generated.entry.flags.0 |= JournalEntryFlags::USES_HIGHER_AGGREGATE;
471 generated
472 }
473
474 fn generate_method_e(&mut self) -> GeneratedEntry {
476 let mut generated = self.generate_method_c();
478 generated.entry.solving_method = SolvingMethod::MethodE;
479 generated.entry.flags.0 |= JournalEntryFlags::HAS_DECOMPOSED_VALUES;
480 generated.entry.average_confidence = 0.5; generated
482 }
483
484 fn select_flow(&mut self) -> ExpectedFlow {
486 let r: f64 = self.rng.gen();
487 for (cumulative, flow) in &self.flow_cdf {
488 if r < *cumulative {
489 return flow.clone();
490 }
491 }
492 self.flow_cdf
494 .first()
495 .map(|(_, f)| f.clone())
496 .unwrap_or_else(|| ExpectedFlow::new("1100", "2100", 1.0, (100.0, 1000.0)))
497 }
498
499 fn generate_amount(&mut self, min: f64, max: f64) -> Decimal128 {
501 let amount = if self.config.benford_compliant {
502 let mean = ((min.ln() + max.ln()) / 2.0).exp();
504 let std_dev = (max / min).ln() / 4.0;
505 let dist = LogNormal::new(mean.ln(), std_dev).unwrap_or_else(|_| {
506 LogNormal::new(0.0, 1.0).expect("fallback LogNormal(0,1) params are valid")
507 });
508 let raw: f64 = self.rng.sample(dist);
509 raw.clamp(min, max)
510 } else {
511 self.rng.gen_range(min..max)
512 };
513
514 let scaled = amount * self.config.amount_scale;
516 Decimal128::from_f64((scaled * 100.0).round() / 100.0)
517 }
518
519 fn split_amount(&mut self, total: f64, parts: usize) -> Vec<f64> {
521 if parts == 0 {
522 return vec![];
523 }
524 if parts == 1 {
525 return vec![total];
526 }
527
528 let mut points: Vec<f64> = (0..parts - 1).map(|_| self.rng.gen::<f64>()).collect();
530 points.sort_by(|a, b| a.partial_cmp(b).expect("split points should not be NaN"));
531
532 let mut amounts = Vec::with_capacity(parts);
534 let mut prev = 0.0;
535 for point in points {
536 amounts.push((point - prev) * total);
537 prev = point;
538 }
539 amounts.push((1.0 - prev) * total);
540
541 amounts
543 .iter()
544 .map(|a| (a * 100.0).round() / 100.0)
545 .collect()
546 }
547
548 fn create_entry_header(&mut self) -> JournalEntry {
550 self.entry_counter += 1;
551
552 JournalEntry {
553 id: Uuid::new_v4(),
554 entity_id: Uuid::nil(), document_number_hash: self.entry_counter,
556 source_system_id: 1,
557 batch_id: (self.entry_counter / 1000) as u32,
558 posting_date: self.current_time,
559 line_count: 0,
560 debit_line_count: 0,
561 credit_line_count: 0,
562 first_line_index: 0,
563 total_debits: Decimal128::ZERO,
564 total_credits: Decimal128::ZERO,
565 solving_method: SolvingMethod::Pending,
566 average_confidence: 0.0,
567 flow_count: 0,
568 _pad: 0,
569 flags: JournalEntryFlags::new(),
570 _reserved: [0; 12],
571 }
572 }
573
574 fn advance_time(&mut self) {
576 let interval_ms = (1000.0 / self.config.transactions_per_second) as u64;
578 self.current_time.physical += interval_ms;
579 self.current_time.logical = 0;
580 }
581
582 pub fn stats(&self) -> GeneratorStats {
584 GeneratorStats {
585 entries_generated: self.entry_counter,
586 current_time: self.current_time,
587 }
588 }
589}
590
591#[derive(Debug, Clone)]
593pub struct GeneratedEntry {
594 pub entry: JournalEntry,
596 pub debit_lines: Vec<JournalLineItem>,
598 pub credit_lines: Vec<JournalLineItem>,
600 pub expected_flows: Vec<(u16, u16, Decimal128)>,
602}
603
604impl GeneratedEntry {
605 pub fn to_flows(&self) -> Vec<TransactionFlow> {
607 self.expected_flows
608 .iter()
609 .map(|&(from, to, amount)| {
610 TransactionFlow::with_provenance(
611 from,
612 to,
613 amount,
614 self.entry.id,
615 0,
616 0,
617 self.entry.posting_date,
618 self.entry.solving_method,
619 self.entry.average_confidence,
620 )
621 })
622 .collect()
623 }
624
625 pub fn total_amount(&self) -> Decimal128 {
627 self.entry.total_debits
628 }
629}
630
631#[derive(Debug, Clone)]
633pub struct GeneratorStats {
634 pub entries_generated: u64,
636 pub current_time: HybridTimestamp,
638}
639
640#[cfg(test)]
641mod tests {
642 use super::*;
643
644 #[test]
645 fn test_generator_creation() {
646 let archetype = CompanyArchetype::retail_standard();
647 let config = GeneratorConfig::default();
648 let _gen = TransactionGenerator::new(archetype, config);
649 }
650
651 #[test]
652 fn test_generate_batch() {
653 let archetype = CompanyArchetype::retail_standard();
654 let config = GeneratorConfig {
655 seed: Some(42),
656 ..Default::default()
657 };
658 let mut gen = TransactionGenerator::new(archetype, config);
659
660 let batch = gen.generate_batch(100);
661 assert_eq!(batch.len(), 100);
662
663 for entry in &batch {
665 let debit_total: f64 = entry.total_amount().to_f64();
666 assert!(debit_total > 0.0);
667 assert!(entry.entry.is_balanced());
668 }
669 }
670
671 #[test]
672 fn test_method_distribution() {
673 let archetype = CompanyArchetype::retail_standard();
674 let config = GeneratorConfig {
675 seed: Some(42),
676 ..Default::default()
677 };
678 let mut gen = TransactionGenerator::new(archetype, config);
679
680 let batch = gen.generate_batch(1000);
681
682 let mut method_counts = [0u32; 5];
683 for entry in &batch {
684 match entry.entry.solving_method {
685 SolvingMethod::MethodA => method_counts[0] += 1,
686 SolvingMethod::MethodB => method_counts[1] += 1,
687 SolvingMethod::MethodC => method_counts[2] += 1,
688 SolvingMethod::MethodD => method_counts[3] += 1,
689 SolvingMethod::MethodE => method_counts[4] += 1,
690 _ => {}
691 }
692 }
693
694 let method_a_ratio = method_counts[0] as f64 / 1000.0;
696 assert!(method_a_ratio > 0.50 && method_a_ratio < 0.70);
697 }
698
699 #[test]
700 fn test_config_validation() {
701 let mut config = GeneratorConfig::default();
702 assert!(config.validate().is_ok());
703
704 config.method_a_ratio = 0.5;
705 assert!(config.validate().is_err());
706 }
707
708 #[test]
709 fn test_full_account_coverage() {
710 use std::collections::HashSet;
711
712 let archetype = CompanyArchetype::retail_standard();
713 let coa = ChartOfAccountsTemplate::for_archetype(&archetype);
714 let total_accounts = coa.accounts.len();
715
716 let config = GeneratorConfig {
717 seed: Some(123),
718 ..Default::default()
719 };
720 let mut gen = TransactionGenerator::new(archetype, config);
721
722 let batch = gen.generate_batch(200);
726
727 let mut used_accounts: HashSet<u16> = HashSet::new();
729 for entry in &batch {
730 for (from, to, _) in &entry.expected_flows {
731 used_accounts.insert(*from);
732 used_accounts.insert(*to);
733 }
734 }
735
736 let coverage = used_accounts.len() as f64 / total_accounts as f64;
738 assert!(
739 coverage >= 0.9,
740 "Expected at least 90% account coverage, got {:.1}% ({}/{} accounts)",
741 coverage * 100.0,
742 used_accounts.len(),
743 total_accounts
744 );
745 }
746}