datasynth-config 2.2.0

Configuration schema, validation, and presets for synthetic data generation
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
# Default Causal DAG for Financial Process Simulation
# Covers the primary causal relationships between macro conditions,
# operational parameters, and financial outcomes.

nodes:
  # ── Macro / External ───────────────────────
  - id: gdp_growth
    label: "GDP Growth Rate"
    category: macro
    baseline_value: 0.025
    bounds: [-0.10, 0.15]
    interventionable: true
    config_bindings: []

  - id: interest_rate
    label: "Interest Rate"
    category: macro
    baseline_value: 0.05
    bounds: [0.0, 0.20]
    interventionable: true
    config_bindings: []

  - id: inflation_rate
    label: "Inflation Rate"
    category: macro
    baseline_value: 0.02
    bounds: [-0.02, 0.25]
    interventionable: true
    config_bindings:
      - distributions.drift.economic_cycle.amplitude

  - id: unemployment_rate
    label: "Unemployment Rate"
    category: macro
    baseline_value: 0.04
    bounds: [0.02, 0.15]
    interventionable: true
    config_bindings: []

  # ── Operational ────────────────────────────
  - id: transaction_volume
    label: "Transaction Volume Multiplier"
    category: operational
    baseline_value: 1.0
    bounds: [0.2, 3.0]
    interventionable: true
    config_bindings:
      - transactions.volume_multiplier

  - id: staffing_pressure
    label: "Staffing Pressure"
    category: operational
    baseline_value: 1.0
    bounds: [0.5, 3.0]
    interventionable: false
    config_bindings: []

  - id: processing_lag
    label: "Processing Lag Days"
    category: operational
    baseline_value: 2.0
    bounds: [0.5, 30.0]
    interventionable: true
    config_bindings:
      - temporal_patterns.processing_lags.invoice_receipt_lag.mu

  - id: vendor_default_rate
    label: "Vendor Default Rate"
    category: operational
    baseline_value: 0.02
    bounds: [0.0, 0.30]
    interventionable: true
    config_bindings:
      - vendor_network.dependencies.max_single_vendor_concentration

  - id: customer_churn_rate
    label: "Customer Churn Rate"
    category: operational
    baseline_value: 0.08
    bounds: [0.0, 0.40]
    interventionable: true
    config_bindings:
      - customer_segmentation.lifecycle.churned_rate

  # ── Controls ───────────────────────────────
  - id: control_effectiveness
    label: "Control Effectiveness"
    category: control
    baseline_value: 0.95
    bounds: [0.0, 1.0]
    interventionable: true
    config_bindings:
      - internal_controls.exception_rate

  - id: sod_compliance
    label: "SOD Compliance Rate"
    category: control
    baseline_value: 0.99
    bounds: [0.5, 1.0]
    interventionable: true
    config_bindings:
      - internal_controls.sod_violation_rate

  # ── Financial Outcomes ─────────────────────
  - id: error_rate
    label: "Transaction Error Rate"
    category: outcome
    baseline_value: 0.02
    bounds: [0.0, 0.30]
    interventionable: false
    config_bindings:
      - anomaly_injection.base_rate

  - id: fraud_detection_rate
    label: "Fraud Detection Rate"
    category: outcome
    baseline_value: 0.85
    bounds: [0.0, 1.0]
    interventionable: false
    config_bindings: []

  - id: bad_debt_rate
    label: "Bad Debt Rate"
    category: outcome
    baseline_value: 0.01
    bounds: [0.0, 0.20]
    interventionable: false
    config_bindings: []

  - id: purchase_price_index
    label: "Purchase Price Index"
    category: financial
    baseline_value: 1.0
    bounds: [0.5, 3.0]
    interventionable: true
    config_bindings:
      - distributions.amounts.components[0].mu

  - id: revenue_growth
    label: "Revenue Growth"
    category: financial
    baseline_value: 0.05
    bounds: [-0.40, 0.50]
    interventionable: false
    config_bindings:
      - distributions.drift.amount_mean_drift

  - id: misstatement_risk
    label: "Material Misstatement Risk"
    category: outcome
    baseline_value: 0.02
    bounds: [0.0, 1.0]
    interventionable: false
    config_bindings: []

  # ── Audit methodology chain ───────────────────────────────────
  - id: materiality_threshold
    label: "Materiality Threshold"
    category: audit
    baseline_value: 1.0
    bounds: [0.1, 5.0]
    interventionable: true
    config_bindings:
      - audit_standards.sox.materiality_threshold

  - id: inherent_risk
    label: "Inherent Risk Level"
    category: audit
    baseline_value: 0.5
    bounds: [0.0, 1.0]
    interventionable: true
    config_bindings: []

  - id: combined_risk
    label: "Combined Audit Risk"
    category: audit
    baseline_value: 0.3
    bounds: [0.0, 1.0]
    interventionable: false
    config_bindings: []

  - id: sample_size_factor
    label: "Audit Sample Size Factor"
    category: audit
    baseline_value: 1.0
    bounds: [0.5, 3.0]
    interventionable: false
    config_bindings: []

  - id: opinion_severity
    label: "Audit Opinion Severity"
    category: outcome
    baseline_value: 0.0
    bounds: [0.0, 3.0]
    interventionable: false
    config_bindings: []

  # ── Financial chain ───────────────────────────────────────────
  - id: gross_margin
    label: "Gross Margin"
    category: financial
    baseline_value: 0.35
    bounds: [-0.5, 0.9]
    interventionable: false
    config_bindings: []

  - id: debt_ratio
    label: "Debt-to-Equity Ratio"
    category: financial
    baseline_value: 0.5
    bounds: [0.0, 10.0]
    interventionable: false
    config_bindings: []

  - id: ecl_provision_rate
    label: "ECL Provision Rate"
    category: financial
    baseline_value: 0.02
    bounds: [0.0, 0.5]
    interventionable: false
    config_bindings: []

  - id: going_concern_risk
    label: "Going Concern Risk"
    category: outcome
    baseline_value: 0.05
    bounds: [0.0, 1.0]
    interventionable: false
    config_bindings: []

  - id: tax_rate
    label: "Effective Tax Rate"
    category: financial
    baseline_value: 0.21
    bounds: [0.0, 0.5]
    interventionable: true
    config_bindings: []

edges:
  # ── Macro → Operational ────────────────────
  - from: gdp_growth
    to: transaction_volume
    transfer: { type: linear, coefficient: 0.8, intercept: 1.0 }
    lag_months: 1
    strength: 0.7
    mechanism: "Economic growth drives transaction volume"

  - from: gdp_growth
    to: customer_churn_rate
    transfer: { type: linear, coefficient: -0.5, intercept: 0.08 }
    lag_months: 2
    strength: 0.6
    mechanism: "Growth reduces churn; contraction increases it"

  - from: gdp_growth
    to: vendor_default_rate
    transfer: { type: inverse_logistic, capacity: 0.20, midpoint: -0.02, steepness: 15.0 }
    lag_months: 3
    strength: 0.7
    mechanism: "Recession drives vendor defaults via logistic curve"

  - from: unemployment_rate
    to: staffing_pressure
    transfer: { type: inverse_logistic, capacity: 2.5, midpoint: 0.06, steepness: 20.0 }
    lag_months: 1
    strength: 0.5
    mechanism: "Low unemployment = hard to hire = staffing pressure"

  - from: inflation_rate
    to: purchase_price_index
    transfer: { type: linear, coefficient: 1.0, intercept: 1.0 }
    lag_months: 1
    strength: 0.8
    mechanism: "Inflation directly impacts purchase prices"

  - from: interest_rate
    to: bad_debt_rate
    transfer: { type: logistic, capacity: 0.15, midpoint: 0.08, steepness: 30.0 }
    lag_months: 3
    strength: 0.6
    mechanism: "Higher rates increase default probability"

  # ── Operational → Controls ─────────────────
  - from: staffing_pressure
    to: control_effectiveness
    transfer: { type: inverse_logistic, capacity: 0.95, midpoint: 1.8, steepness: 5.0 }
    lag_months: 0
    strength: 0.8
    mechanism: "Understaffing degrades control execution"

  - from: transaction_volume
    to: staffing_pressure
    transfer: { type: threshold, threshold: 1.3, magnitude: 0.5, saturation: 2.5 }
    lag_months: 0
    strength: 0.6
    mechanism: "Volume above 130% of capacity creates pressure"

  - from: staffing_pressure
    to: processing_lag
    transfer: { type: linear, coefficient: 3.0, intercept: 0.0 }
    lag_months: 0
    strength: 0.5
    mechanism: "Pressure increases processing delays"

  # ── Controls → Outcomes ────────────────────
  - from: control_effectiveness
    to: error_rate
    transfer: { type: inverse_logistic, capacity: 0.20, midpoint: 0.70, steepness: 8.0 }
    lag_months: 0
    strength: 0.9
    mechanism: "Weaker controls → more errors slip through"

  - from: control_effectiveness
    to: fraud_detection_rate
    transfer: { type: logistic, capacity: 0.95, midpoint: 0.60, steepness: 6.0 }
    lag_months: 0
    strength: 0.8
    mechanism: "Effective controls detect more fraud"

  - from: sod_compliance
    to: fraud_detection_rate
    transfer: { type: linear, coefficient: 0.3, intercept: 0.55 }
    lag_months: 0
    strength: 0.4
    mechanism: "SOD compliance is a secondary fraud deterrent"

  # ── Outcomes → Financial ───────────────────
  - from: customer_churn_rate
    to: revenue_growth
    transfer: { type: linear, coefficient: -2.0, intercept: 0.05 }
    lag_months: 1
    strength: 0.7
    mechanism: "Churn directly reduces revenue"

  - from: bad_debt_rate
    to: revenue_growth
    transfer: { type: linear, coefficient: -0.5, intercept: 0.0 }
    lag_months: 1
    strength: 0.3
    mechanism: "Bad debt reduces effective revenue"

  - from: error_rate
    to: misstatement_risk
    transfer: { type: logistic, capacity: 0.80, midpoint: 0.08, steepness: 25.0 }
    lag_months: 0
    strength: 0.9
    mechanism: "Errors accumulate into misstatement risk"

  - from: fraud_detection_rate
    to: misstatement_risk
    transfer: { type: linear, coefficient: -0.3, intercept: 0.30 }
    lag_months: 0
    strength: 0.5
    mechanism: "Better detection reduces undetected fraud → lower misstatement"

  # ── Audit methodology chain ─────────────────
  - from: inherent_risk
    to: combined_risk
    transfer: { type: linear, coefficient: 0.5, intercept: 0.0 }
    lag_months: 0
    strength: 0.5
    mechanism: "Higher inherent risk increases combined audit risk"

  - from: control_effectiveness
    to: combined_risk
    transfer: { type: linear, coefficient: -0.5, intercept: 0.0 }
    lag_months: 0
    strength: 0.5
    mechanism: "Stronger controls reduce combined audit risk"

  - from: combined_risk
    to: sample_size_factor
    transfer: { type: linear, coefficient: 0.8, intercept: 0.0 }
    lag_months: 0
    strength: 0.8
    mechanism: "Higher combined risk requires larger audit samples"

  - from: sample_size_factor
    to: misstatement_risk
    transfer: { type: linear, coefficient: -0.3, intercept: 0.0 }
    lag_months: 0
    strength: 0.3
    mechanism: "Larger samples reduce undetected misstatements"

  - from: misstatement_risk
    to: opinion_severity
    transfer: { type: linear, coefficient: 0.7, intercept: 0.0 }
    lag_months: 0
    strength: 0.7
    mechanism: "Material misstatement risk drives opinion qualification"

  - from: going_concern_risk
    to: opinion_severity
    transfer: { type: linear, coefficient: 0.5, intercept: 0.0 }
    lag_months: 0
    strength: 0.5
    mechanism: "Going concern doubt escalates audit opinion severity"

  # ── Financial chain ──────────────────────────
  - from: bad_debt_rate
    to: ecl_provision_rate
    transfer: { type: linear, coefficient: 0.6, intercept: 0.0 }
    lag_months: 0
    strength: 0.6
    mechanism: "Bad debt experience drives ECL provisioning"

  - from: interest_rate
    to: debt_ratio
    transfer: { type: linear, coefficient: 0.3, intercept: 0.0 }
    lag_months: 3
    strength: 0.3
    mechanism: "Higher rates increase effective debt burden"

  - from: debt_ratio
    to: going_concern_risk
    transfer: { type: linear, coefficient: 0.4, intercept: 0.0 }
    lag_months: 0
    strength: 0.4
    mechanism: "High leverage increases going concern risk"

  - from: gdp_growth
    to: gross_margin
    transfer: { type: linear, coefficient: 0.2, intercept: 0.0 }
    lag_months: 1
    strength: 0.2
    mechanism: "Economic growth supports margin expansion"

  - from: revenue_growth
    to: gross_margin
    transfer: { type: linear, coefficient: 0.3, intercept: 0.0 }
    lag_months: 0
    strength: 0.3
    mechanism: "Revenue growth improves operating leverage and margins"