Skip to main content

datasynth_generators/
legal_document_generator.rs

1//! Legal document generator for audit engagements.
2//!
3//! Generates realistic legal documents (engagement letters, management
4//! representation letters, legal opinions, regulatory filings, and board
5//! resolutions) that support GAM audit procedures.
6
7use chrono::NaiveDate;
8use datasynth_core::models::LegalDocument;
9use datasynth_core::utils::seeded_rng;
10use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
11use rand::prelude::*;
12use rand_chacha::ChaCha8Rng;
13
14// ---------------------------------------------------------------------------
15// Template pools
16// ---------------------------------------------------------------------------
17
18const ENGAGEMENT_LETTER_TERMS: &[&str] = &[
19    "Scope of audit procedures",
20    "Management responsibilities for financial statements",
21    "Auditor responsibilities under ISA",
22    "Expected form and content of audit report",
23    "Fee arrangements and billing schedule",
24    "Access to records and personnel",
25    "Confidentiality and data protection obligations",
26    "Communication of internal control deficiencies",
27    "Anti-money laundering compliance requirements",
28    "Independence requirements and safeguards",
29];
30
31const MANAGEMENT_REP_TERMS: &[&str] = &[
32    "Financial statements prepared in accordance with applicable framework",
33    "All transactions recorded and reflected in financial statements",
34    "Internal controls designed and maintained for reliable reporting",
35    "All known fraud or suspected fraud communicated to auditor",
36    "All related party relationships and transactions disclosed",
37    "No material subsequent events requiring adjustment or disclosure",
38    "Going concern assessment provided to auditor",
39    "All known litigation and claims disclosed",
40    "Compliance with laws and regulations confirmed",
41    "Uncorrected misstatements assessed as immaterial",
42];
43
44const LEGAL_OPINION_TERMS: &[&str] = &[
45    "Entity duly incorporated and in good standing",
46    "Authorization of transactions under applicable law",
47    "No pending litigation materially affecting financial position",
48    "Compliance with contractual obligations",
49    "Regulatory approval obtained for disclosed transactions",
50    "Tax position supportable under applicable legislation",
51];
52
53const REGULATORY_FILING_TERMS: &[&str] = &[
54    "Annual financial statements filed with regulator",
55    "Tax return submitted to competent authority",
56    "Securities disclosure requirements satisfied",
57    "Capital adequacy ratio reported to banking authority",
58    "Environmental compliance report submitted",
59    "Anti-money laundering annual report filed",
60    "Data protection annual assessment filed",
61    "Corporate governance statement submitted",
62];
63
64const BOARD_RESOLUTION_TERMS: &[&str] = &[
65    "Appointment of external auditor approved",
66    "Audit committee composition confirmed",
67    "Financial statements approved for issuance",
68    "Dividend distribution authorized",
69    "Related party transactions ratified",
70    "Internal audit charter approved",
71    "Risk appetite statement adopted",
72    "Compliance program reviewed and endorsed",
73];
74
75const SENIORITY_TITLES: &[&str] = &[
76    "Chief Executive Officer",
77    "Chief Financial Officer",
78    "General Counsel",
79    "Board Chairperson",
80    "Audit Committee Chair",
81    "Chief Compliance Officer",
82    "Company Secretary",
83    "Head of Internal Audit",
84    "Controller",
85    "VP of Finance",
86];
87
88/// Configuration for the legal document generator.
89pub struct LegalDocumentGeneratorConfig {
90    /// Minimum legal opinions per engagement (default: 0).
91    pub legal_opinion_min: u32,
92    /// Maximum legal opinions per engagement (default: 2).
93    pub legal_opinion_max: u32,
94    /// Minimum regulatory filings per engagement (default: 1).
95    pub regulatory_filing_min: u32,
96    /// Maximum regulatory filings per engagement (default: 3).
97    pub regulatory_filing_max: u32,
98    /// Minimum board resolutions per engagement (default: 1).
99    pub board_resolution_min: u32,
100    /// Maximum board resolutions per engagement (default: 2).
101    pub board_resolution_max: u32,
102}
103
104impl Default for LegalDocumentGeneratorConfig {
105    fn default() -> Self {
106        Self {
107            legal_opinion_min: 0,
108            legal_opinion_max: 2,
109            regulatory_filing_min: 1,
110            regulatory_filing_max: 3,
111            board_resolution_min: 1,
112            board_resolution_max: 2,
113        }
114    }
115}
116
117/// Generates [`LegalDocument`] records for audit engagements.
118pub struct LegalDocumentGenerator {
119    rng: ChaCha8Rng,
120    uuid_factory: DeterministicUuidFactory,
121    config: LegalDocumentGeneratorConfig,
122}
123
124impl LegalDocumentGenerator {
125    /// Create a new generator with the given seed and default configuration.
126    pub fn new(seed: u64) -> Self {
127        Self {
128            rng: seeded_rng(seed, 0),
129            uuid_factory: DeterministicUuidFactory::new(seed, GeneratorType::LegalDocument),
130            config: LegalDocumentGeneratorConfig::default(),
131        }
132    }
133
134    /// Create a new generator with explicit configuration.
135    pub fn with_config(seed: u64, config: LegalDocumentGeneratorConfig) -> Self {
136        Self {
137            rng: seeded_rng(seed, 0),
138            uuid_factory: DeterministicUuidFactory::new(seed, GeneratorType::LegalDocument),
139            config,
140        }
141    }
142
143    /// Generate legal documents for a single engagement.
144    ///
145    /// Produces:
146    /// - 1 engagement letter
147    /// - 1 management representation letter
148    /// - 0-2 legal opinions
149    /// - 1-3 regulatory filings
150    /// - 1-2 board resolutions
151    ///
152    /// Signatories are drawn from `employee_names`; if empty, generic
153    /// titles are used as fallback.
154    pub fn generate(
155        &mut self,
156        entity_code: &str,
157        fiscal_year: i32,
158        employee_names: &[String],
159    ) -> Vec<LegalDocument> {
160        let mut docs = Vec::new();
161
162        // Engagement letter (always 1, near start of fiscal year)
163        if let Some(date) = NaiveDate::from_ymd_opt(fiscal_year, 1, 15) {
164            docs.push(self.make_document(
165                "engagement_letter",
166                entity_code,
167                date,
168                &format!("Engagement Letter — {} FY{}", entity_code, fiscal_year),
169                ENGAGEMENT_LETTER_TERMS,
170                employee_names,
171                "signed",
172                2,
173                3,
174            ));
175        }
176
177        // Management representation letter (always 1, near year-end close)
178        if let Some(date) = NaiveDate::from_ymd_opt(fiscal_year, 12, 20) {
179            docs.push(self.make_document(
180                "management_rep",
181                entity_code,
182                date,
183                &format!(
184                    "Management Representation Letter — {} FY{}",
185                    entity_code, fiscal_year
186                ),
187                MANAGEMENT_REP_TERMS,
188                employee_names,
189                "signed",
190                2,
191                4,
192            ));
193        }
194
195        // Legal opinions (0-2)
196        let opinion_count = self
197            .rng
198            .random_range(self.config.legal_opinion_min..=self.config.legal_opinion_max);
199        for i in 0..opinion_count {
200            let month = self.rng.random_range(3u32..=11);
201            if let Some(date) = NaiveDate::from_ymd_opt(fiscal_year, month, 10) {
202                docs.push(self.make_document(
203                    "legal_opinion",
204                    entity_code,
205                    date,
206                    &format!(
207                        "Legal Opinion #{} — {} FY{}",
208                        i + 1,
209                        entity_code,
210                        fiscal_year
211                    ),
212                    LEGAL_OPINION_TERMS,
213                    employee_names,
214                    "final",
215                    1,
216                    2,
217                ));
218            }
219        }
220
221        // Regulatory filings (1-3)
222        let filing_count = self
223            .rng
224            .random_range(self.config.regulatory_filing_min..=self.config.regulatory_filing_max);
225        for i in 0..filing_count {
226            let month = self.rng.random_range(1u32..=12);
227            let day = self.rng.random_range(1u32..=28);
228            if let Some(date) = NaiveDate::from_ymd_opt(fiscal_year, month, day) {
229                docs.push(self.make_document(
230                    "regulatory_filing",
231                    entity_code,
232                    date,
233                    &format!(
234                        "Regulatory Filing #{} — {} FY{}",
235                        i + 1,
236                        entity_code,
237                        fiscal_year
238                    ),
239                    REGULATORY_FILING_TERMS,
240                    employee_names,
241                    "signed",
242                    1,
243                    2,
244                ));
245            }
246        }
247
248        // Board resolutions (1-2)
249        let resolution_count = self
250            .rng
251            .random_range(self.config.board_resolution_min..=self.config.board_resolution_max);
252        for i in 0..resolution_count {
253            let month = self.rng.random_range(1u32..=12);
254            if let Some(date) = NaiveDate::from_ymd_opt(fiscal_year, month, 25) {
255                docs.push(self.make_document(
256                    "board_resolution",
257                    entity_code,
258                    date,
259                    &format!(
260                        "Board Resolution #{} — {} FY{}",
261                        i + 1,
262                        entity_code,
263                        fiscal_year
264                    ),
265                    BOARD_RESOLUTION_TERMS,
266                    employee_names,
267                    "signed",
268                    3,
269                    5,
270                ));
271            }
272        }
273
274        // Sort chronologically
275        docs.sort_by_key(|d| d.date);
276        docs
277    }
278
279    /// Build a single legal document.
280    #[allow(clippy::too_many_arguments)]
281    fn make_document(
282        &mut self,
283        doc_type: &str,
284        entity_code: &str,
285        date: NaiveDate,
286        title: &str,
287        terms_pool: &[&str],
288        employee_names: &[String],
289        status: &str,
290        signatories_min: usize,
291        signatories_max: usize,
292    ) -> LegalDocument {
293        let signatories = self.pick_signatories(employee_names, signatories_min, signatories_max);
294        let key_terms = self.pick_items(terms_pool, 3, terms_pool.len().min(6));
295
296        LegalDocument {
297            document_id: self.uuid_factory.next(),
298            document_type: doc_type.to_string(),
299            entity_code: entity_code.to_string(),
300            date,
301            title: title.to_string(),
302            signatories,
303            key_terms,
304            status: status.to_string(),
305        }
306    }
307
308    /// Pick signatories from the employee pool (or use generic titles as fallback).
309    fn pick_signatories(&mut self, pool: &[String], min: usize, max: usize) -> Vec<String> {
310        let source: Vec<String> = if pool.is_empty() {
311            SENIORITY_TITLES.iter().map(|s| (*s).to_string()).collect()
312        } else {
313            pool.to_vec()
314        };
315        let count = self.rng.random_range(min..=max).min(source.len());
316        let mut indices: Vec<usize> = (0..source.len()).collect();
317        indices.shuffle(&mut self.rng);
318        indices.truncate(count);
319        indices.sort_unstable();
320        indices.iter().map(|&i| source[i].clone()).collect()
321    }
322
323    /// Randomly pick `min..=max` items from a template pool.
324    fn pick_items(&mut self, pool: &[&str], min: usize, max: usize) -> Vec<String> {
325        let count = self.rng.random_range(min..=max).min(pool.len());
326        let mut indices: Vec<usize> = (0..pool.len()).collect();
327        indices.shuffle(&mut self.rng);
328        indices.truncate(count);
329        indices.sort_unstable();
330        indices.iter().map(|&i| pool[i].to_string()).collect()
331    }
332}
333
334// ---------------------------------------------------------------------------
335// Tests
336// ---------------------------------------------------------------------------
337
338#[cfg(test)]
339#[allow(clippy::unwrap_used)]
340mod tests {
341    use super::*;
342    use chrono::Datelike;
343
344    fn sample_employees() -> Vec<String> {
345        (1..=15).map(|i| format!("Employee_{:03}", i)).collect()
346    }
347
348    #[test]
349    fn test_generates_non_empty_output() {
350        let mut gen = LegalDocumentGenerator::new(42);
351        let docs = gen.generate("C001", 2025, &sample_employees());
352        assert!(!docs.is_empty(), "should produce legal documents");
353    }
354
355    #[test]
356    fn test_document_count_range() {
357        let mut gen = LegalDocumentGenerator::new(42);
358        let docs = gen.generate("C001", 2025, &sample_employees());
359        // Min: 1 engagement + 1 mgmt rep + 0 legal + 1 filing + 1 resolution = 4
360        // Max: 1 engagement + 1 mgmt rep + 2 legal + 3 filing + 2 resolution = 9
361        assert!(
362            docs.len() >= 4 && docs.len() <= 9,
363            "expected 4-9 documents, got {}",
364            docs.len()
365        );
366    }
367
368    #[test]
369    fn test_has_engagement_letter() {
370        let mut gen = LegalDocumentGenerator::new(42);
371        let docs = gen.generate("C001", 2025, &sample_employees());
372        let engagement = docs
373            .iter()
374            .filter(|d| d.document_type == "engagement_letter")
375            .count();
376        assert_eq!(engagement, 1, "should have exactly 1 engagement letter");
377    }
378
379    #[test]
380    fn test_has_management_rep() {
381        let mut gen = LegalDocumentGenerator::new(42);
382        let docs = gen.generate("C001", 2025, &sample_employees());
383        let mgmt_rep = docs
384            .iter()
385            .filter(|d| d.document_type == "management_rep")
386            .count();
387        assert_eq!(mgmt_rep, 1, "should have exactly 1 management rep letter");
388    }
389
390    #[test]
391    fn test_document_types_correct() {
392        let mut gen = LegalDocumentGenerator::new(42);
393        let docs = gen.generate("C001", 2025, &sample_employees());
394        let valid_types = [
395            "engagement_letter",
396            "management_rep",
397            "legal_opinion",
398            "regulatory_filing",
399            "board_resolution",
400        ];
401        for doc in &docs {
402            assert!(
403                valid_types.contains(&doc.document_type.as_str()),
404                "unexpected document type: {}",
405                doc.document_type
406            );
407        }
408    }
409
410    #[test]
411    fn test_entity_code_propagated() {
412        let mut gen = LegalDocumentGenerator::new(42);
413        let docs = gen.generate("TEST_ENTITY", 2025, &sample_employees());
414        for doc in &docs {
415            assert_eq!(
416                doc.entity_code, "TEST_ENTITY",
417                "entity_code should match input"
418            );
419        }
420    }
421
422    #[test]
423    fn test_dates_within_fiscal_year() {
424        let mut gen = LegalDocumentGenerator::new(42);
425        let docs = gen.generate("C001", 2025, &sample_employees());
426        for doc in &docs {
427            assert_eq!(doc.date.year(), 2025, "document date should be in FY2025");
428        }
429    }
430
431    #[test]
432    fn test_dates_sorted() {
433        let mut gen = LegalDocumentGenerator::new(42);
434        let docs = gen.generate("C001", 2025, &sample_employees());
435        for w in docs.windows(2) {
436            assert!(
437                w[0].date <= w[1].date,
438                "documents should be sorted chronologically"
439            );
440        }
441    }
442
443    #[test]
444    fn test_unique_ids() {
445        let mut gen = LegalDocumentGenerator::new(42);
446        let docs = gen.generate("C001", 2025, &sample_employees());
447        let ids: std::collections::HashSet<_> = docs.iter().map(|d| d.document_id).collect();
448        assert_eq!(ids.len(), docs.len(), "all document IDs should be unique");
449    }
450
451    #[test]
452    fn test_signatories_present() {
453        let mut gen = LegalDocumentGenerator::new(42);
454        let docs = gen.generate("C001", 2025, &sample_employees());
455        for doc in &docs {
456            assert!(
457                !doc.signatories.is_empty(),
458                "document {} should have signatories",
459                doc.document_type
460            );
461        }
462    }
463
464    #[test]
465    fn test_key_terms_present() {
466        let mut gen = LegalDocumentGenerator::new(42);
467        let docs = gen.generate("C001", 2025, &sample_employees());
468        for doc in &docs {
469            assert!(
470                !doc.key_terms.is_empty(),
471                "document {} should have key terms",
472                doc.document_type
473            );
474        }
475    }
476
477    #[test]
478    fn test_empty_employee_pool_fallback() {
479        let mut gen = LegalDocumentGenerator::new(42);
480        let docs = gen.generate("C001", 2025, &[]);
481        assert!(!docs.is_empty(), "should produce docs with empty pool");
482        for doc in &docs {
483            assert!(
484                !doc.signatories.is_empty(),
485                "should have fallback signatories"
486            );
487        }
488    }
489
490    #[test]
491    fn test_deterministic_with_same_seed() {
492        let employees = sample_employees();
493
494        let mut gen1 = LegalDocumentGenerator::new(999);
495        let d1 = gen1.generate("C001", 2025, &employees);
496
497        let mut gen2 = LegalDocumentGenerator::new(999);
498        let d2 = gen2.generate("C001", 2025, &employees);
499
500        assert_eq!(d1.len(), d2.len());
501        for (a, b) in d1.iter().zip(d2.iter()) {
502            assert_eq!(a.document_id, b.document_id);
503            assert_eq!(a.document_type, b.document_type);
504            assert_eq!(a.date, b.date);
505            assert_eq!(a.title, b.title);
506            assert_eq!(a.key_terms, b.key_terms);
507        }
508    }
509
510    #[test]
511    fn test_serialization_roundtrip() {
512        let mut gen = LegalDocumentGenerator::new(42);
513        let docs = gen.generate("C001", 2025, &sample_employees());
514        let json = serde_json::to_string(&docs).expect("serialize");
515        let parsed: Vec<LegalDocument> = serde_json::from_str(&json).expect("deserialize");
516        assert_eq!(docs.len(), parsed.len());
517        for (orig, rt) in docs.iter().zip(parsed.iter()) {
518            assert_eq!(orig.document_id, rt.document_id);
519            assert_eq!(orig.document_type, rt.document_type);
520            assert_eq!(orig.date, rt.date);
521        }
522    }
523}