Skip to main content

datasynth_generators/
legal_document_generator.rs

1//! Legal document generator for audit engagements.
2//!
3//! Generates realistic legal documents (engagement letters, management
4//! representation letters, legal opinions, regulatory filings, and board
5//! resolutions) that support GAM audit procedures.
6
7use chrono::NaiveDate;
8use datasynth_core::models::LegalDocument;
9use datasynth_core::utils::seeded_rng;
10use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
11use rand::prelude::*;
12use rand_chacha::ChaCha8Rng;
13
14// ---------------------------------------------------------------------------
15// Template pools
16// ---------------------------------------------------------------------------
17
18const ENGAGEMENT_LETTER_TERMS: &[&str] = &[
19    "Scope of audit procedures",
20    "Management responsibilities for financial statements",
21    "Auditor responsibilities under ISA",
22    "Expected form and content of audit report",
23    "Fee arrangements and billing schedule",
24    "Access to records and personnel",
25    "Confidentiality and data protection obligations",
26    "Communication of internal control deficiencies",
27    "Anti-money laundering compliance requirements",
28    "Independence requirements and safeguards",
29];
30
31const MANAGEMENT_REP_TERMS: &[&str] = &[
32    "Financial statements prepared in accordance with applicable framework",
33    "All transactions recorded and reflected in financial statements",
34    "Internal controls designed and maintained for reliable reporting",
35    "All known fraud or suspected fraud communicated to auditor",
36    "All related party relationships and transactions disclosed",
37    "No material subsequent events requiring adjustment or disclosure",
38    "Going concern assessment provided to auditor",
39    "All known litigation and claims disclosed",
40    "Compliance with laws and regulations confirmed",
41    "Uncorrected misstatements assessed as immaterial",
42];
43
44const LEGAL_OPINION_TERMS: &[&str] = &[
45    "Entity duly incorporated and in good standing",
46    "Authorization of transactions under applicable law",
47    "No pending litigation materially affecting financial position",
48    "Compliance with contractual obligations",
49    "Regulatory approval obtained for disclosed transactions",
50    "Tax position supportable under applicable legislation",
51];
52
53const REGULATORY_FILING_TERMS: &[&str] = &[
54    "Annual financial statements filed with regulator",
55    "Tax return submitted to competent authority",
56    "Securities disclosure requirements satisfied",
57    "Capital adequacy ratio reported to banking authority",
58    "Environmental compliance report submitted",
59    "Anti-money laundering annual report filed",
60    "Data protection annual assessment filed",
61    "Corporate governance statement submitted",
62];
63
64const BOARD_RESOLUTION_TERMS: &[&str] = &[
65    "Appointment of external auditor approved",
66    "Audit committee composition confirmed",
67    "Financial statements approved for issuance",
68    "Dividend distribution authorized",
69    "Related party transactions ratified",
70    "Internal audit charter approved",
71    "Risk appetite statement adopted",
72    "Compliance program reviewed and endorsed",
73];
74
75const SENIORITY_TITLES: &[&str] = &[
76    "Chief Executive Officer",
77    "Chief Financial Officer",
78    "General Counsel",
79    "Board Chairperson",
80    "Audit Committee Chair",
81    "Chief Compliance Officer",
82    "Company Secretary",
83    "Head of Internal Audit",
84    "Controller",
85    "VP of Finance",
86];
87
88/// Configuration for the legal document generator.
89pub struct LegalDocumentGeneratorConfig {
90    /// Minimum legal opinions per engagement (default: 0).
91    pub legal_opinion_min: u32,
92    /// Maximum legal opinions per engagement (default: 2).
93    pub legal_opinion_max: u32,
94    /// Minimum regulatory filings per engagement (default: 1).
95    pub regulatory_filing_min: u32,
96    /// Maximum regulatory filings per engagement (default: 3).
97    pub regulatory_filing_max: u32,
98    /// Minimum board resolutions per engagement (default: 1).
99    pub board_resolution_min: u32,
100    /// Maximum board resolutions per engagement (default: 2).
101    pub board_resolution_max: u32,
102}
103
104impl Default for LegalDocumentGeneratorConfig {
105    fn default() -> Self {
106        Self {
107            legal_opinion_min: 0,
108            legal_opinion_max: 2,
109            regulatory_filing_min: 1,
110            regulatory_filing_max: 3,
111            board_resolution_min: 1,
112            board_resolution_max: 2,
113        }
114    }
115}
116
117/// Generates [`LegalDocument`] records for audit engagements.
118pub struct LegalDocumentGenerator {
119    rng: ChaCha8Rng,
120    uuid_factory: DeterministicUuidFactory,
121    config: LegalDocumentGeneratorConfig,
122}
123
124impl LegalDocumentGenerator {
125    /// Create a new generator with the given seed and default configuration.
126    pub fn new(seed: u64) -> Self {
127        Self {
128            rng: seeded_rng(seed, 0),
129            uuid_factory: DeterministicUuidFactory::new(seed, GeneratorType::LegalDocument),
130            config: LegalDocumentGeneratorConfig::default(),
131        }
132    }
133
134    /// Create a new generator with explicit configuration.
135    pub fn with_config(seed: u64, config: LegalDocumentGeneratorConfig) -> Self {
136        Self {
137            rng: seeded_rng(seed, 0),
138            uuid_factory: DeterministicUuidFactory::new(seed, GeneratorType::LegalDocument),
139            config,
140        }
141    }
142
143    /// Generate legal documents for a single engagement.
144    ///
145    /// Produces:
146    /// - 1 engagement letter
147    /// - 1 management representation letter
148    /// - 0-2 legal opinions
149    /// - 1-3 regulatory filings
150    /// - 1-2 board resolutions
151    ///
152    /// Signatories are drawn from `employee_names`; if empty, generic
153    /// titles are used as fallback.
154    pub fn generate(
155        &mut self,
156        entity_code: &str,
157        fiscal_year: i32,
158        employee_names: &[String],
159    ) -> Vec<LegalDocument> {
160        let mut docs = Vec::new();
161
162        // Engagement letter (always 1, near start of fiscal year)
163        if let Some(date) = NaiveDate::from_ymd_opt(fiscal_year, 1, 15) {
164            docs.push(self.make_document(
165                "engagement_letter",
166                entity_code,
167                date,
168                &format!("Engagement Letter — {} FY{}", entity_code, fiscal_year),
169                ENGAGEMENT_LETTER_TERMS,
170                employee_names,
171                "signed",
172                2,
173                3,
174            ));
175        }
176
177        // Management representation letter (always 1, near year-end close)
178        if let Some(date) = NaiveDate::from_ymd_opt(fiscal_year, 12, 20) {
179            docs.push(self.make_document(
180                "management_rep",
181                entity_code,
182                date,
183                &format!(
184                    "Management Representation Letter — {} FY{}",
185                    entity_code, fiscal_year
186                ),
187                MANAGEMENT_REP_TERMS,
188                employee_names,
189                "signed",
190                2,
191                4,
192            ));
193        }
194
195        // Legal opinions (0-2)
196        let opinion_count = self
197            .rng
198            .random_range(self.config.legal_opinion_min..=self.config.legal_opinion_max);
199        for i in 0..opinion_count {
200            let month = self.rng.random_range(3u32..=11);
201            if let Some(date) = NaiveDate::from_ymd_opt(fiscal_year, month, 10) {
202                docs.push(self.make_document(
203                    "legal_opinion",
204                    entity_code,
205                    date,
206                    &format!(
207                        "Legal Opinion #{} — {} FY{}",
208                        i + 1,
209                        entity_code,
210                        fiscal_year
211                    ),
212                    LEGAL_OPINION_TERMS,
213                    employee_names,
214                    "final",
215                    1,
216                    2,
217                ));
218            }
219        }
220
221        // Regulatory filings (1-3)
222        let filing_count = self
223            .rng
224            .random_range(self.config.regulatory_filing_min..=self.config.regulatory_filing_max);
225        for i in 0..filing_count {
226            let month = self.rng.random_range(1u32..=12);
227            let day = self.rng.random_range(1u32..=28);
228            if let Some(date) = NaiveDate::from_ymd_opt(fiscal_year, month, day) {
229                docs.push(self.make_document(
230                    "regulatory_filing",
231                    entity_code,
232                    date,
233                    &format!(
234                        "Regulatory Filing #{} — {} FY{}",
235                        i + 1,
236                        entity_code,
237                        fiscal_year
238                    ),
239                    REGULATORY_FILING_TERMS,
240                    employee_names,
241                    "signed",
242                    1,
243                    2,
244                ));
245            }
246        }
247
248        // Board resolutions (1-2)
249        let resolution_count = self
250            .rng
251            .random_range(self.config.board_resolution_min..=self.config.board_resolution_max);
252        for i in 0..resolution_count {
253            let month = self.rng.random_range(1u32..=12);
254            if let Some(date) = NaiveDate::from_ymd_opt(fiscal_year, month, 25) {
255                docs.push(self.make_document(
256                    "board_resolution",
257                    entity_code,
258                    date,
259                    &format!(
260                        "Board Resolution #{} — {} FY{}",
261                        i + 1,
262                        entity_code,
263                        fiscal_year
264                    ),
265                    BOARD_RESOLUTION_TERMS,
266                    employee_names,
267                    "signed",
268                    3,
269                    5,
270                ));
271            }
272        }
273
274        // Sort chronologically
275        docs.sort_by_key(|d| d.date);
276        docs
277    }
278
279    /// Build a single legal document.
280    #[allow(clippy::too_many_arguments)]
281    fn make_document(
282        &mut self,
283        doc_type: &str,
284        entity_code: &str,
285        date: NaiveDate,
286        title: &str,
287        terms_pool: &[&str],
288        employee_names: &[String],
289        status: &str,
290        signatories_min: usize,
291        signatories_max: usize,
292    ) -> LegalDocument {
293        let signatories = self.pick_signatories(employee_names, signatories_min, signatories_max);
294        let key_terms = self.pick_items(terms_pool, 3, terms_pool.len().min(6));
295
296        LegalDocument {
297            document_id: self.uuid_factory.next(),
298            document_type: doc_type.to_string(),
299            entity_code: entity_code.to_string(),
300            date,
301            title: title.to_string(),
302            signatories,
303            key_terms,
304            status: status.to_string(),
305        }
306    }
307
308    /// Pick signatories from the employee pool (or use generic titles as fallback).
309    fn pick_signatories(&mut self, pool: &[String], min: usize, max: usize) -> Vec<String> {
310        let source: Vec<String> = if pool.is_empty() {
311            SENIORITY_TITLES.iter().map(|s| (*s).to_string()).collect()
312        } else {
313            pool.to_vec()
314        };
315        let count = self.rng.random_range(min..=max).min(source.len());
316        let mut indices: Vec<usize> = (0..source.len()).collect();
317        indices.shuffle(&mut self.rng);
318        indices.truncate(count);
319        indices.sort_unstable();
320        indices.iter().map(|&i| source[i].clone()).collect()
321    }
322
323    /// Randomly pick `min..=max` items from a template pool.
324    fn pick_items(&mut self, pool: &[&str], min: usize, max: usize) -> Vec<String> {
325        let count = self.rng.random_range(min..=max).min(pool.len());
326        let mut indices: Vec<usize> = (0..pool.len()).collect();
327        indices.shuffle(&mut self.rng);
328        indices.truncate(count);
329        indices.sort_unstable();
330        indices.iter().map(|&i| pool[i].to_string()).collect()
331    }
332}
333
334// ---------------------------------------------------------------------------
335// Tests
336// ---------------------------------------------------------------------------
337
338#[cfg(test)]
339mod tests {
340    use super::*;
341    use chrono::Datelike;
342
343    fn sample_employees() -> Vec<String> {
344        (1..=15).map(|i| format!("Employee_{:03}", i)).collect()
345    }
346
347    #[test]
348    fn test_generates_non_empty_output() {
349        let mut gen = LegalDocumentGenerator::new(42);
350        let docs = gen.generate("C001", 2025, &sample_employees());
351        assert!(!docs.is_empty(), "should produce legal documents");
352    }
353
354    #[test]
355    fn test_document_count_range() {
356        let mut gen = LegalDocumentGenerator::new(42);
357        let docs = gen.generate("C001", 2025, &sample_employees());
358        // Min: 1 engagement + 1 mgmt rep + 0 legal + 1 filing + 1 resolution = 4
359        // Max: 1 engagement + 1 mgmt rep + 2 legal + 3 filing + 2 resolution = 9
360        assert!(
361            docs.len() >= 4 && docs.len() <= 9,
362            "expected 4-9 documents, got {}",
363            docs.len()
364        );
365    }
366
367    #[test]
368    fn test_has_engagement_letter() {
369        let mut gen = LegalDocumentGenerator::new(42);
370        let docs = gen.generate("C001", 2025, &sample_employees());
371        let engagement = docs
372            .iter()
373            .filter(|d| d.document_type == "engagement_letter")
374            .count();
375        assert_eq!(engagement, 1, "should have exactly 1 engagement letter");
376    }
377
378    #[test]
379    fn test_has_management_rep() {
380        let mut gen = LegalDocumentGenerator::new(42);
381        let docs = gen.generate("C001", 2025, &sample_employees());
382        let mgmt_rep = docs
383            .iter()
384            .filter(|d| d.document_type == "management_rep")
385            .count();
386        assert_eq!(mgmt_rep, 1, "should have exactly 1 management rep letter");
387    }
388
389    #[test]
390    fn test_document_types_correct() {
391        let mut gen = LegalDocumentGenerator::new(42);
392        let docs = gen.generate("C001", 2025, &sample_employees());
393        let valid_types = [
394            "engagement_letter",
395            "management_rep",
396            "legal_opinion",
397            "regulatory_filing",
398            "board_resolution",
399        ];
400        for doc in &docs {
401            assert!(
402                valid_types.contains(&doc.document_type.as_str()),
403                "unexpected document type: {}",
404                doc.document_type
405            );
406        }
407    }
408
409    #[test]
410    fn test_entity_code_propagated() {
411        let mut gen = LegalDocumentGenerator::new(42);
412        let docs = gen.generate("TEST_ENTITY", 2025, &sample_employees());
413        for doc in &docs {
414            assert_eq!(
415                doc.entity_code, "TEST_ENTITY",
416                "entity_code should match input"
417            );
418        }
419    }
420
421    #[test]
422    fn test_dates_within_fiscal_year() {
423        let mut gen = LegalDocumentGenerator::new(42);
424        let docs = gen.generate("C001", 2025, &sample_employees());
425        for doc in &docs {
426            assert_eq!(doc.date.year(), 2025, "document date should be in FY2025");
427        }
428    }
429
430    #[test]
431    fn test_dates_sorted() {
432        let mut gen = LegalDocumentGenerator::new(42);
433        let docs = gen.generate("C001", 2025, &sample_employees());
434        for w in docs.windows(2) {
435            assert!(
436                w[0].date <= w[1].date,
437                "documents should be sorted chronologically"
438            );
439        }
440    }
441
442    #[test]
443    fn test_unique_ids() {
444        let mut gen = LegalDocumentGenerator::new(42);
445        let docs = gen.generate("C001", 2025, &sample_employees());
446        let ids: std::collections::HashSet<_> = docs.iter().map(|d| d.document_id).collect();
447        assert_eq!(ids.len(), docs.len(), "all document IDs should be unique");
448    }
449
450    #[test]
451    fn test_signatories_present() {
452        let mut gen = LegalDocumentGenerator::new(42);
453        let docs = gen.generate("C001", 2025, &sample_employees());
454        for doc in &docs {
455            assert!(
456                !doc.signatories.is_empty(),
457                "document {} should have signatories",
458                doc.document_type
459            );
460        }
461    }
462
463    #[test]
464    fn test_key_terms_present() {
465        let mut gen = LegalDocumentGenerator::new(42);
466        let docs = gen.generate("C001", 2025, &sample_employees());
467        for doc in &docs {
468            assert!(
469                !doc.key_terms.is_empty(),
470                "document {} should have key terms",
471                doc.document_type
472            );
473        }
474    }
475
476    #[test]
477    fn test_empty_employee_pool_fallback() {
478        let mut gen = LegalDocumentGenerator::new(42);
479        let docs = gen.generate("C001", 2025, &[]);
480        assert!(!docs.is_empty(), "should produce docs with empty pool");
481        for doc in &docs {
482            assert!(
483                !doc.signatories.is_empty(),
484                "should have fallback signatories"
485            );
486        }
487    }
488
489    #[test]
490    fn test_deterministic_with_same_seed() {
491        let employees = sample_employees();
492
493        let mut gen1 = LegalDocumentGenerator::new(999);
494        let d1 = gen1.generate("C001", 2025, &employees);
495
496        let mut gen2 = LegalDocumentGenerator::new(999);
497        let d2 = gen2.generate("C001", 2025, &employees);
498
499        assert_eq!(d1.len(), d2.len());
500        for (a, b) in d1.iter().zip(d2.iter()) {
501            assert_eq!(a.document_id, b.document_id);
502            assert_eq!(a.document_type, b.document_type);
503            assert_eq!(a.date, b.date);
504            assert_eq!(a.title, b.title);
505            assert_eq!(a.key_terms, b.key_terms);
506        }
507    }
508
509    #[test]
510    fn test_serialization_roundtrip() {
511        let mut gen = LegalDocumentGenerator::new(42);
512        let docs = gen.generate("C001", 2025, &sample_employees());
513        let json = serde_json::to_string(&docs).expect("serialize");
514        let parsed: Vec<LegalDocument> = serde_json::from_str(&json).expect("deserialize");
515        assert_eq!(docs.len(), parsed.len());
516        for (orig, rt) in docs.iter().zip(parsed.iter()) {
517            assert_eq!(orig.document_id, rt.document_id);
518            assert_eq!(orig.document_type, rt.document_type);
519            assert_eq!(orig.date, rt.date);
520        }
521    }
522}