Skip to main content

datasynth_core/templates/
provider.rs

1//! Template provider trait and implementations.
2//!
3//! This module defines the `TemplateProvider` trait for accessing template data,
4//! along with implementations that combine embedded and file-based templates.
5
6use rand::seq::IndexedRandom;
7use rand::Rng;
8use std::sync::Arc;
9
10use super::loader::{MergeStrategy, TemplateData, TemplateLoader};
11use super::names::NameCulture;
12use crate::models::BusinessProcess;
13
14/// Trait for providing template data to generators.
15///
16/// This trait abstracts the source of template data, allowing generators
17/// to work with either embedded templates, file-based templates, or a
18/// combination of both.
19///
20/// Methods use `&mut dyn Rng` to allow the trait to be dyn-compatible.
21pub trait TemplateProvider: Send + Sync {
22    /// Get a random person first name for the given culture and gender.
23    fn get_person_first_name(
24        &self,
25        culture: NameCulture,
26        is_male: bool,
27        rng: &mut dyn Rng,
28    ) -> String;
29
30    /// Get a random person last name for the given culture.
31    fn get_person_last_name(&self, culture: NameCulture, rng: &mut dyn Rng) -> String;
32
33    /// Get a random vendor name for the given category.
34    fn get_vendor_name(&self, category: &str, rng: &mut dyn Rng) -> String;
35
36    /// Get a random customer name for the given industry.
37    fn get_customer_name(&self, industry: &str, rng: &mut dyn Rng) -> String;
38
39    /// Get a random material description for the given type.
40    fn get_material_description(&self, material_type: &str, rng: &mut dyn Rng) -> String;
41
42    /// Get a random asset description for the given category.
43    fn get_asset_description(&self, category: &str, rng: &mut dyn Rng) -> String;
44
45    /// Get a random line text for the given process and account type.
46    fn get_line_text(
47        &self,
48        process: BusinessProcess,
49        account_type: &str,
50        rng: &mut dyn Rng,
51    ) -> String;
52
53    /// Get a random header text template for the given process.
54    fn get_header_template(&self, process: BusinessProcess, rng: &mut dyn Rng) -> String;
55
56    /// Get a random bank name from the flat pool. (v3.2.0+)
57    ///
58    /// Default impl returns `None` — implementors with a bank-name pool
59    /// (like `DefaultTemplateProvider`) override this. `None` means
60    /// "caller should use its own fallback" so existing
61    /// `BANK_NAMES`-based callers keep working until the rewire lands.
62    fn get_bank_name(&self, _rng: &mut dyn Rng) -> Option<String> {
63        None
64    }
65
66    /// Get a (title, account) pair for an audit finding of the given type.
67    /// (v3.2.0+)
68    ///
69    /// `finding_type_key` is a lowercase-snake-case canonical name
70    /// (e.g. "material_weakness", "control_deficiency"). Default impl
71    /// returns `None` so the caller falls back to its inline tables.
72    fn get_finding_title(
73        &self,
74        _finding_type_key: &str,
75        _rng: &mut dyn Rng,
76    ) -> Option<(String, String)> {
77        None
78    }
79
80    /// Get a narrative template string for an audit finding section.
81    /// (v3.2.0+)
82    ///
83    /// `section` must be one of: "condition", "criteria", "cause",
84    /// "effect", "recommendation". Returns `None` to trigger caller
85    /// fallback. Templates may contain `{placeholder}` tokens
86    /// (e.g. `{account}`, `{amount}`) that the caller substitutes.
87    fn get_finding_narrative(
88        &self,
89        _finding_type_key: &str,
90        _section: &str,
91        _rng: &mut dyn Rng,
92    ) -> Option<String> {
93        None
94    }
95
96    /// Get a display name for a department by code. (v3.2.0+)
97    ///
98    /// `department_code` is one of: "finance", "procurement", "sales",
99    /// "warehouse", "it". Returns `None` to trigger caller fallback.
100    fn get_department_name(&self, _department_code: &str, _rng: &mut dyn Rng) -> Option<String> {
101        None
102    }
103}
104
105/// Default template provider using embedded templates with optional file overrides.
106pub struct DefaultTemplateProvider {
107    /// Loaded template data (file-based)
108    template_data: Option<TemplateData>,
109    /// Merge strategy for combining embedded and file templates
110    merge_strategy: MergeStrategy,
111}
112
113/// Bundled default YAML — v4.1.4+ proof-of-concept for the
114/// YAML-as-source-of-truth migration path. The file at
115/// `crates/datasynth-core/templates/defaults.yaml` is included at
116/// compile time and made available via
117/// [`DefaultTemplateProvider::bundled`].
118pub const BUNDLED_DEFAULTS_YAML: &str = include_str!("../../templates/defaults.yaml");
119
120impl DefaultTemplateProvider {
121    /// Create a new provider backed by the bundled `defaults.yaml`.
122    ///
123    /// As of v4.2.4 this is a thin alias for [`Self::bundled`]: the YAML
124    /// bundled via `include_str!` is the sole source of truth for the
125    /// default name pools. Parse failure would mean `defaults.yaml` is
126    /// malformed, which `build.rs` validates at compile time — so the
127    /// `.expect` below is effectively infallible for any shipped build.
128    pub fn new() -> Self {
129        Self::bundled().expect(
130            "bundled defaults.yaml must parse — validated at build time by \
131             datasynth-core/build.rs; a panic here means the YAML was edited \
132             without re-running the build validator",
133        )
134    }
135
136    /// Create a provider backed directly by the bundled `defaults.yaml`
137    /// (included at compile time via `include_str!`).
138    ///
139    /// Equivalent to [`Self::new`] but surfaces the parse `Result` for
140    /// callers that want to handle malformed YAML explicitly (e.g. tests
141    /// that validate the loader against hand-crafted malformed inputs).
142    pub fn bundled() -> Result<Self, super::loader::TemplateError> {
143        let data = TemplateLoader::load_from_yaml_str(BUNDLED_DEFAULTS_YAML)?;
144        Ok(Self::with_templates(data, MergeStrategy::Extend))
145    }
146
147    /// Create a provider with user-supplied template data.
148    ///
149    /// The data replaces the bundled defaults wholesale. Callers who want
150    /// to *merge* custom content on top of the bundled defaults should
151    /// start from [`Self::new`] and use [`TemplateLoader::merge`] / the
152    /// `MergeStrategy::Extend` path explicitly.
153    pub fn with_templates(template_data: TemplateData, strategy: MergeStrategy) -> Self {
154        Self {
155            template_data: Some(template_data),
156            merge_strategy: strategy,
157        }
158    }
159
160    /// Load templates from a file path.
161    pub fn from_file(path: &std::path::Path) -> Result<Self, super::loader::TemplateError> {
162        let data = TemplateLoader::load_from_file(path)?;
163        Ok(Self::with_templates(data, MergeStrategy::Extend))
164    }
165
166    /// Load templates from a directory.
167    pub fn from_directory(path: &std::path::Path) -> Result<Self, super::loader::TemplateError> {
168        let data = TemplateLoader::load_from_directory(path)?;
169        Ok(Self::with_templates(data, MergeStrategy::Extend))
170    }
171
172    /// Set the merge strategy.
173    pub fn with_merge_strategy(mut self, strategy: MergeStrategy) -> Self {
174        self.merge_strategy = strategy;
175        self
176    }
177
178    fn culture_to_key(culture: NameCulture) -> &'static str {
179        match culture {
180            NameCulture::WesternUs => "us",
181            NameCulture::German => "german",
182            NameCulture::Hispanic => "hispanic",
183            NameCulture::French => "french",
184            NameCulture::Chinese => "chinese",
185            NameCulture::Japanese => "japanese",
186            NameCulture::Indian => "indian",
187        }
188    }
189
190    fn process_to_key(process: BusinessProcess) -> &'static str {
191        match process {
192            BusinessProcess::P2P => "p2p",
193            BusinessProcess::O2C => "o2c",
194            BusinessProcess::H2R => "h2r",
195            BusinessProcess::R2R => "r2r",
196            _ => "other",
197        }
198    }
199}
200
201impl Default for DefaultTemplateProvider {
202    fn default() -> Self {
203        Self::new()
204    }
205}
206
207impl TemplateProvider for DefaultTemplateProvider {
208    fn get_person_first_name(
209        &self,
210        culture: NameCulture,
211        is_male: bool,
212        rng: &mut dyn Rng,
213    ) -> String {
214        let key = Self::culture_to_key(culture);
215
216        if let Some(ref data) = self.template_data {
217            // Primary lookup: the requested culture.
218            if let Some(culture_names) = data.person_names.cultures.get(key) {
219                let names = if is_male {
220                    &culture_names.male_first_names
221                } else {
222                    &culture_names.female_first_names
223                };
224                if let Some(name) = names.choose(rng) {
225                    return name.clone();
226                }
227            }
228            // Secondary lookup: the "us" pool as a universal default
229            // for cultures not yet mirrored in defaults.yaml (matches
230            // pre-v4.2.4 behaviour where non-German cultures fell back
231            // to the embedded US arrays).
232            if key != "us" {
233                if let Some(us_names) = data.person_names.cultures.get("us") {
234                    let names = if is_male {
235                        &us_names.male_first_names
236                    } else {
237                        &us_names.female_first_names
238                    };
239                    if let Some(name) = names.choose(rng) {
240                        return name.clone();
241                    }
242                }
243            }
244        }
245
246        "Unknown".to_string()
247    }
248
249    fn get_person_last_name(&self, culture: NameCulture, rng: &mut dyn Rng) -> String {
250        let key = Self::culture_to_key(culture);
251
252        if let Some(ref data) = self.template_data {
253            if let Some(culture_names) = data.person_names.cultures.get(key) {
254                if let Some(name) = culture_names.last_names.choose(rng) {
255                    return name.clone();
256                }
257            }
258            if key != "us" {
259                if let Some(us_names) = data.person_names.cultures.get("us") {
260                    if let Some(name) = us_names.last_names.choose(rng) {
261                        return name.clone();
262                    }
263                }
264            }
265        }
266
267        "Unknown".to_string()
268    }
269
270    fn get_vendor_name(&self, category: &str, rng: &mut dyn Rng) -> String {
271        if let Some(ref data) = self.template_data {
272            if let Some(names) = data.vendor_names.categories.get(category) {
273                if let Some(name) = names.choose(rng) {
274                    return name.clone();
275                }
276            }
277            if category != "manufacturing" {
278                tracing::debug!(
279                    "Unknown vendor name category '{}', falling back to manufacturing",
280                    category
281                );
282                if let Some(names) = data.vendor_names.categories.get("manufacturing") {
283                    if let Some(name) = names.choose(rng) {
284                        return name.clone();
285                    }
286                }
287            }
288        }
289
290        "Unknown Vendor".to_string()
291    }
292
293    fn get_customer_name(&self, industry: &str, rng: &mut dyn Rng) -> String {
294        if let Some(ref data) = self.template_data {
295            if let Some(names) = data.customer_names.industries.get(industry) {
296                if let Some(name) = names.choose(rng) {
297                    return name.clone();
298                }
299            }
300            if industry != "retail" {
301                tracing::debug!(
302                    "Unknown customer name industry '{}', falling back to retail",
303                    industry
304                );
305                if let Some(names) = data.customer_names.industries.get("retail") {
306                    if let Some(name) = names.choose(rng) {
307                        return name.clone();
308                    }
309                }
310            }
311        }
312
313        "Unknown Customer".to_string()
314    }
315
316    fn get_material_description(&self, material_type: &str, rng: &mut dyn Rng) -> String {
317        // Try file templates first
318        if let Some(ref data) = self.template_data {
319            if let Some(descs) = data.material_descriptions.by_type.get(material_type) {
320                if !descs.is_empty() {
321                    if let Some(desc) = descs.choose(rng) {
322                        return desc.clone();
323                    }
324                }
325            }
326        }
327
328        // Fall back to generic
329        format!("{material_type} material")
330    }
331
332    fn get_asset_description(&self, category: &str, rng: &mut dyn Rng) -> String {
333        // Try file templates first
334        if let Some(ref data) = self.template_data {
335            if let Some(descs) = data.asset_descriptions.by_category.get(category) {
336                if !descs.is_empty() {
337                    if let Some(desc) = descs.choose(rng) {
338                        return desc.clone();
339                    }
340                }
341            }
342        }
343
344        // Fall back to generic
345        format!("{category} asset")
346    }
347
348    fn get_line_text(
349        &self,
350        process: BusinessProcess,
351        account_type: &str,
352        rng: &mut dyn Rng,
353    ) -> String {
354        let key = Self::process_to_key(process);
355
356        // Try file templates first
357        if let Some(ref data) = self.template_data {
358            let descs_map = match process {
359                BusinessProcess::P2P => &data.line_item_descriptions.p2p,
360                BusinessProcess::O2C => &data.line_item_descriptions.o2c,
361                BusinessProcess::H2R => &data.line_item_descriptions.h2r,
362                BusinessProcess::R2R => &data.line_item_descriptions.r2r,
363                _ => &data.line_item_descriptions.p2p,
364            };
365
366            if let Some(descs) = descs_map.get(account_type) {
367                if !descs.is_empty() {
368                    if let Some(desc) = descs.choose(rng) {
369                        return desc.clone();
370                    }
371                }
372            }
373        }
374
375        // Fall back to generic
376        format!("{} posting", key.to_uppercase())
377    }
378
379    fn get_header_template(&self, process: BusinessProcess, rng: &mut dyn Rng) -> String {
380        let key = Self::process_to_key(process);
381
382        // Try file templates first
383        if let Some(ref data) = self.template_data {
384            if let Some(templates) = data.header_text_templates.by_process.get(key) {
385                if !templates.is_empty() {
386                    if let Some(template) = templates.choose(rng) {
387                        return template.clone();
388                    }
389                }
390            }
391        }
392
393        // Fall back to generic
394        format!("{} Transaction", key.to_uppercase())
395    }
396
397    fn get_bank_name(&self, rng: &mut dyn Rng) -> Option<String> {
398        if let Some(ref data) = self.template_data {
399            if !data.bank_names.names.is_empty() {
400                if let Some(name) = data.bank_names.names.choose(rng) {
401                    return Some(name.clone());
402                }
403            }
404        }
405        None
406    }
407
408    fn get_finding_title(
409        &self,
410        finding_type_key: &str,
411        rng: &mut dyn Rng,
412    ) -> Option<(String, String)> {
413        if let Some(ref data) = self.template_data {
414            if let Some(entries) = data.finding_titles.by_type.get(finding_type_key) {
415                if !entries.is_empty() {
416                    if let Some(entry) = entries.choose(rng) {
417                        return Some((entry.title.clone(), entry.account.clone()));
418                    }
419                }
420            }
421        }
422        None
423    }
424
425    fn get_finding_narrative(
426        &self,
427        finding_type_key: &str,
428        section: &str,
429        rng: &mut dyn Rng,
430    ) -> Option<String> {
431        if let Some(ref data) = self.template_data {
432            if let Some(sections) = data.finding_narratives.by_type.get(finding_type_key) {
433                if let Some(templates) = sections.get(section) {
434                    if !templates.is_empty() {
435                        if let Some(tpl) = templates.choose(rng) {
436                            return Some(tpl.clone());
437                        }
438                    }
439                }
440            }
441        }
442        None
443    }
444
445    fn get_department_name(&self, department_code: &str, _rng: &mut dyn Rng) -> Option<String> {
446        if let Some(ref data) = self.template_data {
447            if let Some(name) = data.department_names.by_code.get(department_code) {
448                if !name.is_empty() {
449                    return Some(name.clone());
450                }
451            }
452        }
453        None
454    }
455}
456
457/// A thread-safe wrapper around a template provider.
458pub type SharedTemplateProvider = Arc<dyn TemplateProvider>;
459
460/// Create a default shared template provider.
461pub fn default_provider() -> SharedTemplateProvider {
462    Arc::new(DefaultTemplateProvider::new())
463}
464
465/// Create a shared template provider from a file.
466pub fn provider_from_file(
467    path: &std::path::Path,
468) -> Result<SharedTemplateProvider, super::loader::TemplateError> {
469    Ok(Arc::new(DefaultTemplateProvider::from_file(path)?))
470}
471
472#[cfg(test)]
473mod tests {
474    use super::*;
475    use rand::SeedableRng;
476    use rand_chacha::ChaCha8Rng;
477
478    #[test]
479    fn test_default_provider() {
480        let provider = DefaultTemplateProvider::new();
481        let mut rng = ChaCha8Rng::seed_from_u64(12345);
482
483        let name = provider.get_person_first_name(NameCulture::German, true, &mut rng);
484        assert!(!name.is_empty());
485
486        let last_name = provider.get_person_last_name(NameCulture::German, &mut rng);
487        assert!(!last_name.is_empty());
488    }
489
490    #[test]
491    fn bundled_defaults_loads() {
492        // v4.1.4+ — the bundled YAML must parse and produce a provider
493        // that supplies names without panicking.
494        let provider = DefaultTemplateProvider::bundled().expect("bundled YAML parses");
495        let mut rng = ChaCha8Rng::seed_from_u64(42);
496        let vendor = provider.get_vendor_name("office_supplies", &mut rng);
497        assert!(!vendor.is_empty());
498        let customer = provider.get_customer_name("retail", &mut rng);
499        assert!(!customer.is_empty());
500    }
501
502    #[test]
503    fn bundled_matches_embedded_for_person_names() {
504        // v4.1.7 byte-identity regression: the bundled YAML mirrors the
505        // embedded arrays (same entries, same order). Therefore
506        // `bundled()` and `new()` must produce the same sequence of
507        // names for the same seed.
508        for culture in [NameCulture::German, NameCulture::WesternUs] {
509            for is_male in [true, false] {
510                let p_embedded = DefaultTemplateProvider::new();
511                let p_bundled = DefaultTemplateProvider::bundled().unwrap();
512                let mut rng_e = ChaCha8Rng::seed_from_u64(12345);
513                let mut rng_b = ChaCha8Rng::seed_from_u64(12345);
514                for _ in 0..500 {
515                    let ne = p_embedded.get_person_first_name(culture, is_male, &mut rng_e);
516                    let nb = p_bundled.get_person_first_name(culture, is_male, &mut rng_b);
517                    assert_eq!(
518                        ne, nb,
519                        "first name mismatch for culture={culture:?} male={is_male}"
520                    );
521                }
522            }
523            let p_embedded = DefaultTemplateProvider::new();
524            let p_bundled = DefaultTemplateProvider::bundled().unwrap();
525            let mut rng_e = ChaCha8Rng::seed_from_u64(54321);
526            let mut rng_b = ChaCha8Rng::seed_from_u64(54321);
527            for _ in 0..500 {
528                let ne = p_embedded.get_person_last_name(culture, &mut rng_e);
529                let nb = p_bundled.get_person_last_name(culture, &mut rng_b);
530                assert_eq!(ne, nb, "last name mismatch for culture={culture:?}");
531            }
532        }
533    }
534
535    #[test]
536    fn bundled_matches_embedded_for_vendor_customer_names() {
537        // Categories mirrored in YAML: manufacturing, services (vendors);
538        // automotive, retail (customers).
539        for category in ["manufacturing", "services"] {
540            let p_embedded = DefaultTemplateProvider::new();
541            let p_bundled = DefaultTemplateProvider::bundled().unwrap();
542            let mut rng_e = ChaCha8Rng::seed_from_u64(99);
543            let mut rng_b = ChaCha8Rng::seed_from_u64(99);
544            for _ in 0..200 {
545                let ne = p_embedded.get_vendor_name(category, &mut rng_e);
546                let nb = p_bundled.get_vendor_name(category, &mut rng_b);
547                assert_eq!(ne, nb, "vendor mismatch for category={category}");
548            }
549        }
550        for industry in ["automotive", "retail"] {
551            let p_embedded = DefaultTemplateProvider::new();
552            let p_bundled = DefaultTemplateProvider::bundled().unwrap();
553            let mut rng_e = ChaCha8Rng::seed_from_u64(77);
554            let mut rng_b = ChaCha8Rng::seed_from_u64(77);
555            for _ in 0..200 {
556                let ne = p_embedded.get_customer_name(industry, &mut rng_e);
557                let nb = p_bundled.get_customer_name(industry, &mut rng_b);
558                assert_eq!(ne, nb, "customer mismatch for industry={industry}");
559            }
560        }
561    }
562
563    #[test]
564    fn bundled_defaults_include_embedded_mirrored_entries() {
565        // v4.1.7: YAML now mirrors the embedded arrays. Spot-check
566        // that the loader isn't silently swallowing the file by
567        // confirming drawn names match the known pool contents.
568        let provider = DefaultTemplateProvider::bundled().expect("bundled YAML parses");
569        let mut rng = ChaCha8Rng::seed_from_u64(1);
570        let expected = [
571            "Retail Solutions Corp.",
572            "Consumer Goods Direct",
573            "Shop Smart Inc.",
574            "Merchandise Holdings LLC",
575            "Retail Distribution Co.",
576            "Store Systems Ltd.",
577        ];
578        let mut saw_expected = false;
579        for _ in 0..500 {
580            let name = provider.get_customer_name("retail", &mut rng);
581            if expected.contains(&name.as_str()) {
582                saw_expected = true;
583                break;
584            }
585        }
586        assert!(
587            saw_expected,
588            "bundled retail customer names (mirrored from embedded) should appear in the draw stream"
589        );
590    }
591
592    #[test]
593    fn test_vendor_names() {
594        let provider = DefaultTemplateProvider::new();
595        let mut rng = ChaCha8Rng::seed_from_u64(12345);
596
597        let name = provider.get_vendor_name("manufacturing", &mut rng);
598        assert!(!name.is_empty());
599        assert!(!name.contains("Unknown"));
600    }
601
602    #[test]
603    fn test_shared_provider() {
604        let provider = default_provider();
605        let mut rng = ChaCha8Rng::seed_from_u64(12345);
606
607        let name = provider.get_customer_name("retail", &mut rng);
608        assert!(!name.is_empty());
609    }
610}