Skip to main content

datasynth_core/llm/
nl_config.rs

1//! Natural language to YAML configuration generator.
2//!
3//! Takes a free-text description of desired synthetic data (e.g., "Generate 1 year of
4//! retail data for a medium US company with fraud detection") and produces a valid
5//! `GeneratorConfig` YAML string.
6
7use super::provider::{LlmProvider, LlmRequest};
8use crate::error::SynthError;
9
10/// Structured representation of user intent extracted from natural language.
11#[derive(Debug, Clone, Default)]
12pub struct ConfigIntent {
13    /// Target industry (e.g., "retail", "manufacturing", "financial_services").
14    pub industry: Option<String>,
15    /// Country code (e.g., "US", "DE", "GB").
16    pub country: Option<String>,
17    /// Company size: "small", "medium", or "large".
18    pub company_size: Option<String>,
19    /// Duration in months.
20    pub period_months: Option<u32>,
21    /// Requested feature flags (e.g., "fraud", "audit", "banking", "controls").
22    pub features: Vec<String>,
23}
24
25/// Generates YAML configuration from natural language descriptions.
26///
27/// The generator uses a two-phase approach:
28/// 1. Parse the natural language description into a structured [`ConfigIntent`].
29/// 2. Map the intent to a YAML configuration string using preset templates.
30pub struct NlConfigGenerator;
31
32impl NlConfigGenerator {
33    /// Generate a YAML configuration from a natural language description.
34    ///
35    /// Uses the provided LLM provider to help parse the description, with
36    /// keyword-based fallback parsing for reliability.
37    ///
38    /// # Errors
39    ///
40    /// Returns `SynthError::GenerationError` if the description cannot be parsed
41    /// or the resulting configuration is invalid.
42    pub fn generate(description: &str, provider: &dyn LlmProvider) -> Result<String, SynthError> {
43        if description.trim().is_empty() {
44            return Err(SynthError::generation(
45                "Natural language description cannot be empty",
46            ));
47        }
48
49        let intent = Self::parse_intent(description, provider)?;
50        Self::intent_to_yaml(&intent)
51    }
52
53    /// Parse a natural language description into a structured [`ConfigIntent`].
54    ///
55    /// Attempts to use the LLM provider first, then falls back to keyword-based
56    /// extraction for reliability.
57    pub fn parse_intent(
58        description: &str,
59        provider: &dyn LlmProvider,
60    ) -> Result<ConfigIntent, SynthError> {
61        // Try LLM-based parsing first
62        let llm_intent = Self::parse_with_llm(description, provider);
63
64        // Always run keyword-based parsing as fallback/supplement
65        let keyword_intent = Self::parse_with_keywords(description);
66
67        // Merge: prefer LLM results where available, fall back to keywords
68        match llm_intent {
69            Ok(llm) => Ok(Self::merge_intents(llm, keyword_intent)),
70            Err(_) => Ok(keyword_intent),
71        }
72    }
73
74    /// Map a [`ConfigIntent`] to a YAML configuration string.
75    pub fn intent_to_yaml(intent: &ConfigIntent) -> Result<String, SynthError> {
76        let industry = intent.industry.as_deref().unwrap_or("manufacturing");
77        let country = intent.country.as_deref().unwrap_or("US");
78        let complexity = intent.company_size.as_deref().unwrap_or("medium");
79        let period_months = intent.period_months.unwrap_or(12);
80
81        // Validate inputs
82        if !(1..=120).contains(&period_months) {
83            return Err(SynthError::generation(format!(
84                "Period months must be between 1 and 120, got {}",
85                period_months
86            )));
87        }
88
89        let valid_complexities = ["small", "medium", "large"];
90        if !valid_complexities.contains(&complexity) {
91            return Err(SynthError::generation(format!(
92                "Invalid company size '{}', must be one of: small, medium, large",
93                complexity
94            )));
95        }
96
97        let currency = Self::country_to_currency(country);
98        let company_name = Self::industry_company_name(industry);
99
100        let mut yaml = String::with_capacity(2048);
101
102        // Global settings
103        yaml.push_str(&format!(
104            "global:\n  industry: {}\n  start_date: \"2024-01-01\"\n  period_months: {}\n  seed: 42\n\n",
105            industry, period_months
106        ));
107
108        // Companies
109        yaml.push_str(&format!(
110            "companies:\n  - code: \"C001\"\n    name: \"{}\"\n    currency: \"{}\"\n    country: \"{}\"\n\n",
111            company_name, currency, country
112        ));
113
114        // Chart of accounts
115        yaml.push_str(&format!(
116            "chart_of_accounts:\n  complexity: {}\n\n",
117            complexity
118        ));
119
120        // Transactions
121        let tx_count = Self::complexity_to_tx_count(complexity);
122        yaml.push_str(&format!(
123            "transactions:\n  count: {}\n  anomaly_rate: 0.02\n\n",
124            tx_count
125        ));
126
127        // Output
128        yaml.push_str("output:\n  format: csv\n  compression: false\n\n");
129
130        // Feature-specific sections
131        for feature in &intent.features {
132            match feature.as_str() {
133                "fraud" => {
134                    yaml.push_str(
135                        "fraud:\n  enabled: true\n  types:\n    - fictitious_transaction\n    - duplicate_payment\n    - split_transaction\n  injection_rate: 0.03\n\n",
136                    );
137                }
138                "audit" => {
139                    yaml.push_str(
140                        "audit_standards:\n  enabled: true\n  isa_compliance:\n    enabled: true\n    compliance_level: standard\n    framework: isa\n  analytical_procedures:\n    enabled: true\n    procedures_per_account: 3\n  confirmations:\n    enabled: true\n    positive_response_rate: 0.85\n  sox:\n    enabled: true\n    materiality_threshold: 10000.0\n\n",
141                    );
142                }
143                "banking" => {
144                    yaml.push_str(
145                        "banking:\n  enabled: true\n  customer_count: 100\n  account_types:\n    - checking\n    - savings\n    - loan\n  kyc_enabled: true\n  aml_enabled: true\n\n",
146                    );
147                }
148                "controls" => {
149                    yaml.push_str(
150                        "internal_controls:\n  enabled: true\n  coso_enabled: true\n  include_entity_level_controls: true\n  target_maturity_level: \"managed\"\n  exception_rate: 0.02\n  sod_violation_rate: 0.01\n\n",
151                    );
152                }
153                "process_mining" => {
154                    yaml.push_str(
155                        "business_processes:\n  enabled: true\n  ocel_export: true\n  p2p:\n    enabled: true\n  o2c:\n    enabled: true\n\n",
156                    );
157                }
158                "intercompany" => {
159                    yaml.push_str(
160                        "intercompany:\n  enabled: true\n  matching_tolerance: 0.01\n  elimination_enabled: true\n\n",
161                    );
162                }
163                "distributions" => {
164                    yaml.push_str(&format!(
165                        "distributions:\n  enabled: true\n  industry_profile: {}\n  amounts:\n    enabled: true\n    distribution_type: lognormal\n    benford_compliance: true\n\n",
166                        industry
167                    ));
168                }
169                other => {
170                    tracing::warn!(
171                        "Unknown NL config feature '{}' ignored. Valid features: fraud, audit, banking, controls, process_mining, intercompany, distributions",
172                        other
173                    );
174                }
175            }
176        }
177
178        Ok(yaml)
179    }
180
181    /// Attempt LLM-based parsing of the description.
182    fn parse_with_llm(
183        description: &str,
184        provider: &dyn LlmProvider,
185    ) -> Result<ConfigIntent, SynthError> {
186        let system_prompt = "You are a configuration parser. Extract structured fields from a natural language description of desired synthetic data generation. Return ONLY a JSON object with these fields: industry (string or null), country (string or null), company_size (string or null), period_months (number or null), features (array of strings). Valid industries: retail, manufacturing, financial_services, healthcare, technology. Valid sizes: small, medium, large. Valid features: fraud, audit, banking, controls, process_mining, intercompany, distributions.";
187
188        let request = LlmRequest::new(description)
189            .with_system(system_prompt.to_string())
190            .with_temperature(0.1)
191            .with_max_tokens(512);
192
193        let response = provider.complete(&request)?;
194        Self::parse_llm_response(&response.content)
195    }
196
197    /// Parse the LLM response JSON into a ConfigIntent.
198    fn parse_llm_response(content: &str) -> Result<ConfigIntent, SynthError> {
199        // Try to find JSON in the response
200        let json_str = Self::extract_json(content)
201            .ok_or_else(|| SynthError::generation("No JSON found in LLM response"))?;
202
203        let value: serde_json::Value = serde_json::from_str(json_str)
204            .map_err(|e| SynthError::generation(format!("Failed to parse LLM JSON: {}", e)))?;
205
206        let industry = value
207            .get("industry")
208            .and_then(|v| v.as_str())
209            .map(String::from);
210        let country = value
211            .get("country")
212            .and_then(|v| v.as_str())
213            .map(String::from);
214        let company_size = value
215            .get("company_size")
216            .and_then(|v| v.as_str())
217            .map(String::from);
218        let period_months = value
219            .get("period_months")
220            .and_then(|v| v.as_u64())
221            .map(|v| v as u32);
222        let features = value
223            .get("features")
224            .and_then(|v| v.as_array())
225            .map(|arr| {
226                arr.iter()
227                    .filter_map(|v| v.as_str().map(String::from))
228                    .collect()
229            })
230            .unwrap_or_default();
231
232        Ok(ConfigIntent {
233            industry,
234            country,
235            company_size,
236            period_months,
237            features,
238        })
239    }
240
241    /// Extract a JSON object substring from potentially noisy LLM output.
242    fn extract_json(content: &str) -> Option<&str> {
243        // Find the first '{' and matching '}'
244        let start = content.find('{')?;
245        let mut depth = 0i32;
246        for (i, ch) in content[start..].char_indices() {
247            match ch {
248                '{' => depth += 1,
249                '}' => {
250                    depth -= 1;
251                    if depth == 0 {
252                        return Some(&content[start..start + i + 1]);
253                    }
254                }
255                _ => {}
256            }
257        }
258        None
259    }
260
261    /// Keyword-based parsing as a reliable fallback.
262    fn parse_with_keywords(description: &str) -> ConfigIntent {
263        let lower = description.to_lowercase();
264
265        let industry = Self::extract_industry(&lower);
266        let country = Self::extract_country(&lower);
267        let company_size = Self::extract_size(&lower);
268        let period_months = Self::extract_period(&lower);
269        let features = Self::extract_features(&lower);
270
271        ConfigIntent {
272            industry,
273            country,
274            company_size,
275            period_months,
276            features,
277        }
278    }
279
280    /// Extract industry from lowercased text.
281    ///
282    /// Uses a scoring approach: each industry gets points for keyword matches,
283    /// and the highest-scoring industry wins. This avoids order-dependent
284    /// issues where "banking" in a feature context incorrectly triggers
285    /// "financial_services" over "technology".
286    fn extract_industry(text: &str) -> Option<String> {
287        let patterns: &[(&[&str], &str)] = &[
288            (
289                &["retail", "store", "shop", "e-commerce", "ecommerce"],
290                "retail",
291            ),
292            (
293                &["manufactur", "factory", "production", "assembly"],
294                "manufacturing",
295            ),
296            (
297                &[
298                    "financial",
299                    "finance",
300                    "insurance",
301                    "fintech",
302                    "investment firm",
303                ],
304                "financial_services",
305            ),
306            (
307                &["health", "hospital", "medical", "pharma", "clinic"],
308                "healthcare",
309            ),
310            (
311                &["tech", "software", "saas", "startup", "digital"],
312                "technology",
313            ),
314        ];
315
316        let mut best: Option<(&str, usize)> = None;
317        for (keywords, industry) in patterns {
318            let count = keywords.iter().filter(|kw| text.contains(*kw)).count();
319            if count > 0 && (best.is_none() || count > best.expect("checked is_some").1) {
320                best = Some((industry, count));
321            }
322        }
323        best.map(|(industry, _)| industry.to_string())
324    }
325
326    /// Extract country from lowercased text.
327    fn extract_country(text: &str) -> Option<String> {
328        // Check full country names first (most reliable), then short codes.
329        // Short codes like "in", "de", "us" can clash with English words,
330        // so we only use unambiguous short codes.
331        let name_patterns = [
332            (&["united states", "u.s.", "america"][..], "US"),
333            (&["germany", "german"][..], "DE"),
334            (&["united kingdom", "british", "england"][..], "GB"),
335            (&["china", "chinese"][..], "CN"),
336            (&["japan", "japanese"][..], "JP"),
337            (&["india", "indian"][..], "IN"),
338            (&["brazil", "brazilian"][..], "BR"),
339            (&["mexico", "mexican"][..], "MX"),
340            (&["australia", "australian"][..], "AU"),
341            (&["singapore", "singaporean"][..], "SG"),
342            (&["korea", "korean"][..], "KR"),
343            (&["france", "french"][..], "FR"),
344            (&["canada", "canadian"][..], "CA"),
345        ];
346
347        for (keywords, code) in &name_patterns {
348            if keywords.iter().any(|kw| text.contains(kw)) {
349                return Some(code.to_string());
350            }
351        }
352
353        // Fall back to short codes (padded with spaces).
354        // Excluded: "in" (India - clashes with preposition "in"),
355        //           "de" (Germany - clashes with various uses).
356        let padded = format!(" {} ", text);
357        let safe_codes = [
358            (" us ", "US"),
359            (" uk ", "GB"),
360            (" gb ", "GB"),
361            (" cn ", "CN"),
362            (" jp ", "JP"),
363            (" br ", "BR"),
364            (" mx ", "MX"),
365            (" au ", "AU"),
366            (" sg ", "SG"),
367            (" kr ", "KR"),
368            (" fr ", "FR"),
369            (" ca ", "CA"),
370        ];
371
372        for (code_pattern, code) in &safe_codes {
373            if padded.contains(code_pattern) {
374                return Some(code.to_string());
375            }
376        }
377
378        None
379    }
380
381    /// Extract company size from lowercased text.
382    fn extract_size(text: &str) -> Option<String> {
383        if text.contains("small") || text.contains("startup") || text.contains("tiny") {
384            Some("small".to_string())
385        } else if text.contains("large")
386            || text.contains("enterprise")
387            || text.contains("big")
388            || text.contains("multinational")
389            || text.contains("fortune 500")
390        {
391            Some("large".to_string())
392        } else if text.contains("medium")
393            || text.contains("mid-size")
394            || text.contains("midsize")
395            || text.contains("mid size")
396        {
397            Some("medium".to_string())
398        } else {
399            None
400        }
401    }
402
403    /// Extract period in months from lowercased text.
404    fn extract_period(text: &str) -> Option<u32> {
405        // Match patterns like "1 year", "2 years", "6 months", "18 months"
406        // Also handle "one year", "two years", etc.
407        let word_numbers = [
408            ("one", 1u32),
409            ("two", 2),
410            ("three", 3),
411            ("four", 4),
412            ("five", 5),
413            ("six", 6),
414            ("twelve", 12),
415            ("eighteen", 18),
416            ("twenty-four", 24),
417        ];
418
419        // Try "N year(s)" pattern
420        for (word, num) in &word_numbers {
421            if text.contains(&format!("{} year", word)) {
422                return Some(num * 12);
423            }
424            if text.contains(&format!("{} month", word)) {
425                return Some(*num);
426            }
427        }
428
429        // Try numeric patterns: "N year(s)", "N month(s)"
430        let tokens: Vec<&str> = text.split_whitespace().collect();
431        for window in tokens.windows(2) {
432            if let Ok(num) = window[0].parse::<u32>() {
433                if window[1].starts_with("year") {
434                    return Some(num * 12);
435                }
436                if window[1].starts_with("month") {
437                    return Some(num);
438                }
439            }
440        }
441
442        None
443    }
444
445    /// Extract feature flags from lowercased text.
446    fn extract_features(text: &str) -> Vec<String> {
447        let mut features = Vec::new();
448
449        let feature_patterns = [
450            (&["fraud", "fraudulent", "suspicious"][..], "fraud"),
451            (&["audit", "auditing", "assurance"][..], "audit"),
452            (&["banking", "bank account", "kyc", "aml"][..], "banking"),
453            (
454                &["control", "sox", "sod", "segregation of duties", "coso"][..],
455                "controls",
456            ),
457            (
458                &["process mining", "ocel", "event log"][..],
459                "process_mining",
460            ),
461            (
462                &["intercompany", "inter-company", "consolidation"][..],
463                "intercompany",
464            ),
465            (
466                &["distribution", "benford", "statistical"][..],
467                "distributions",
468            ),
469        ];
470
471        for (keywords, feature) in &feature_patterns {
472            if keywords.iter().any(|kw| text.contains(kw)) {
473                features.push(feature.to_string());
474            }
475        }
476
477        features
478    }
479
480    /// Merge two ConfigIntents, preferring the primary where available.
481    fn merge_intents(primary: ConfigIntent, fallback: ConfigIntent) -> ConfigIntent {
482        ConfigIntent {
483            industry: primary.industry.or(fallback.industry),
484            country: primary.country.or(fallback.country),
485            company_size: primary.company_size.or(fallback.company_size),
486            period_months: primary.period_months.or(fallback.period_months),
487            features: if primary.features.is_empty() {
488                fallback.features
489            } else {
490                primary.features
491            },
492        }
493    }
494
495    /// Map country code to default currency.
496    fn country_to_currency(country: &str) -> &'static str {
497        match country {
498            "US" | "CA" => "USD",
499            "DE" | "FR" => "EUR",
500            "GB" => "GBP",
501            "CN" => "CNY",
502            "JP" => "JPY",
503            "IN" => "INR",
504            "BR" => "BRL",
505            "MX" => "MXN",
506            "AU" => "AUD",
507            "SG" => "SGD",
508            "KR" => "KRW",
509            _ => "USD",
510        }
511    }
512
513    /// Generate a company name based on industry.
514    fn industry_company_name(industry: &str) -> &'static str {
515        match industry {
516            "retail" => "Retail Corp",
517            "manufacturing" => "Manufacturing Industries Inc",
518            "financial_services" => "Financial Services Group",
519            "healthcare" => "HealthCare Solutions",
520            "technology" => "TechCorp Solutions",
521            _ => "DataSynth Corp",
522        }
523    }
524
525    /// Map complexity to an appropriate transaction count.
526    fn complexity_to_tx_count(complexity: &str) -> u32 {
527        match complexity {
528            "small" => 1000,
529            "medium" => 5000,
530            "large" => 25000,
531            _ => 5000,
532        }
533    }
534}
535
536#[cfg(test)]
537#[allow(clippy::unwrap_used)]
538mod tests {
539    use super::*;
540    use crate::llm::mock_provider::MockLlmProvider;
541
542    #[test]
543    fn test_parse_retail_description() {
544        let provider = MockLlmProvider::new(42);
545        let intent = NlConfigGenerator::parse_intent(
546            "Generate 1 year of retail data for a medium US company",
547            &provider,
548        )
549        .expect("should parse successfully");
550
551        assert_eq!(intent.industry, Some("retail".to_string()));
552        assert_eq!(intent.country, Some("US".to_string()));
553        assert_eq!(intent.company_size, Some("medium".to_string()));
554        assert_eq!(intent.period_months, Some(12));
555    }
556
557    #[test]
558    fn test_parse_manufacturing_with_fraud() {
559        let provider = MockLlmProvider::new(42);
560        let intent = NlConfigGenerator::parse_intent(
561            "Create 6 months of manufacturing data for a large German company with fraud detection",
562            &provider,
563        )
564        .expect("should parse successfully");
565
566        assert_eq!(intent.industry, Some("manufacturing".to_string()));
567        assert_eq!(intent.country, Some("DE".to_string()));
568        assert_eq!(intent.company_size, Some("large".to_string()));
569        assert_eq!(intent.period_months, Some(6));
570        assert!(intent.features.contains(&"fraud".to_string()));
571    }
572
573    #[test]
574    fn test_parse_financial_services_with_audit() {
575        let provider = MockLlmProvider::new(42);
576        let intent = NlConfigGenerator::parse_intent(
577            "I need 2 years of financial services data for audit testing with SOX controls",
578            &provider,
579        )
580        .expect("should parse successfully");
581
582        assert_eq!(intent.industry, Some("financial_services".to_string()));
583        assert_eq!(intent.period_months, Some(24));
584        assert!(intent.features.contains(&"audit".to_string()));
585        assert!(intent.features.contains(&"controls".to_string()));
586    }
587
588    #[test]
589    fn test_parse_healthcare_small() {
590        let provider = MockLlmProvider::new(42);
591        let intent = NlConfigGenerator::parse_intent(
592            "Small healthcare company in Japan, 3 months of data",
593            &provider,
594        )
595        .expect("should parse successfully");
596
597        assert_eq!(intent.industry, Some("healthcare".to_string()));
598        assert_eq!(intent.country, Some("JP".to_string()));
599        assert_eq!(intent.company_size, Some("small".to_string()));
600        assert_eq!(intent.period_months, Some(3));
601    }
602
603    #[test]
604    fn test_parse_technology_with_banking() {
605        let provider = MockLlmProvider::new(42);
606        let intent = NlConfigGenerator::parse_intent(
607            "Generate data for a technology startup in Singapore with banking and KYC",
608            &provider,
609        )
610        .expect("should parse successfully");
611
612        assert_eq!(intent.industry, Some("technology".to_string()));
613        assert_eq!(intent.country, Some("SG".to_string()));
614        assert_eq!(intent.company_size, Some("small".to_string()));
615        assert!(intent.features.contains(&"banking".to_string()));
616    }
617
618    #[test]
619    fn test_parse_word_numbers() {
620        let provider = MockLlmProvider::new(42);
621        let intent =
622            NlConfigGenerator::parse_intent("Generate two years of retail data", &provider)
623                .expect("should parse successfully");
624
625        assert_eq!(intent.period_months, Some(24));
626    }
627
628    #[test]
629    fn test_parse_multiple_features() {
630        let provider = MockLlmProvider::new(42);
631        let intent = NlConfigGenerator::parse_intent(
632            "Manufacturing data with fraud detection, audit trail, process mining, and intercompany consolidation",
633            &provider,
634        )
635        .expect("should parse successfully");
636
637        assert_eq!(intent.industry, Some("manufacturing".to_string()));
638        assert!(intent.features.contains(&"fraud".to_string()));
639        assert!(intent.features.contains(&"audit".to_string()));
640        assert!(intent.features.contains(&"process_mining".to_string()));
641        assert!(intent.features.contains(&"intercompany".to_string()));
642    }
643
644    #[test]
645    fn test_intent_to_yaml_basic() {
646        let intent = ConfigIntent {
647            industry: Some("retail".to_string()),
648            country: Some("US".to_string()),
649            company_size: Some("medium".to_string()),
650            period_months: Some(12),
651            features: vec![],
652        };
653
654        let yaml = NlConfigGenerator::intent_to_yaml(&intent).expect("should generate YAML");
655
656        assert!(yaml.contains("industry: retail"));
657        assert!(yaml.contains("period_months: 12"));
658        assert!(yaml.contains("currency: \"USD\""));
659        assert!(yaml.contains("country: \"US\""));
660        assert!(yaml.contains("complexity: medium"));
661        assert!(yaml.contains("count: 5000"));
662    }
663
664    #[test]
665    fn test_intent_to_yaml_with_features() {
666        let intent = ConfigIntent {
667            industry: Some("manufacturing".to_string()),
668            country: Some("DE".to_string()),
669            company_size: Some("large".to_string()),
670            period_months: Some(24),
671            features: vec![
672                "fraud".to_string(),
673                "audit".to_string(),
674                "controls".to_string(),
675            ],
676        };
677
678        let yaml = NlConfigGenerator::intent_to_yaml(&intent).expect("should generate YAML");
679
680        assert!(yaml.contains("industry: manufacturing"));
681        assert!(yaml.contains("currency: \"EUR\""));
682        assert!(yaml.contains("complexity: large"));
683        assert!(yaml.contains("count: 25000"));
684        assert!(yaml.contains("fraud:"));
685        assert!(yaml.contains("audit_standards:"));
686        assert!(yaml.contains("internal_controls:"));
687    }
688
689    #[test]
690    fn test_intent_to_yaml_defaults() {
691        let intent = ConfigIntent::default();
692
693        let yaml = NlConfigGenerator::intent_to_yaml(&intent).expect("should generate YAML");
694
695        // Should use defaults
696        assert!(yaml.contains("industry: manufacturing"));
697        assert!(yaml.contains("period_months: 12"));
698        assert!(yaml.contains("complexity: medium"));
699    }
700
701    #[test]
702    fn test_intent_to_yaml_invalid_period() {
703        let intent = ConfigIntent {
704            period_months: Some(0),
705            ..ConfigIntent::default()
706        };
707
708        let result = NlConfigGenerator::intent_to_yaml(&intent);
709        assert!(result.is_err());
710
711        let intent = ConfigIntent {
712            period_months: Some(121),
713            ..ConfigIntent::default()
714        };
715
716        let result = NlConfigGenerator::intent_to_yaml(&intent);
717        assert!(result.is_err());
718    }
719
720    #[test]
721    fn test_generate_end_to_end() {
722        let provider = MockLlmProvider::new(42);
723        let yaml = NlConfigGenerator::generate(
724            "Generate 1 year of retail data for a medium US company with fraud detection",
725            &provider,
726        )
727        .expect("should generate YAML");
728
729        assert!(yaml.contains("industry: retail"));
730        assert!(yaml.contains("period_months: 12"));
731        assert!(yaml.contains("currency: \"USD\""));
732        assert!(yaml.contains("fraud:"));
733        assert!(yaml.contains("complexity: medium"));
734    }
735
736    #[test]
737    fn test_generate_empty_description() {
738        let provider = MockLlmProvider::new(42);
739        let result = NlConfigGenerator::generate("", &provider);
740        assert!(result.is_err());
741
742        let result = NlConfigGenerator::generate("   ", &provider);
743        assert!(result.is_err());
744    }
745
746    #[test]
747    fn test_extract_json_from_response() {
748        let content = r#"Here is the parsed output: {"industry": "retail", "country": "US"} done"#;
749        let json = NlConfigGenerator::extract_json(content);
750        assert!(json.is_some());
751        assert_eq!(
752            json.expect("json should be present"),
753            r#"{"industry": "retail", "country": "US"}"#
754        );
755    }
756
757    #[test]
758    fn test_extract_json_nested() {
759        let content = r#"{"industry": "retail", "features": ["fraud", "audit"]}"#;
760        let json = NlConfigGenerator::extract_json(content);
761        assert!(json.is_some());
762    }
763
764    #[test]
765    fn test_extract_json_missing() {
766        let content = "No JSON here at all";
767        let json = NlConfigGenerator::extract_json(content);
768        assert!(json.is_none());
769    }
770
771    #[test]
772    fn test_parse_llm_response_valid() {
773        let content = r#"{"industry": "retail", "country": "US", "company_size": "medium", "period_months": 12, "features": ["fraud"]}"#;
774        let intent =
775            NlConfigGenerator::parse_llm_response(content).expect("should parse valid JSON");
776
777        assert_eq!(intent.industry, Some("retail".to_string()));
778        assert_eq!(intent.country, Some("US".to_string()));
779        assert_eq!(intent.company_size, Some("medium".to_string()));
780        assert_eq!(intent.period_months, Some(12));
781        assert_eq!(intent.features, vec!["fraud".to_string()]);
782    }
783
784    #[test]
785    fn test_parse_llm_response_partial() {
786        let content = r#"{"industry": "retail"}"#;
787        let intent =
788            NlConfigGenerator::parse_llm_response(content).expect("should parse partial JSON");
789
790        assert_eq!(intent.industry, Some("retail".to_string()));
791        assert_eq!(intent.country, None);
792        assert!(intent.features.is_empty());
793    }
794
795    #[test]
796    fn test_country_to_currency_mapping() {
797        assert_eq!(NlConfigGenerator::country_to_currency("US"), "USD");
798        assert_eq!(NlConfigGenerator::country_to_currency("DE"), "EUR");
799        assert_eq!(NlConfigGenerator::country_to_currency("GB"), "GBP");
800        assert_eq!(NlConfigGenerator::country_to_currency("JP"), "JPY");
801        assert_eq!(NlConfigGenerator::country_to_currency("CN"), "CNY");
802        assert_eq!(NlConfigGenerator::country_to_currency("BR"), "BRL");
803        assert_eq!(NlConfigGenerator::country_to_currency("XX"), "USD"); // Unknown defaults to USD
804    }
805
806    #[test]
807    fn test_merge_intents() {
808        let primary = ConfigIntent {
809            industry: Some("retail".to_string()),
810            country: None,
811            company_size: None,
812            period_months: Some(12),
813            features: vec![],
814        };
815        let fallback = ConfigIntent {
816            industry: Some("manufacturing".to_string()),
817            country: Some("DE".to_string()),
818            company_size: Some("large".to_string()),
819            period_months: Some(6),
820            features: vec!["fraud".to_string()],
821        };
822
823        let merged = NlConfigGenerator::merge_intents(primary, fallback);
824        assert_eq!(merged.industry, Some("retail".to_string())); // primary wins
825        assert_eq!(merged.country, Some("DE".to_string())); // fallback fills gap
826        assert_eq!(merged.company_size, Some("large".to_string())); // fallback fills gap
827        assert_eq!(merged.period_months, Some(12)); // primary wins
828        assert_eq!(merged.features, vec!["fraud".to_string()]); // fallback since primary empty
829    }
830
831    #[test]
832    fn test_parse_uk_country() {
833        let provider = MockLlmProvider::new(42);
834        let intent = NlConfigGenerator::parse_intent(
835            "Generate data for a UK manufacturing company",
836            &provider,
837        )
838        .expect("should parse successfully");
839
840        assert_eq!(intent.country, Some("GB".to_string()));
841    }
842
843    #[test]
844    fn test_intent_to_yaml_banking_feature() {
845        let intent = ConfigIntent {
846            industry: Some("financial_services".to_string()),
847            country: Some("US".to_string()),
848            company_size: Some("large".to_string()),
849            period_months: Some(12),
850            features: vec!["banking".to_string()],
851        };
852
853        let yaml = NlConfigGenerator::intent_to_yaml(&intent).expect("should generate YAML");
854
855        assert!(yaml.contains("banking:"));
856        assert!(yaml.contains("kyc_enabled: true"));
857        assert!(yaml.contains("aml_enabled: true"));
858    }
859
860    #[test]
861    fn test_intent_to_yaml_process_mining_feature() {
862        let intent = ConfigIntent {
863            features: vec!["process_mining".to_string()],
864            ..ConfigIntent::default()
865        };
866
867        let yaml = NlConfigGenerator::intent_to_yaml(&intent).expect("should generate YAML");
868
869        assert!(yaml.contains("business_processes:"));
870        assert!(yaml.contains("ocel_export: true"));
871    }
872
873    #[test]
874    fn test_intent_to_yaml_distributions_feature() {
875        let intent = ConfigIntent {
876            industry: Some("retail".to_string()),
877            features: vec!["distributions".to_string()],
878            ..ConfigIntent::default()
879        };
880
881        let yaml = NlConfigGenerator::intent_to_yaml(&intent).expect("should generate YAML");
882
883        assert!(yaml.contains("distributions:"));
884        assert!(yaml.contains("industry_profile: retail"));
885        assert!(yaml.contains("benford_compliance: true"));
886    }
887}