mockforge_data/
lib.rs

1//! # MockForge Data
2//!
3//! Synthetic data generation engine with faker primitives and RAG (Retrieval-Augmented Generation).
4
5// Re-export error types from mockforge-core
6pub use mockforge_core::{Error, Result};
7
8/// Consistency engine for entity ID → persona mappings
9pub mod consistency;
10pub mod dataset;
11pub mod domains;
12pub mod drift;
13pub mod faker;
14pub mod generator;
15pub mod intelligent_mock;
16pub mod mock_generator;
17pub mod mock_server;
18/// Persona profile system for consistent data generation
19pub mod persona;
20/// Backstory generation for personas
21pub mod persona_backstory;
22/// Persona graph and relationship management
23pub mod persona_graph;
24/// Persona lifecycle and time-aware state management
25pub mod persona_lifecycle;
26/// Lifecycle state response modifiers
27pub mod persona_lifecycle_response;
28/// Domain-specific persona templates
29pub mod persona_templates;
30/// Provider utilities for faker and data generation
31pub mod provider;
32/// RAG (Retrieval-Augmented Generation) utilities for intelligent mock data generation
33pub mod rag;
34pub mod replay_augmentation;
35pub mod schema;
36pub mod token_resolver;
37
38#[cfg(test)]
39mod mock_data_tests;
40
41#[cfg(test)]
42mod integration_tests;
43
44pub use consistency::{ConsistencyStore, EntityIdExtractor, EntityType};
45pub use dataset::{Dataset, DatasetValidationResult};
46pub use domains::{Domain, DomainGenerator, ParseDomainError};
47pub use drift::{DataDriftConfig, DataDriftEngine, DriftStrategy};
48pub use fake::Faker;
49pub use generator::DataGenerator;
50pub use intelligent_mock::{IntelligentMockConfig, IntelligentMockGenerator, ResponseMode};
51pub use mock_generator::{MockDataGenerator, MockDataResult, MockGeneratorConfig, MockResponse};
52pub use mock_server::{
53    start_mock_server, start_mock_server_with_config, MockServer, MockServerBuilder,
54    MockServerConfig,
55};
56pub use persona::{PersonaGenerator, PersonaProfile, PersonaRegistry};
57pub use persona_backstory::{BackstoryGenerator, BackstoryTemplate};
58pub use persona_graph::{
59    Edge, GraphStats, GraphVisualization, PersonaGraph, PersonaNode, VisualizationEdge,
60    VisualizationNode,
61};
62pub use persona_lifecycle::{
63    LifecyclePreset, LifecycleScenarios, LifecycleState, PersonaLifecycle, TransitionRule,
64};
65pub use persona_lifecycle_response::{
66    apply_billing_lifecycle_effects, apply_lifecycle_effects, apply_loan_lifecycle_effects,
67    apply_order_fulfillment_lifecycle_effects, apply_support_lifecycle_effects,
68};
69pub use persona_templates::{
70    EcommercePersonaTemplate, FinancePersonaTemplate, HealthcarePersonaTemplate, PersonaTemplate,
71    PersonaTemplateRegistry,
72};
73pub use rag::{EmbeddingProvider, LlmProvider, RagConfig, RagEngine, SearchResult};
74pub use replay_augmentation::{
75    EventStrategy, GeneratedEvent, ReplayAugmentationConfig, ReplayAugmentationEngine, ReplayMode,
76};
77pub use schema::{FieldDefinition, SchemaDefinition};
78pub use token_resolver::{resolve_tokens, resolve_tokens_with_rag, TokenResolver, TokenType};
79
80/// Data generation configuration
81#[derive(Debug, Clone, serde::Deserialize, serde::Serialize, Default)]
82pub struct DataConfig {
83    /// Number of rows to generate
84    #[serde(default = "default_rows")]
85    pub rows: usize,
86    /// Random seed for reproducible generation
87    pub seed: Option<u64>,
88    /// Enable RAG mode
89    pub rag_enabled: bool,
90    /// Maximum RAG context length
91    #[serde(default = "default_rag_context_length")]
92    pub rag_context_length: usize,
93    /// Output format
94    pub format: OutputFormat,
95}
96
97fn default_rows() -> usize {
98    100
99}
100fn default_rag_context_length() -> usize {
101    1000
102}
103
104/// Output format for generated data
105#[derive(Debug, Clone, serde::Deserialize, serde::Serialize, Default)]
106#[serde(rename_all = "lowercase")]
107pub enum OutputFormat {
108    /// JSON format
109    #[default]
110    Json,
111    /// JSON Lines format
112    JsonLines,
113    /// YAML format
114    Yaml,
115    /// CSV format
116    Csv,
117}
118
119/// Generation result
120#[derive(Debug)]
121pub struct GenerationResult {
122    /// Generated data
123    pub data: Vec<serde_json::Value>,
124    /// Number of rows generated
125    pub count: usize,
126    /// Generation time in milliseconds
127    pub generation_time_ms: u128,
128    /// Any warnings during generation
129    pub warnings: Vec<String>,
130}
131
132impl GenerationResult {
133    /// Create a new generation result
134    pub fn new(data: Vec<serde_json::Value>, generation_time_ms: u128) -> Self {
135        Self {
136            count: data.len(),
137            data,
138            generation_time_ms,
139            warnings: Vec::new(),
140        }
141    }
142
143    /// Add a warning
144    pub fn with_warning(mut self, warning: String) -> Self {
145        self.warnings.push(warning);
146        self
147    }
148
149    /// Get data as JSON string
150    pub fn to_json_string(&self) -> mockforge_core::Result<String> {
151        Ok(serde_json::to_string_pretty(&self.data)?)
152    }
153
154    /// Get data as JSON Lines string
155    pub fn to_jsonl_string(&self) -> mockforge_core::Result<String> {
156        let lines: Vec<String> = self
157            .data
158            .iter()
159            .map(serde_json::to_string)
160            .collect::<std::result::Result<_, _>>()?;
161        Ok(lines.join("\n"))
162    }
163}
164
165/// Quick data generation function
166pub async fn generate_data(
167    schema: SchemaDefinition,
168    config: DataConfig,
169) -> mockforge_core::Result<GenerationResult> {
170    let mut generator = DataGenerator::new(schema, config)?;
171    generator.generate().await
172}
173
174/// Generate sample data from a JSON schema
175pub async fn generate_from_json_schema(
176    json_schema: &serde_json::Value,
177    rows: usize,
178) -> mockforge_core::Result<GenerationResult> {
179    let schema = SchemaDefinition::from_json_schema(json_schema)?;
180    let config = DataConfig {
181        rows,
182        ..Default::default()
183    };
184    generate_data(schema, config).await
185}
186
187/// Generate sample data from an OpenAPI schema
188pub async fn generate_from_openapi(
189    openapi_spec: &serde_json::Value,
190    rows: usize,
191) -> mockforge_core::Result<GenerationResult> {
192    let schema = SchemaDefinition::from_openapi_spec(openapi_spec)?;
193    let config = DataConfig {
194        rows,
195        ..Default::default()
196    };
197    generate_data(schema, config).await
198}
199
200#[cfg(test)]
201mod tests {
202    use super::*;
203    use serde_json::json;
204
205    #[test]
206    fn test_data_config_default() {
207        let config = DataConfig::default();
208        assert_eq!(config.rows, 0); // Default for usize is 0
209        assert_eq!(config.seed, None);
210        assert!(!config.rag_enabled);
211        assert_eq!(config.rag_context_length, 0); // Default for usize is 0
212        assert!(matches!(config.format, OutputFormat::Json));
213    }
214
215    #[test]
216    fn test_data_config_custom() {
217        let config = DataConfig {
218            rows: 50,
219            seed: Some(42),
220            rag_enabled: true,
221            rag_context_length: 2000,
222            format: OutputFormat::Csv,
223        };
224
225        assert_eq!(config.rows, 50);
226        assert_eq!(config.seed, Some(42));
227        assert!(config.rag_enabled);
228        assert_eq!(config.rag_context_length, 2000);
229        assert!(matches!(config.format, OutputFormat::Csv));
230    }
231
232    #[test]
233    fn test_output_format_variants() {
234        let json = OutputFormat::Json;
235        let jsonlines = OutputFormat::JsonLines;
236        let yaml = OutputFormat::Yaml;
237        let csv = OutputFormat::Csv;
238
239        assert!(matches!(json, OutputFormat::Json));
240        assert!(matches!(jsonlines, OutputFormat::JsonLines));
241        assert!(matches!(yaml, OutputFormat::Yaml));
242        assert!(matches!(csv, OutputFormat::Csv));
243    }
244
245    #[test]
246    fn test_generation_result_new() {
247        let data = vec![json!({"id": 1, "name": "test"})];
248        let result = GenerationResult::new(data.clone(), 100);
249
250        assert_eq!(result.count, 1);
251        assert_eq!(result.data.len(), 1);
252        assert_eq!(result.generation_time_ms, 100);
253        assert_eq!(result.warnings.len(), 0);
254    }
255
256    #[test]
257    fn test_generation_result_with_warning() {
258        let data = vec![json!({"id": 1})];
259        let result = GenerationResult::new(data, 50).with_warning("Test warning".to_string());
260
261        assert_eq!(result.warnings.len(), 1);
262        assert_eq!(result.warnings[0], "Test warning");
263    }
264
265    #[test]
266    fn test_generation_result_to_json_string() {
267        let data = vec![json!({"id": 1, "name": "test"})];
268        let result = GenerationResult::new(data, 10);
269
270        let json_string = result.to_json_string();
271        assert!(json_string.is_ok());
272        let json_str = json_string.unwrap();
273        assert!(json_str.contains("\"id\""));
274        assert!(json_str.contains("\"name\""));
275    }
276
277    #[test]
278    fn test_generation_result_to_jsonl_string() {
279        let data = vec![json!({"id": 1}), json!({"id": 2})];
280        let result = GenerationResult::new(data, 10);
281
282        let jsonl_string = result.to_jsonl_string();
283        assert!(jsonl_string.is_ok());
284        let jsonl_str = jsonl_string.unwrap();
285        assert!(jsonl_str.contains("{\"id\":1}"));
286        assert!(jsonl_str.contains("{\"id\":2}"));
287        assert!(jsonl_str.contains("\n"));
288    }
289
290    #[test]
291    fn test_generation_result_multiple_warnings() {
292        let data = vec![json!({"id": 1})];
293        let result = GenerationResult::new(data, 10)
294            .with_warning("Warning 1".to_string())
295            .with_warning("Warning 2".to_string());
296
297        assert_eq!(result.warnings.len(), 2);
298        assert_eq!(result.warnings[0], "Warning 1");
299        assert_eq!(result.warnings[1], "Warning 2");
300    }
301
302    #[test]
303    fn test_default_rows() {
304        assert_eq!(default_rows(), 100);
305    }
306
307    #[test]
308    fn test_default_rag_context_length() {
309        assert_eq!(default_rag_context_length(), 1000);
310    }
311}