mockforge_data/
lib.rs

1//! Pillars: [Reality]
2//!
3//! # MockForge Data
4//!
5//! Synthetic data generation engine with faker primitives and RAG (Retrieval-Augmented Generation).
6
7mod error;
8
9// Re-export error types
10pub use error::{Error, Result};
11
12/// Consistency engine for entity ID → persona mappings
13pub mod consistency;
14pub mod dataset;
15pub mod domains;
16pub mod drift;
17pub mod drift_learning;
18pub mod faker;
19pub mod generator;
20pub mod intelligent_mock;
21pub mod mock_generator;
22pub mod mock_server;
23/// Persona profile system for consistent data generation
24pub mod persona;
25/// Backstory generation for personas
26pub mod persona_backstory;
27/// Persona graph and relationship management
28pub mod persona_graph;
29/// Persona lifecycle and time-aware state management
30pub mod persona_lifecycle;
31/// Lifecycle state response modifiers
32pub mod persona_lifecycle_response;
33/// Domain-specific persona templates
34pub mod persona_templates;
35/// Provider utilities for faker and data generation
36pub mod provider;
37/// RAG (Retrieval-Augmented Generation) utilities for intelligent mock data generation
38pub mod rag;
39pub mod replay_augmentation;
40pub mod schema;
41pub mod token_resolver;
42
43#[cfg(test)]
44mod mock_data_tests;
45
46#[cfg(test)]
47mod integration_tests;
48
49pub use consistency::{ConsistencyStore, EntityIdExtractor, EntityType};
50pub use dataset::{Dataset, DatasetValidationResult};
51pub use domains::{Domain, DomainGenerator, ParseDomainError};
52pub use drift::{DataDriftConfig, DataDriftEngine, DriftStrategy};
53pub use drift_learning::{
54    BehaviorEvent, BehaviorEventType, DriftLearningEngine, LearnedPattern, LearningConfig,
55    LearningMode, PatternType, PersonaBehaviorLearner, TrafficPatternLearner,
56};
57pub use fake::Faker;
58pub use generator::DataGenerator;
59pub use intelligent_mock::{IntelligentMockConfig, IntelligentMockGenerator, ResponseMode};
60pub use mock_generator::{MockDataGenerator, MockDataResult, MockGeneratorConfig, MockResponse};
61pub use mock_server::{
62    start_mock_server, start_mock_server_with_config, MockServer, MockServerBuilder,
63    MockServerConfig,
64};
65pub use persona::{PersonaGenerator, PersonaProfile, PersonaRegistry};
66pub use persona_backstory::{BackstoryGenerator, BackstoryTemplate};
67pub use persona_graph::{
68    Edge, GraphStats, GraphVisualization, PersonaGraph, PersonaNode, VisualizationEdge,
69    VisualizationNode,
70};
71pub use persona_lifecycle::{
72    LifecyclePreset, LifecycleScenarios, LifecycleState, PersonaLifecycle, TransitionRule,
73};
74pub use persona_lifecycle_response::{
75    apply_billing_lifecycle_effects, apply_lifecycle_effects, apply_loan_lifecycle_effects,
76    apply_order_fulfillment_lifecycle_effects, apply_support_lifecycle_effects,
77    apply_user_engagement_lifecycle_effects,
78};
79pub use persona_templates::{
80    EcommercePersonaTemplate, FinancePersonaTemplate, HealthcarePersonaTemplate, PersonaTemplate,
81    PersonaTemplateRegistry,
82};
83pub use rag::{EmbeddingProvider, LlmProvider, RagConfig, RagEngine, SearchResult};
84pub use replay_augmentation::{
85    EventStrategy, GeneratedEvent, ReplayAugmentationConfig, ReplayAugmentationEngine, ReplayMode,
86};
87pub use schema::{FieldDefinition, SchemaDefinition};
88pub use token_resolver::{resolve_tokens, resolve_tokens_with_rag, TokenResolver, TokenType};
89
90/// Data generation configuration
91#[derive(Debug, Clone, serde::Deserialize, serde::Serialize, Default)]
92pub struct DataConfig {
93    /// Number of rows to generate
94    #[serde(default = "default_rows")]
95    pub rows: usize,
96    /// Random seed for reproducible generation
97    pub seed: Option<u64>,
98    /// Enable RAG mode
99    pub rag_enabled: bool,
100    /// Maximum RAG context length
101    #[serde(default = "default_rag_context_length")]
102    pub rag_context_length: usize,
103    /// Output format
104    pub format: OutputFormat,
105}
106
107fn default_rows() -> usize {
108    100
109}
110fn default_rag_context_length() -> usize {
111    1000
112}
113
114/// Output format for generated data
115#[derive(Debug, Clone, serde::Deserialize, serde::Serialize, Default)]
116#[serde(rename_all = "lowercase")]
117pub enum OutputFormat {
118    /// JSON format
119    #[default]
120    Json,
121    /// JSON Lines format
122    JsonLines,
123    /// YAML format
124    Yaml,
125    /// CSV format
126    Csv,
127}
128
129/// Generation result
130#[derive(Debug)]
131pub struct GenerationResult {
132    /// Generated data
133    pub data: Vec<serde_json::Value>,
134    /// Number of rows generated
135    pub count: usize,
136    /// Generation time in milliseconds
137    pub generation_time_ms: u128,
138    /// Any warnings during generation
139    pub warnings: Vec<String>,
140}
141
142impl GenerationResult {
143    /// Create a new generation result
144    pub fn new(data: Vec<serde_json::Value>, generation_time_ms: u128) -> Self {
145        Self {
146            count: data.len(),
147            data,
148            generation_time_ms,
149            warnings: Vec::new(),
150        }
151    }
152
153    /// Add a warning
154    pub fn with_warning(mut self, warning: String) -> Self {
155        self.warnings.push(warning);
156        self
157    }
158
159    /// Get data as JSON string
160    pub fn to_json_string(&self) -> crate::Result<String> {
161        Ok(serde_json::to_string_pretty(&self.data)?)
162    }
163
164    /// Get data as JSON Lines string
165    pub fn to_jsonl_string(&self) -> crate::Result<String> {
166        let lines: Vec<String> = self
167            .data
168            .iter()
169            .map(serde_json::to_string)
170            .collect::<std::result::Result<_, _>>()?;
171        Ok(lines.join("\n"))
172    }
173}
174
175/// Quick data generation function
176pub async fn generate_data(
177    schema: SchemaDefinition,
178    config: DataConfig,
179) -> crate::Result<GenerationResult> {
180    let mut generator = DataGenerator::new(schema, config)?;
181    generator.generate().await
182}
183
184/// Generate sample data from a JSON schema
185pub async fn generate_from_json_schema(
186    json_schema: &serde_json::Value,
187    rows: usize,
188) -> crate::Result<GenerationResult> {
189    let schema = SchemaDefinition::from_json_schema(json_schema)?;
190    let config = DataConfig {
191        rows,
192        ..Default::default()
193    };
194    generate_data(schema, config).await
195}
196
197/// Generate sample data from an OpenAPI schema
198pub async fn generate_from_openapi(
199    openapi_spec: &serde_json::Value,
200    rows: usize,
201) -> crate::Result<GenerationResult> {
202    let schema = SchemaDefinition::from_openapi_spec(openapi_spec)?;
203    let config = DataConfig {
204        rows,
205        ..Default::default()
206    };
207    generate_data(schema, config).await
208}
209
210#[cfg(test)]
211mod tests {
212    use super::*;
213    use serde_json::json;
214
215    #[test]
216    fn test_data_config_default() {
217        let config = DataConfig::default();
218        assert_eq!(config.rows, 0); // Default for usize is 0
219        assert_eq!(config.seed, None);
220        assert!(!config.rag_enabled);
221        assert_eq!(config.rag_context_length, 0); // Default for usize is 0
222        assert!(matches!(config.format, OutputFormat::Json));
223    }
224
225    #[test]
226    fn test_data_config_custom() {
227        let config = DataConfig {
228            rows: 50,
229            seed: Some(42),
230            rag_enabled: true,
231            rag_context_length: 2000,
232            format: OutputFormat::Csv,
233        };
234
235        assert_eq!(config.rows, 50);
236        assert_eq!(config.seed, Some(42));
237        assert!(config.rag_enabled);
238        assert_eq!(config.rag_context_length, 2000);
239        assert!(matches!(config.format, OutputFormat::Csv));
240    }
241
242    #[test]
243    fn test_output_format_variants() {
244        let json = OutputFormat::Json;
245        let jsonlines = OutputFormat::JsonLines;
246        let yaml = OutputFormat::Yaml;
247        let csv = OutputFormat::Csv;
248
249        assert!(matches!(json, OutputFormat::Json));
250        assert!(matches!(jsonlines, OutputFormat::JsonLines));
251        assert!(matches!(yaml, OutputFormat::Yaml));
252        assert!(matches!(csv, OutputFormat::Csv));
253    }
254
255    #[test]
256    fn test_generation_result_new() {
257        let data = vec![json!({"id": 1, "name": "test"})];
258        let result = GenerationResult::new(data.clone(), 100);
259
260        assert_eq!(result.count, 1);
261        assert_eq!(result.data.len(), 1);
262        assert_eq!(result.generation_time_ms, 100);
263        assert_eq!(result.warnings.len(), 0);
264    }
265
266    #[test]
267    fn test_generation_result_with_warning() {
268        let data = vec![json!({"id": 1})];
269        let result = GenerationResult::new(data, 50).with_warning("Test warning".to_string());
270
271        assert_eq!(result.warnings.len(), 1);
272        assert_eq!(result.warnings[0], "Test warning");
273    }
274
275    #[test]
276    fn test_generation_result_to_json_string() {
277        let data = vec![json!({"id": 1, "name": "test"})];
278        let result = GenerationResult::new(data, 10);
279
280        let json_string = result.to_json_string();
281        assert!(json_string.is_ok());
282        let json_str = json_string.unwrap();
283        assert!(json_str.contains("\"id\""));
284        assert!(json_str.contains("\"name\""));
285    }
286
287    #[test]
288    fn test_generation_result_to_jsonl_string() {
289        let data = vec![json!({"id": 1}), json!({"id": 2})];
290        let result = GenerationResult::new(data, 10);
291
292        let jsonl_string = result.to_jsonl_string();
293        assert!(jsonl_string.is_ok());
294        let jsonl_str = jsonl_string.unwrap();
295        assert!(jsonl_str.contains("{\"id\":1}"));
296        assert!(jsonl_str.contains("{\"id\":2}"));
297        assert!(jsonl_str.contains("\n"));
298    }
299
300    #[test]
301    fn test_generation_result_multiple_warnings() {
302        let data = vec![json!({"id": 1})];
303        let result = GenerationResult::new(data, 10)
304            .with_warning("Warning 1".to_string())
305            .with_warning("Warning 2".to_string());
306
307        assert_eq!(result.warnings.len(), 2);
308        assert_eq!(result.warnings[0], "Warning 1");
309        assert_eq!(result.warnings[1], "Warning 2");
310    }
311
312    #[test]
313    fn test_default_rows() {
314        assert_eq!(default_rows(), 100);
315    }
316
317    #[test]
318    fn test_default_rag_context_length() {
319        assert_eq!(default_rag_context_length(), 1000);
320    }
321}