mockforge_data/
lib.rs

1//! # MockForge Data
2//!
3//! Synthetic data generation engine with faker primitives and RAG (Retrieval-Augmented Generation).
4
5// Re-export error types from mockforge-core
6pub use mockforge_core::{Error, Result};
7
8pub mod dataset;
9pub mod domains;
10pub mod drift;
11pub mod faker;
12pub mod generator;
13pub mod intelligent_mock;
14pub mod mock_generator;
15pub mod mock_server;
16pub mod provider;
17pub mod rag;
18pub mod replay_augmentation;
19pub mod schema;
20pub mod token_resolver;
21
22#[cfg(test)]
23mod mock_data_tests;
24
25pub use dataset::{Dataset, DatasetValidationResult};
26pub use domains::{Domain, DomainGenerator, ParseDomainError};
27pub use drift::{DataDriftConfig, DataDriftEngine, DriftStrategy};
28pub use fake::Faker;
29pub use generator::DataGenerator;
30pub use intelligent_mock::{IntelligentMockConfig, IntelligentMockGenerator, ResponseMode};
31pub use mock_generator::{MockDataGenerator, MockDataResult, MockGeneratorConfig, MockResponse};
32pub use mock_server::{
33    start_mock_server, start_mock_server_with_config, MockServer, MockServerBuilder,
34    MockServerConfig,
35};
36pub use rag::{EmbeddingProvider, LlmProvider, RagConfig, RagEngine, SearchResult};
37pub use replay_augmentation::{
38    EventStrategy, GeneratedEvent, ReplayAugmentationConfig, ReplayAugmentationEngine, ReplayMode,
39};
40pub use schema::{FieldDefinition, SchemaDefinition};
41pub use token_resolver::{resolve_tokens, resolve_tokens_with_rag, TokenResolver, TokenType};
42
43/// Data generation configuration
44#[derive(Debug, Clone, serde::Deserialize, serde::Serialize, Default)]
45pub struct DataConfig {
46    /// Number of rows to generate
47    #[serde(default = "default_rows")]
48    pub rows: usize,
49    /// Random seed for reproducible generation
50    pub seed: Option<u64>,
51    /// Enable RAG mode
52    pub rag_enabled: bool,
53    /// Maximum RAG context length
54    #[serde(default = "default_rag_context_length")]
55    pub rag_context_length: usize,
56    /// Output format
57    pub format: OutputFormat,
58}
59
60fn default_rows() -> usize {
61    100
62}
63fn default_rag_context_length() -> usize {
64    1000
65}
66
67/// Output format for generated data
68#[derive(Debug, Clone, serde::Deserialize, serde::Serialize, Default)]
69#[serde(rename_all = "lowercase")]
70pub enum OutputFormat {
71    /// JSON format
72    #[default]
73    Json,
74    /// JSON Lines format
75    JsonLines,
76    /// YAML format
77    Yaml,
78    /// CSV format
79    Csv,
80}
81
82/// Generation result
83#[derive(Debug)]
84pub struct GenerationResult {
85    /// Generated data
86    pub data: Vec<serde_json::Value>,
87    /// Number of rows generated
88    pub count: usize,
89    /// Generation time in milliseconds
90    pub generation_time_ms: u128,
91    /// Any warnings during generation
92    pub warnings: Vec<String>,
93}
94
95impl GenerationResult {
96    /// Create a new generation result
97    pub fn new(data: Vec<serde_json::Value>, generation_time_ms: u128) -> Self {
98        Self {
99            count: data.len(),
100            data,
101            generation_time_ms,
102            warnings: Vec::new(),
103        }
104    }
105
106    /// Add a warning
107    pub fn with_warning(mut self, warning: String) -> Self {
108        self.warnings.push(warning);
109        self
110    }
111
112    /// Get data as JSON string
113    pub fn to_json_string(&self) -> mockforge_core::Result<String> {
114        Ok(serde_json::to_string_pretty(&self.data)?)
115    }
116
117    /// Get data as JSON Lines string
118    pub fn to_jsonl_string(&self) -> mockforge_core::Result<String> {
119        let lines: Vec<String> = self
120            .data
121            .iter()
122            .map(serde_json::to_string)
123            .collect::<std::result::Result<_, _>>()?;
124        Ok(lines.join("\n"))
125    }
126}
127
128/// Quick data generation function
129pub async fn generate_data(
130    schema: SchemaDefinition,
131    config: DataConfig,
132) -> mockforge_core::Result<GenerationResult> {
133    let mut generator = DataGenerator::new(schema, config)?;
134    generator.generate().await
135}
136
137/// Generate sample data from a JSON schema
138pub async fn generate_from_json_schema(
139    json_schema: &serde_json::Value,
140    rows: usize,
141) -> mockforge_core::Result<GenerationResult> {
142    let schema = SchemaDefinition::from_json_schema(json_schema)?;
143    let config = DataConfig {
144        rows,
145        ..Default::default()
146    };
147    generate_data(schema, config).await
148}
149
150/// Generate sample data from an OpenAPI schema
151pub async fn generate_from_openapi(
152    openapi_spec: &serde_json::Value,
153    rows: usize,
154) -> mockforge_core::Result<GenerationResult> {
155    let schema = SchemaDefinition::from_openapi_spec(openapi_spec)?;
156    let config = DataConfig {
157        rows,
158        ..Default::default()
159    };
160    generate_data(schema, config).await
161}
162
163#[cfg(test)]
164mod tests {
165    use super::*;
166    use serde_json::json;
167
168    #[test]
169    fn test_data_config_default() {
170        let config = DataConfig::default();
171        assert_eq!(config.rows, 0); // Default for usize is 0
172        assert_eq!(config.seed, None);
173        assert!(!config.rag_enabled);
174        assert_eq!(config.rag_context_length, 0); // Default for usize is 0
175        assert!(matches!(config.format, OutputFormat::Json));
176    }
177
178    #[test]
179    fn test_data_config_custom() {
180        let config = DataConfig {
181            rows: 50,
182            seed: Some(42),
183            rag_enabled: true,
184            rag_context_length: 2000,
185            format: OutputFormat::Csv,
186        };
187
188        assert_eq!(config.rows, 50);
189        assert_eq!(config.seed, Some(42));
190        assert!(config.rag_enabled);
191        assert_eq!(config.rag_context_length, 2000);
192        assert!(matches!(config.format, OutputFormat::Csv));
193    }
194
195    #[test]
196    fn test_output_format_variants() {
197        let json = OutputFormat::Json;
198        let jsonlines = OutputFormat::JsonLines;
199        let yaml = OutputFormat::Yaml;
200        let csv = OutputFormat::Csv;
201
202        assert!(matches!(json, OutputFormat::Json));
203        assert!(matches!(jsonlines, OutputFormat::JsonLines));
204        assert!(matches!(yaml, OutputFormat::Yaml));
205        assert!(matches!(csv, OutputFormat::Csv));
206    }
207
208    #[test]
209    fn test_generation_result_new() {
210        let data = vec![json!({"id": 1, "name": "test"})];
211        let result = GenerationResult::new(data.clone(), 100);
212
213        assert_eq!(result.count, 1);
214        assert_eq!(result.data.len(), 1);
215        assert_eq!(result.generation_time_ms, 100);
216        assert_eq!(result.warnings.len(), 0);
217    }
218
219    #[test]
220    fn test_generation_result_with_warning() {
221        let data = vec![json!({"id": 1})];
222        let result = GenerationResult::new(data, 50).with_warning("Test warning".to_string());
223
224        assert_eq!(result.warnings.len(), 1);
225        assert_eq!(result.warnings[0], "Test warning");
226    }
227
228    #[test]
229    fn test_generation_result_to_json_string() {
230        let data = vec![json!({"id": 1, "name": "test"})];
231        let result = GenerationResult::new(data, 10);
232
233        let json_string = result.to_json_string();
234        assert!(json_string.is_ok());
235        let json_str = json_string.unwrap();
236        assert!(json_str.contains("\"id\""));
237        assert!(json_str.contains("\"name\""));
238    }
239
240    #[test]
241    fn test_generation_result_to_jsonl_string() {
242        let data = vec![json!({"id": 1}), json!({"id": 2})];
243        let result = GenerationResult::new(data, 10);
244
245        let jsonl_string = result.to_jsonl_string();
246        assert!(jsonl_string.is_ok());
247        let jsonl_str = jsonl_string.unwrap();
248        assert!(jsonl_str.contains("{\"id\":1}"));
249        assert!(jsonl_str.contains("{\"id\":2}"));
250        assert!(jsonl_str.contains("\n"));
251    }
252
253    #[test]
254    fn test_generation_result_multiple_warnings() {
255        let data = vec![json!({"id": 1})];
256        let result = GenerationResult::new(data, 10)
257            .with_warning("Warning 1".to_string())
258            .with_warning("Warning 2".to_string());
259
260        assert_eq!(result.warnings.len(), 2);
261        assert_eq!(result.warnings[0], "Warning 1");
262        assert_eq!(result.warnings[1], "Warning 2");
263    }
264
265    #[test]
266    fn test_default_rows() {
267        assert_eq!(default_rows(), 100);
268    }
269
270    #[test]
271    fn test_default_rag_context_length() {
272        assert_eq!(default_rag_context_length(), 1000);
273    }
274}