mockforge_data/
lib.rs

1//! # MockForge Data
2//!
3//! Synthetic data generation engine with faker primitives and RAG (Retrieval-Augmented Generation).
4
5// Re-export error types from mockforge-core
6pub use mockforge_core::{Error, Result};
7
8pub mod dataset;
9pub mod domains;
10pub mod drift;
11pub mod faker;
12pub mod generator;
13pub mod intelligent_mock;
14pub mod mock_generator;
15pub mod mock_server;
16/// Provider utilities for faker and data generation
17pub mod provider;
18/// RAG (Retrieval-Augmented Generation) utilities for intelligent mock data generation
19pub mod rag;
20pub mod replay_augmentation;
21pub mod schema;
22pub mod token_resolver;
23
24#[cfg(test)]
25mod mock_data_tests;
26
27pub use dataset::{Dataset, DatasetValidationResult};
28pub use domains::{Domain, DomainGenerator, ParseDomainError};
29pub use drift::{DataDriftConfig, DataDriftEngine, DriftStrategy};
30pub use fake::Faker;
31pub use generator::DataGenerator;
32pub use intelligent_mock::{IntelligentMockConfig, IntelligentMockGenerator, ResponseMode};
33pub use mock_generator::{MockDataGenerator, MockDataResult, MockGeneratorConfig, MockResponse};
34pub use mock_server::{
35    start_mock_server, start_mock_server_with_config, MockServer, MockServerBuilder,
36    MockServerConfig,
37};
38pub use rag::{EmbeddingProvider, LlmProvider, RagConfig, RagEngine, SearchResult};
39pub use replay_augmentation::{
40    EventStrategy, GeneratedEvent, ReplayAugmentationConfig, ReplayAugmentationEngine, ReplayMode,
41};
42pub use schema::{FieldDefinition, SchemaDefinition};
43pub use token_resolver::{resolve_tokens, resolve_tokens_with_rag, TokenResolver, TokenType};
44
45/// Data generation configuration
46#[derive(Debug, Clone, serde::Deserialize, serde::Serialize, Default)]
47pub struct DataConfig {
48    /// Number of rows to generate
49    #[serde(default = "default_rows")]
50    pub rows: usize,
51    /// Random seed for reproducible generation
52    pub seed: Option<u64>,
53    /// Enable RAG mode
54    pub rag_enabled: bool,
55    /// Maximum RAG context length
56    #[serde(default = "default_rag_context_length")]
57    pub rag_context_length: usize,
58    /// Output format
59    pub format: OutputFormat,
60}
61
62fn default_rows() -> usize {
63    100
64}
65fn default_rag_context_length() -> usize {
66    1000
67}
68
69/// Output format for generated data
70#[derive(Debug, Clone, serde::Deserialize, serde::Serialize, Default)]
71#[serde(rename_all = "lowercase")]
72pub enum OutputFormat {
73    /// JSON format
74    #[default]
75    Json,
76    /// JSON Lines format
77    JsonLines,
78    /// YAML format
79    Yaml,
80    /// CSV format
81    Csv,
82}
83
84/// Generation result
85#[derive(Debug)]
86pub struct GenerationResult {
87    /// Generated data
88    pub data: Vec<serde_json::Value>,
89    /// Number of rows generated
90    pub count: usize,
91    /// Generation time in milliseconds
92    pub generation_time_ms: u128,
93    /// Any warnings during generation
94    pub warnings: Vec<String>,
95}
96
97impl GenerationResult {
98    /// Create a new generation result
99    pub fn new(data: Vec<serde_json::Value>, generation_time_ms: u128) -> Self {
100        Self {
101            count: data.len(),
102            data,
103            generation_time_ms,
104            warnings: Vec::new(),
105        }
106    }
107
108    /// Add a warning
109    pub fn with_warning(mut self, warning: String) -> Self {
110        self.warnings.push(warning);
111        self
112    }
113
114    /// Get data as JSON string
115    pub fn to_json_string(&self) -> mockforge_core::Result<String> {
116        Ok(serde_json::to_string_pretty(&self.data)?)
117    }
118
119    /// Get data as JSON Lines string
120    pub fn to_jsonl_string(&self) -> mockforge_core::Result<String> {
121        let lines: Vec<String> = self
122            .data
123            .iter()
124            .map(serde_json::to_string)
125            .collect::<std::result::Result<_, _>>()?;
126        Ok(lines.join("\n"))
127    }
128}
129
130/// Quick data generation function
131pub async fn generate_data(
132    schema: SchemaDefinition,
133    config: DataConfig,
134) -> mockforge_core::Result<GenerationResult> {
135    let mut generator = DataGenerator::new(schema, config)?;
136    generator.generate().await
137}
138
139/// Generate sample data from a JSON schema
140pub async fn generate_from_json_schema(
141    json_schema: &serde_json::Value,
142    rows: usize,
143) -> mockforge_core::Result<GenerationResult> {
144    let schema = SchemaDefinition::from_json_schema(json_schema)?;
145    let config = DataConfig {
146        rows,
147        ..Default::default()
148    };
149    generate_data(schema, config).await
150}
151
152/// Generate sample data from an OpenAPI schema
153pub async fn generate_from_openapi(
154    openapi_spec: &serde_json::Value,
155    rows: usize,
156) -> mockforge_core::Result<GenerationResult> {
157    let schema = SchemaDefinition::from_openapi_spec(openapi_spec)?;
158    let config = DataConfig {
159        rows,
160        ..Default::default()
161    };
162    generate_data(schema, config).await
163}
164
165#[cfg(test)]
166mod tests {
167    use super::*;
168    use serde_json::json;
169
170    #[test]
171    fn test_data_config_default() {
172        let config = DataConfig::default();
173        assert_eq!(config.rows, 0); // Default for usize is 0
174        assert_eq!(config.seed, None);
175        assert!(!config.rag_enabled);
176        assert_eq!(config.rag_context_length, 0); // Default for usize is 0
177        assert!(matches!(config.format, OutputFormat::Json));
178    }
179
180    #[test]
181    fn test_data_config_custom() {
182        let config = DataConfig {
183            rows: 50,
184            seed: Some(42),
185            rag_enabled: true,
186            rag_context_length: 2000,
187            format: OutputFormat::Csv,
188        };
189
190        assert_eq!(config.rows, 50);
191        assert_eq!(config.seed, Some(42));
192        assert!(config.rag_enabled);
193        assert_eq!(config.rag_context_length, 2000);
194        assert!(matches!(config.format, OutputFormat::Csv));
195    }
196
197    #[test]
198    fn test_output_format_variants() {
199        let json = OutputFormat::Json;
200        let jsonlines = OutputFormat::JsonLines;
201        let yaml = OutputFormat::Yaml;
202        let csv = OutputFormat::Csv;
203
204        assert!(matches!(json, OutputFormat::Json));
205        assert!(matches!(jsonlines, OutputFormat::JsonLines));
206        assert!(matches!(yaml, OutputFormat::Yaml));
207        assert!(matches!(csv, OutputFormat::Csv));
208    }
209
210    #[test]
211    fn test_generation_result_new() {
212        let data = vec![json!({"id": 1, "name": "test"})];
213        let result = GenerationResult::new(data.clone(), 100);
214
215        assert_eq!(result.count, 1);
216        assert_eq!(result.data.len(), 1);
217        assert_eq!(result.generation_time_ms, 100);
218        assert_eq!(result.warnings.len(), 0);
219    }
220
221    #[test]
222    fn test_generation_result_with_warning() {
223        let data = vec![json!({"id": 1})];
224        let result = GenerationResult::new(data, 50).with_warning("Test warning".to_string());
225
226        assert_eq!(result.warnings.len(), 1);
227        assert_eq!(result.warnings[0], "Test warning");
228    }
229
230    #[test]
231    fn test_generation_result_to_json_string() {
232        let data = vec![json!({"id": 1, "name": "test"})];
233        let result = GenerationResult::new(data, 10);
234
235        let json_string = result.to_json_string();
236        assert!(json_string.is_ok());
237        let json_str = json_string.unwrap();
238        assert!(json_str.contains("\"id\""));
239        assert!(json_str.contains("\"name\""));
240    }
241
242    #[test]
243    fn test_generation_result_to_jsonl_string() {
244        let data = vec![json!({"id": 1}), json!({"id": 2})];
245        let result = GenerationResult::new(data, 10);
246
247        let jsonl_string = result.to_jsonl_string();
248        assert!(jsonl_string.is_ok());
249        let jsonl_str = jsonl_string.unwrap();
250        assert!(jsonl_str.contains("{\"id\":1}"));
251        assert!(jsonl_str.contains("{\"id\":2}"));
252        assert!(jsonl_str.contains("\n"));
253    }
254
255    #[test]
256    fn test_generation_result_multiple_warnings() {
257        let data = vec![json!({"id": 1})];
258        let result = GenerationResult::new(data, 10)
259            .with_warning("Warning 1".to_string())
260            .with_warning("Warning 2".to_string());
261
262        assert_eq!(result.warnings.len(), 2);
263        assert_eq!(result.warnings[0], "Warning 1");
264        assert_eq!(result.warnings[1], "Warning 2");
265    }
266
267    #[test]
268    fn test_default_rows() {
269        assert_eq!(default_rows(), 100);
270    }
271
272    #[test]
273    fn test_default_rag_context_length() {
274        assert_eq!(default_rag_context_length(), 1000);
275    }
276}