mockforge_data/
lib.rs

1//! # MockForge Data
2//!
3//! Synthetic data generation engine with faker primitives and RAG (Retrieval-Augmented Generation).
4
5pub mod dataset;
6pub mod drift;
7pub mod faker;
8pub mod generator;
9pub mod intelligent_mock;
10pub mod provider;
11pub mod rag;
12pub mod replay_augmentation;
13pub mod schema;
14
15pub use dataset::{Dataset, DatasetValidationResult};
16pub use drift::{DataDriftConfig, DataDriftEngine, DriftStrategy};
17pub use fake::Faker;
18pub use generator::DataGenerator;
19pub use intelligent_mock::{IntelligentMockConfig, IntelligentMockGenerator, ResponseMode};
20pub use rag::{EmbeddingProvider, LlmProvider, RagConfig, RagEngine, SearchResult};
21pub use replay_augmentation::{
22    EventStrategy, GeneratedEvent, ReplayAugmentationConfig, ReplayAugmentationEngine, ReplayMode,
23};
24pub use schema::{FieldDefinition, SchemaDefinition};
25
26/// Data generation configuration
27#[derive(Debug, Clone, serde::Deserialize, serde::Serialize, Default)]
28pub struct DataConfig {
29    /// Number of rows to generate
30    #[serde(default = "default_rows")]
31    pub rows: usize,
32    /// Random seed for reproducible generation
33    pub seed: Option<u64>,
34    /// Enable RAG mode
35    pub rag_enabled: bool,
36    /// Maximum RAG context length
37    #[serde(default = "default_rag_context_length")]
38    pub rag_context_length: usize,
39    /// Output format
40    pub format: OutputFormat,
41}
42
43fn default_rows() -> usize {
44    100
45}
46fn default_rag_context_length() -> usize {
47    1000
48}
49
50/// Output format for generated data
51#[derive(Debug, Clone, serde::Deserialize, serde::Serialize, Default)]
52#[serde(rename_all = "lowercase")]
53pub enum OutputFormat {
54    /// JSON format
55    #[default]
56    Json,
57    /// JSON Lines format
58    JsonLines,
59    /// YAML format
60    Yaml,
61    /// CSV format
62    Csv,
63}
64
65/// Generation result
66#[derive(Debug)]
67pub struct GenerationResult {
68    /// Generated data
69    pub data: Vec<serde_json::Value>,
70    /// Number of rows generated
71    pub count: usize,
72    /// Generation time in milliseconds
73    pub generation_time_ms: u128,
74    /// Any warnings during generation
75    pub warnings: Vec<String>,
76}
77
78impl GenerationResult {
79    /// Create a new generation result
80    pub fn new(data: Vec<serde_json::Value>, generation_time_ms: u128) -> Self {
81        Self {
82            count: data.len(),
83            data,
84            generation_time_ms,
85            warnings: Vec::new(),
86        }
87    }
88
89    /// Add a warning
90    pub fn with_warning(mut self, warning: String) -> Self {
91        self.warnings.push(warning);
92        self
93    }
94
95    /// Get data as JSON string
96    pub fn to_json_string(&self) -> Result<String, serde_json::Error> {
97        serde_json::to_string_pretty(&self.data)
98    }
99
100    /// Get data as JSON Lines string
101    pub fn to_jsonl_string(&self) -> Result<String, serde_json::Error> {
102        let lines: Result<Vec<String>, _> = self.data.iter().map(serde_json::to_string).collect();
103        lines.map(|lines| lines.join("\n"))
104    }
105}
106
107/// Quick data generation function
108pub async fn generate_data(
109    schema: SchemaDefinition,
110    config: DataConfig,
111) -> mockforge_core::Result<GenerationResult> {
112    let mut generator = DataGenerator::new(schema, config)?;
113    generator.generate().await
114}
115
116/// Generate sample data from a JSON schema
117pub async fn generate_from_json_schema(
118    json_schema: &serde_json::Value,
119    rows: usize,
120) -> mockforge_core::Result<GenerationResult> {
121    let schema = SchemaDefinition::from_json_schema(json_schema)?;
122    let config = DataConfig {
123        rows,
124        ..Default::default()
125    };
126    generate_data(schema, config).await
127}
128
129/// Generate sample data from an OpenAPI schema
130pub async fn generate_from_openapi(
131    openapi_spec: &serde_json::Value,
132    rows: usize,
133) -> mockforge_core::Result<GenerationResult> {
134    let schema = SchemaDefinition::from_openapi_spec(openapi_spec)?;
135    let config = DataConfig {
136        rows,
137        ..Default::default()
138    };
139    generate_data(schema, config).await
140}
141
142#[cfg(test)]
143mod tests {
144    use super::*;
145    use serde_json::json;
146
147    #[test]
148    fn test_data_config_default() {
149        let config = DataConfig::default();
150        assert_eq!(config.rows, 0); // Default for usize is 0
151        assert_eq!(config.seed, None);
152        assert!(!config.rag_enabled);
153        assert_eq!(config.rag_context_length, 0); // Default for usize is 0
154        assert!(matches!(config.format, OutputFormat::Json));
155    }
156
157    #[test]
158    fn test_data_config_custom() {
159        let config = DataConfig {
160            rows: 50,
161            seed: Some(42),
162            rag_enabled: true,
163            rag_context_length: 2000,
164            format: OutputFormat::Csv,
165        };
166
167        assert_eq!(config.rows, 50);
168        assert_eq!(config.seed, Some(42));
169        assert!(config.rag_enabled);
170        assert_eq!(config.rag_context_length, 2000);
171        assert!(matches!(config.format, OutputFormat::Csv));
172    }
173
174    #[test]
175    fn test_output_format_variants() {
176        let json = OutputFormat::Json;
177        let jsonlines = OutputFormat::JsonLines;
178        let yaml = OutputFormat::Yaml;
179        let csv = OutputFormat::Csv;
180
181        assert!(matches!(json, OutputFormat::Json));
182        assert!(matches!(jsonlines, OutputFormat::JsonLines));
183        assert!(matches!(yaml, OutputFormat::Yaml));
184        assert!(matches!(csv, OutputFormat::Csv));
185    }
186
187    #[test]
188    fn test_generation_result_new() {
189        let data = vec![json!({"id": 1, "name": "test"})];
190        let result = GenerationResult::new(data.clone(), 100);
191
192        assert_eq!(result.count, 1);
193        assert_eq!(result.data.len(), 1);
194        assert_eq!(result.generation_time_ms, 100);
195        assert_eq!(result.warnings.len(), 0);
196    }
197
198    #[test]
199    fn test_generation_result_with_warning() {
200        let data = vec![json!({"id": 1})];
201        let result = GenerationResult::new(data, 50).with_warning("Test warning".to_string());
202
203        assert_eq!(result.warnings.len(), 1);
204        assert_eq!(result.warnings[0], "Test warning");
205    }
206
207    #[test]
208    fn test_generation_result_to_json_string() {
209        let data = vec![json!({"id": 1, "name": "test"})];
210        let result = GenerationResult::new(data, 10);
211
212        let json_string = result.to_json_string();
213        assert!(json_string.is_ok());
214        let json_str = json_string.unwrap();
215        assert!(json_str.contains("\"id\""));
216        assert!(json_str.contains("\"name\""));
217    }
218
219    #[test]
220    fn test_generation_result_to_jsonl_string() {
221        let data = vec![json!({"id": 1}), json!({"id": 2})];
222        let result = GenerationResult::new(data, 10);
223
224        let jsonl_string = result.to_jsonl_string();
225        assert!(jsonl_string.is_ok());
226        let jsonl_str = jsonl_string.unwrap();
227        assert!(jsonl_str.contains("{\"id\":1}"));
228        assert!(jsonl_str.contains("{\"id\":2}"));
229        assert!(jsonl_str.contains("\n"));
230    }
231
232    #[test]
233    fn test_generation_result_multiple_warnings() {
234        let data = vec![json!({"id": 1})];
235        let result = GenerationResult::new(data, 10)
236            .with_warning("Warning 1".to_string())
237            .with_warning("Warning 2".to_string());
238
239        assert_eq!(result.warnings.len(), 2);
240        assert_eq!(result.warnings[0], "Warning 1");
241        assert_eq!(result.warnings[1], "Warning 2");
242    }
243
244    #[test]
245    fn test_default_rows() {
246        assert_eq!(default_rows(), 100);
247    }
248
249    #[test]
250    fn test_default_rag_context_length() {
251        assert_eq!(default_rag_context_length(), 1000);
252    }
253}