mockforge_data/
lib.rs

1//! # MockForge Data
2//!
3//! Synthetic data generation engine with faker primitives and RAG (Retrieval-Augmented Generation).
4
5// Re-export error types from mockforge-core
6pub use mockforge_core::{Error, Result};
7
8pub mod dataset;
9pub mod drift;
10pub mod faker;
11pub mod generator;
12pub mod intelligent_mock;
13pub mod provider;
14pub mod rag;
15pub mod replay_augmentation;
16pub mod schema;
17
18pub use dataset::{Dataset, DatasetValidationResult};
19pub use drift::{DataDriftConfig, DataDriftEngine, DriftStrategy};
20pub use fake::Faker;
21pub use generator::DataGenerator;
22pub use intelligent_mock::{IntelligentMockConfig, IntelligentMockGenerator, ResponseMode};
23pub use rag::{EmbeddingProvider, LlmProvider, RagConfig, RagEngine, SearchResult};
24pub use replay_augmentation::{
25    EventStrategy, GeneratedEvent, ReplayAugmentationConfig, ReplayAugmentationEngine, ReplayMode,
26};
27pub use schema::{FieldDefinition, SchemaDefinition};
28
29/// Data generation configuration
30#[derive(Debug, Clone, serde::Deserialize, serde::Serialize, Default)]
31pub struct DataConfig {
32    /// Number of rows to generate
33    #[serde(default = "default_rows")]
34    pub rows: usize,
35    /// Random seed for reproducible generation
36    pub seed: Option<u64>,
37    /// Enable RAG mode
38    pub rag_enabled: bool,
39    /// Maximum RAG context length
40    #[serde(default = "default_rag_context_length")]
41    pub rag_context_length: usize,
42    /// Output format
43    pub format: OutputFormat,
44}
45
46fn default_rows() -> usize {
47    100
48}
49fn default_rag_context_length() -> usize {
50    1000
51}
52
53/// Output format for generated data
54#[derive(Debug, Clone, serde::Deserialize, serde::Serialize, Default)]
55#[serde(rename_all = "lowercase")]
56pub enum OutputFormat {
57    /// JSON format
58    #[default]
59    Json,
60    /// JSON Lines format
61    JsonLines,
62    /// YAML format
63    Yaml,
64    /// CSV format
65    Csv,
66}
67
68/// Generation result
69#[derive(Debug)]
70pub struct GenerationResult {
71    /// Generated data
72    pub data: Vec<serde_json::Value>,
73    /// Number of rows generated
74    pub count: usize,
75    /// Generation time in milliseconds
76    pub generation_time_ms: u128,
77    /// Any warnings during generation
78    pub warnings: Vec<String>,
79}
80
81impl GenerationResult {
82    /// Create a new generation result
83    pub fn new(data: Vec<serde_json::Value>, generation_time_ms: u128) -> Self {
84        Self {
85            count: data.len(),
86            data,
87            generation_time_ms,
88            warnings: Vec::new(),
89        }
90    }
91
92    /// Add a warning
93    pub fn with_warning(mut self, warning: String) -> Self {
94        self.warnings.push(warning);
95        self
96    }
97
98    /// Get data as JSON string
99    pub fn to_json_string(&self) -> mockforge_core::Result<String> {
100        Ok(serde_json::to_string_pretty(&self.data)?)
101    }
102
103    /// Get data as JSON Lines string
104    pub fn to_jsonl_string(&self) -> mockforge_core::Result<String> {
105        let lines: Vec<String> = self
106            .data
107            .iter()
108            .map(|v| serde_json::to_string(v))
109            .collect::<std::result::Result<_, _>>()?;
110        Ok(lines.join("\n"))
111    }
112}
113
114/// Quick data generation function
115pub async fn generate_data(
116    schema: SchemaDefinition,
117    config: DataConfig,
118) -> mockforge_core::Result<GenerationResult> {
119    let mut generator = DataGenerator::new(schema, config)?;
120    generator.generate().await
121}
122
123/// Generate sample data from a JSON schema
124pub async fn generate_from_json_schema(
125    json_schema: &serde_json::Value,
126    rows: usize,
127) -> mockforge_core::Result<GenerationResult> {
128    let schema = SchemaDefinition::from_json_schema(json_schema)?;
129    let config = DataConfig {
130        rows,
131        ..Default::default()
132    };
133    generate_data(schema, config).await
134}
135
136/// Generate sample data from an OpenAPI schema
137pub async fn generate_from_openapi(
138    openapi_spec: &serde_json::Value,
139    rows: usize,
140) -> mockforge_core::Result<GenerationResult> {
141    let schema = SchemaDefinition::from_openapi_spec(openapi_spec)?;
142    let config = DataConfig {
143        rows,
144        ..Default::default()
145    };
146    generate_data(schema, config).await
147}
148
149#[cfg(test)]
150mod tests {
151    use super::*;
152    use serde_json::json;
153
154    #[test]
155    fn test_data_config_default() {
156        let config = DataConfig::default();
157        assert_eq!(config.rows, 0); // Default for usize is 0
158        assert_eq!(config.seed, None);
159        assert!(!config.rag_enabled);
160        assert_eq!(config.rag_context_length, 0); // Default for usize is 0
161        assert!(matches!(config.format, OutputFormat::Json));
162    }
163
164    #[test]
165    fn test_data_config_custom() {
166        let config = DataConfig {
167            rows: 50,
168            seed: Some(42),
169            rag_enabled: true,
170            rag_context_length: 2000,
171            format: OutputFormat::Csv,
172        };
173
174        assert_eq!(config.rows, 50);
175        assert_eq!(config.seed, Some(42));
176        assert!(config.rag_enabled);
177        assert_eq!(config.rag_context_length, 2000);
178        assert!(matches!(config.format, OutputFormat::Csv));
179    }
180
181    #[test]
182    fn test_output_format_variants() {
183        let json = OutputFormat::Json;
184        let jsonlines = OutputFormat::JsonLines;
185        let yaml = OutputFormat::Yaml;
186        let csv = OutputFormat::Csv;
187
188        assert!(matches!(json, OutputFormat::Json));
189        assert!(matches!(jsonlines, OutputFormat::JsonLines));
190        assert!(matches!(yaml, OutputFormat::Yaml));
191        assert!(matches!(csv, OutputFormat::Csv));
192    }
193
194    #[test]
195    fn test_generation_result_new() {
196        let data = vec![json!({"id": 1, "name": "test"})];
197        let result = GenerationResult::new(data.clone(), 100);
198
199        assert_eq!(result.count, 1);
200        assert_eq!(result.data.len(), 1);
201        assert_eq!(result.generation_time_ms, 100);
202        assert_eq!(result.warnings.len(), 0);
203    }
204
205    #[test]
206    fn test_generation_result_with_warning() {
207        let data = vec![json!({"id": 1})];
208        let result = GenerationResult::new(data, 50).with_warning("Test warning".to_string());
209
210        assert_eq!(result.warnings.len(), 1);
211        assert_eq!(result.warnings[0], "Test warning");
212    }
213
214    #[test]
215    fn test_generation_result_to_json_string() {
216        let data = vec![json!({"id": 1, "name": "test"})];
217        let result = GenerationResult::new(data, 10);
218
219        let json_string = result.to_json_string();
220        assert!(json_string.is_ok());
221        let json_str = json_string.unwrap();
222        assert!(json_str.contains("\"id\""));
223        assert!(json_str.contains("\"name\""));
224    }
225
226    #[test]
227    fn test_generation_result_to_jsonl_string() {
228        let data = vec![json!({"id": 1}), json!({"id": 2})];
229        let result = GenerationResult::new(data, 10);
230
231        let jsonl_string = result.to_jsonl_string();
232        assert!(jsonl_string.is_ok());
233        let jsonl_str = jsonl_string.unwrap();
234        assert!(jsonl_str.contains("{\"id\":1}"));
235        assert!(jsonl_str.contains("{\"id\":2}"));
236        assert!(jsonl_str.contains("\n"));
237    }
238
239    #[test]
240    fn test_generation_result_multiple_warnings() {
241        let data = vec![json!({"id": 1})];
242        let result = GenerationResult::new(data, 10)
243            .with_warning("Warning 1".to_string())
244            .with_warning("Warning 2".to_string());
245
246        assert_eq!(result.warnings.len(), 2);
247        assert_eq!(result.warnings[0], "Warning 1");
248        assert_eq!(result.warnings[1], "Warning 2");
249    }
250
251    #[test]
252    fn test_default_rows() {
253        assert_eq!(default_rows(), 100);
254    }
255
256    #[test]
257    fn test_default_rag_context_length() {
258        assert_eq!(default_rag_context_length(), 1000);
259    }
260}