mockforge_data/
lib.rs

1//! # MockForge Data
2//!
3//! Synthetic data generation engine with faker primitives and RAG (Retrieval-Augmented Generation).
4
5// Re-export error types from mockforge-core
6pub use mockforge_core::{Error, Result};
7
8pub mod dataset;
9pub mod domains;
10pub mod drift;
11pub mod faker;
12pub mod generator;
13pub mod intelligent_mock;
14pub mod provider;
15pub mod rag;
16pub mod replay_augmentation;
17pub mod schema;
18pub mod token_resolver;
19
20pub use dataset::{Dataset, DatasetValidationResult};
21pub use domains::{Domain, DomainGenerator, ParseDomainError};
22pub use drift::{DataDriftConfig, DataDriftEngine, DriftStrategy};
23pub use fake::Faker;
24pub use generator::DataGenerator;
25pub use intelligent_mock::{IntelligentMockConfig, IntelligentMockGenerator, ResponseMode};
26pub use rag::{EmbeddingProvider, LlmProvider, RagConfig, RagEngine, SearchResult};
27pub use replay_augmentation::{
28    EventStrategy, GeneratedEvent, ReplayAugmentationConfig, ReplayAugmentationEngine, ReplayMode,
29};
30pub use schema::{FieldDefinition, SchemaDefinition};
31pub use token_resolver::{resolve_tokens, resolve_tokens_with_rag, TokenResolver, TokenType};
32
33/// Data generation configuration
34#[derive(Debug, Clone, serde::Deserialize, serde::Serialize, Default)]
35pub struct DataConfig {
36    /// Number of rows to generate
37    #[serde(default = "default_rows")]
38    pub rows: usize,
39    /// Random seed for reproducible generation
40    pub seed: Option<u64>,
41    /// Enable RAG mode
42    pub rag_enabled: bool,
43    /// Maximum RAG context length
44    #[serde(default = "default_rag_context_length")]
45    pub rag_context_length: usize,
46    /// Output format
47    pub format: OutputFormat,
48}
49
50fn default_rows() -> usize {
51    100
52}
53fn default_rag_context_length() -> usize {
54    1000
55}
56
57/// Output format for generated data
58#[derive(Debug, Clone, serde::Deserialize, serde::Serialize, Default)]
59#[serde(rename_all = "lowercase")]
60pub enum OutputFormat {
61    /// JSON format
62    #[default]
63    Json,
64    /// JSON Lines format
65    JsonLines,
66    /// YAML format
67    Yaml,
68    /// CSV format
69    Csv,
70}
71
72/// Generation result
73#[derive(Debug)]
74pub struct GenerationResult {
75    /// Generated data
76    pub data: Vec<serde_json::Value>,
77    /// Number of rows generated
78    pub count: usize,
79    /// Generation time in milliseconds
80    pub generation_time_ms: u128,
81    /// Any warnings during generation
82    pub warnings: Vec<String>,
83}
84
85impl GenerationResult {
86    /// Create a new generation result
87    pub fn new(data: Vec<serde_json::Value>, generation_time_ms: u128) -> Self {
88        Self {
89            count: data.len(),
90            data,
91            generation_time_ms,
92            warnings: Vec::new(),
93        }
94    }
95
96    /// Add a warning
97    pub fn with_warning(mut self, warning: String) -> Self {
98        self.warnings.push(warning);
99        self
100    }
101
102    /// Get data as JSON string
103    pub fn to_json_string(&self) -> mockforge_core::Result<String> {
104        Ok(serde_json::to_string_pretty(&self.data)?)
105    }
106
107    /// Get data as JSON Lines string
108    pub fn to_jsonl_string(&self) -> mockforge_core::Result<String> {
109        let lines: Vec<String> = self
110            .data
111            .iter()
112            .map(serde_json::to_string)
113            .collect::<std::result::Result<_, _>>()?;
114        Ok(lines.join("\n"))
115    }
116}
117
118/// Quick data generation function
119pub async fn generate_data(
120    schema: SchemaDefinition,
121    config: DataConfig,
122) -> mockforge_core::Result<GenerationResult> {
123    let mut generator = DataGenerator::new(schema, config)?;
124    generator.generate().await
125}
126
127/// Generate sample data from a JSON schema
128pub async fn generate_from_json_schema(
129    json_schema: &serde_json::Value,
130    rows: usize,
131) -> mockforge_core::Result<GenerationResult> {
132    let schema = SchemaDefinition::from_json_schema(json_schema)?;
133    let config = DataConfig {
134        rows,
135        ..Default::default()
136    };
137    generate_data(schema, config).await
138}
139
140/// Generate sample data from an OpenAPI schema
141pub async fn generate_from_openapi(
142    openapi_spec: &serde_json::Value,
143    rows: usize,
144) -> mockforge_core::Result<GenerationResult> {
145    let schema = SchemaDefinition::from_openapi_spec(openapi_spec)?;
146    let config = DataConfig {
147        rows,
148        ..Default::default()
149    };
150    generate_data(schema, config).await
151}
152
153#[cfg(test)]
154mod tests {
155    use super::*;
156    use serde_json::json;
157
158    #[test]
159    fn test_data_config_default() {
160        let config = DataConfig::default();
161        assert_eq!(config.rows, 0); // Default for usize is 0
162        assert_eq!(config.seed, None);
163        assert!(!config.rag_enabled);
164        assert_eq!(config.rag_context_length, 0); // Default for usize is 0
165        assert!(matches!(config.format, OutputFormat::Json));
166    }
167
168    #[test]
169    fn test_data_config_custom() {
170        let config = DataConfig {
171            rows: 50,
172            seed: Some(42),
173            rag_enabled: true,
174            rag_context_length: 2000,
175            format: OutputFormat::Csv,
176        };
177
178        assert_eq!(config.rows, 50);
179        assert_eq!(config.seed, Some(42));
180        assert!(config.rag_enabled);
181        assert_eq!(config.rag_context_length, 2000);
182        assert!(matches!(config.format, OutputFormat::Csv));
183    }
184
185    #[test]
186    fn test_output_format_variants() {
187        let json = OutputFormat::Json;
188        let jsonlines = OutputFormat::JsonLines;
189        let yaml = OutputFormat::Yaml;
190        let csv = OutputFormat::Csv;
191
192        assert!(matches!(json, OutputFormat::Json));
193        assert!(matches!(jsonlines, OutputFormat::JsonLines));
194        assert!(matches!(yaml, OutputFormat::Yaml));
195        assert!(matches!(csv, OutputFormat::Csv));
196    }
197
198    #[test]
199    fn test_generation_result_new() {
200        let data = vec![json!({"id": 1, "name": "test"})];
201        let result = GenerationResult::new(data.clone(), 100);
202
203        assert_eq!(result.count, 1);
204        assert_eq!(result.data.len(), 1);
205        assert_eq!(result.generation_time_ms, 100);
206        assert_eq!(result.warnings.len(), 0);
207    }
208
209    #[test]
210    fn test_generation_result_with_warning() {
211        let data = vec![json!({"id": 1})];
212        let result = GenerationResult::new(data, 50).with_warning("Test warning".to_string());
213
214        assert_eq!(result.warnings.len(), 1);
215        assert_eq!(result.warnings[0], "Test warning");
216    }
217
218    #[test]
219    fn test_generation_result_to_json_string() {
220        let data = vec![json!({"id": 1, "name": "test"})];
221        let result = GenerationResult::new(data, 10);
222
223        let json_string = result.to_json_string();
224        assert!(json_string.is_ok());
225        let json_str = json_string.unwrap();
226        assert!(json_str.contains("\"id\""));
227        assert!(json_str.contains("\"name\""));
228    }
229
230    #[test]
231    fn test_generation_result_to_jsonl_string() {
232        let data = vec![json!({"id": 1}), json!({"id": 2})];
233        let result = GenerationResult::new(data, 10);
234
235        let jsonl_string = result.to_jsonl_string();
236        assert!(jsonl_string.is_ok());
237        let jsonl_str = jsonl_string.unwrap();
238        assert!(jsonl_str.contains("{\"id\":1}"));
239        assert!(jsonl_str.contains("{\"id\":2}"));
240        assert!(jsonl_str.contains("\n"));
241    }
242
243    #[test]
244    fn test_generation_result_multiple_warnings() {
245        let data = vec![json!({"id": 1})];
246        let result = GenerationResult::new(data, 10)
247            .with_warning("Warning 1".to_string())
248            .with_warning("Warning 2".to_string());
249
250        assert_eq!(result.warnings.len(), 2);
251        assert_eq!(result.warnings[0], "Warning 1");
252        assert_eq!(result.warnings[1], "Warning 2");
253    }
254
255    #[test]
256    fn test_default_rows() {
257        assert_eq!(default_rows(), 100);
258    }
259
260    #[test]
261    fn test_default_rag_context_length() {
262        assert_eq!(default_rag_context_length(), 1000);
263    }
264}