Skip to main content

datafake_rs/
generator.rs

1//! High-level API for generating fake data.
2//!
3//! This module provides the [`DataGenerator`] struct, which is the main entry point
4//! for generating fake JSON data from a configuration.
5
6use crate::config::ConfigParser;
7use crate::engine::Engine;
8use crate::error::Result;
9use crate::types::{DataFakeConfig, GenerationContext};
10use serde_json::Value;
11
12/// The main struct for generating fake JSON data.
13///
14/// `DataGenerator` takes a configuration and provides methods to generate
15/// single records or batches of fake data based on the schema.
16///
17/// # Example
18///
19/// ```rust
20/// use datafake_rs::DataGenerator;
21///
22/// let config = r#"{
23///     "schema": {
24///         "id": {"fake": ["uuid"]},
25///         "name": {"fake": ["name"]}
26///     }
27/// }"#;
28///
29/// let generator = DataGenerator::from_json(config).unwrap();
30/// let data = generator.generate().unwrap();
31/// ```
32pub struct DataGenerator {
33    config: DataFakeConfig,
34}
35
36impl DataGenerator {
37    /// Creates a new `DataGenerator` from an already-parsed configuration.
38    pub fn new(config: DataFakeConfig) -> Self {
39        Self { config }
40    }
41
42    /// Creates a new `DataGenerator` by parsing a JSON configuration string.
43    ///
44    /// # Errors
45    ///
46    /// Returns an error if the JSON is invalid or the configuration fails validation.
47    pub fn from_json(json_str: &str) -> Result<Self> {
48        let config = ConfigParser::parse(json_str)?;
49        Ok(Self::new(config))
50    }
51
52    /// Creates a new `DataGenerator` from a `serde_json::Value`.
53    ///
54    /// # Errors
55    ///
56    /// Returns an error if the value cannot be converted to a valid configuration.
57    pub fn from_value(json_value: Value) -> Result<Self> {
58        let config = ConfigParser::parse_value(json_value)?;
59        Ok(Self::new(config))
60    }
61
62    /// Generates a single fake data record based on the schema.
63    ///
64    /// Each call generates fresh random data. Variables are evaluated once
65    /// per call and can be referenced multiple times within the schema.
66    ///
67    /// # Errors
68    ///
69    /// Returns an error if data generation fails (e.g., invalid operator usage).
70    pub fn generate(&self) -> Result<Value> {
71        // First, convert HashMap to Map for engine
72        let variables_map: serde_json::Map<String, Value> = self
73            .config
74            .variables
75            .iter()
76            .map(|(k, v)| (k.clone(), v.clone()))
77            .collect();
78
79        // Generate all variables
80        let generated_vars = Engine::generate_variables(&variables_map)?;
81
82        // Create context with generated variables (convert back to HashMap)
83        let context = GenerationContext::with_variables(generated_vars.into_iter().collect());
84
85        // Process the schema with the context
86        Engine::process_schema(&self.config.schema, &context)
87    }
88
89    /// Generates multiple fake data records.
90    ///
91    /// Each record is independently generated with fresh random data.
92    ///
93    /// # Arguments
94    ///
95    /// * `count` - The number of records to generate.
96    ///
97    /// # Errors
98    ///
99    /// Returns an error if any record generation fails.
100    pub fn generate_batch(&self, count: usize) -> Result<Vec<Value>> {
101        let mut results = Vec::with_capacity(count);
102
103        for _ in 0..count {
104            results.push(self.generate()?);
105        }
106
107        Ok(results)
108    }
109
110    /// Returns a reference to the underlying configuration.
111    pub fn config(&self) -> &DataFakeConfig {
112        &self.config
113    }
114}
115
116#[cfg(test)]
117mod tests {
118    use super::*;
119    use serde_json::json;
120
121    #[test]
122    fn test_from_json() {
123        let config_json = r#"{
124            "schema": {
125                "id": {"fake": ["uuid"]},
126                "name": {"fake": ["name"]}
127            }
128        }"#;
129
130        let generator = DataGenerator::from_json(config_json).unwrap();
131        let result = generator.generate().unwrap();
132
133        assert!(result["id"].is_string());
134        assert!(result["name"].is_string());
135    }
136
137    #[test]
138    fn test_with_variables() {
139        let config_json = r#"{
140            "variables": {
141                "userId": {"fake": ["uuid"]},
142                "country": {"fake": ["country_code"]}
143            },
144            "schema": {
145                "id": {"var": "userId"},
146                "location": {
147                    "country": {"var": "country"},
148                    "city": {"fake": ["city_name"]}
149                }
150            }
151        }"#;
152
153        let generator = DataGenerator::from_json(config_json).unwrap();
154        let result = generator.generate().unwrap();
155
156        assert!(result["id"].is_string());
157        assert_eq!(result["id"], result["id"]); // Should be same for single generation
158        assert!(result["location"]["country"].is_string());
159        assert!(result["location"]["city"].is_string());
160    }
161
162    #[test]
163    fn test_generate_batch() {
164        let config_json = r#"{
165            "schema": {
166                "id": {"fake": ["uuid"]},
167                "timestamp": {"fake": ["u64"]}
168            }
169        }"#;
170
171        let generator = DataGenerator::from_json(config_json).unwrap();
172        let results = generator.generate_batch(5).unwrap();
173
174        assert_eq!(results.len(), 5);
175
176        // Each result should have unique values
177        let mut ids = std::collections::HashSet::new();
178        for result in results {
179            assert!(result["id"].is_string());
180            assert!(result["timestamp"].is_number());
181            ids.insert(result["id"].as_str().unwrap().to_string());
182        }
183        assert_eq!(ids.len(), 5); // All IDs should be unique
184    }
185
186    #[test]
187    fn test_complex_schema() {
188        let config = json!({
189            "metadata": {
190                "name": "User Profile Generator",
191                "version": "1.0.0"
192            },
193            "variables": {
194                "userId": {"fake": ["uuid"]},
195                "createdAt": {"fake": ["u64", 1000000000, 1700000000]}
196            },
197            "schema": {
198                "id": {"var": "userId"},
199                "profile": {
200                    "firstName": {"fake": ["first_name"]},
201                    "lastName": {"fake": ["last_name"]},
202                    "email": {"fake": ["email"]},
203                    "age": {"fake": ["u8", 18, 65]}
204                },
205                "address": {
206                    "street": {"fake": ["street_address"]},
207                    "city": {"fake": ["city_name"]},
208                    "zipCode": {"fake": ["zip_code"]}
209                },
210                "metadata": {
211                    "createdAt": {"var": "createdAt"},
212                    "updatedAt": {"fake": ["u64", 1700000000, 1800000000]}
213                }
214            }
215        });
216
217        let generator = DataGenerator::from_value(config).unwrap();
218        let result = generator.generate().unwrap();
219
220        // Verify structure
221        assert!(result["id"].is_string());
222        assert!(result["profile"]["firstName"].is_string());
223        assert!(result["profile"]["email"].as_str().unwrap().contains('@'));
224        assert!(result["address"]["street"].is_string());
225        assert!(result["metadata"]["createdAt"].is_number());
226
227        // Verify that variable references work
228        assert_eq!(result["id"], result["id"]);
229        assert_eq!(
230            result["metadata"]["createdAt"],
231            result["metadata"]["createdAt"]
232        );
233    }
234
235    #[test]
236    fn test_concurrent_generation() {
237        use std::sync::Arc;
238        use std::thread;
239
240        let config_json = r#"{
241            "schema": {
242                "id": {"fake": ["uuid"]},
243                "name": {"fake": ["name"]},
244                "email": {"fake": ["email"]},
245                "age": {"fake": ["u8", 18, 65]}
246            }
247        }"#;
248
249        let generator = Arc::new(DataGenerator::from_json(config_json).unwrap());
250        let num_threads = 4;
251        let generations_per_thread = 100;
252
253        let handles: Vec<_> = (0..num_threads)
254            .map(|_| {
255                let data_gen = Arc::clone(&generator);
256                thread::spawn(move || {
257                    let mut results = Vec::with_capacity(generations_per_thread);
258                    for _ in 0..generations_per_thread {
259                        let result = data_gen.generate().expect("Generation should succeed");
260                        assert!(result["id"].is_string());
261                        assert!(result["name"].is_string());
262                        assert!(result["email"].as_str().unwrap().contains('@'));
263                        results.push(result["id"].as_str().unwrap().to_string());
264                    }
265                    results
266                })
267            })
268            .collect();
269
270        // Collect all generated IDs
271        let mut all_ids: Vec<String> = Vec::new();
272        for handle in handles {
273            let ids = handle.join().expect("Thread should complete successfully");
274            all_ids.extend(ids);
275        }
276
277        // Verify we got the expected number of results
278        assert_eq!(all_ids.len(), num_threads * generations_per_thread);
279
280        // Verify all IDs are unique (UUIDs should not collide)
281        let unique_ids: std::collections::HashSet<_> = all_ids.iter().collect();
282        assert_eq!(
283            unique_ids.len(),
284            all_ids.len(),
285            "All generated UUIDs should be unique"
286        );
287    }
288
289    #[test]
290    fn test_concurrent_batch_generation() {
291        use std::sync::Arc;
292        use std::thread;
293
294        let config_json = r#"{
295            "variables": {
296                "baseId": {"fake": ["uuid"]}
297            },
298            "schema": {
299                "id": {"var": "baseId"},
300                "timestamp": {"fake": ["u64"]}
301            }
302        }"#;
303
304        let generator = Arc::new(DataGenerator::from_json(config_json).unwrap());
305        let num_threads = 4;
306        let batch_size = 50;
307
308        let handles: Vec<_> = (0..num_threads)
309            .map(|_| {
310                let data_gen = Arc::clone(&generator);
311                thread::spawn(move || {
312                    data_gen
313                        .generate_batch(batch_size)
314                        .expect("Batch generation should succeed")
315                })
316            })
317            .collect();
318
319        let mut total_count = 0;
320        for handle in handles {
321            let batch = handle.join().expect("Thread should complete successfully");
322            assert_eq!(batch.len(), batch_size);
323            total_count += batch.len();
324        }
325
326        assert_eq!(total_count, num_threads * batch_size);
327    }
328}