Skip to main content

dev_fixtures/
mock.rs

1//! Deterministic mock data generators.
2//!
3//! Generators are seeded by a `u64`. Same seed + same configuration
4//! produces byte-identical output across runs and machines. No
5//! external dependencies; deterministic via splitmix64.
6//!
7//! ## Generators
8//!
9//! - [`csv`] — CSV with a configurable schema and row count.
10//! - [`json_array`] — JSON array of records with a shape template.
11//! - [`bytes`] — raw byte streams: random, zeroed, patterned.
12
13/// A deterministic pseudo-random number generator.
14///
15/// Internally splitmix64. Cheap to construct, no allocation.
16///
17/// # Example
18///
19/// ```
20/// use dev_fixtures::mock::Rng;
21/// let mut a = Rng::seeded(42);
22/// let mut b = Rng::seeded(42);
23/// assert_eq!(a.next_u64(), b.next_u64());
24/// ```
25pub struct Rng {
26    state: u64,
27}
28
29impl Rng {
30    /// Build a new RNG from a seed.
31    pub fn seeded(seed: u64) -> Self {
32        Self { state: seed }
33    }
34
35    /// Step and return the next 64-bit value.
36    pub fn next_u64(&mut self) -> u64 {
37        self.state = self.state.wrapping_add(0x9E37_79B9_7F4A_7C15);
38        let mut z = self.state;
39        z = (z ^ (z >> 30)).wrapping_mul(0xBF58_476D_1CE4_E5B9);
40        z = (z ^ (z >> 27)).wrapping_mul(0x94D0_49BB_1331_11EB);
41        z ^= z >> 31;
42        z
43    }
44
45    /// Return a value uniformly in `[0, n)`. For small `n` this is
46    /// biased; the bias is bounded by `1 / 2^32` for `n` up to
47    /// `2^31`, which is fine for fixture purposes.
48    pub fn range(&mut self, n: u64) -> u64 {
49        if n == 0 {
50            return 0;
51        }
52        self.next_u64() % n
53    }
54}
55
56/// Module containing CSV generation.
57pub mod csv {
58    use super::Rng;
59
60    /// Generate a CSV string with `rows` rows, one per `Vec<String>`
61    /// produced by `row_factory`. The first line is the header.
62    ///
63    /// Field values containing `,`, `"`, `\n`, or `\r` are escaped per
64    /// [RFC 4180](https://datatracker.ietf.org/doc/html/rfc4180):
65    /// the value is wrapped in double quotes and any internal `"` is
66    /// doubled. Values without those characters pass through verbatim.
67    ///
68    /// Header values are escaped the same way.
69    ///
70    /// # Example
71    ///
72    /// ```
73    /// use dev_fixtures::mock::csv::generate;
74    /// let csv = generate(
75    ///     &["id", "name"],
76    ///     3,
77    ///     42,
78    ///     |rng| vec![rng.range(1000).to_string(), format!("name_{}", rng.range(100))],
79    /// );
80    /// assert!(csv.starts_with("id,name\n"));
81    /// assert_eq!(csv.lines().count(), 4); // 1 header + 3 rows
82    /// ```
83    ///
84    /// # Escaping example
85    ///
86    /// ```
87    /// use dev_fixtures::mock::csv::generate;
88    /// let csv = generate(&["a", "b"], 1, 0, |_rng| {
89    ///     vec![r#"contains, comma"#.into(), r#"has "quotes""#.into()]
90    /// });
91    /// // Both fields are wrapped; the quote inside is doubled.
92    /// assert!(csv.contains("\"contains, comma\""));
93    /// assert!(csv.contains("\"has \"\"quotes\"\"\""));
94    /// ```
95    pub fn generate<F>(headers: &[&str], rows: usize, seed: u64, mut row_factory: F) -> String
96    where
97        F: FnMut(&mut Rng) -> Vec<String>,
98    {
99        let mut rng = Rng::seeded(seed);
100        let mut out = String::new();
101        let header_row: Vec<String> = headers.iter().map(|h| escape_field(h)).collect();
102        out.push_str(&header_row.join(","));
103        out.push('\n');
104        for _ in 0..rows {
105            let row = row_factory(&mut rng);
106            let escaped: Vec<String> = row.iter().map(|f| escape_field(f)).collect();
107            out.push_str(&escaped.join(","));
108            out.push('\n');
109        }
110        out
111    }
112
113    /// Escape a single CSV field per RFC 4180.
114    ///
115    /// Returns the field unchanged when no special characters are
116    /// present; otherwise returns the field wrapped in double quotes
117    /// with any internal `"` doubled.
118    pub fn escape_field(value: &str) -> String {
119        if value.contains(',')
120            || value.contains('"')
121            || value.contains('\n')
122            || value.contains('\r')
123        {
124            let escaped = value.replace('"', "\"\"");
125            format!("\"{}\"", escaped)
126        } else {
127            value.to_string()
128        }
129    }
130
131    /// Parse a CSV string back into a header row + data rows.
132    ///
133    /// Implements the same RFC 4180 escaping rules used by
134    /// [`generate`]: quoted fields support embedded `,`, `"` (doubled),
135    /// `\n`, and `\r`. Designed for round-trip with `generate`, not as
136    /// a general-purpose CSV parser.
137    ///
138    /// Returns `(headers, rows)`. Each row has the same length as
139    /// `headers` for well-formed input. Trailing empty lines are
140    /// ignored. Returns `Err` with a one-line message on malformed
141    /// input (e.g. unterminated quoted field).
142    ///
143    /// # Example
144    ///
145    /// ```
146    /// use dev_fixtures::mock::csv::{generate, parse};
147    ///
148    /// let csv = generate(&["id", "name"], 3, 0, |rng| {
149    ///     vec![rng.range(100).to_string(), format!("u{}", rng.range(10))]
150    /// });
151    /// let (headers, rows) = parse(&csv).unwrap();
152    /// assert_eq!(headers, vec!["id", "name"]);
153    /// assert_eq!(rows.len(), 3);
154    /// for row in &rows {
155    ///     assert_eq!(row.len(), 2);
156    /// }
157    /// ```
158    pub fn parse(input: &str) -> Result<(Vec<String>, Vec<Vec<String>>), String> {
159        let mut all_rows: Vec<Vec<String>> = Vec::new();
160        let mut chars = input.chars().peekable();
161        let mut current_field = String::new();
162        let mut current_row: Vec<String> = Vec::new();
163        let mut in_quotes = false;
164        let mut row_has_content = false;
165        loop {
166            match chars.next() {
167                None => {
168                    // EOF
169                    if in_quotes {
170                        return Err("unterminated quoted field at EOF".to_string());
171                    }
172                    if !current_field.is_empty() || row_has_content {
173                        current_row.push(std::mem::take(&mut current_field));
174                        all_rows.push(std::mem::take(&mut current_row));
175                    }
176                    break;
177                }
178                Some(c) => {
179                    if in_quotes {
180                        match c {
181                            '"' => {
182                                if matches!(chars.peek(), Some('"')) {
183                                    chars.next();
184                                    current_field.push('"');
185                                } else {
186                                    in_quotes = false;
187                                }
188                            }
189                            other => current_field.push(other),
190                        }
191                    } else {
192                        match c {
193                            '"' if current_field.is_empty() => {
194                                in_quotes = true;
195                                row_has_content = true;
196                            }
197                            ',' => {
198                                current_row.push(std::mem::take(&mut current_field));
199                                row_has_content = true;
200                            }
201                            '\r' => {
202                                // Eat following \n if present.
203                                if matches!(chars.peek(), Some('\n')) {
204                                    chars.next();
205                                }
206                                current_row.push(std::mem::take(&mut current_field));
207                                all_rows.push(std::mem::take(&mut current_row));
208                                row_has_content = false;
209                            }
210                            '\n' => {
211                                current_row.push(std::mem::take(&mut current_field));
212                                all_rows.push(std::mem::take(&mut current_row));
213                                row_has_content = false;
214                            }
215                            other => {
216                                current_field.push(other);
217                                row_has_content = true;
218                            }
219                        }
220                    }
221                }
222            }
223        }
224
225        if all_rows.is_empty() {
226            return Ok((Vec::new(), Vec::new()));
227        }
228        let headers = all_rows.remove(0);
229        Ok((headers, all_rows))
230    }
231}
232
233/// Module containing JSON array generation.
234pub mod json_array {
235    use super::Rng;
236
237    /// Generate a JSON array of `count` elements. Each element is
238    /// produced by `element_factory(rng)` and embedded verbatim in the
239    /// array (the factory MUST return valid JSON).
240    ///
241    /// # Example
242    ///
243    /// ```
244    /// use dev_fixtures::mock::{json_array::generate, Rng};
245    /// let json = generate(3, 7, |rng| {
246    ///     format!("{{\"id\": {}}}", rng.range(1000))
247    /// });
248    /// assert!(json.starts_with("["));
249    /// assert!(json.ends_with("]"));
250    /// ```
251    pub fn generate<F>(count: usize, seed: u64, mut element_factory: F) -> String
252    where
253        F: FnMut(&mut Rng) -> String,
254    {
255        let mut rng = Rng::seeded(seed);
256        let mut out = String::new();
257        out.push('[');
258        for i in 0..count {
259            if i > 0 {
260                out.push(',');
261            }
262            out.push_str(&element_factory(&mut rng));
263        }
264        out.push(']');
265        out
266    }
267
268    /// Like [`generate`] but validates the output as JSON before
269    /// returning it.
270    ///
271    /// Performs a structural validation pass: every byte of the
272    /// output must form a syntactically valid JSON value. Returns
273    /// `Err(message)` if validation fails — typically because the
274    /// `element_factory` returned malformed JSON.
275    ///
276    /// Useful as a defensive guard when the factory produces JSON
277    /// from external/untrusted templates. Adds linear-in-output-size
278    /// overhead.
279    ///
280    /// # Example
281    ///
282    /// ```
283    /// use dev_fixtures::mock::json_array::generate_validated;
284    ///
285    /// let json = generate_validated(3, 0, |rng| format!("{{\"v\":{}}}", rng.range(10))).unwrap();
286    /// assert!(json.starts_with("["));
287    ///
288    /// // A broken factory triggers a validation error.
289    /// let err = generate_validated(1, 0, |_| "not_json".to_string()).unwrap_err();
290    /// assert!(err.contains("invalid"));
291    /// ```
292    pub fn generate_validated<F>(
293        count: usize,
294        seed: u64,
295        element_factory: F,
296    ) -> Result<String, String>
297    where
298        F: FnMut(&mut Rng) -> String,
299    {
300        let out = generate(count, seed, element_factory);
301        validate_json(&out)?;
302        Ok(out)
303    }
304
305    /// Minimal JSON structural validator.
306    ///
307    /// Returns `Ok(())` if `s` is a syntactically valid JSON value
308    /// (object, array, string, number, true/false/null). Does NOT
309    /// validate semantics (e.g. duplicate keys are accepted).
310    ///
311    /// Hand-rolled, no `serde_json` dependency at the public surface.
312    ///
313    /// # Example
314    ///
315    /// ```
316    /// use dev_fixtures::mock::json_array::validate_json;
317    ///
318    /// assert!(validate_json("[]").is_ok());
319    /// assert!(validate_json("[{\"x\": 1}]").is_ok());
320    /// assert!(validate_json("[invalid]").is_err());
321    /// ```
322    pub fn validate_json(s: &str) -> Result<(), String> {
323        let mut parser = MiniJsonParser::new(s);
324        parser.skip_ws();
325        parser.parse_value()?;
326        parser.skip_ws();
327        if parser.pos < parser.bytes.len() {
328            return Err(format!(
329                "trailing characters after JSON value at position {}",
330                parser.pos
331            ));
332        }
333        Ok(())
334    }
335
336    struct MiniJsonParser<'a> {
337        bytes: &'a [u8],
338        pos: usize,
339    }
340
341    impl<'a> MiniJsonParser<'a> {
342        fn new(s: &'a str) -> Self {
343            Self {
344                bytes: s.as_bytes(),
345                pos: 0,
346            }
347        }
348
349        fn skip_ws(&mut self) {
350            while self.pos < self.bytes.len()
351                && matches!(self.bytes[self.pos], b' ' | b'\t' | b'\n' | b'\r')
352            {
353                self.pos += 1;
354            }
355        }
356
357        fn parse_value(&mut self) -> Result<(), String> {
358            self.skip_ws();
359            if self.pos >= self.bytes.len() {
360                return Err("unexpected end of input".to_string());
361            }
362            match self.bytes[self.pos] {
363                b'{' => self.parse_object(),
364                b'[' => self.parse_array(),
365                b'"' => self.parse_string(),
366                b't' | b'f' => self.parse_bool(),
367                b'n' => self.parse_null(),
368                b'-' | b'0'..=b'9' => self.parse_number(),
369                other => Err(format!(
370                    "invalid JSON: unexpected '{}' at position {}",
371                    other as char, self.pos
372                )),
373            }
374        }
375
376        fn parse_object(&mut self) -> Result<(), String> {
377            self.pos += 1; // consume '{'
378            self.skip_ws();
379            if self.peek() == Some(b'}') {
380                self.pos += 1;
381                return Ok(());
382            }
383            loop {
384                self.skip_ws();
385                self.parse_string()?;
386                self.skip_ws();
387                if self.peek() != Some(b':') {
388                    return Err(format!("expected ':' at position {}", self.pos));
389                }
390                self.pos += 1;
391                self.parse_value()?;
392                self.skip_ws();
393                match self.peek() {
394                    Some(b',') => {
395                        self.pos += 1;
396                    }
397                    Some(b'}') => {
398                        self.pos += 1;
399                        return Ok(());
400                    }
401                    _ => {
402                        return Err(format!(
403                            "expected ',' or '}}' in object at position {}",
404                            self.pos
405                        ));
406                    }
407                }
408            }
409        }
410
411        fn parse_array(&mut self) -> Result<(), String> {
412            self.pos += 1; // consume '['
413            self.skip_ws();
414            if self.peek() == Some(b']') {
415                self.pos += 1;
416                return Ok(());
417            }
418            loop {
419                self.parse_value()?;
420                self.skip_ws();
421                match self.peek() {
422                    Some(b',') => {
423                        self.pos += 1;
424                    }
425                    Some(b']') => {
426                        self.pos += 1;
427                        return Ok(());
428                    }
429                    _ => {
430                        return Err(format!(
431                            "expected ',' or ']' in array at position {}",
432                            self.pos
433                        ));
434                    }
435                }
436            }
437        }
438
439        fn parse_string(&mut self) -> Result<(), String> {
440            if self.peek() != Some(b'"') {
441                return Err(format!("expected string at position {}", self.pos));
442            }
443            self.pos += 1;
444            while self.pos < self.bytes.len() {
445                match self.bytes[self.pos] {
446                    b'"' => {
447                        self.pos += 1;
448                        return Ok(());
449                    }
450                    b'\\' => {
451                        self.pos += 1;
452                        if self.pos >= self.bytes.len() {
453                            return Err("unterminated escape in string".to_string());
454                        }
455                        self.pos += 1;
456                    }
457                    _ => self.pos += 1,
458                }
459            }
460            Err("unterminated string".to_string())
461        }
462
463        fn parse_bool(&mut self) -> Result<(), String> {
464            if self.bytes[self.pos..].starts_with(b"true") {
465                self.pos += 4;
466                Ok(())
467            } else if self.bytes[self.pos..].starts_with(b"false") {
468                self.pos += 5;
469                Ok(())
470            } else {
471                Err(format!("invalid bool at position {}", self.pos))
472            }
473        }
474
475        fn parse_null(&mut self) -> Result<(), String> {
476            if self.bytes[self.pos..].starts_with(b"null") {
477                self.pos += 4;
478                Ok(())
479            } else {
480                Err(format!("invalid null at position {}", self.pos))
481            }
482        }
483
484        fn parse_number(&mut self) -> Result<(), String> {
485            let start = self.pos;
486            if self.peek() == Some(b'-') {
487                self.pos += 1;
488            }
489            while self.pos < self.bytes.len() {
490                let c = self.bytes[self.pos];
491                if c.is_ascii_digit() || matches!(c, b'.' | b'e' | b'E' | b'+' | b'-') {
492                    self.pos += 1;
493                } else {
494                    break;
495                }
496            }
497            if self.pos == start || (self.peek_at(start) == Some(b'-') && self.pos == start + 1) {
498                return Err(format!("invalid number at position {}", start));
499            }
500            Ok(())
501        }
502
503        fn peek(&self) -> Option<u8> {
504            self.bytes.get(self.pos).copied()
505        }
506
507        fn peek_at(&self, idx: usize) -> Option<u8> {
508            self.bytes.get(idx).copied()
509        }
510    }
511}
512
513/// Module containing raw-byte generation.
514pub mod bytes {
515    use super::Rng;
516
517    /// `n` bytes of zeros.
518    pub fn zeros(n: usize) -> Vec<u8> {
519        vec![0u8; n]
520    }
521
522    /// `n` bytes of a repeating pattern.
523    ///
524    /// # Example
525    ///
526    /// ```
527    /// use dev_fixtures::mock::bytes::patterned;
528    /// let bytes = patterned(7, &[0xAB, 0xCD]);
529    /// assert_eq!(bytes, vec![0xAB, 0xCD, 0xAB, 0xCD, 0xAB, 0xCD, 0xAB]);
530    /// ```
531    pub fn patterned(n: usize, pattern: &[u8]) -> Vec<u8> {
532        if pattern.is_empty() {
533            return zeros(n);
534        }
535        let mut out = Vec::with_capacity(n);
536        while out.len() < n {
537            out.push(pattern[out.len() % pattern.len()]);
538        }
539        out
540    }
541
542    /// `n` deterministic random bytes from `seed`.
543    pub fn random(n: usize, seed: u64) -> Vec<u8> {
544        let mut rng = Rng::seeded(seed);
545        let mut out = Vec::with_capacity(n);
546        while out.len() < n {
547            let v = rng.next_u64();
548            for b in v.to_le_bytes() {
549                if out.len() < n {
550                    out.push(b);
551                }
552            }
553        }
554        out
555    }
556}
557
558#[cfg(test)]
559mod tests {
560    use super::*;
561
562    #[test]
563    fn rng_is_deterministic() {
564        let mut a = Rng::seeded(42);
565        let mut b = Rng::seeded(42);
566        for _ in 0..16 {
567            assert_eq!(a.next_u64(), b.next_u64());
568        }
569    }
570
571    #[test]
572    fn rng_differs_with_seed() {
573        let mut a = Rng::seeded(1);
574        let mut b = Rng::seeded(2);
575        assert_ne!(a.next_u64(), b.next_u64());
576    }
577
578    #[test]
579    fn rng_range_bounds() {
580        let mut r = Rng::seeded(7);
581        for _ in 0..1000 {
582            let v = r.range(10);
583            assert!(v < 10);
584        }
585        assert_eq!(Rng::seeded(0).range(0), 0);
586    }
587
588    #[test]
589    fn csv_generate_is_deterministic() {
590        let g = |seed| {
591            csv::generate(&["a", "b"], 5, seed, |rng| {
592                vec![rng.range(100).to_string(), rng.range(100).to_string()]
593            })
594        };
595        assert_eq!(g(42), g(42));
596        assert_ne!(g(42), g(43));
597    }
598
599    #[test]
600    fn csv_has_header_and_row_count() {
601        let csv = csv::generate(&["x", "y"], 3, 0, |rng| {
602            vec![rng.range(10).to_string(), rng.range(10).to_string()]
603        });
604        assert!(csv.starts_with("x,y\n"));
605        assert_eq!(csv.lines().count(), 4);
606    }
607
608    #[test]
609    fn csv_escapes_commas_quotes_and_newlines() {
610        let csv = csv::generate(&["a", "b"], 1, 0, |_rng| {
611            vec![
612                "value, with comma".into(),
613                "value with \"quote\" and\nnewline".into(),
614            ]
615        });
616        assert!(csv.contains("\"value, with comma\""));
617        assert!(csv.contains("\"value with \"\"quote\"\" and\nnewline\""));
618    }
619
620    #[test]
621    fn csv_escapes_in_headers_too() {
622        let csv = csv::generate(&["plain", "with, comma"], 0, 0, |_rng| vec![]);
623        assert_eq!(csv.trim(), "plain,\"with, comma\"");
624    }
625
626    #[test]
627    fn csv_unescaped_when_no_special_chars() {
628        let csv = csv::generate(&["a", "b"], 1, 0, |_rng| {
629            vec!["plain".into(), "also plain".into()]
630        });
631        assert!(csv.contains("plain,also plain"));
632        // No quotes added.
633        assert!(!csv.contains("\""));
634    }
635
636    #[test]
637    fn json_array_round_trip_shape() {
638        let json = json_array::generate(3, 0, |rng| format!("{{\"id\":{}}}", rng.range(100)));
639        assert!(json.starts_with("["));
640        assert!(json.ends_with("]"));
641        // 3 elements -> 2 commas at top level.
642        assert_eq!(json.matches(',').count(), 2);
643    }
644
645    #[test]
646    fn json_array_validates_well_formed() {
647        let json =
648            json_array::generate_validated(3, 0, |rng| format!("{{\"v\":{}}}", rng.range(10)))
649                .unwrap();
650        assert!(json.starts_with("["));
651    }
652
653    #[test]
654    fn json_array_validation_rejects_garbage_factory_output() {
655        let err = json_array::generate_validated(2, 0, |_| "not_json".to_string()).unwrap_err();
656        assert!(err.contains("invalid"));
657    }
658
659    #[test]
660    fn json_validate_accepts_canonical_examples() {
661        for s in &[
662            "{}",
663            "[]",
664            "[1,2,3]",
665            "{\"a\":1}",
666            "[{\"k\":[true,false,null]}]",
667            "[\"with \\\"quote\\\"\"]",
668            "{\"n\": -3.14e2}",
669        ] {
670            assert!(json_array::validate_json(s).is_ok(), "should accept: {}", s);
671        }
672    }
673
674    #[test]
675    fn json_validate_rejects_malformed() {
676        for s in &["{", "[", "[,]", "{1:1}", "[true,]"] {
677            assert!(
678                json_array::validate_json(s).is_err(),
679                "should reject: {}",
680                s
681            );
682        }
683    }
684
685    #[test]
686    fn csv_round_trip_with_special_chars() {
687        let csv = csv::generate(&["id", "note"], 2, 0, |rng| {
688            vec![
689                rng.range(100).to_string(),
690                "value, with comma\nand newline".into(),
691            ]
692        });
693        let (headers, rows) = csv::parse(&csv).unwrap();
694        assert_eq!(headers, vec!["id", "note"]);
695        assert_eq!(rows.len(), 2);
696        for row in rows {
697            assert_eq!(row.len(), 2);
698            assert!(row[1].contains("value, with comma"));
699            assert!(row[1].contains('\n'));
700        }
701    }
702
703    #[test]
704    fn csv_parse_quoted_doubled_quote() {
705        let csv = "a,b\nplain,\"has \"\"quote\"\" inside\"\n";
706        let (h, r) = csv::parse(csv).unwrap();
707        assert_eq!(h, vec!["a", "b"]);
708        assert_eq!(r[0][1], "has \"quote\" inside");
709    }
710
711    #[test]
712    fn csv_parse_rejects_unterminated_quote() {
713        let csv = "a,b\n\"never closes,foo\n";
714        assert!(csv::parse(csv).is_err());
715    }
716
717    #[test]
718    fn csv_parse_handles_crlf() {
719        let csv = "a,b\r\n1,2\r\n3,4\r\n";
720        let (h, r) = csv::parse(csv).unwrap();
721        assert_eq!(h, vec!["a", "b"]);
722        assert_eq!(
723            r,
724            vec![
725                vec!["1".to_string(), "2".to_string()],
726                vec!["3".to_string(), "4".to_string()]
727            ]
728        );
729    }
730
731    #[test]
732    fn bytes_zeros_and_patterned() {
733        assert_eq!(bytes::zeros(4), vec![0, 0, 0, 0]);
734        assert_eq!(bytes::patterned(5, &[1, 2]), vec![1, 2, 1, 2, 1]);
735        assert_eq!(bytes::patterned(3, &[]), vec![0, 0, 0]);
736    }
737
738    #[test]
739    fn bytes_random_is_deterministic() {
740        assert_eq!(bytes::random(64, 7), bytes::random(64, 7));
741        assert_ne!(bytes::random(64, 7), bytes::random(64, 8));
742    }
743}