1pub struct Rng {
26 state: u64,
27}
28
29impl Rng {
30 pub fn seeded(seed: u64) -> Self {
32 Self { state: seed }
33 }
34
35 pub fn next_u64(&mut self) -> u64 {
37 self.state = self.state.wrapping_add(0x9E37_79B9_7F4A_7C15);
38 let mut z = self.state;
39 z = (z ^ (z >> 30)).wrapping_mul(0xBF58_476D_1CE4_E5B9);
40 z = (z ^ (z >> 27)).wrapping_mul(0x94D0_49BB_1331_11EB);
41 z ^= z >> 31;
42 z
43 }
44
45 pub fn range(&mut self, n: u64) -> u64 {
49 if n == 0 {
50 return 0;
51 }
52 self.next_u64() % n
53 }
54}
55
56pub mod csv {
58 use super::Rng;
59
60 pub fn generate<F>(headers: &[&str], rows: usize, seed: u64, mut row_factory: F) -> String
96 where
97 F: FnMut(&mut Rng) -> Vec<String>,
98 {
99 let mut rng = Rng::seeded(seed);
100 let mut out = String::new();
101 let header_row: Vec<String> = headers.iter().map(|h| escape_field(h)).collect();
102 out.push_str(&header_row.join(","));
103 out.push('\n');
104 for _ in 0..rows {
105 let row = row_factory(&mut rng);
106 let escaped: Vec<String> = row.iter().map(|f| escape_field(f)).collect();
107 out.push_str(&escaped.join(","));
108 out.push('\n');
109 }
110 out
111 }
112
113 pub fn escape_field(value: &str) -> String {
119 if value.contains(',')
120 || value.contains('"')
121 || value.contains('\n')
122 || value.contains('\r')
123 {
124 let escaped = value.replace('"', "\"\"");
125 format!("\"{}\"", escaped)
126 } else {
127 value.to_string()
128 }
129 }
130
131 pub fn parse(input: &str) -> Result<(Vec<String>, Vec<Vec<String>>), String> {
159 let mut all_rows: Vec<Vec<String>> = Vec::new();
160 let mut chars = input.chars().peekable();
161 let mut current_field = String::new();
162 let mut current_row: Vec<String> = Vec::new();
163 let mut in_quotes = false;
164 let mut row_has_content = false;
165 loop {
166 match chars.next() {
167 None => {
168 if in_quotes {
170 return Err("unterminated quoted field at EOF".to_string());
171 }
172 if !current_field.is_empty() || row_has_content {
173 current_row.push(std::mem::take(&mut current_field));
174 all_rows.push(std::mem::take(&mut current_row));
175 }
176 break;
177 }
178 Some(c) => {
179 if in_quotes {
180 match c {
181 '"' => {
182 if matches!(chars.peek(), Some('"')) {
183 chars.next();
184 current_field.push('"');
185 } else {
186 in_quotes = false;
187 }
188 }
189 other => current_field.push(other),
190 }
191 } else {
192 match c {
193 '"' if current_field.is_empty() => {
194 in_quotes = true;
195 row_has_content = true;
196 }
197 ',' => {
198 current_row.push(std::mem::take(&mut current_field));
199 row_has_content = true;
200 }
201 '\r' => {
202 if matches!(chars.peek(), Some('\n')) {
204 chars.next();
205 }
206 current_row.push(std::mem::take(&mut current_field));
207 all_rows.push(std::mem::take(&mut current_row));
208 row_has_content = false;
209 }
210 '\n' => {
211 current_row.push(std::mem::take(&mut current_field));
212 all_rows.push(std::mem::take(&mut current_row));
213 row_has_content = false;
214 }
215 other => {
216 current_field.push(other);
217 row_has_content = true;
218 }
219 }
220 }
221 }
222 }
223 }
224
225 if all_rows.is_empty() {
226 return Ok((Vec::new(), Vec::new()));
227 }
228 let headers = all_rows.remove(0);
229 Ok((headers, all_rows))
230 }
231}
232
233pub mod json_array {
235 use super::Rng;
236
237 pub fn generate<F>(count: usize, seed: u64, mut element_factory: F) -> String
252 where
253 F: FnMut(&mut Rng) -> String,
254 {
255 let mut rng = Rng::seeded(seed);
256 let mut out = String::new();
257 out.push('[');
258 for i in 0..count {
259 if i > 0 {
260 out.push(',');
261 }
262 out.push_str(&element_factory(&mut rng));
263 }
264 out.push(']');
265 out
266 }
267
268 pub fn generate_validated<F>(
293 count: usize,
294 seed: u64,
295 element_factory: F,
296 ) -> Result<String, String>
297 where
298 F: FnMut(&mut Rng) -> String,
299 {
300 let out = generate(count, seed, element_factory);
301 validate_json(&out)?;
302 Ok(out)
303 }
304
305 pub fn validate_json(s: &str) -> Result<(), String> {
323 let mut parser = MiniJsonParser::new(s);
324 parser.skip_ws();
325 parser.parse_value()?;
326 parser.skip_ws();
327 if parser.pos < parser.bytes.len() {
328 return Err(format!(
329 "trailing characters after JSON value at position {}",
330 parser.pos
331 ));
332 }
333 Ok(())
334 }
335
336 struct MiniJsonParser<'a> {
337 bytes: &'a [u8],
338 pos: usize,
339 }
340
341 impl<'a> MiniJsonParser<'a> {
342 fn new(s: &'a str) -> Self {
343 Self {
344 bytes: s.as_bytes(),
345 pos: 0,
346 }
347 }
348
349 fn skip_ws(&mut self) {
350 while self.pos < self.bytes.len()
351 && matches!(self.bytes[self.pos], b' ' | b'\t' | b'\n' | b'\r')
352 {
353 self.pos += 1;
354 }
355 }
356
357 fn parse_value(&mut self) -> Result<(), String> {
358 self.skip_ws();
359 if self.pos >= self.bytes.len() {
360 return Err("unexpected end of input".to_string());
361 }
362 match self.bytes[self.pos] {
363 b'{' => self.parse_object(),
364 b'[' => self.parse_array(),
365 b'"' => self.parse_string(),
366 b't' | b'f' => self.parse_bool(),
367 b'n' => self.parse_null(),
368 b'-' | b'0'..=b'9' => self.parse_number(),
369 other => Err(format!(
370 "invalid JSON: unexpected '{}' at position {}",
371 other as char, self.pos
372 )),
373 }
374 }
375
376 fn parse_object(&mut self) -> Result<(), String> {
377 self.pos += 1; self.skip_ws();
379 if self.peek() == Some(b'}') {
380 self.pos += 1;
381 return Ok(());
382 }
383 loop {
384 self.skip_ws();
385 self.parse_string()?;
386 self.skip_ws();
387 if self.peek() != Some(b':') {
388 return Err(format!("expected ':' at position {}", self.pos));
389 }
390 self.pos += 1;
391 self.parse_value()?;
392 self.skip_ws();
393 match self.peek() {
394 Some(b',') => {
395 self.pos += 1;
396 }
397 Some(b'}') => {
398 self.pos += 1;
399 return Ok(());
400 }
401 _ => {
402 return Err(format!(
403 "expected ',' or '}}' in object at position {}",
404 self.pos
405 ));
406 }
407 }
408 }
409 }
410
411 fn parse_array(&mut self) -> Result<(), String> {
412 self.pos += 1; self.skip_ws();
414 if self.peek() == Some(b']') {
415 self.pos += 1;
416 return Ok(());
417 }
418 loop {
419 self.parse_value()?;
420 self.skip_ws();
421 match self.peek() {
422 Some(b',') => {
423 self.pos += 1;
424 }
425 Some(b']') => {
426 self.pos += 1;
427 return Ok(());
428 }
429 _ => {
430 return Err(format!(
431 "expected ',' or ']' in array at position {}",
432 self.pos
433 ));
434 }
435 }
436 }
437 }
438
439 fn parse_string(&mut self) -> Result<(), String> {
440 if self.peek() != Some(b'"') {
441 return Err(format!("expected string at position {}", self.pos));
442 }
443 self.pos += 1;
444 while self.pos < self.bytes.len() {
445 match self.bytes[self.pos] {
446 b'"' => {
447 self.pos += 1;
448 return Ok(());
449 }
450 b'\\' => {
451 self.pos += 1;
452 if self.pos >= self.bytes.len() {
453 return Err("unterminated escape in string".to_string());
454 }
455 self.pos += 1;
456 }
457 _ => self.pos += 1,
458 }
459 }
460 Err("unterminated string".to_string())
461 }
462
463 fn parse_bool(&mut self) -> Result<(), String> {
464 if self.bytes[self.pos..].starts_with(b"true") {
465 self.pos += 4;
466 Ok(())
467 } else if self.bytes[self.pos..].starts_with(b"false") {
468 self.pos += 5;
469 Ok(())
470 } else {
471 Err(format!("invalid bool at position {}", self.pos))
472 }
473 }
474
475 fn parse_null(&mut self) -> Result<(), String> {
476 if self.bytes[self.pos..].starts_with(b"null") {
477 self.pos += 4;
478 Ok(())
479 } else {
480 Err(format!("invalid null at position {}", self.pos))
481 }
482 }
483
484 fn parse_number(&mut self) -> Result<(), String> {
485 let start = self.pos;
486 if self.peek() == Some(b'-') {
487 self.pos += 1;
488 }
489 while self.pos < self.bytes.len() {
490 let c = self.bytes[self.pos];
491 if c.is_ascii_digit() || matches!(c, b'.' | b'e' | b'E' | b'+' | b'-') {
492 self.pos += 1;
493 } else {
494 break;
495 }
496 }
497 if self.pos == start || (self.peek_at(start) == Some(b'-') && self.pos == start + 1) {
498 return Err(format!("invalid number at position {}", start));
499 }
500 Ok(())
501 }
502
503 fn peek(&self) -> Option<u8> {
504 self.bytes.get(self.pos).copied()
505 }
506
507 fn peek_at(&self, idx: usize) -> Option<u8> {
508 self.bytes.get(idx).copied()
509 }
510 }
511}
512
513pub mod bytes {
515 use super::Rng;
516
517 pub fn zeros(n: usize) -> Vec<u8> {
519 vec![0u8; n]
520 }
521
522 pub fn patterned(n: usize, pattern: &[u8]) -> Vec<u8> {
532 if pattern.is_empty() {
533 return zeros(n);
534 }
535 let mut out = Vec::with_capacity(n);
536 while out.len() < n {
537 out.push(pattern[out.len() % pattern.len()]);
538 }
539 out
540 }
541
542 pub fn random(n: usize, seed: u64) -> Vec<u8> {
544 let mut rng = Rng::seeded(seed);
545 let mut out = Vec::with_capacity(n);
546 while out.len() < n {
547 let v = rng.next_u64();
548 for b in v.to_le_bytes() {
549 if out.len() < n {
550 out.push(b);
551 }
552 }
553 }
554 out
555 }
556}
557
558#[cfg(test)]
559mod tests {
560 use super::*;
561
562 #[test]
563 fn rng_is_deterministic() {
564 let mut a = Rng::seeded(42);
565 let mut b = Rng::seeded(42);
566 for _ in 0..16 {
567 assert_eq!(a.next_u64(), b.next_u64());
568 }
569 }
570
571 #[test]
572 fn rng_differs_with_seed() {
573 let mut a = Rng::seeded(1);
574 let mut b = Rng::seeded(2);
575 assert_ne!(a.next_u64(), b.next_u64());
576 }
577
578 #[test]
579 fn rng_range_bounds() {
580 let mut r = Rng::seeded(7);
581 for _ in 0..1000 {
582 let v = r.range(10);
583 assert!(v < 10);
584 }
585 assert_eq!(Rng::seeded(0).range(0), 0);
586 }
587
588 #[test]
589 fn csv_generate_is_deterministic() {
590 let g = |seed| {
591 csv::generate(&["a", "b"], 5, seed, |rng| {
592 vec![rng.range(100).to_string(), rng.range(100).to_string()]
593 })
594 };
595 assert_eq!(g(42), g(42));
596 assert_ne!(g(42), g(43));
597 }
598
599 #[test]
600 fn csv_has_header_and_row_count() {
601 let csv = csv::generate(&["x", "y"], 3, 0, |rng| {
602 vec![rng.range(10).to_string(), rng.range(10).to_string()]
603 });
604 assert!(csv.starts_with("x,y\n"));
605 assert_eq!(csv.lines().count(), 4);
606 }
607
608 #[test]
609 fn csv_escapes_commas_quotes_and_newlines() {
610 let csv = csv::generate(&["a", "b"], 1, 0, |_rng| {
611 vec![
612 "value, with comma".into(),
613 "value with \"quote\" and\nnewline".into(),
614 ]
615 });
616 assert!(csv.contains("\"value, with comma\""));
617 assert!(csv.contains("\"value with \"\"quote\"\" and\nnewline\""));
618 }
619
620 #[test]
621 fn csv_escapes_in_headers_too() {
622 let csv = csv::generate(&["plain", "with, comma"], 0, 0, |_rng| vec![]);
623 assert_eq!(csv.trim(), "plain,\"with, comma\"");
624 }
625
626 #[test]
627 fn csv_unescaped_when_no_special_chars() {
628 let csv = csv::generate(&["a", "b"], 1, 0, |_rng| {
629 vec!["plain".into(), "also plain".into()]
630 });
631 assert!(csv.contains("plain,also plain"));
632 assert!(!csv.contains("\""));
634 }
635
636 #[test]
637 fn json_array_round_trip_shape() {
638 let json = json_array::generate(3, 0, |rng| format!("{{\"id\":{}}}", rng.range(100)));
639 assert!(json.starts_with("["));
640 assert!(json.ends_with("]"));
641 assert_eq!(json.matches(',').count(), 2);
643 }
644
645 #[test]
646 fn json_array_validates_well_formed() {
647 let json =
648 json_array::generate_validated(3, 0, |rng| format!("{{\"v\":{}}}", rng.range(10)))
649 .unwrap();
650 assert!(json.starts_with("["));
651 }
652
653 #[test]
654 fn json_array_validation_rejects_garbage_factory_output() {
655 let err = json_array::generate_validated(2, 0, |_| "not_json".to_string()).unwrap_err();
656 assert!(err.contains("invalid"));
657 }
658
659 #[test]
660 fn json_validate_accepts_canonical_examples() {
661 for s in &[
662 "{}",
663 "[]",
664 "[1,2,3]",
665 "{\"a\":1}",
666 "[{\"k\":[true,false,null]}]",
667 "[\"with \\\"quote\\\"\"]",
668 "{\"n\": -3.14e2}",
669 ] {
670 assert!(json_array::validate_json(s).is_ok(), "should accept: {}", s);
671 }
672 }
673
674 #[test]
675 fn json_validate_rejects_malformed() {
676 for s in &["{", "[", "[,]", "{1:1}", "[true,]"] {
677 assert!(
678 json_array::validate_json(s).is_err(),
679 "should reject: {}",
680 s
681 );
682 }
683 }
684
685 #[test]
686 fn csv_round_trip_with_special_chars() {
687 let csv = csv::generate(&["id", "note"], 2, 0, |rng| {
688 vec![
689 rng.range(100).to_string(),
690 "value, with comma\nand newline".into(),
691 ]
692 });
693 let (headers, rows) = csv::parse(&csv).unwrap();
694 assert_eq!(headers, vec!["id", "note"]);
695 assert_eq!(rows.len(), 2);
696 for row in rows {
697 assert_eq!(row.len(), 2);
698 assert!(row[1].contains("value, with comma"));
699 assert!(row[1].contains('\n'));
700 }
701 }
702
703 #[test]
704 fn csv_parse_quoted_doubled_quote() {
705 let csv = "a,b\nplain,\"has \"\"quote\"\" inside\"\n";
706 let (h, r) = csv::parse(csv).unwrap();
707 assert_eq!(h, vec!["a", "b"]);
708 assert_eq!(r[0][1], "has \"quote\" inside");
709 }
710
711 #[test]
712 fn csv_parse_rejects_unterminated_quote() {
713 let csv = "a,b\n\"never closes,foo\n";
714 assert!(csv::parse(csv).is_err());
715 }
716
717 #[test]
718 fn csv_parse_handles_crlf() {
719 let csv = "a,b\r\n1,2\r\n3,4\r\n";
720 let (h, r) = csv::parse(csv).unwrap();
721 assert_eq!(h, vec!["a", "b"]);
722 assert_eq!(
723 r,
724 vec![
725 vec!["1".to_string(), "2".to_string()],
726 vec!["3".to_string(), "4".to_string()]
727 ]
728 );
729 }
730
731 #[test]
732 fn bytes_zeros_and_patterned() {
733 assert_eq!(bytes::zeros(4), vec![0, 0, 0, 0]);
734 assert_eq!(bytes::patterned(5, &[1, 2]), vec![1, 2, 1, 2, 1]);
735 assert_eq!(bytes::patterned(3, &[]), vec![0, 0, 0]);
736 }
737
738 #[test]
739 fn bytes_random_is_deterministic() {
740 assert_eq!(bytes::random(64, 7), bytes::random(64, 7));
741 assert_ne!(bytes::random(64, 7), bytes::random(64, 8));
742 }
743}