Skip to main content

seedfaker_core/
pipeline.rs

1use crate::ctx::{GenContext, Identity};
2use crate::field::{self, Field, Transform};
3use crate::locale::Locale;
4use crate::rng::Rng;
5use crate::script::Ctx;
6use crate::{DOMAIN_CORRUPT, DOMAIN_IDENTITY, DOMAIN_LOCALE};
7
8/// Per-field specification for batch generation.
9pub struct FieldSpec<'a> {
10    pub field: &'static Field,
11    pub modifier: &'a str,
12    pub domain_hash: u64,
13    pub range: Option<(i64, i64)>,
14    pub transform: Transform,
15    pub omit_pct: Option<u8>,
16}
17
18/// Options shared across all records in a batch.
19pub struct RecordOpts<'a> {
20    pub master_seed: u64,
21    pub locales: &'a [&'a Locale],
22    pub ctx: Ctx,
23    pub corrupt_rate: Option<f64>,
24    pub tz_offset_minutes: i32,
25    pub since: i64,
26    pub until: i64,
27}
28
29/// Compute domain hash for a field + modifier combination.
30pub fn field_domain_hash(master_seed: u64, field: &Field, modifier: &str) -> u64 {
31    let domain =
32        if modifier.is_empty() { field.id.to_string() } else { format!("{}_{modifier}", field.id) };
33    crate::rng::domain_hash(master_seed, &domain)
34}
35
36/// Generate `n` records starting from `start_serial`.
37///
38/// Handles locale locking (ctx strict/loose), identity creation,
39/// per-field generation, transforms, and corruption.
40///
41/// This is the canonical pipeline for batch generation in bindings
42/// (`PyO3`, NAPI, FFI) and MCP. The CLI engine uses its own optimized
43/// loop with streaming output, aggregators, and template rendering.
44pub fn generate_records(
45    opts: &RecordOpts<'_>,
46    specs: &[FieldSpec<'_>],
47    n: u64,
48    start_serial: u64,
49) -> Vec<Vec<String>> {
50    let needs_ctx = opts.ctx != Ctx::None;
51    let mut records = Vec::with_capacity(n as usize);
52
53    for i in 0..n {
54        let serial = start_serial + i;
55
56        let locked_locale: Option<&Locale> = match opts.ctx {
57            Ctx::Strict => {
58                let mut lr = Rng::derive(opts.master_seed, serial, DOMAIN_LOCALE);
59                Some(*lr.choice(opts.locales))
60            }
61            Ctx::Loose => {
62                let mut lr = Rng::derive(opts.master_seed, serial, DOMAIN_LOCALE);
63                if lr.maybe(0.7) {
64                    Some(*lr.choice(opts.locales))
65                } else {
66                    None
67                }
68            }
69            Ctx::None => None,
70        };
71        let locked_arr: [&Locale; 1];
72        let effective_locales: &[&Locale] = if let Some(loc) = locked_locale {
73            locked_arr = [loc];
74            &locked_arr
75        } else {
76            opts.locales
77        };
78
79        let identity = if needs_ctx {
80            let mut ir = Rng::derive(opts.master_seed, serial, DOMAIN_IDENTITY);
81            Some(Identity::new(&mut ir, effective_locales, None, opts.since, opts.until))
82        } else {
83            None
84        };
85
86        let mut values: Vec<String> = specs
87            .iter()
88            .map(|spec| {
89                if let Some(pct) = spec.omit_pct {
90                    let mut or = Rng::derive(spec.domain_hash, serial, "omit");
91                    if or.range(0, 100) < i64::from(pct) {
92                        return String::new();
93                    }
94                }
95                let mut ctx = GenContext {
96                    rng: Rng::derive_fast(spec.domain_hash, serial),
97                    locales: effective_locales,
98                    modifier: spec.modifier,
99                    identity: identity.as_ref(),
100                    tz_offset_minutes: opts.tz_offset_minutes,
101                    since: opts.since,
102                    until: opts.until,
103                    range: spec.range,
104                    ordering: field::Ordering::None,
105                    zipf: None,
106                    numeric: None,
107                };
108                let mut buf = String::new();
109                spec.field.generate(&mut ctx, &mut buf);
110                if spec.transform == Transform::None {
111                    buf
112                } else {
113                    spec.transform.apply(&buf)
114                }
115            })
116            .collect();
117
118        if let Some(rate) = opts.corrupt_rate {
119            let mut cr = Rng::derive(opts.master_seed, serial, DOMAIN_CORRUPT);
120            crate::corrupt::corrupt_values(&mut cr, &mut values, rate);
121        }
122
123        records.push(values);
124    }
125
126    records
127}
128
129/// Generate N values for a single parsed field spec.
130///
131/// Used by binding `field()` methods to avoid duplicating the generation loop.
132pub fn generate_field_values(
133    spec: &FieldSpec<'_>,
134    n: usize,
135    record_counter: &mut u64,
136    locales: &[&Locale],
137    tz_offset_minutes: i32,
138    since: i64,
139    until: i64,
140) -> Vec<String> {
141    (0..n)
142        .map(|_| {
143            let serial = *record_counter;
144            *record_counter += 1;
145            if let Some(pct) = spec.omit_pct {
146                let mut or = Rng::derive(spec.domain_hash, serial, "omit");
147                if or.range(0, 100) < i64::from(pct) {
148                    return String::new();
149                }
150            }
151            let mut ctx = GenContext {
152                rng: Rng::derive_fast(spec.domain_hash, serial),
153                locales,
154                modifier: spec.modifier,
155                identity: None,
156                tz_offset_minutes,
157                since,
158                until,
159                range: spec.range,
160                ordering: field::Ordering::None,
161                zipf: None,
162                numeric: None,
163            };
164            let mut buf = String::new();
165            spec.field.generate(&mut ctx, &mut buf);
166            if spec.transform == Transform::None {
167                buf
168            } else {
169                spec.transform.apply(&buf)
170            }
171        })
172        .collect()
173}
174
175/// Validate field specs and options without generating data.
176pub fn validate(fields: &[String], ctx: Option<&str>, corrupt: Option<&str>) -> Result<(), String> {
177    crate::field::validate_specs(fields)?;
178    if let Some(c) = ctx {
179        match c {
180            "strict" | "loose" => {}
181            other => return Err(format!("unknown ctx mode: '{other}'")),
182        }
183    }
184    if let Some(c) = corrupt {
185        if crate::script::Corrupt::parse_level(c).is_none() {
186            return Err(format!("unknown corrupt level: '{c}'"));
187        }
188    }
189    Ok(())
190}