Skip to main content

seedfaker_core/
field.rs

1use crate::ctx::GenContext;
2
3#[path = "field_gen.rs"]
4mod field_gen;
5
6pub use field_gen::{field_capabilities, field_modifiers, GROUPS, REGISTRY};
7
8pub type GenFn = for<'a> fn(&mut GenContext<'a>, &mut String);
9
10/// Zipf distribution parameter for a field (e.g. `integer:1..1000:zipf` or `integer:1..1000:zipf=0.8`).
11#[derive(Clone, Copy, Debug, PartialEq)]
12pub struct ZipfSpec {
13    /// Exponent (s). Default 1.0, must be > 0.
14    pub s: f64,
15}
16
17impl ZipfSpec {
18    pub const DEFAULT: Self = Self { s: 1.0 };
19}
20
21/// Parsed field specification.
22pub type ParsedSpec<'a> =
23    (&'a str, &'a str, Transform, Option<RangeSpec>, Ordering, Option<u8>, Option<ZipfSpec>);
24
25pub struct Field {
26    pub id: &'static str,
27    pub name: &'static str,
28    pub group: &'static str,
29    pub description: &'static str,
30    pub gen: GenFn,
31}
32
33impl std::fmt::Debug for Field {
34    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
35        f.debug_struct("Field")
36            .field("id", &self.id)
37            .field("name", &self.name)
38            .field("group", &self.group)
39            .field("description", &self.description)
40            .finish_non_exhaustive()
41    }
42}
43
44impl Field {
45    pub const fn new(
46        id: &'static str,
47        name: &'static str,
48        group: &'static str,
49        description: &'static str,
50        gen: GenFn,
51    ) -> Self {
52        Self { id, name, group, description, gen }
53    }
54
55    #[inline]
56    pub fn generate(&self, ctx: &mut GenContext<'_>, buf: &mut String) -> Option<f64> {
57        ctx.numeric = None;
58        (self.gen)(ctx, buf);
59        ctx.numeric
60    }
61}
62
63/// Per-field range constraint (e.g. `integer:1..100`, `date:2020..2025`).
64/// `None` bounds are resolved later using field defaults or global `since`/`until`.
65#[derive(Clone, Copy, Debug, PartialEq, Eq)]
66pub struct RangeSpec {
67    pub from: Option<i64>,
68    pub to: Option<i64>,
69}
70
71/// Fields that support the range syntax.
72const RANGE_FIELDS: &[&str] =
73    &["integer", "float", "amount", "date", "birthdate", "timestamp", "age", "digits"];
74
75fn parse_range(s: &str) -> Result<RangeSpec, String> {
76    if let Some(to_str) = s.strip_prefix("..") {
77        let to = to_str.parse::<i64>().map_err(|_| format!("invalid range bound: '{to_str}'"))?;
78        Ok(RangeSpec { from: None, to: Some(to) })
79    } else if let Some(from_str) = s.strip_suffix("..") {
80        let from =
81            from_str.parse::<i64>().map_err(|_| format!("invalid range bound: '{from_str}'"))?;
82        Ok(RangeSpec { from: Some(from), to: None })
83    } else if let Some((from_str, to_str)) = s.split_once("..") {
84        let from =
85            from_str.parse::<i64>().map_err(|_| format!("invalid range bound: '{from_str}'"))?;
86        let to = to_str.parse::<i64>().map_err(|_| format!("invalid range bound: '{to_str}'"))?;
87        if from >= to {
88            return Err(format!("invalid range: {from}..{to}"));
89        }
90        Ok(RangeSpec { from: Some(from), to: Some(to) })
91    } else {
92        Err(format!("invalid range: '{s}'"))
93    }
94}
95
96fn is_range_segment(s: &str) -> bool {
97    s.contains("..")
98}
99
100#[derive(Clone, Copy, Debug, PartialEq, Eq)]
101pub enum Transform {
102    None,
103    Upper,
104    Lower,
105    Capitalize,
106}
107
108const TRANSFORMS: &[(&str, Transform)] = &[
109    ("upper", Transform::Upper),
110    ("lower", Transform::Lower),
111    ("capitalize", Transform::Capitalize),
112];
113
114fn parse_transform(s: &str) -> Option<Transform> {
115    TRANSFORMS.iter().find(|&&(k, _)| k == s).map(|&(_, t)| t)
116}
117
118impl Transform {
119    pub fn apply(self, s: &str) -> String {
120        match self {
121            Transform::None => s.to_string(),
122            Transform::Upper => s.to_uppercase(),
123            Transform::Lower => s.to_lowercase(),
124            Transform::Capitalize => {
125                let mut chars = s.chars();
126                match chars.next() {
127                    None => String::new(),
128                    Some(c) => {
129                        let mut out = c.to_uppercase().to_string();
130                        for ch in chars {
131                            out.extend(ch.to_lowercase());
132                        }
133                        out
134                    }
135                }
136            }
137        }
138    }
139}
140
141#[derive(Clone, Copy, Debug, PartialEq, Eq)]
142pub enum Ordering {
143    None,
144    Asc,
145    Desc,
146}
147
148pub struct ResolvedField {
149    pub field: &'static Field,
150    pub modifier: String,
151    pub transform: Transform,
152    pub range: Option<RangeSpec>,
153    pub ordering: Ordering,
154    /// Explicit column name from `name=field` syntax. Overrides `display_name()`.
155    pub alias: Option<String>,
156    /// Percentage of rows where this field is omitted (outputs NULL). 0–100.
157    pub omit_pct: Option<u8>,
158    /// Zipf distribution over the range instead of uniform.
159    pub zipf: Option<ZipfSpec>,
160}
161
162impl ResolvedField {
163    /// Column name: explicit alias if set, otherwise auto-derived.
164    pub fn column_name(&self) -> String {
165        if let Some(ref a) = self.alias {
166            return a.clone();
167        }
168        self.display_name()
169    }
170
171    /// Auto-derived name for CSV/JSON headers.
172    pub fn display_name(&self) -> String {
173        let base = self.field.name.replace('-', "_");
174        if self.modifier.is_empty() {
175            base
176        } else {
177            format!("{base}_{}", self.modifier)
178        }
179    }
180
181    /// Stable domain key for RNG derivation — uses field id, never changes.
182    pub fn domain_key(&self) -> String {
183        if self.modifier.is_empty() {
184            self.field.id.to_string()
185        } else {
186            format!("{}_{}", self.field.id, self.modifier)
187        }
188    }
189}
190
191pub fn lookup(name: &str) -> Option<&'static Field> {
192    REGISTRY.iter().find(|f| f.name == name)
193}
194
195pub fn all_names() -> Vec<&'static str> {
196    REGISTRY.iter().map(|f| f.name).collect()
197}
198
199fn is_group(name: &str) -> bool {
200    name == "all" || GROUPS.contains(&name)
201}
202
203fn expand_group(name: &str) -> Vec<ResolvedField> {
204    let fields: Vec<&Field> = if name == "all" {
205        REGISTRY.iter().collect()
206    } else {
207        REGISTRY.iter().filter(|f| f.group == name).collect()
208    };
209    fields
210        .into_iter()
211        .map(|f| ResolvedField {
212            field: f,
213            modifier: String::new(),
214            transform: Transform::None,
215            range: None,
216            ordering: Ordering::None,
217            alias: None,
218            omit_pct: None,
219            zipf: None,
220        })
221        .collect()
222}
223
224/// Fields that accept a numeric modifier as length (digits:4, hex:8, etc.).
225const LENGTH_FIELDS: &[&str] = &["digits", "letters", "alnum", "base64", "hex", "password"];
226
227fn validate_modifier(field: &Field, m: &str) -> Result<(), String> {
228    if m.is_empty() {
229        return Ok(());
230    }
231    if !m.chars().all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '-') {
232        return Err(format!("modifier '{m}' must contain only a-z, 0-9 and -"));
233    }
234    // Numeric length modifier for text fields (digits:4, hex:8)
235    if LENGTH_FIELDS.contains(&field.name) && m.parse::<usize>().is_ok() {
236        return Ok(());
237    }
238    let known = field_modifiers(field.id);
239    if !known.is_empty() {
240        let valid: Vec<&str> = known.split(", ").collect();
241        if !valid.contains(&m) {
242            return Err(format!("unknown modifier '{}:{m}'; available: {known}", field.name));
243        }
244    } else if parse_transform(m).is_none() {
245        return Err(format!(
246            "field '{}' has no modifiers; did you mean a transform? available: upper, lower, capitalize",
247            field.name
248        ));
249    }
250    Ok(())
251}
252
253fn parse_ordering(s: &str) -> Option<Ordering> {
254    match s {
255        "asc" => Some(Ordering::Asc),
256        "desc" => Some(Ordering::Desc),
257        _ => None,
258    }
259}
260
261fn parse_zipf(s: &str) -> Option<Result<ZipfSpec, String>> {
262    if s == "zipf" {
263        return Some(Ok(ZipfSpec::DEFAULT));
264    }
265    let rest = s.strip_prefix("zipf=")?;
266    let v: f64 = match rest.parse() {
267        Ok(v) => v,
268        Err(_) => return Some(Err(format!("invalid zipf exponent: '{rest}'"))),
269    };
270    if v <= 0.0 || !v.is_finite() {
271        return Some(Err(format!("zipf exponent must be > 0, got {v}")));
272    }
273    Some(Ok(ZipfSpec { s: v }))
274}
275
276pub fn parse_field_spec(token: &str) -> Result<ParsedSpec<'_>, String> {
277    let mut parts = token.splitn(8, ':');
278    let name = parts.next().unwrap_or("");
279    let mut modifier: Option<&str> = None;
280    let mut transform = Transform::None;
281    let mut range: Option<RangeSpec> = None;
282    let mut ordering = Ordering::None;
283    let mut omit_pct: Option<u8> = None;
284    let mut zipf: Option<ZipfSpec> = None;
285
286    for seg in parts {
287        if let Some(result) = parse_zipf(seg) {
288            if zipf.is_some() {
289                return Err("duplicate zipf in field descriptor".into());
290            }
291            zipf = Some(result?);
292        } else if let Some(pct) = parse_omit_pct(seg) {
293            if omit_pct.is_some() {
294                return Err("duplicate omit in field descriptor".into());
295            }
296            omit_pct = Some(pct);
297        } else if is_range_segment(seg) {
298            if range.is_some() {
299                return Err("duplicate range in field descriptor".into());
300            }
301            range = Some(parse_range(seg)?);
302        } else if let Some(t) = parse_transform(seg) {
303            if transform != Transform::None {
304                return Err("duplicate transform in field descriptor".into());
305            }
306            transform = t;
307        } else if let Some(o) = parse_ordering(seg) {
308            if ordering != Ordering::None {
309                return Err("duplicate ordering in field descriptor".into());
310            }
311            ordering = o;
312        } else {
313            if modifier.is_some() {
314                return Err("duplicate modifier in field descriptor".into());
315            }
316            modifier = Some(seg);
317        }
318    }
319
320    Ok((name, modifier.unwrap_or(""), transform, range, ordering, omit_pct, zipf))
321}
322
323fn parse_omit_pct(s: &str) -> Option<u8> {
324    let rest = s.strip_prefix("omit=")?;
325    let n: u8 = rest.parse().ok()?;
326    if n > 100 {
327        return None;
328    }
329    Some(n)
330}
331
332fn validate_range(field: &Field, range: &Option<RangeSpec>) -> Result<(), String> {
333    if let Some(r) = range {
334        if !RANGE_FIELDS.contains(&field.name) {
335            return Err(format!("field '{}' does not support range", field.name));
336        }
337        if let (Some(from), Some(to)) = (r.from, r.to) {
338            if from >= to {
339                return Err(format!("invalid range: {from}..{to}"));
340            }
341        }
342    }
343    Ok(())
344}
345
346pub fn resolve_range(
347    range: &Option<RangeSpec>,
348    field_name: &str,
349    since: i64,
350    until: i64,
351) -> Option<(i64, i64)> {
352    let r = range.as_ref()?;
353    let is_date = matches!(field_name, "date" | "birthdate" | "timestamp");
354    let (default_min, default_max) = if is_date { (since, until) } else { (0, 999_999) };
355    let from = r.from.unwrap_or(default_min);
356    let to = r.to.unwrap_or(default_max);
357    // For date fields: small values (<=9999) are years → convert to epoch
358    if is_date {
359        let from_e = if from > 0 && from <= 9999 {
360            crate::temporal::parse(&from.to_string()).unwrap_or(from)
361        } else {
362            from
363        };
364        let to_e = if to > 0 && to <= 9999 {
365            crate::temporal::parse_until(&to.to_string()).unwrap_or(to)
366        } else {
367            to
368        };
369        Some((from_e, to_e))
370    } else {
371        Some((from, to))
372    }
373}
374
375pub fn resolve(tokens: &[String]) -> Result<Vec<ResolvedField>, String> {
376    let mut result = Vec::new();
377    for token in tokens {
378        // Split on first `=` for `name=field_spec` syntax.
379        let (alias, spec) = if let Some(eq_pos) = token.find('=') {
380            // Exclude enum values (enum:a=3,b=1) and range (1..100)
381            // by checking the `=` is before any `:` — i.e. it's a column alias.
382            let colon_pos = token.find(':').unwrap_or(token.len());
383            if eq_pos < colon_pos {
384                let (a, s) = token.split_at(eq_pos);
385                (Some(a.to_string()), &s[1..])
386            } else {
387                (None, token.as_str())
388            }
389        } else {
390            (None, token.as_str())
391        };
392
393        let (name, modifier, transform, range, ordering, omit_pct, zipf) = parse_field_spec(spec)?;
394
395        if let Some(field) = lookup(name) {
396            if name == "enum" {
397                super::gen::validate_enum(modifier)?;
398            } else {
399                validate_modifier(field, modifier)?;
400                validate_range(field, &range)?;
401            }
402            if zipf.is_some() && range.is_none() {
403                return Err(format!(
404                    "field '{name}': zipf requires a range (e.g. {name}:1..1000:zipf)"
405                ));
406            }
407            result.push(ResolvedField {
408                field,
409                modifier: modifier.to_string(),
410                transform,
411                range,
412                ordering,
413                alias,
414                omit_pct,
415                zipf,
416            });
417        } else if is_group(name) {
418            if alias.is_some() {
419                return Err(format!("alias not supported on groups: '{token}'"));
420            }
421            if !modifier.is_empty() || transform != Transform::None {
422                return Err(format!("modifiers and transforms not supported on groups: '{token}'"));
423            }
424            result.extend(expand_group(name));
425        } else {
426            return Err(format!("unknown field or group '{name}'; run 'seedfaker --list'"));
427        }
428    }
429    if result.is_empty() {
430        return Err("no fields specified".into());
431    }
432    Ok(result)
433}
434
435/// Validate a single field spec string without resolving it.
436/// Checks field name, modifier, range, zipf, and enum syntax.
437pub fn validate_spec(spec: &str) -> Result<(), String> {
438    let (name, modifier, _transform, range, _ordering, _omit_pct, zipf) = parse_field_spec(spec)?;
439    let field = lookup(name).ok_or_else(|| format!("unknown field '{name}'"))?;
440    if name == "enum" {
441        super::gen::validate_enum(modifier)?;
442    } else {
443        validate_modifier(field, modifier)?;
444        validate_range(field, &range)?;
445    }
446    if zipf.is_some() && range.is_none() {
447        return Err(format!("field '{name}': zipf requires a range (e.g. {name}:1..1000:zipf)"));
448    }
449    Ok(())
450}
451
452/// Validate a batch of field spec strings.
453pub fn validate_specs(specs: &[String]) -> Result<(), String> {
454    for spec in specs {
455        validate_spec(spec)?;
456    }
457    Ok(())
458}