Skip to main content

sonda_core/compiler/
parse.rs

1//! YAML parsing, schema validation, and version detection for v2 scenario files.
2//!
3//! The primary entry point is [`parse`], which deserializes a YAML string
4//! into a [`ScenarioFile`] and runs structural validation (version check,
5//! id uniqueness, signal type validity, generator/pack mutual exclusion).
6//!
7//! [`detect_version`] is a lightweight helper that peeks at the `version` field
8//! without fully parsing the file. It will be used by the version dispatch layer
9//! (PR 6) to route between v1 and v2 parsing paths.
10
11use std::collections::HashSet;
12
13use super::{Entry, ScenarioFile};
14
15// ---------------------------------------------------------------------------
16// Error type
17// ---------------------------------------------------------------------------
18
19/// Errors produced during v2 scenario parsing and validation.
20#[derive(Debug, thiserror::Error)]
21#[non_exhaustive]
22pub enum ParseError {
23    /// The YAML could not be deserialized into the expected structure.
24    #[error("YAML parse error: {0}")]
25    Yaml(#[from] serde_yaml_ng::Error),
26
27    /// The `version` field is present but is not `2`.
28    #[error("version must be 2, got {0}")]
29    InvalidVersion(u32),
30
31    /// Two or more entries share the same `id`.
32    #[error("duplicate entry id: '{0}'")]
33    DuplicateId(String),
34
35    /// An entry has an unrecognized `signal_type`.
36    #[error("entry {index}: invalid signal_type '{signal_type}', must be one of: metrics, logs, histogram, summary")]
37    InvalidSignalType {
38        /// Zero-based index of the offending entry.
39        index: usize,
40        /// The invalid signal type string.
41        signal_type: String,
42    },
43
44    /// An entry specifies both `generator` and `pack`.
45    #[error("entry {index}: must have either 'generator' or 'pack', not both")]
46    GeneratorAndPack {
47        /// Zero-based index of the offending entry.
48        index: usize,
49    },
50
51    /// An entry specifies neither `generator`/`distribution` nor `pack`.
52    #[error("entry {index}: must have either 'generator' (or 'distribution' for histogram/summary) or 'pack'")]
53    MissingGeneratorOrPack {
54        /// Zero-based index of the offending entry.
55        index: usize,
56    },
57
58    /// An inline entry (non-pack) is missing the required `name` field.
59    #[error("entry {index}: inline signal must have 'name'")]
60    MissingName {
61        /// Zero-based index of the offending entry.
62        index: usize,
63    },
64
65    /// A pack entry has a `signal_type` other than `"metrics"`.
66    #[error("entry {index}: pack entries must have signal_type 'metrics'")]
67    PackNotMetrics {
68        /// Zero-based index of the offending entry.
69        index: usize,
70    },
71
72    /// An entry `id` does not match the allowed pattern `[a-zA-Z_][a-zA-Z0-9_]*`.
73    #[error("entry id '{0}' is invalid: must match [a-zA-Z_][a-zA-Z0-9_]*")]
74    InvalidId(String),
75
76    /// An entry has a generator field that is incompatible with its `signal_type`.
77    ///
78    /// For example, a `signal_type: metrics` entry must not have `log_generator`
79    /// or `distribution`.
80    #[error("entry {index}: signal_type '{signal_type}' must not have '{field}' field")]
81    UnexpectedField {
82        /// Zero-based index of the offending entry.
83        index: usize,
84        /// The signal type of the entry.
85        signal_type: String,
86        /// The field name that is not allowed for this signal type.
87        field: String,
88    },
89}
90
91// ---------------------------------------------------------------------------
92// Recognized signal types
93// ---------------------------------------------------------------------------
94
95/// The set of valid `signal_type` values in a v2 entry.
96const VALID_SIGNAL_TYPES: &[&str] = &["metrics", "logs", "histogram", "summary"];
97
98/// Signal types that support a `distribution` field instead of `generator`.
99const DISTRIBUTION_SIGNAL_TYPES: &[&str] = &["histogram", "summary"];
100
101// ---------------------------------------------------------------------------
102// Version detection
103// ---------------------------------------------------------------------------
104
105/// Peek at the `version` field in a YAML string without fully parsing it.
106///
107/// Returns `Some(n)` when the top-level mapping contains a `version` key with
108/// an integer value, or `None` when the field is absent or cannot be parsed.
109/// This is intentionally cheap — it deserializes into a minimal struct.
110///
111/// # Examples
112///
113/// ```
114/// use sonda_core::compiler::parse::detect_version;
115///
116/// assert_eq!(detect_version("version: 2\nscenarios: []"), Some(2));
117/// assert_eq!(detect_version("version: 1"), Some(1));
118/// assert_eq!(detect_version("name: cpu_usage\nrate: 1"), None);
119/// ```
120pub fn detect_version(yaml: &str) -> Option<u32> {
121    #[derive(serde::Deserialize)]
122    struct VersionProbe {
123        version: Option<u32>,
124    }
125
126    let probe: VersionProbe = serde_yaml_ng::from_str(yaml).ok()?;
127    probe.version
128}
129
130// ---------------------------------------------------------------------------
131// Single-signal shorthand support
132// ---------------------------------------------------------------------------
133
134/// A flat representation of a single-entry v2 file (no `scenarios:` key).
135///
136/// This is an internal deserialization target used to support the shorthand
137/// format where the top-level YAML mapping contains entry fields directly.
138#[derive(serde::Deserialize)]
139#[serde(deny_unknown_fields)]
140struct FlatFile {
141    version: u32,
142
143    // Defaults-level fields (also allowed at top level in shorthand)
144    #[serde(default)]
145    rate: Option<f64>,
146    #[serde(default)]
147    duration: Option<String>,
148    #[serde(default)]
149    encoder: Option<crate::encoder::EncoderConfig>,
150    #[serde(default)]
151    sink: Option<crate::sink::SinkConfig>,
152    #[serde(default)]
153    labels: Option<std::collections::BTreeMap<String, String>>,
154
155    // Entry-level fields
156    #[serde(default)]
157    id: Option<String>,
158    #[serde(default)]
159    signal_type: Option<String>,
160    #[serde(default)]
161    name: Option<String>,
162    #[serde(default)]
163    generator: Option<crate::generator::GeneratorConfig>,
164    #[serde(default)]
165    log_generator: Option<crate::generator::LogGeneratorConfig>,
166    #[serde(default)]
167    dynamic_labels: Option<Vec<crate::config::DynamicLabelConfig>>,
168    #[serde(default)]
169    jitter: Option<f64>,
170    #[serde(default)]
171    jitter_seed: Option<u64>,
172    #[serde(default)]
173    gaps: Option<crate::config::GapConfig>,
174    #[serde(default)]
175    bursts: Option<crate::config::BurstConfig>,
176    #[serde(default)]
177    cardinality_spikes: Option<Vec<crate::config::CardinalitySpikeConfig>>,
178    #[serde(default)]
179    phase_offset: Option<String>,
180    #[serde(default)]
181    clock_group: Option<String>,
182    #[serde(default)]
183    after: Option<super::AfterClause>,
184    #[serde(default, rename = "while")]
185    while_clause: Option<super::WhileClause>,
186    #[serde(default, rename = "delay")]
187    delay_clause: Option<super::DelayClause>,
188
189    // Pack fields
190    #[serde(default)]
191    pack: Option<String>,
192    #[serde(default)]
193    overrides: Option<std::collections::BTreeMap<String, crate::packs::MetricOverride>>,
194
195    // Histogram / summary fields
196    #[serde(default)]
197    distribution: Option<crate::config::DistributionConfig>,
198    #[serde(default)]
199    buckets: Option<Vec<f64>>,
200    #[serde(default)]
201    quantiles: Option<Vec<f64>>,
202    #[serde(default)]
203    observations_per_tick: Option<u32>,
204    #[serde(default)]
205    mean_shift_per_sec: Option<f64>,
206    #[serde(default)]
207    seed: Option<u64>,
208    #[serde(default)]
209    on_sink_error: Option<crate::config::OnSinkError>,
210}
211
212impl FlatFile {
213    /// Convert the flat representation into a [`ScenarioFile`] with a single entry.
214    fn into_scenario_file(self) -> ScenarioFile {
215        let signal_type = self.signal_type.unwrap_or_else(|| {
216            if self.distribution.is_some() {
217                if self.quantiles.is_some() {
218                    "summary".to_string()
219                } else {
220                    "histogram".to_string()
221                }
222            } else if self.log_generator.is_some() {
223                "logs".to_string()
224            } else {
225                "metrics".to_string()
226            }
227        });
228
229        let entry = Entry {
230            id: self.id,
231            signal_type,
232            name: self.name,
233            rate: self.rate,
234            duration: self.duration,
235            generator: self.generator,
236            log_generator: self.log_generator,
237            labels: self.labels,
238            dynamic_labels: self.dynamic_labels,
239            encoder: self.encoder,
240            sink: self.sink,
241            jitter: self.jitter,
242            jitter_seed: self.jitter_seed,
243            gaps: self.gaps,
244            bursts: self.bursts,
245            cardinality_spikes: self.cardinality_spikes,
246            phase_offset: self.phase_offset,
247            clock_group: self.clock_group,
248            after: self.after,
249            while_clause: self.while_clause,
250            delay_clause: self.delay_clause,
251            pack: self.pack,
252            overrides: self.overrides,
253            distribution: self.distribution,
254            buckets: self.buckets,
255            quantiles: self.quantiles,
256            observations_per_tick: self.observations_per_tick,
257            mean_shift_per_sec: self.mean_shift_per_sec,
258            seed: self.seed,
259            on_sink_error: self.on_sink_error,
260        };
261
262        // Flat-form files deliberately do NOT expose the top-level metadata
263        // fields (`scenario_name` / `category` / `description`). The shorthand
264        // is the terse single-signal authoring shape; metadata belongs on the
265        // canonical `scenarios:` form consumed by the CLI catalog probe.
266        ScenarioFile {
267            version: self.version,
268            scenario_name: None,
269            category: None,
270            description: None,
271            defaults: None,
272            scenarios: vec![entry],
273        }
274    }
275}
276
277// ---------------------------------------------------------------------------
278// Main parser
279// ---------------------------------------------------------------------------
280
281/// Parse a YAML string as a v2 scenario file.
282///
283/// Performs deserialization followed by structural validation:
284///
285/// 1. Version must be exactly `2`.
286/// 2. Single-signal shorthand (no `scenarios:` key) is promoted to a
287///    one-entry file.
288/// 3. Entry `id` values must be unique and match `[a-zA-Z_][a-zA-Z0-9_]*`.
289/// 4. `signal_type` must be one of `metrics`, `logs`, `histogram`, `summary`.
290/// 5. Each entry has either `generator`/`distribution` or `pack`, not both.
291/// 6. Cross-generator mutual exclusion: each signal type may only carry its
292///    expected generator field (`generator` for metrics, `log_generator` for
293///    logs, `distribution` for histogram/summary). The other fields must be
294///    absent.
295/// 7. Pack entries must have `signal_type: metrics`.
296/// 8. Inline (non-pack) entries must have `name`.
297///
298/// Note: `after.ref` references are not resolved during parsing. Reference
299/// resolution, threshold validation, and timing computation happen during
300/// compilation (see the `after` compiler).
301///
302/// Note: `after.op` is deserialized as an [`AfterOp`](super::AfterOp) enum.
303/// Invalid operator values (anything other than `"<"` or `">"`) are rejected
304/// by serde during deserialization.
305///
306/// # Errors
307///
308/// Returns [`ParseError`] describing the first validation failure found.
309pub fn parse(yaml: &str) -> Result<ScenarioFile, ParseError> {
310    let file = deserialize(yaml)?;
311
312    if file.version != 2 {
313        return Err(ParseError::InvalidVersion(file.version));
314    }
315
316    validate_entries(&file.scenarios)?;
317    Ok(file)
318}
319
320/// Determine the file shape and deserialize accordingly.
321///
322/// Instead of trying canonical parsing and falling back to flat on failure (which
323/// produces confusing errors when a canonical file has a structural mistake), we
324/// peek for the `scenarios` key first. If present, we parse as canonical. If
325/// absent, we parse as flat shorthand. No fallback.
326fn deserialize(yaml: &str) -> Result<ScenarioFile, ParseError> {
327    /// Minimal probe to detect whether the YAML contains a `scenarios` key.
328    /// Intentionally does NOT use `deny_unknown_fields`.
329    #[derive(serde::Deserialize)]
330    struct ShapeProbe {
331        scenarios: Option<serde_yaml_ng::Value>,
332    }
333
334    let probe: ShapeProbe = serde_yaml_ng::from_str(yaml)?;
335
336    if probe.scenarios.is_some() {
337        // Canonical format: top-level `scenarios` array.
338        let file: ScenarioFile = serde_yaml_ng::from_str(yaml)?;
339        Ok(file)
340    } else {
341        // Flat single-signal shorthand: no `scenarios` key.
342        let flat: FlatFile = serde_yaml_ng::from_str(yaml)?;
343        Ok(flat.into_scenario_file())
344    }
345}
346
347// ---------------------------------------------------------------------------
348// Validation helpers
349// ---------------------------------------------------------------------------
350
351/// Validate all entries in a parsed scenario file.
352fn validate_entries(entries: &[Entry]) -> Result<(), ParseError> {
353    let mut seen_ids = HashSet::new();
354
355    for (index, entry) in entries.iter().enumerate() {
356        // Validate id format and uniqueness.
357        if let Some(ref id) = entry.id {
358            if !is_valid_id(id) {
359                return Err(ParseError::InvalidId(id.clone()));
360            }
361            if !seen_ids.insert(id.clone()) {
362                return Err(ParseError::DuplicateId(id.clone()));
363            }
364        }
365
366        // Validate signal_type.
367        if !VALID_SIGNAL_TYPES.contains(&entry.signal_type.as_str()) {
368            return Err(ParseError::InvalidSignalType {
369                index,
370                signal_type: entry.signal_type.clone(),
371            });
372        }
373
374        // Validate generator/pack mutual exclusion.
375        let has_generator = entry.generator.is_some();
376        let has_log_generator = entry.log_generator.is_some();
377        let has_pack = entry.pack.is_some();
378        let has_distribution = entry.distribution.is_some();
379        let is_distribution_type = DISTRIBUTION_SIGNAL_TYPES.contains(&entry.signal_type.as_str());
380        let is_logs = entry.signal_type == "logs";
381
382        if (has_generator || has_log_generator || has_distribution) && has_pack {
383            return Err(ParseError::GeneratorAndPack { index });
384        }
385
386        // For non-pack entries, validate the correct generator variant is present.
387        if !has_pack {
388            if is_distribution_type {
389                if !has_distribution {
390                    return Err(ParseError::MissingGeneratorOrPack { index });
391                }
392            } else if is_logs {
393                if !has_log_generator {
394                    return Err(ParseError::MissingGeneratorOrPack { index });
395                }
396            } else if !has_generator {
397                return Err(ParseError::MissingGeneratorOrPack { index });
398            }
399        }
400
401        // Cross-generator mutual exclusion: ensure only the expected generator
402        // field is set for each signal_type. The wrong fields must be absent.
403        validate_no_unexpected_generator_fields(entry, index)?;
404
405        // Pack entries must be metrics.
406        if has_pack && entry.signal_type != "metrics" {
407            return Err(ParseError::PackNotMetrics { index });
408        }
409
410        // Inline (non-pack) entries must have name.
411        if !has_pack && entry.name.is_none() {
412            return Err(ParseError::MissingName { index });
413        }
414    }
415
416    Ok(())
417}
418
419/// Ensure that an entry does not carry generator fields incompatible with its
420/// `signal_type`.
421///
422/// - `metrics`: allows `generator`, forbids `log_generator` and `distribution`
423/// - `logs`: allows `log_generator`, forbids `generator` and `distribution`
424/// - `histogram`/`summary`: allows `distribution`, forbids `generator` and `log_generator`
425/// - `pack` (any signal_type with `pack`): forbids all three generator fields
426///   (already checked upstream, but pack entries also pass through here safely
427///   since they must be `metrics` and having no extra fields is fine)
428fn validate_no_unexpected_generator_fields(entry: &Entry, index: usize) -> Result<(), ParseError> {
429    let st = entry.signal_type.as_str();
430
431    // Build list of fields that must NOT be present for this signal_type.
432    let forbidden: &[(&str, bool)] = match st {
433        "metrics" => &[
434            ("log_generator", entry.log_generator.is_some()),
435            ("distribution", entry.distribution.is_some()),
436        ],
437        "logs" => &[
438            ("generator", entry.generator.is_some()),
439            ("distribution", entry.distribution.is_some()),
440        ],
441        "histogram" | "summary" => &[
442            ("generator", entry.generator.is_some()),
443            ("log_generator", entry.log_generator.is_some()),
444        ],
445        // Pack-only or unknown signal_type (caught by earlier validation) —
446        // all three generator fields should be absent.
447        _ => &[
448            ("generator", entry.generator.is_some()),
449            ("log_generator", entry.log_generator.is_some()),
450            ("distribution", entry.distribution.is_some()),
451        ],
452    };
453
454    for &(field, present) in forbidden {
455        if present {
456            return Err(ParseError::UnexpectedField {
457                index,
458                signal_type: entry.signal_type.clone(),
459                field: field.to_string(),
460            });
461        }
462    }
463
464    Ok(())
465}
466
467/// Check whether an id string matches `[a-zA-Z_][a-zA-Z0-9_]*`.
468fn is_valid_id(id: &str) -> bool {
469    let mut chars = id.chars();
470    match chars.next() {
471        Some(c) if c.is_ascii_alphabetic() || c == '_' => {}
472        _ => return false,
473    }
474    chars.all(|c| c.is_ascii_alphanumeric() || c == '_')
475}
476
477// ---------------------------------------------------------------------------
478// Tests
479// ---------------------------------------------------------------------------
480
481#[cfg(test)]
482mod tests {
483    use super::super::{AfterClause, AfterOp, Defaults};
484    use super::*;
485
486    // ======================================================================
487    // Valid parse cases
488    // ======================================================================
489
490    #[test]
491    fn multi_scenario_with_three_entries() {
492        let yaml = r#"
493version: 2
494scenarios:
495  - signal_type: metrics
496    name: cpu_usage
497    rate: 1
498    generator:
499      type: sine
500      amplitude: 50
501      period_secs: 60
502      offset: 50
503
504  - signal_type: logs
505    name: syslog
506    rate: 5
507    log_generator:
508      type: template
509      templates:
510        - message: "host={hostname} cpu={value}"
511          field_pools:
512            hostname: ["rtr-01", "rtr-02"]
513            value: ["50", "90"]
514      seed: 42
515
516  - signal_type: metrics
517    pack: telegraf_snmp_interface
518    rate: 1
519    labels:
520      device: rtr-01
521"#;
522
523        let file = parse(yaml).expect("must parse valid multi-scenario file");
524        assert_eq!(file.version, 2);
525        assert_eq!(file.scenarios.len(), 3);
526        assert_eq!(file.scenarios[0].signal_type, "metrics");
527        assert_eq!(file.scenarios[0].name.as_deref(), Some("cpu_usage"));
528        assert_eq!(file.scenarios[1].signal_type, "logs");
529        assert_eq!(
530            file.scenarios[2].pack.as_deref(),
531            Some("telegraf_snmp_interface")
532        );
533    }
534
535    #[test]
536    fn single_signal_shorthand_inline() {
537        let yaml = r#"
538version: 2
539name: cpu_usage
540signal_type: metrics
541rate: 1
542duration: 30s
543generator:
544  type: sine
545  amplitude: 50
546  period_secs: 60
547  offset: 50
548"#;
549
550        let file = parse(yaml).expect("must parse single-signal shorthand");
551        assert_eq!(file.version, 2);
552        assert!(file.defaults.is_none());
553        assert_eq!(file.scenarios.len(), 1);
554
555        let entry = &file.scenarios[0];
556        assert_eq!(entry.signal_type, "metrics");
557        assert_eq!(entry.name.as_deref(), Some("cpu_usage"));
558        assert!(entry.generator.is_some());
559        assert_eq!(entry.duration.as_deref(), Some("30s"));
560    }
561
562    #[test]
563    fn single_signal_shorthand_pack() {
564        let yaml = r#"
565version: 2
566pack: telegraf_snmp_interface
567rate: 1
568duration: 10s
569labels:
570  device: rtr-01
571"#;
572
573        let file = parse(yaml).expect("must parse pack shorthand");
574        assert_eq!(file.version, 2);
575        assert_eq!(file.scenarios.len(), 1);
576
577        let entry = &file.scenarios[0];
578        assert_eq!(entry.signal_type, "metrics");
579        assert_eq!(entry.pack.as_deref(), Some("telegraf_snmp_interface"));
580        let labels = entry.labels.as_ref().expect("must have labels");
581        assert_eq!(labels.get("device").map(String::as_str), Some("rtr-01"));
582    }
583
584    /// Locks the invariant that the flat shorthand shape never produces
585    /// top-level `scenario_name`, `category`, or `description` on the
586    /// resulting [`ScenarioFile`]. Those fields are only reachable via the
587    /// canonical `scenarios:` form (see `metadata_unknown_field_is_rejected_
588    /// by_deny_unknown_fields` and friends for the canonical-side tests).
589    #[test]
590    fn flat_shorthand_never_carries_top_level_metadata() {
591        let yaml = r#"
592version: 2
593name: cpu_usage
594signal_type: metrics
595rate: 1
596generator:
597  type: sine
598  amplitude: 50
599  period_secs: 60
600  offset: 50
601"#;
602
603        let file = parse(yaml).expect("must parse flat shorthand");
604        assert!(
605            file.scenario_name.is_none(),
606            "flat shorthand must not carry scenario_name; got {:?}",
607            file.scenario_name
608        );
609        assert!(
610            file.category.is_none(),
611            "flat shorthand must not carry category; got {:?}",
612            file.category
613        );
614        assert!(
615            file.description.is_none(),
616            "flat shorthand must not carry description; got {:?}",
617            file.description
618        );
619    }
620
621    #[test]
622    fn entry_with_after_clause() {
623        let yaml = r#"
624version: 2
625scenarios:
626  - signal_type: metrics
627    name: cpu_usage
628    id: cpu_signal
629    rate: 1
630    generator:
631      type: sine
632      amplitude: 50
633      period_secs: 60
634      offset: 50
635
636  - signal_type: metrics
637    name: alert_metric
638    rate: 1
639    generator:
640      type: constant
641      value: 1.0
642    after:
643      ref: cpu_signal
644      op: ">"
645      value: 90.0
646"#;
647
648        let file = parse(yaml).expect("must parse after clause");
649        assert_eq!(file.scenarios.len(), 2);
650
651        let after = file.scenarios[1]
652            .after
653            .as_ref()
654            .expect("second entry must have after clause");
655        assert_eq!(after.ref_id, "cpu_signal");
656        assert_eq!(after.op, AfterOp::GreaterThan);
657        assert!((after.value - 90.0).abs() < f64::EPSILON);
658        assert!(after.delay.is_none());
659    }
660
661    #[test]
662    fn entry_with_after_clause_and_delay() {
663        let yaml = r#"
664version: 2
665scenarios:
666  - signal_type: metrics
667    name: source
668    id: src
669    rate: 1
670    generator:
671      type: constant
672      value: 100.0
673
674  - signal_type: metrics
675    name: dependent
676    rate: 1
677    generator:
678      type: constant
679      value: 1.0
680    after:
681      ref: src
682      op: "<"
683      value: 50.0
684      delay: "5s"
685"#;
686
687        let file = parse(yaml).expect("must parse after with delay");
688        let after = file.scenarios[1]
689            .after
690            .as_ref()
691            .expect("must have after clause");
692        assert_eq!(after.op, AfterOp::LessThan);
693        assert_eq!(after.delay.as_deref(), Some("5s"));
694    }
695
696    #[test]
697    fn histogram_entry_with_distribution_and_buckets() {
698        let yaml = r#"
699version: 2
700scenarios:
701  - signal_type: histogram
702    name: http_request_duration_seconds
703    rate: 1
704    distribution:
705      type: exponential
706      rate: 10.0
707    buckets: [0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0]
708    observations_per_tick: 100
709    seed: 42
710"#;
711
712        let file = parse(yaml).expect("must parse histogram entry");
713        assert_eq!(file.scenarios.len(), 1);
714
715        let entry = &file.scenarios[0];
716        assert_eq!(entry.signal_type, "histogram");
717        assert!(entry.distribution.is_some());
718        let buckets = entry.buckets.as_ref().expect("must have buckets");
719        assert_eq!(buckets.len(), 11);
720        assert_eq!(entry.observations_per_tick, Some(100));
721        assert_eq!(entry.seed, Some(42));
722    }
723
724    #[test]
725    fn summary_entry_with_distribution_and_quantiles() {
726        let yaml = r#"
727version: 2
728scenarios:
729  - signal_type: summary
730    name: rpc_duration_seconds
731    rate: 1
732    distribution:
733      type: normal
734      mean: 0.1
735      stddev: 0.02
736    quantiles: [0.5, 0.9, 0.99]
737    observations_per_tick: 200
738    seed: 99
739"#;
740
741        let file = parse(yaml).expect("must parse summary entry");
742        assert_eq!(file.scenarios.len(), 1);
743
744        let entry = &file.scenarios[0];
745        assert_eq!(entry.signal_type, "summary");
746        assert!(entry.distribution.is_some());
747        let quantiles = entry.quantiles.as_ref().expect("must have quantiles");
748        assert_eq!(quantiles.len(), 3);
749    }
750
751    #[test]
752    fn file_with_defaults_block() {
753        let yaml = r#"
754version: 2
755defaults:
756  rate: 10
757  duration: "60s"
758  encoder:
759    type: prometheus_text
760  sink:
761    type: stdout
762  labels:
763    env: staging
764scenarios:
765  - signal_type: metrics
766    name: cpu
767    generator:
768      type: constant
769      value: 50.0
770"#;
771
772        let file = parse(yaml).expect("must parse file with defaults");
773        let defaults = file.defaults.as_ref().expect("must have defaults");
774        assert!((defaults.rate.expect("must have rate") - 10.0).abs() < f64::EPSILON);
775        assert_eq!(defaults.duration.as_deref(), Some("60s"));
776        assert!(defaults.encoder.is_some());
777        assert!(defaults.sink.is_some());
778        let labels = defaults.labels.as_ref().expect("must have labels");
779        assert_eq!(labels.get("env").map(String::as_str), Some("staging"));
780    }
781
782    #[test]
783    fn entry_with_all_optional_fields() {
784        let yaml = r#"
785version: 2
786scenarios:
787  - signal_type: metrics
788    id: full_entry
789    name: fully_loaded_metric
790    rate: 5
791    duration: "120s"
792    generator:
793      type: sine
794      amplitude: 10
795      period_secs: 30
796      offset: 50
797    labels:
798      job: test
799      env: dev
800    dynamic_labels:
801      - key: hostname
802        prefix: "host-"
803        cardinality: 10
804    encoder:
805      type: prometheus_text
806    sink:
807      type: stdout
808    jitter: 2.5
809    jitter_seed: 12345
810    gaps:
811      every: "2m"
812      for: "20s"
813    bursts:
814      every: "10s"
815      for: "2s"
816      multiplier: 3.0
817    cardinality_spikes:
818      - label: pod_name
819        every: "2m"
820        for: "30s"
821        cardinality: 500
822    phase_offset: "5s"
823    clock_group: group_a
824"#;
825
826        let file = parse(yaml).expect("must parse entry with all optional fields");
827        let entry = &file.scenarios[0];
828
829        assert_eq!(entry.id.as_deref(), Some("full_entry"));
830        assert_eq!(entry.name.as_deref(), Some("fully_loaded_metric"));
831        assert!(entry.rate.is_some());
832        assert!(entry.duration.is_some());
833        assert!(entry.generator.is_some());
834        assert!(entry.labels.is_some());
835        assert!(entry.dynamic_labels.is_some());
836        assert!(entry.encoder.is_some());
837        assert!(entry.sink.is_some());
838        assert!(entry.jitter.is_some());
839        assert!(entry.jitter_seed.is_some());
840        assert!(entry.gaps.is_some());
841        assert!(entry.bursts.is_some());
842        assert!(entry.cardinality_spikes.is_some());
843        assert_eq!(entry.phase_offset.as_deref(), Some("5s"));
844        assert_eq!(entry.clock_group.as_deref(), Some("group_a"));
845    }
846
847    // ======================================================================
848    // Invalid cases
849    // ======================================================================
850
851    #[rustfmt::skip]
852    #[rstest::rstest]
853    #[case::version_1(r#"
854version: 1
855scenarios:
856  - signal_type: metrics
857    name: cpu
858    generator:
859      type: constant
860      value: 1.0
861"#, 1)]
862    #[case::version_0(r#"
863version: 0
864scenarios:
865  - signal_type: metrics
866    name: cpu
867    generator:
868      type: constant
869      value: 1.0
870"#, 0)]
871    fn unsupported_version_returns_invalid_version(#[case] yaml: &str, #[case] expected: u32) {
872        let err = parse(yaml).expect_err("unsupported version must fail");
873        assert!(
874            matches!(err, ParseError::InvalidVersion(v) if v == expected),
875            "expected InvalidVersion({expected}), got: {err}"
876        );
877    }
878
879    #[test]
880    fn missing_version_returns_yaml_error() {
881        let yaml = r#"
882scenarios:
883  - signal_type: metrics
884    name: cpu
885    generator:
886      type: constant
887      value: 1.0
888"#;
889
890        let err = parse(yaml).expect_err("missing version must fail");
891        assert!(
892            matches!(err, ParseError::Yaml(_)),
893            "expected Yaml error, got: {err}"
894        );
895    }
896
897    #[test]
898    fn duplicate_ids_returns_error() {
899        let yaml = r#"
900version: 2
901scenarios:
902  - signal_type: metrics
903    id: same_id
904    name: metric_a
905    generator:
906      type: constant
907      value: 1.0
908  - signal_type: metrics
909    id: same_id
910    name: metric_b
911    generator:
912      type: constant
913      value: 2.0
914"#;
915
916        let err = parse(yaml).expect_err("duplicate ids must fail");
917        assert!(
918            matches!(err, ParseError::DuplicateId(ref id) if id == "same_id"),
919            "expected DuplicateId('same_id'), got: {err}"
920        );
921    }
922
923    #[test]
924    fn invalid_signal_type_returns_error() {
925        let yaml = r#"
926version: 2
927scenarios:
928  - signal_type: traces
929    name: some_trace
930    generator:
931      type: constant
932      value: 1.0
933"#;
934
935        let err = parse(yaml).expect_err("invalid signal_type must fail");
936        assert!(
937            matches!(err, ParseError::InvalidSignalType { index: 0, ref signal_type } if signal_type == "traces"),
938            "expected InvalidSignalType at index 0, got: {err}"
939        );
940    }
941
942    #[test]
943    fn both_generator_and_pack_returns_error() {
944        let yaml = r#"
945version: 2
946scenarios:
947  - signal_type: metrics
948    name: mixed
949    generator:
950      type: constant
951      value: 1.0
952    pack: some_pack
953"#;
954
955        let err = parse(yaml).expect_err("generator + pack must fail");
956        assert!(
957            matches!(err, ParseError::GeneratorAndPack { index: 0 }),
958            "expected GeneratorAndPack at index 0, got: {err}"
959        );
960    }
961
962    #[test]
963    fn neither_generator_nor_pack_returns_error() {
964        let yaml = r#"
965version: 2
966scenarios:
967  - signal_type: metrics
968    name: bare_entry
969"#;
970
971        let err = parse(yaml).expect_err("missing generator/pack must fail");
972        assert!(
973            matches!(err, ParseError::MissingGeneratorOrPack { index: 0 }),
974            "expected MissingGeneratorOrPack at index 0, got: {err}"
975        );
976    }
977
978    #[test]
979    fn pack_with_logs_signal_type_returns_error() {
980        let yaml = r#"
981version: 2
982scenarios:
983  - signal_type: logs
984    pack: some_log_pack
985"#;
986
987        let err = parse(yaml).expect_err("pack + logs must fail");
988        assert!(
989            matches!(err, ParseError::PackNotMetrics { index: 0 }),
990            "expected PackNotMetrics at index 0, got: {err}"
991        );
992    }
993
994    #[test]
995    fn logs_without_log_generator_returns_error() {
996        let yaml = r#"
997version: 2
998scenarios:
999  - signal_type: logs
1000    name: bare_log
1001"#;
1002
1003        let err = parse(yaml).expect_err("logs without log_generator must fail");
1004        assert!(
1005            matches!(err, ParseError::MissingGeneratorOrPack { index: 0 }),
1006            "expected MissingGeneratorOrPack at index 0, got: {err}"
1007        );
1008    }
1009
1010    #[test]
1011    fn inline_without_name_returns_error() {
1012        let yaml = r#"
1013version: 2
1014scenarios:
1015  - signal_type: metrics
1016    generator:
1017      type: constant
1018      value: 1.0
1019"#;
1020
1021        let err = parse(yaml).expect_err("inline without name must fail");
1022        assert!(
1023            matches!(err, ParseError::MissingName { index: 0 }),
1024            "expected MissingName at index 0, got: {err}"
1025        );
1026    }
1027
1028    #[rustfmt::skip]
1029    #[rstest::rstest]
1030    #[case::starts_with_digit(r#"
1031version: 2
1032scenarios:
1033  - signal_type: metrics
1034    id: 123abc
1035    name: metric_a
1036    generator:
1037      type: constant
1038      value: 1.0
1039"#, "123abc")]
1040    #[case::contains_dot(r#"
1041version: 2
1042scenarios:
1043  - signal_type: metrics
1044    id: my.id
1045    name: metric_a
1046    generator:
1047      type: constant
1048      value: 1.0
1049"#, "my.id")]
1050    #[case::empty_string(r#"
1051version: 2
1052scenarios:
1053  - signal_type: metrics
1054    id: ""
1055    name: metric_a
1056    generator:
1057      type: constant
1058      value: 1.0
1059"#, "")]
1060    fn invalid_id_returns_invalid_id_error(#[case] yaml: &str, #[case] expected_id: &str) {
1061        let err = parse(yaml).expect_err("invalid id must fail");
1062        assert!(
1063            matches!(err, ParseError::InvalidId(ref id) if id == expected_id),
1064            "expected InvalidId({expected_id:?}), got: {err}"
1065        );
1066    }
1067
1068    #[test]
1069    fn invalid_after_op_returns_yaml_error() {
1070        let yaml = r#"
1071version: 2
1072scenarios:
1073  - signal_type: metrics
1074    name: source
1075    id: src
1076    generator:
1077      type: constant
1078      value: 1.0
1079
1080  - signal_type: metrics
1081    name: dependent
1082    generator:
1083      type: constant
1084      value: 1.0
1085    after:
1086      ref: src
1087      op: "=="
1088      value: 50.0
1089"#;
1090
1091        let err = parse(yaml).expect_err("invalid after op must fail");
1092        assert!(
1093            matches!(err, ParseError::Yaml(_)),
1094            "expected Yaml error for invalid op, got: {err}"
1095        );
1096        let msg = err.to_string();
1097        assert!(
1098            msg.contains("=="),
1099            "error message should mention the invalid op '==', got: {msg}"
1100        );
1101    }
1102
1103    // ======================================================================
1104    // Version detection tests
1105    // ======================================================================
1106
1107    #[rustfmt::skip]
1108    #[rstest::rstest]
1109    #[case::v2("version: 2\nscenarios: []",  Some(2))]
1110    #[case::v1_explicit("version: 1\nname: test", Some(1))]
1111    #[case::absent("name: cpu_usage\nrate: 1",    None)]
1112    // Malformed YAML must surface as `None` rather than panicking — callers
1113    // rely on `detect_version` as a lightweight pre-flight probe.
1114    #[case::unparseable("not valid yaml {",       None)]
1115    fn detect_version_cases(#[case] yaml: &str, #[case] expected: Option<u32>) {
1116        assert_eq!(detect_version(yaml), expected);
1117    }
1118
1119    // ======================================================================
1120    // ID validation unit tests
1121    // ======================================================================
1122
1123    #[rustfmt::skip]
1124    #[rstest::rstest]
1125    #[case::simple_snake("cpu_signal",            true)]
1126    #[case::leading_underscore("_private",        true)]
1127    #[case::single_upper("A",                     true)]
1128    #[case::alphanumeric("a1b2c3",                true)]
1129    #[case::double_underscore("__double_underscore__", true)]
1130    #[case::empty("",                             false)]
1131    #[case::starts_with_digit("123abc",           false)]
1132    #[case::contains_dot("my.id",                 false)]
1133    #[case::contains_hyphen("has-hyphen",         false)]
1134    #[case::contains_space("has space",           false)]
1135    #[case::single_digit("0",                     false)]
1136    fn id_validation_cases(#[case] id: &str, #[case] expected: bool) {
1137        assert_eq!(is_valid_id(id), expected, "is_valid_id({id:?})");
1138    }
1139
1140    // ======================================================================
1141    // Error display tests
1142    // ======================================================================
1143
1144    #[test]
1145    fn error_display_messages() {
1146        let err = ParseError::InvalidVersion(3);
1147        assert_eq!(err.to_string(), "version must be 2, got 3");
1148
1149        let err = ParseError::DuplicateId("foo".to_string());
1150        assert_eq!(err.to_string(), "duplicate entry id: 'foo'");
1151
1152        let err = ParseError::InvalidSignalType {
1153            index: 2,
1154            signal_type: "traces".to_string(),
1155        };
1156        assert!(err.to_string().contains("entry 2"));
1157        assert!(err.to_string().contains("traces"));
1158
1159        let err = ParseError::GeneratorAndPack { index: 0 };
1160        assert!(err.to_string().contains("entry 0"));
1161        assert!(err.to_string().contains("not both"));
1162
1163        let err = ParseError::MissingName { index: 1 };
1164        assert!(err.to_string().contains("entry 1"));
1165        assert!(err.to_string().contains("name"));
1166
1167        let err = ParseError::PackNotMetrics { index: 0 };
1168        assert!(err.to_string().contains("metrics"));
1169
1170        let err = ParseError::InvalidId("bad.id".to_string());
1171        assert!(err.to_string().contains("bad.id"));
1172    }
1173
1174    // ======================================================================
1175    // Contract tests
1176    // ======================================================================
1177
1178    #[test]
1179    fn error_type_is_send_and_sync() {
1180        fn assert_send_sync<T: Send + Sync>() {}
1181        assert_send_sync::<ParseError>();
1182    }
1183
1184    #[test]
1185    fn v2_scenario_file_is_send_and_sync() {
1186        fn assert_send_sync<T: Send + Sync>() {}
1187        assert_send_sync::<ScenarioFile>();
1188        assert_send_sync::<Defaults>();
1189        assert_send_sync::<Entry>();
1190        assert_send_sync::<AfterClause>();
1191    }
1192
1193    // ======================================================================
1194    // Histogram without distribution fails
1195    // ======================================================================
1196
1197    #[test]
1198    fn histogram_without_distribution_fails() {
1199        let yaml = r#"
1200version: 2
1201scenarios:
1202  - signal_type: histogram
1203    name: bad_histogram
1204    buckets: [0.1, 0.5, 1.0]
1205"#;
1206
1207        let err = parse(yaml).expect_err("histogram without distribution must fail");
1208        assert!(
1209            matches!(err, ParseError::MissingGeneratorOrPack { index: 0 }),
1210            "expected MissingGeneratorOrPack, got: {err}"
1211        );
1212    }
1213
1214    // ======================================================================
1215    // Pack with overrides
1216    // ======================================================================
1217
1218    #[test]
1219    fn pack_entry_with_overrides() {
1220        let yaml = r#"
1221version: 2
1222scenarios:
1223  - signal_type: metrics
1224    pack: telegraf_snmp_interface
1225    rate: 1
1226    overrides:
1227      ifOperStatus:
1228        generator:
1229          type: constant
1230          value: 0.0
1231        labels:
1232          alert: down
1233"#;
1234
1235        let file = parse(yaml).expect("must parse pack with overrides");
1236        let entry = &file.scenarios[0];
1237        let overrides = entry.overrides.as_ref().expect("must have overrides");
1238        assert!(overrides.contains_key("ifOperStatus"));
1239    }
1240
1241    // ======================================================================
1242    // Cross-generator mutual exclusion tests
1243    // ======================================================================
1244
1245    #[rustfmt::skip]
1246    #[rstest::rstest]
1247    #[case::metrics_with_log_generator(r#"
1248version: 2
1249scenarios:
1250  - signal_type: metrics
1251    name: cpu
1252    generator:
1253      type: constant
1254      value: 1.0
1255    log_generator:
1256      type: template
1257      templates:
1258        - message: "hello"
1259      seed: 1
1260"#, "metrics", "log_generator")]
1261    #[case::metrics_with_distribution(r#"
1262version: 2
1263scenarios:
1264  - signal_type: metrics
1265    name: cpu
1266    generator:
1267      type: constant
1268      value: 1.0
1269    distribution:
1270      type: normal
1271      mean: 0.1
1272      stddev: 0.02
1273"#, "metrics", "distribution")]
1274    #[case::logs_with_generator(r#"
1275version: 2
1276scenarios:
1277  - signal_type: logs
1278    name: syslog
1279    log_generator:
1280      type: template
1281      templates:
1282        - message: "hello"
1283      seed: 1
1284    generator:
1285      type: constant
1286      value: 1.0
1287"#, "logs", "generator")]
1288    #[case::logs_with_distribution(r#"
1289version: 2
1290scenarios:
1291  - signal_type: logs
1292    name: syslog
1293    log_generator:
1294      type: template
1295      templates:
1296        - message: "hello"
1297      seed: 1
1298    distribution:
1299      type: normal
1300      mean: 0.1
1301      stddev: 0.02
1302"#, "logs", "distribution")]
1303    #[case::histogram_with_generator(r#"
1304version: 2
1305scenarios:
1306  - signal_type: histogram
1307    name: request_duration
1308    distribution:
1309      type: exponential
1310      rate: 10.0
1311    buckets: [0.1, 0.5, 1.0]
1312    generator:
1313      type: constant
1314      value: 1.0
1315"#, "histogram", "generator")]
1316    #[case::histogram_with_log_generator(r#"
1317version: 2
1318scenarios:
1319  - signal_type: histogram
1320    name: request_duration
1321    distribution:
1322      type: exponential
1323      rate: 10.0
1324    buckets: [0.1, 0.5, 1.0]
1325    log_generator:
1326      type: template
1327      templates:
1328        - message: "hello"
1329      seed: 1
1330"#, "histogram", "log_generator")]
1331    #[case::summary_with_generator(r#"
1332version: 2
1333scenarios:
1334  - signal_type: summary
1335    name: rpc_duration
1336    distribution:
1337      type: normal
1338      mean: 0.1
1339      stddev: 0.02
1340    quantiles: [0.5, 0.9, 0.99]
1341    generator:
1342      type: constant
1343      value: 1.0
1344"#, "summary", "generator")]
1345    fn mismatched_generator_family_returns_unexpected_field(
1346        #[case] yaml: &str,
1347        #[case] expected_signal_type: &str,
1348        #[case] expected_field: &str,
1349    ) {
1350        let err = parse(yaml).expect_err("mismatched generator family must fail");
1351        assert!(
1352            matches!(
1353                err,
1354                ParseError::UnexpectedField { index: 0, ref signal_type, ref field }
1355                if signal_type == expected_signal_type && field == expected_field
1356            ),
1357            "expected UnexpectedField for {expected_field} on {expected_signal_type}, got: {err}"
1358        );
1359    }
1360
1361    // ======================================================================
1362    // Fallback parse error clarity
1363    // ======================================================================
1364
1365    #[test]
1366    fn malformed_canonical_file_does_not_produce_misleading_error() {
1367        // A canonical file (has `scenarios:`) with a structural error (unknown
1368        // field `bogus` inside an entry). The old fallback approach would try
1369        // flat parsing and produce a confusing "unknown field `scenarios`" error.
1370        // With the ShapeProbe approach, we should get a clear error about the
1371        // actual problem inside the canonical parse path.
1372        let yaml = r#"
1373version: 2
1374scenarios:
1375  - signal_type: metrics
1376    name: cpu
1377    generator:
1378      type: constant
1379      value: 1.0
1380    bogus: unexpected_field
1381"#;
1382
1383        let err = parse(yaml).expect_err("malformed canonical file must fail");
1384        let msg = err.to_string();
1385        // The error should mention the actual problem (unknown field `bogus`),
1386        // not the misleading "unknown field `scenarios`".
1387        assert!(
1388            !msg.contains("unknown field `scenarios`"),
1389            "error must not mention 'unknown field scenarios', got: {msg}"
1390        );
1391        assert!(
1392            msg.contains("bogus"),
1393            "error should reference the actual unknown field 'bogus', got: {msg}"
1394        );
1395    }
1396
1397    #[test]
1398    fn unexpected_field_error_display_message() {
1399        let err = ParseError::UnexpectedField {
1400            index: 1,
1401            signal_type: "metrics".to_string(),
1402            field: "log_generator".to_string(),
1403        };
1404        assert_eq!(
1405            err.to_string(),
1406            "entry 1: signal_type 'metrics' must not have 'log_generator' field"
1407        );
1408    }
1409
1410    // ======================================================================
1411    // Edge cases (NOTE 2)
1412    // ======================================================================
1413
1414    #[test]
1415    fn empty_scenarios_list_parses_successfully() {
1416        // An empty scenarios array is syntactically valid at the parse level.
1417        // Semantic rejection (no runnable entries) is deferred to compilation.
1418        let yaml = r#"
1419version: 2
1420scenarios: []
1421"#;
1422
1423        let file = parse(yaml).expect("empty scenarios list should parse");
1424        assert_eq!(file.version, 2);
1425        assert!(file.scenarios.is_empty());
1426    }
1427
1428    #[test]
1429    fn deny_unknown_fields_rejects_typo() {
1430        // A misspelling of `signal_type` as `signal_typ` must produce a YAML
1431        // parse error (via deny_unknown_fields), not silently default to None.
1432        let yaml = r#"
1433version: 2
1434scenarios:
1435  - signal_typ: metrics
1436    name: cpu
1437    generator:
1438      type: constant
1439      value: 1.0
1440"#;
1441
1442        let err = parse(yaml).expect_err("typo in field name must fail");
1443        assert!(
1444            matches!(err, ParseError::Yaml(_)),
1445            "expected Yaml error for unknown field, got: {err}"
1446        );
1447        let msg = err.to_string();
1448        assert!(
1449            msg.contains("signal_typ"),
1450            "error should mention the typo 'signal_typ', got: {msg}"
1451        );
1452    }
1453
1454    // ======================================================================
1455    // Shorthand signal_type inference tests
1456    //
1457    // When `signal_type` is omitted from a flat (single-signal) file,
1458    // `FlatFile::into_scenario_file` infers it from which generator-family
1459    // field is present:
1460    //   - `distribution` + `quantiles`   → "summary"
1461    //   - `distribution` (no quantiles)  → "histogram"
1462    //   - `log_generator`                → "logs"
1463    //   - else                           → "metrics"
1464    // ======================================================================
1465
1466    #[test]
1467    fn shorthand_infers_histogram_from_distribution_and_buckets() {
1468        // No explicit `signal_type` — presence of `distribution` without
1469        // `quantiles` must infer `histogram`.
1470        let yaml = r#"
1471version: 2
1472name: http_request_duration_seconds
1473rate: 1
1474distribution:
1475  type: exponential
1476  rate: 10.0
1477buckets: [0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0]
1478observations_per_tick: 100
1479seed: 42
1480"#;
1481
1482        let file = parse(yaml).expect("must parse histogram shorthand");
1483        assert_eq!(file.scenarios.len(), 1);
1484        let entry = &file.scenarios[0];
1485        assert_eq!(entry.signal_type, "histogram");
1486        assert_eq!(entry.name.as_deref(), Some("http_request_duration_seconds"));
1487        assert!(entry.distribution.is_some());
1488        assert!(entry.buckets.is_some());
1489        assert!(entry.quantiles.is_none());
1490    }
1491
1492    #[test]
1493    fn shorthand_infers_summary_from_distribution_and_quantiles() {
1494        // No explicit `signal_type` — presence of `distribution` with
1495        // `quantiles` must infer `summary`.
1496        let yaml = r#"
1497version: 2
1498name: rpc_duration_seconds
1499rate: 1
1500distribution:
1501  type: normal
1502  mean: 0.1
1503  stddev: 0.02
1504quantiles: [0.5, 0.9, 0.99]
1505observations_per_tick: 200
1506seed: 99
1507"#;
1508
1509        let file = parse(yaml).expect("must parse summary shorthand");
1510        assert_eq!(file.scenarios.len(), 1);
1511        let entry = &file.scenarios[0];
1512        assert_eq!(entry.signal_type, "summary");
1513        assert!(entry.distribution.is_some());
1514        assert!(entry.quantiles.is_some());
1515    }
1516
1517    #[test]
1518    fn shorthand_infers_logs_from_log_generator() {
1519        // No explicit `signal_type` — presence of `log_generator` must
1520        // infer `logs`.
1521        let yaml = r#"
1522version: 2
1523name: syslog
1524rate: 5
1525log_generator:
1526  type: template
1527  templates:
1528    - message: "host={hostname} value={value}"
1529      field_pools:
1530        hostname: ["rtr-01", "rtr-02"]
1531        value: ["50", "90"]
1532  seed: 42
1533"#;
1534
1535        let file = parse(yaml).expect("must parse logs shorthand");
1536        assert_eq!(file.scenarios.len(), 1);
1537        let entry = &file.scenarios[0];
1538        assert_eq!(entry.signal_type, "logs");
1539        assert_eq!(entry.name.as_deref(), Some("syslog"));
1540        assert!(entry.log_generator.is_some());
1541        assert!(entry.generator.is_none());
1542    }
1543
1544    #[test]
1545    fn shorthand_with_defaults_key_is_rejected() {
1546        // The flat (shorthand) format does not have a `defaults` field.
1547        // Since FlatFile uses deny_unknown_fields, including `defaults:`
1548        // in a flat file must produce a YAML parse error.
1549        let yaml = r#"
1550version: 2
1551name: cpu_usage
1552signal_type: metrics
1553generator:
1554  type: constant
1555  value: 1.0
1556defaults:
1557  rate: 10
1558"#;
1559
1560        let err = parse(yaml).expect_err("defaults in shorthand must fail");
1561        assert!(
1562            matches!(err, ParseError::Yaml(_)),
1563            "expected Yaml error for defaults in shorthand, got: {err}"
1564        );
1565        let msg = err.to_string();
1566        assert!(
1567            msg.contains("defaults"),
1568            "error should mention 'defaults', got: {msg}"
1569        );
1570    }
1571
1572    // Catalog metadata roundtrip tests.
1573    //
1574    // `scenario_name`, `category`, and `description` are optional top-level
1575    // fields on [`ScenarioFile`]. They are metadata consumed by the CLI
1576    // catalog probe (v1↔v2 parity) and ignored by every compiler phase.
1577    // The parser must preserve them verbatim.
1578    // ======================================================================
1579
1580    #[test]
1581    fn metadata_all_fields_present_roundtrip() {
1582        // All three metadata fields at the root are preserved on the parsed
1583        // AST exactly as written in the YAML.
1584        let yaml = r#"
1585version: 2
1586scenario_name: steady-state
1587category: infrastructure
1588description: "Normal oscillating baseline (sine + jitter)"
1589scenarios:
1590  - signal_type: metrics
1591    name: node_cpu_usage_idle_percent
1592    rate: 1
1593    generator:
1594      type: constant
1595      value: 1.0
1596"#;
1597
1598        let file = parse(yaml).expect("must parse file with full metadata");
1599        assert_eq!(file.scenario_name.as_deref(), Some("steady-state"));
1600        assert_eq!(file.category.as_deref(), Some("infrastructure"));
1601        assert_eq!(
1602            file.description.as_deref(),
1603            Some("Normal oscillating baseline (sine + jitter)")
1604        );
1605        // Compiler input remains untouched.
1606        assert_eq!(file.scenarios.len(), 1);
1607        assert_eq!(file.scenarios[0].signal_type, "metrics");
1608    }
1609
1610    #[test]
1611    fn metadata_absent_leaves_fields_none() {
1612        // A v2 file without any metadata fields parses cleanly and the AST
1613        // reports `None` for all three. This is the shape every v2 fixture
1614        // and test file written before PR 8a will continue to produce, so
1615        // existing v2 callers are unaffected by the field additions.
1616        let yaml = r#"
1617version: 2
1618scenarios:
1619  - signal_type: metrics
1620    name: cpu
1621    rate: 1
1622    generator:
1623      type: constant
1624      value: 1.0
1625"#;
1626
1627        let file = parse(yaml).expect("must parse file without metadata");
1628        assert!(file.scenario_name.is_none());
1629        assert!(file.category.is_none());
1630        assert!(file.description.is_none());
1631    }
1632
1633    #[rustfmt::skip]
1634    #[rstest::rstest]
1635    #[case::only_scenario_name(r#"
1636version: 2
1637scenario_name: solo-name
1638scenarios:
1639  - signal_type: metrics
1640    name: cpu
1641    rate: 1
1642    generator:
1643      type: constant
1644      value: 1.0
1645"#, Some("solo-name"), None,                None)]
1646    #[case::only_category(r#"
1647version: 2
1648category: network
1649scenarios:
1650  - signal_type: metrics
1651    name: cpu
1652    rate: 1
1653    generator:
1654      type: constant
1655      value: 1.0
1656"#, None,              Some("network"),     None)]
1657    #[case::only_description(r#"
1658version: 2
1659description: "terse one-liner"
1660scenarios:
1661  - signal_type: metrics
1662    name: cpu
1663    rate: 1
1664    generator:
1665      type: constant
1666      value: 1.0
1667"#, None,              None,                Some("terse one-liner"))]
1668    #[case::name_and_category(r#"
1669version: 2
1670scenario_name: partial
1671category: application
1672scenarios:
1673  - signal_type: metrics
1674    name: cpu
1675    rate: 1
1676    generator:
1677      type: constant
1678      value: 1.0
1679"#, Some("partial"),   Some("application"), None)]
1680    fn metadata_partial_roundtrip(
1681        #[case] yaml: &str,
1682        #[case] expected_name: Option<&str>,
1683        #[case] expected_category: Option<&str>,
1684        #[case] expected_description: Option<&str>,
1685    ) {
1686        let file = parse(yaml).expect("must parse partial-metadata file");
1687        assert_eq!(file.scenario_name.as_deref(), expected_name);
1688        assert_eq!(file.category.as_deref(), expected_category);
1689        assert_eq!(file.description.as_deref(), expected_description);
1690    }
1691
1692    #[test]
1693    fn metadata_unknown_field_is_rejected_by_deny_unknown_fields() {
1694        // `deny_unknown_fields` stays on `ScenarioFile` after the metadata
1695        // additions. A typo on an adjacent metadata key (e.g. `descripton`)
1696        // must still surface as a YAML parse error, not silently default
1697        // to `None`.
1698        let yaml = r#"
1699version: 2
1700scenario_name: typo-test
1701descripton: "misspelled — must be rejected"
1702scenarios:
1703  - signal_type: metrics
1704    name: cpu
1705    rate: 1
1706    generator:
1707      type: constant
1708      value: 1.0
1709"#;
1710
1711        let err = parse(yaml).expect_err("unknown metadata field must fail");
1712        assert!(
1713            matches!(err, ParseError::Yaml(_)),
1714            "expected Yaml error for unknown field, got: {err}"
1715        );
1716        let msg = err.to_string();
1717        assert!(
1718            msg.contains("descripton"),
1719            "error should mention the misspelled field, got: {msg}"
1720        );
1721    }
1722
1723    #[test]
1724    fn metadata_on_entry_is_rejected() {
1725        // Metadata lives at the top level only. Placing `category` inside a
1726        // scenario entry must be rejected by `Entry`'s
1727        // `deny_unknown_fields` — metadata is not per-entry and must not
1728        // silently leak through.
1729        let yaml = r#"
1730version: 2
1731scenarios:
1732  - signal_type: metrics
1733    name: cpu
1734    rate: 1
1735    category: infrastructure
1736    generator:
1737      type: constant
1738      value: 1.0
1739"#;
1740
1741        let err = parse(yaml).expect_err("metadata on entry must fail");
1742        assert!(
1743            matches!(err, ParseError::Yaml(_)),
1744            "expected Yaml error for entry-level metadata, got: {err}"
1745        );
1746        let msg = err.to_string();
1747        assert!(
1748            msg.contains("category"),
1749            "error should mention the misplaced field, got: {msg}"
1750        );
1751    }
1752}