Skip to main content

sonda_core/compiler/
mod.rs

1//! Version 2 scenario format: AST types and parser.
2//!
3//! This module defines the parsed representation of a v2 scenario file before
4//! any compilation (defaults resolution, pack expansion, or after-clause
5//! evaluation). The [`ScenarioFile`] is a direct, faithful representation of
6//! the YAML on disk.
7//!
8//! All types use `deny_unknown_fields` to reject YAML typos at parse time.
9//! This is a deliberate strictness choice — adding new schema fields requires
10//! updating these types.
11//!
12//! # Submodules
13//!
14//! - [`env_interpolate`] — Phase 0: pre-parse `${VAR}` / `${VAR:-default}`
15//!   substitution against the process environment.
16//! - [`parse`] — YAML deserialization, schema validation, and version detection.
17//! - [`normalize`] — `defaults:` resolution and entry-level normalization.
18//! - [`expand`] — pack expansion inside `scenarios:` (Phase 3).
19//! - [`timing`] — pure threshold-crossing math for every supported generator.
20//! - [`compile_after`] — `after` clause resolution, dependency graph, and
21//!   clock-group assignment (Phases 4 and 5).
22//! - [`prepare`] — translation from [`compile_after::CompiledFile`] into the
23//!   runtime's `Vec<ScenarioEntry>` input shape (Phase 6).
24
25#[cfg(feature = "config")]
26pub mod env_interpolate;
27
28#[cfg(feature = "config")]
29pub mod parse;
30
31#[cfg(feature = "config")]
32pub mod normalize;
33
34#[cfg(feature = "config")]
35pub mod expand;
36
37pub mod timing;
38
39#[cfg(feature = "config")]
40pub mod compile_after;
41
42#[cfg(feature = "config")]
43pub mod prepare;
44
45use std::collections::BTreeMap;
46
47use crate::config::{
48    BurstConfig, CardinalitySpikeConfig, DistributionConfig, DynamicLabelConfig, GapConfig,
49    OnSinkError,
50};
51use crate::encoder::EncoderConfig;
52use crate::generator::{GeneratorConfig, LogGeneratorConfig};
53use crate::packs::MetricOverride;
54use crate::sink::SinkConfig;
55
56// ---------------------------------------------------------------------------
57// Compiler AST types
58// ---------------------------------------------------------------------------
59
60/// A parsed v2 scenario file.
61///
62/// This is the top-level AST node produced by [`parse::parse`]. It captures
63/// the exact structure of the YAML input without resolving defaults, expanding
64/// packs, or compiling after-clauses.
65///
66/// # Catalog metadata
67///
68/// The three optional fields [`scenario_name`](Self::scenario_name),
69/// [`category`](Self::category), and [`description`](Self::description)
70/// mirror the v1 top-level metadata shape so the CLI catalog probe
71/// (`sonda::scenarios::read_scenario_metadata`) reads v1 and v2 files
72/// through the same `Deserialize` struct. The compiler pipeline itself
73/// (normalize → expand → compile_after → prepare) does **not** consume
74/// these fields — they are pure metadata, not compile input.
75#[derive(Debug, Clone)]
76#[cfg_attr(
77    feature = "config",
78    derive(serde::Serialize, serde::Deserialize),
79    serde(deny_unknown_fields)
80)]
81pub struct ScenarioFile {
82    /// Schema version. Must be `2`.
83    pub version: u32,
84    /// Catalog display name (kebab-case). When present it overrides the
85    /// filename-derived name in the CLI catalog probe. Pure metadata —
86    /// ignored by every compiler phase.
87    #[cfg_attr(feature = "config", serde(default))]
88    pub scenario_name: Option<String>,
89    /// Catalog category used by `scenarios list --category <name>` and
90    /// `catalog list --category <name>`. Allowed values are enforced by
91    /// the CLI CI validation (`infrastructure`, `network`, `application`,
92    /// `observability`); the AST itself does not constrain the string.
93    /// Pure metadata — ignored by every compiler phase.
94    #[cfg_attr(feature = "config", serde(default))]
95    pub category: Option<String>,
96    /// One-line human-readable description surfaced by
97    /// `scenarios list` / `catalog list` and `scenarios show`. Pure
98    /// metadata — ignored by every compiler phase.
99    #[cfg_attr(feature = "config", serde(default))]
100    pub description: Option<String>,
101    /// Optional shared defaults inherited by all entries.
102    #[cfg_attr(feature = "config", serde(default))]
103    pub defaults: Option<Defaults>,
104    /// One or more scenario entries (inline signals or pack references).
105    pub scenarios: Vec<Entry>,
106}
107
108/// Shared defaults inherited by all entries in a v2 scenario file.
109///
110/// Fields set here act as fallbacks for entries that omit the corresponding
111/// field. Defaults resolution is performed in a later compilation phase (PR 3),
112/// not during parsing.
113#[derive(Debug, Clone)]
114#[cfg_attr(
115    feature = "config",
116    derive(serde::Serialize, serde::Deserialize),
117    serde(deny_unknown_fields)
118)]
119pub struct Defaults {
120    /// Default event rate in events per second.
121    #[cfg_attr(feature = "config", serde(default))]
122    pub rate: Option<f64>,
123    /// Default total run duration (e.g. `"30s"`, `"5m"`). Applied per entry —
124    /// each entry runs for this long from its own resolved start, so a cascade's
125    /// total wall-clock is `max(phase_offset + duration)`, not `duration`.
126    #[cfg_attr(feature = "config", serde(default))]
127    pub duration: Option<String>,
128    /// Default encoder configuration.
129    #[cfg_attr(feature = "config", serde(default))]
130    pub encoder: Option<EncoderConfig>,
131    /// Default sink configuration.
132    #[cfg_attr(feature = "config", serde(default))]
133    pub sink: Option<SinkConfig>,
134    /// Default static labels merged into every entry.
135    #[cfg_attr(feature = "config", serde(default))]
136    pub labels: Option<BTreeMap<String, String>>,
137    /// Default sink-error policy inherited by every entry.
138    #[cfg_attr(feature = "config", serde(default))]
139    pub on_sink_error: Option<OnSinkError>,
140    /// Default `while:` clause inherited by every entry.
141    #[cfg_attr(
142        feature = "config",
143        serde(default, rename = "while", skip_serializing_if = "Option::is_none")
144    )]
145    pub while_clause: Option<WhileClause>,
146    /// Default `delay:` clause inherited by every entry.
147    #[cfg_attr(
148        feature = "config",
149        serde(default, rename = "delay", skip_serializing_if = "Option::is_none")
150    )]
151    pub delay_clause: Option<DelayClause>,
152}
153
154/// A single scenario entry in a v2 file.
155///
156/// An entry is either an **inline signal** (has `generator` and `name`) or a
157/// **pack reference** (has `pack`). The two forms are mutually exclusive,
158/// enforced at parse time.
159///
160/// All fields are optional in the struct to support flexible YAML authoring.
161/// Semantic validation (required fields, mutual exclusion) is performed by
162/// [`parse::parse`].
163#[derive(Debug, Clone)]
164#[cfg_attr(
165    feature = "config",
166    derive(serde::Serialize, serde::Deserialize),
167    serde(deny_unknown_fields)
168)]
169pub struct Entry {
170    /// Unique identifier for causal dependency references (`after.ref`).
171    #[cfg_attr(feature = "config", serde(default))]
172    pub id: Option<String>,
173    /// Signal type: `"metrics"`, `"logs"`, `"histogram"`, or `"summary"`.
174    pub signal_type: String,
175    /// Metric or scenario name. Required for inline entries.
176    #[cfg_attr(feature = "config", serde(default))]
177    pub name: Option<String>,
178    /// Event rate in events per second.
179    #[cfg_attr(feature = "config", serde(default))]
180    pub rate: Option<f64>,
181    /// Total run duration (e.g. `"30s"`, `"5m"`).
182    #[cfg_attr(feature = "config", serde(default))]
183    pub duration: Option<String>,
184    /// Value generator configuration (for metrics).
185    #[cfg_attr(feature = "config", serde(default))]
186    pub generator: Option<GeneratorConfig>,
187    /// Log generator configuration (for logs signal type).
188    ///
189    /// Mutually exclusive with `generator` — an entry uses one or the other
190    /// depending on `signal_type`.
191    #[cfg_attr(feature = "config", serde(default))]
192    pub log_generator: Option<LogGeneratorConfig>,
193    /// Static labels attached to every emitted event.
194    #[cfg_attr(feature = "config", serde(default))]
195    pub labels: Option<BTreeMap<String, String>>,
196    /// Dynamic (rotating) label configurations.
197    #[cfg_attr(feature = "config", serde(default))]
198    pub dynamic_labels: Option<Vec<DynamicLabelConfig>>,
199    /// Encoder configuration for this entry.
200    #[cfg_attr(feature = "config", serde(default))]
201    pub encoder: Option<EncoderConfig>,
202    /// Sink configuration for this entry.
203    #[cfg_attr(feature = "config", serde(default))]
204    pub sink: Option<SinkConfig>,
205    /// Jitter amplitude applied to generated values.
206    #[cfg_attr(feature = "config", serde(default))]
207    pub jitter: Option<f64>,
208    /// Deterministic seed for jitter RNG.
209    #[cfg_attr(feature = "config", serde(default))]
210    pub jitter_seed: Option<u64>,
211    /// Recurring silent-period configuration.
212    #[cfg_attr(feature = "config", serde(default))]
213    pub gaps: Option<GapConfig>,
214    /// Recurring high-rate burst configuration.
215    #[cfg_attr(feature = "config", serde(default))]
216    pub bursts: Option<BurstConfig>,
217    /// Cardinality spike configurations.
218    #[cfg_attr(feature = "config", serde(default))]
219    pub cardinality_spikes: Option<Vec<CardinalitySpikeConfig>>,
220    /// Phase offset for staggered start within a clock group.
221    #[cfg_attr(feature = "config", serde(default))]
222    pub phase_offset: Option<String>,
223    /// Clock group for coordinated timing across entries.
224    #[cfg_attr(feature = "config", serde(default))]
225    pub clock_group: Option<String>,
226    /// Causal dependency on another signal's value.
227    #[cfg_attr(feature = "config", serde(default))]
228    pub after: Option<AfterClause>,
229    /// Continuous lifecycle gate on another signal's value.
230    #[cfg_attr(
231        feature = "config",
232        serde(default, rename = "while", skip_serializing_if = "Option::is_none")
233    )]
234    pub while_clause: Option<WhileClause>,
235    /// Open / close debounce windows applied to `while_clause` transitions.
236    #[cfg_attr(
237        feature = "config",
238        serde(default, rename = "delay", skip_serializing_if = "Option::is_none")
239    )]
240    pub delay_clause: Option<DelayClause>,
241
242    // -- Pack-backed entry fields --
243    /// Pack name or file path. Mutually exclusive with `generator`.
244    #[cfg_attr(feature = "config", serde(default))]
245    pub pack: Option<String>,
246    /// Per-metric overrides within the referenced pack.
247    #[cfg_attr(feature = "config", serde(default))]
248    pub overrides: Option<BTreeMap<String, MetricOverride>>,
249
250    // -- Histogram / summary fields --
251    /// Distribution model for histogram or summary observations.
252    #[cfg_attr(feature = "config", serde(default))]
253    pub distribution: Option<DistributionConfig>,
254    /// Histogram bucket boundaries (histogram only).
255    #[cfg_attr(feature = "config", serde(default))]
256    pub buckets: Option<Vec<f64>>,
257    /// Summary quantile boundaries (summary only).
258    #[cfg_attr(feature = "config", serde(default))]
259    pub quantiles: Option<Vec<f64>>,
260    /// Number of observations sampled per tick.
261    #[cfg_attr(feature = "config", serde(default))]
262    pub observations_per_tick: Option<u32>,
263    /// Linear drift applied to the distribution mean each second.
264    #[cfg_attr(feature = "config", serde(default))]
265    pub mean_shift_per_sec: Option<f64>,
266    /// Deterministic seed for histogram/summary sampling.
267    #[cfg_attr(feature = "config", serde(default))]
268    pub seed: Option<u64>,
269    /// Per-entry sink-error policy (overrides defaults).
270    #[cfg_attr(feature = "config", serde(default))]
271    pub on_sink_error: Option<OnSinkError>,
272}
273
274/// Comparison operator for an [`AfterClause`] threshold check.
275///
276/// Serde maps `"<"` to [`LessThan`](AfterOp::LessThan) and `">"` to
277/// [`GreaterThan`](AfterOp::GreaterThan). Any other value is rejected at
278/// deserialization time.
279#[derive(Debug, Clone, PartialEq, Eq)]
280#[cfg_attr(feature = "config", derive(serde::Serialize, serde::Deserialize))]
281pub enum AfterOp {
282    /// The referenced signal's value must be less than the threshold.
283    #[cfg_attr(feature = "config", serde(rename = "<"))]
284    LessThan,
285    /// The referenced signal's value must be greater than the threshold.
286    #[cfg_attr(feature = "config", serde(rename = ">"))]
287    GreaterThan,
288}
289
290/// Structured after-clause expressing a causal dependency on another signal.
291///
292/// When present on a [`Entry`], the entry will not start emitting until the
293/// referenced signal's latest value satisfies the comparison. Compilation of
294/// after-clauses into runtime timing is handled in a later phase (PR 5).
295///
296/// # YAML example
297///
298/// ```yaml
299/// after:
300///   ref: cpu_signal
301///   op: ">"
302///   value: 90.0
303///   delay: "5s"
304/// ```
305#[derive(Debug, Clone)]
306#[cfg_attr(
307    feature = "config",
308    derive(serde::Serialize, serde::Deserialize),
309    serde(deny_unknown_fields)
310)]
311pub struct AfterClause {
312    /// Target signal id to observe.
313    ///
314    /// Serialized as `"ref"` in YAML because `ref` is a Rust keyword.
315    #[cfg_attr(feature = "config", serde(rename = "ref"))]
316    pub ref_id: String,
317    /// Comparison operator: `"<"` or `">"`.
318    pub op: AfterOp,
319    /// Threshold value for the comparison.
320    pub value: f64,
321    /// Optional additional delay after the condition is met.
322    #[cfg_attr(feature = "config", serde(default))]
323    pub delay: Option<String>,
324}
325
326/// Strict comparison operator for a [`WhileClause`].
327///
328/// Only `<` and `>` are accepted. Non-strict operators (`<=`, `>=`, `==`,
329/// `!=`) are rejected at deserialize time with a hint pointing to the
330/// strict alternatives — equality on `f64` over a continuous gate is
331/// numerically unsafe and forbidden by design.
332#[derive(Debug, Clone, Copy, PartialEq, Eq)]
333#[cfg_attr(feature = "config", derive(serde::Serialize))]
334pub enum WhileOp {
335    #[cfg_attr(feature = "config", serde(rename = "<"))]
336    LessThan,
337    #[cfg_attr(feature = "config", serde(rename = ">"))]
338    GreaterThan,
339}
340
341#[cfg(feature = "config")]
342impl<'de> serde::Deserialize<'de> for WhileOp {
343    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
344    where
345        D: serde::Deserializer<'de>,
346    {
347        let raw = String::deserialize(deserializer)?;
348        match raw.as_str() {
349            "<" => Ok(WhileOp::LessThan),
350            ">" => Ok(WhileOp::GreaterThan),
351            other => Err(serde::de::Error::custom(format!(
352                "unsupported operator '{other}' on while: — only strict \
353                 comparisons '<' and '>' are accepted"
354            ))),
355        }
356    }
357}
358
359/// Continuous lifecycle gate on another signal's value.
360///
361/// ```yaml
362/// while:
363///   ref: link_state
364///   op: ">"
365///   value: 0
366/// ```
367#[derive(Debug, Clone)]
368#[cfg_attr(
369    feature = "config",
370    derive(serde::Serialize, serde::Deserialize),
371    serde(deny_unknown_fields)
372)]
373pub struct WhileClause {
374    #[cfg_attr(feature = "config", serde(rename = "ref"))]
375    pub ref_id: String,
376    pub op: WhileOp,
377    pub value: f64,
378}
379
380/// Open / close debounce windows applied to a [`WhileClause`] transition.
381///
382/// `open` debounces a `false → true` transition; `close` debounces
383/// `true → false`. Either may be omitted (treated as `0s`). Validation
384/// requires `delay:` to be paired with `while:`; standalone `delay:`
385/// rejects at normalize time.
386///
387/// Durations are parsed from human-readable strings (`"250ms"`, `"5s"`)
388/// at YAML deserialization time, so the runtime never re-parses.
389///
390/// `close` accepts two shapes for backward compatibility:
391/// - `close: 5s` — legacy duration shorthand (carries no extra fields).
392/// - `close: { duration: 5s, snap_to: 1, stale_marker: false }` — extended
393///   form for [`PROMETHEUS_STALE_NAN`](crate::encoder::remote_write::PROMETHEUS_STALE_NAN)
394///   recovery control on `running → paused`.
395#[derive(Debug, Clone, PartialEq)]
396#[cfg_attr(feature = "config", derive(serde::Serialize))]
397pub struct DelayClause {
398    #[cfg_attr(
399        feature = "config",
400        serde(
401            default,
402            skip_serializing_if = "Option::is_none",
403            with = "delay_duration_opt"
404        )
405    )]
406    pub open: Option<std::time::Duration>,
407    #[cfg_attr(
408        feature = "config",
409        serde(
410            default,
411            skip_serializing_if = "Option::is_none",
412            with = "delay_duration_opt"
413        )
414    )]
415    pub close: Option<std::time::Duration>,
416    #[cfg_attr(
417        feature = "config",
418        serde(default, skip_serializing_if = "Option::is_none")
419    )]
420    pub close_stale_marker: Option<bool>,
421    #[cfg_attr(
422        feature = "config",
423        serde(default, skip_serializing_if = "Option::is_none")
424    )]
425    pub close_snap_to: Option<f64>,
426}
427
428#[cfg(feature = "config")]
429impl<'de> serde::Deserialize<'de> for DelayClause {
430    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
431    where
432        D: serde::Deserializer<'de>,
433    {
434        #[derive(serde::Deserialize)]
435        #[serde(deny_unknown_fields)]
436        struct CloseStruct {
437            #[serde(default)]
438            duration: Option<String>,
439            #[serde(default)]
440            snap_to: Option<f64>,
441            #[serde(default)]
442            stale_marker: Option<bool>,
443        }
444
445        #[derive(serde::Deserialize)]
446        #[serde(untagged)]
447        enum CloseShape {
448            Duration(String),
449            Extended(CloseStruct),
450        }
451
452        #[derive(serde::Deserialize)]
453        #[serde(deny_unknown_fields)]
454        struct Raw {
455            #[serde(default)]
456            open: Option<String>,
457            #[serde(default)]
458            close: Option<CloseShape>,
459        }
460
461        let raw = Raw::deserialize(deserializer)?;
462
463        let open = match raw.open {
464            Some(s) => Some(
465                crate::config::validate::parse_delay_duration(&s)
466                    .map_err(serde::de::Error::custom)?,
467            ),
468            None => None,
469        };
470
471        let (close, close_snap_to, close_stale_marker) = match raw.close {
472            None => (None, None, None),
473            Some(CloseShape::Duration(s)) => {
474                let dur = crate::config::validate::parse_delay_duration(&s)
475                    .map_err(serde::de::Error::custom)?;
476                (Some(dur), None, None)
477            }
478            Some(CloseShape::Extended(ext)) => {
479                let dur = match ext.duration {
480                    Some(s) => Some(
481                        crate::config::validate::parse_delay_duration(&s)
482                            .map_err(serde::de::Error::custom)?,
483                    ),
484                    None => None,
485                };
486                (dur, ext.snap_to, ext.stale_marker)
487            }
488        };
489
490        Ok(DelayClause {
491            open,
492            close,
493            close_stale_marker,
494            close_snap_to,
495        })
496    }
497}
498
499#[cfg(feature = "config")]
500mod delay_duration_opt {
501    use std::time::Duration;
502
503    use serde::Serializer;
504
505    pub fn serialize<S>(value: &Option<Duration>, serializer: S) -> Result<S::Ok, S::Error>
506    where
507        S: Serializer,
508    {
509        match value {
510            Some(d) => serializer.serialize_str(&format_duration(*d)),
511            None => serializer.serialize_none(),
512        }
513    }
514
515    fn format_duration(d: Duration) -> String {
516        let total_ms = d.as_millis();
517        if total_ms == 0 {
518            return "0ms".to_string();
519        }
520        if total_ms.is_multiple_of(3_600_000) {
521            return format!("{}h", total_ms / 3_600_000);
522        }
523        if total_ms.is_multiple_of(60_000) {
524            return format!("{}m", total_ms / 60_000);
525        }
526        if total_ms.is_multiple_of(1_000) {
527            return format!("{}s", total_ms / 1_000);
528        }
529        format!("{total_ms}ms")
530    }
531}
532
533/// Discriminator labeling an edge or diagnostic as `after:` vs `while:`.
534///
535/// Used as the edge label in the dependency graph and as a field on
536/// [`compile_after::CompileAfterError`] variants that span both clause
537/// families. `#[non_exhaustive]` so future clause types extend without a
538/// breaking change.
539#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
540#[cfg_attr(feature = "config", derive(serde::Serialize))]
541#[cfg_attr(feature = "config", serde(rename_all = "lowercase"))]
542#[non_exhaustive]
543pub enum ClauseKind {
544    After,
545    While,
546}
547
548impl std::fmt::Display for ClauseKind {
549    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
550        f.write_str(match self {
551            ClauseKind::After => "after",
552            ClauseKind::While => "while",
553        })
554    }
555}