Skip to main content

grex_core/pack/
mod.rs

1//! Pack-manifest (`pack.yaml`) parser.
2//!
3//! Stage A of M3: **pure parse + round-trip, zero side effects**. This
4//! module turns a YAML string into a typed [`PackManifest`] and fails
5//! loudly on any shape violation. It does not read from disk, walk
6//! children, expand variables, or detect duplicates across actions — all
7//! of those are later stages.
8//!
9//! # Key design points
10//!
11//! * `schema_version` is validated at parse (only `"1"` accepted) so that
12//!   future-schema packs fail with an actionable error rather than a
13//!   cryptic deserialize mismatch.
14//! * Action dispatch is key-based (not `#[serde(untagged)]`) — see
15//!   [`action`] for rationale.
16//! * Predicates use a recursive grammar capped at
17//!   [`error::MAX_REQUIRE_DEPTH`] to bound worst-case nesting cost.
18//! * YAML anchors/aliases are **rejected** at parse time as a security
19//!   policy (cycle + billion-laughs mitigation).
20//! * The omitted-vs-empty distinction on `teardown:` is preserved — `None`
21//!   means "default to reverse(actions) at execute time", `Some(vec![])`
22//!   means "explicit no-op".
23
24pub mod action;
25pub mod error;
26pub mod predicate;
27pub mod validate;
28
29use std::collections::BTreeMap;
30
31use serde::{Deserialize, Serialize};
32
33pub use action::{
34    Action, EnvArgs, EnvScope, ExecSpec, MkdirArgs, RequireSpec, RmdirArgs, SymlinkArgs,
35    SymlinkKind, UnlinkArgs, WhenSpec, VALID_ACTION_KEYS,
36};
37pub use error::{PackParseError, MAX_REQUIRE_DEPTH};
38pub use predicate::{Combiner, ExecOnFail, OsKind, Predicate, RequireOnFail};
39pub use validate::{run_all, PackValidationError, Validator};
40
41/// Literal value accepted for `schema_version`. Bump only with a backwards-
42/// incompatible YAML migration.
43pub const SUPPORTED_SCHEMA_VERSION: &str = "1";
44
45/// Newtype wrapping the schema-version literal.
46///
47/// Parses only the exact string `"1"`. Any other value yields
48/// [`PackParseError::InvalidSchemaVersion`] so consumers can emit an
49/// actionable "upgrade grex" or "downgrade pack" message.
50#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
51#[serde(transparent)]
52pub struct SchemaVersion(String);
53
54impl SchemaVersion {
55    /// The single supported schema version literal.
56    #[must_use]
57    pub fn current() -> Self {
58        Self(SUPPORTED_SCHEMA_VERSION.to_string())
59    }
60
61    /// Borrow the wrapped literal.
62    #[must_use]
63    pub fn as_str(&self) -> &str {
64        &self.0
65    }
66}
67
68impl<'de> Deserialize<'de> for SchemaVersion {
69    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
70    where
71        D: serde::Deserializer<'de>,
72    {
73        // Spec requires the literal string `"1"`. Bare YAML integers (e.g.
74        // `schema_version: 1`) are rejected: YAML's implicit typing would
75        // make the authored value ambiguous, and the manifest schema is
76        // explicitly string-typed so future versions can be non-numeric.
77        let raw = serde_yaml::Value::deserialize(deserializer)?;
78        let got = match &raw {
79            serde_yaml::Value::String(s) => s.clone(),
80            serde_yaml::Value::Number(n) => {
81                return Err(serde::de::Error::custom(format!(
82                    "schema_version must be the quoted string \"1\", got bare number {n} \
83                     (quote it as \"{n}\")"
84                )));
85            }
86            other => {
87                return Err(serde::de::Error::custom(format!(
88                    "schema_version must be the quoted string \"1\", got {other:?}"
89                )));
90            }
91        };
92        if got == SUPPORTED_SCHEMA_VERSION {
93            Ok(Self(got))
94        } else {
95            Err(serde::de::Error::custom(format!(
96                "unsupported pack schema_version {got:?}: this grex build only understands \"1\""
97            )))
98        }
99    }
100}
101
102/// Pack type discriminator.
103///
104/// * [`PackType::Meta`] — composes child packs only (no actions).
105/// * [`PackType::Declarative`] — idempotent actions with automatic rollback.
106/// * [`PackType::Scripted`] — freeform actions with author-defined teardown.
107///
108/// Marked `#[non_exhaustive]` so new pack shapes (e.g. plugin-contributed
109/// kinds in M4+) can land without breaking external match sites.
110#[non_exhaustive]
111#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
112#[serde(rename_all = "snake_case")]
113pub enum PackType {
114    /// Composition-only pack (no actions allowed in strict validation).
115    Meta,
116    /// Idempotent actions; grex runs rollback on failure.
117    Declarative,
118    /// Author-defined actions + teardown.
119    Scripted,
120}
121
122impl PackType {
123    /// Stable snake_case tag matching the `type:` discriminator in
124    /// `pack.yaml`. Used by the pack-type plugin registry to look up the
125    /// driver for a given pack.
126    #[must_use]
127    pub fn as_str(&self) -> &'static str {
128        match self {
129            Self::Meta => "meta",
130            Self::Declarative => "declarative",
131            Self::Scripted => "scripted",
132        }
133    }
134}
135
136/// Reference to a child pack from a `children:` entry.
137///
138/// `path` is intentionally left `None` at parse time — callers that need
139/// the on-disk directory name should invoke [`ChildRef::effective_path`]
140/// which extracts the last URL segment as the default.
141///
142/// Marked `#[non_exhaustive]` so spec growth (e.g. `pin`, `shallow`) does
143/// not break library consumers who destructure the struct.
144#[non_exhaustive]
145#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
146pub struct ChildRef {
147    /// Upstream git URL (any scheme `gix` can resolve).
148    pub url: String,
149    /// Optional override for the on-disk directory name. Preserved as
150    /// `None` when absent so callers can distinguish "defaulted" from
151    /// "explicitly set to the default value".
152    #[serde(default, skip_serializing_if = "Option::is_none")]
153    pub path: Option<String>,
154    /// Optional git ref (branch / tag / commit). Serialized as `ref:` via
155    /// the raw-identifier field.
156    #[serde(default, rename = "ref", skip_serializing_if = "Option::is_none")]
157    pub r#ref: Option<String>,
158}
159
160impl ChildRef {
161    /// Resolve the on-disk directory name. When `path` is explicitly set it
162    /// wins; otherwise the last path segment of `url` (stripped of a
163    /// trailing `.git`) is used.
164    ///
165    /// # Safety precondition (callers MUST validate first)
166    ///
167    /// This method returns its input verbatim — it does **not** check
168    /// for path separators, `.` / `..`, the empty string, or other
169    /// path-traversal shapes. **Callers using the returned string for
170    /// any filesystem operation MUST first run plan-phase validation
171    /// via [`PackManifest::validate_plan`] (which invokes
172    /// `validate::run_all`, including the internal bare-name
173    /// validator).** The sync orchestrator and the tree walker do this
174    /// before dispatch; in-crate callers that reach `ChildRef`
175    /// directly must follow the same discipline.
176    ///
177    /// Validation lives at plan phase — not here — to keep this method
178    /// side-effect-free and out of the hot dispatch path. Re-running
179    /// the regex on every call would amount to per-step paranoia for
180    /// zero added safety, since plan validation already short-circuits
181    /// the entire walk on a bad child path.
182    #[must_use]
183    pub fn effective_path(&self) -> String {
184        if let Some(p) = &self.path {
185            return p.clone();
186        }
187        let url = self.url.trim_end_matches('/');
188        let tail = url.rsplit_once('/').map_or(url, |(_, t)| t);
189        tail.strip_suffix(".git").unwrap_or(tail).to_string()
190    }
191}
192
193/// Top-level representation of a `pack.yaml` manifest.
194///
195/// Post-parse invariants:
196///
197/// * `schema_version` == `"1"`.
198/// * `name` matches `^[a-z][a-z0-9-]*$`.
199/// * Unknown top-level keys are absent unless prefixed with `x-`.
200/// * Predicate trees within any action are depth-bounded by
201///   [`MAX_REQUIRE_DEPTH`].
202#[derive(Debug, Clone, PartialEq, Eq)]
203pub struct PackManifest {
204    /// Schema-version literal. Always [`SchemaVersion::current`] at v1.
205    pub schema_version: SchemaVersion,
206    /// Pack name (validated).
207    pub name: String,
208    /// Pack-type discriminator.
209    pub r#type: PackType,
210    /// Optional semver-ish string; grex does not parse it further in Stage A.
211    pub version: Option<String>,
212    /// Names of packs this pack depends on. Empty default.
213    pub depends_on: Vec<String>,
214    /// Child-pack references. Empty default.
215    pub children: Vec<ChildRef>,
216    /// Ordered actions to run. Empty-default (valid no-op).
217    pub actions: Vec<Action>,
218    /// Explicit teardown.
219    ///
220    /// `None` means the pack-type driver should default to
221    /// `reverse(actions)` at execute time; `Some(vec![])` means the
222    /// author explicitly opted into a no-op teardown. Preserving that
223    /// distinction matters for audit trails — Stage A does not execute but
224    /// must round-trip it.
225    pub teardown: Option<Vec<Action>>,
226    /// Unknown `x-*` extension keys. Preserved verbatim for downstream
227    /// plugins.
228    pub extensions: BTreeMap<String, serde_yaml::Value>,
229}
230
231impl PackManifest {
232    /// Walk every action (including those nested inside `when` blocks),
233    /// yielding `(global_index, &symlink)` pairs.
234    ///
235    /// `global_index` is a 0-based counter across the flattened action-walk
236    /// — it is **not** the top-level index into [`PackManifest::actions`].
237    /// Two symlinks at the same top-level index but at different nesting
238    /// depths receive distinct global indices. This is the index space
239    /// [`PackValidationError`] variants refer to.
240    pub fn iter_all_symlinks(&self) -> impl Iterator<Item = (usize, &SymlinkArgs)> {
241        self.actions.iter().flat_map(Action::iter_symlinks).enumerate()
242    }
243
244    /// Run every default [`Validator`] over this manifest.
245    ///
246    /// Returns `Ok(())` when no validator emits an error; otherwise returns
247    /// `Err(Vec<_>)` carrying every error across every validator (not
248    /// fail-first — downstream consumers can decide whether to abort on the
249    /// first or surface the full batch).
250    ///
251    /// # Errors
252    ///
253    /// Returns [`PackValidationError`] variants aggregated across the
254    /// validator set. See [`validate::run_all`] for the exact default set.
255    ///
256    /// # Example
257    ///
258    /// ```no_run
259    /// use grex_core::pack::parse;
260    ///
261    /// let src = "schema_version: \"1\"\nname: ok\ntype: declarative\n";
262    /// let pack = parse(src).unwrap();
263    /// pack.validate_plan().unwrap();
264    /// ```
265    pub fn validate_plan(&self) -> Result<(), Vec<PackValidationError>> {
266        let errs = validate::run_all(self);
267        if errs.is_empty() {
268            Ok(())
269        } else {
270            Err(errs)
271        }
272    }
273}
274
275/// Parse a `pack.yaml` buffer into a [`PackManifest`].
276///
277/// The entry point:
278///
279/// 1. Pre-scans for YAML anchor / alias events and rejects them.
280/// 2. Deserializes into a permissive raw map.
281/// 3. Validates `schema_version` and `name`.
282/// 4. Segregates known fields from `x-*` extensions; rejects any other
283///    unknown top-level key.
284/// 5. Key-dispatches actions and teardown via [`Action::from_yaml`].
285///
286/// # Errors
287///
288/// Any structural violation surfaces as a [`PackParseError`] variant with
289/// enough context for a CLI consumer to point at the offending key.
290pub fn parse(yaml: &str) -> Result<PackManifest, PackParseError> {
291    reject_yaml_aliases(yaml)?;
292    let mapping = parse_root_mapping(yaml)?;
293    let extensions = segregate_extensions(&mapping)?;
294
295    let schema_version = parse_schema_version(&mapping)?;
296    let name = parse_name(&mapping)?;
297    let r#type = parse_type(&mapping)?;
298    let version = parse_version(&mapping);
299    let depends_on = parse_depends_on(&mapping)?;
300    let children = parse_children(&mapping)?;
301    let actions = Action::parse_list(mapping.get(s("actions")))?;
302    let teardown = parse_teardown(&mapping)?;
303
304    Ok(PackManifest {
305        schema_version,
306        name,
307        r#type,
308        version,
309        depends_on,
310        children,
311        actions,
312        teardown,
313        extensions,
314    })
315}
316
317/// Top-level keys recognised by the parser. Any other non-`x-`-prefixed key
318/// is rejected via [`PackParseError::UnknownTopLevelKey`].
319const KNOWN_TOP_LEVEL_KEYS: &[&str] =
320    &["schema_version", "name", "type", "version", "depends_on", "children", "actions", "teardown"];
321
322/// Parse the raw YAML into a top-level mapping, failing with a clear error
323/// for null / non-mapping roots.
324fn parse_root_mapping(yaml: &str) -> Result<serde_yaml::Mapping, PackParseError> {
325    let root: serde_yaml::Value = serde_yaml::from_str(yaml)?;
326    match root {
327        serde_yaml::Value::Mapping(m) => Ok(m),
328        serde_yaml::Value::Null => Err(PackParseError::InvalidName { got: String::new() }),
329        other => Err(PackParseError::InvalidPredicate {
330            detail: format!("pack.yaml root must be a mapping, got {other:?}"),
331        }),
332    }
333}
334
335/// Walk the mapping, separating `x-*` extension keys from rejected unknowns.
336fn segregate_extensions(
337    mapping: &serde_yaml::Mapping,
338) -> Result<BTreeMap<String, serde_yaml::Value>, PackParseError> {
339    let mut extensions: BTreeMap<String, serde_yaml::Value> = BTreeMap::new();
340    for (k, v) in mapping.iter() {
341        let Some(key) = k.as_str() else {
342            return Err(PackParseError::UnknownTopLevelKey { key: format!("{k:?}") });
343        };
344        if KNOWN_TOP_LEVEL_KEYS.contains(&key) {
345            continue;
346        }
347        if key.starts_with("x-") {
348            extensions.insert(key.to_string(), v.clone());
349            continue;
350        }
351        return Err(PackParseError::UnknownTopLevelKey { key: key.to_string() });
352    }
353    Ok(extensions)
354}
355
356fn parse_schema_version(mapping: &serde_yaml::Mapping) -> Result<SchemaVersion, PackParseError> {
357    match mapping.get(s("schema_version")) {
358        // Propagate the custom Deserialize error as PackParseError::Inner so
359        // its precise diagnostic (e.g. "got bare number 1 — quote it as
360        // \"1\"") surfaces to CLI consumers verbatim. Only string-typed
361        // mismatches fall through to InvalidSchemaVersion.
362        Some(v) => match serde_yaml::from_value::<SchemaVersion>(v.clone()) {
363            Ok(sv) => Ok(sv),
364            Err(e) => {
365                if matches!(v, serde_yaml::Value::String(_)) {
366                    Err(PackParseError::InvalidSchemaVersion { got: render_scalar(v) })
367                } else {
368                    Err(PackParseError::Inner(e))
369                }
370            }
371        },
372        None => Err(PackParseError::InvalidSchemaVersion { got: "<missing>".to_string() }),
373    }
374}
375
376fn parse_name(mapping: &serde_yaml::Mapping) -> Result<String, PackParseError> {
377    let name = match mapping.get(s("name")) {
378        Some(v) => v
379            .as_str()
380            .map(str::to_owned)
381            .ok_or_else(|| PackParseError::InvalidName { got: render_scalar(v) })?,
382        None => return Err(PackParseError::InvalidName { got: "<missing>".to_string() }),
383    };
384    if !is_valid_pack_name(&name) {
385        return Err(PackParseError::InvalidName { got: name });
386    }
387    Ok(name)
388}
389
390fn parse_type(mapping: &serde_yaml::Mapping) -> Result<PackType, PackParseError> {
391    match mapping.get(s("type")) {
392        Some(v) => Ok(serde_yaml::from_value(v.clone())?),
393        None => Err(PackParseError::UnknownTopLevelKey {
394            key: "<missing required field `type`>".to_string(),
395        }),
396    }
397}
398
399fn parse_version(mapping: &serde_yaml::Mapping) -> Option<String> {
400    match mapping.get(s("version")) {
401        Some(v) if v.is_null() => None,
402        Some(v) => Some(v.as_str().map(str::to_owned).unwrap_or_else(|| render_scalar(v))),
403        None => None,
404    }
405}
406
407fn parse_depends_on(mapping: &serde_yaml::Mapping) -> Result<Vec<String>, PackParseError> {
408    match mapping.get(s("depends_on")) {
409        Some(v) if v.is_null() => Ok(Vec::new()),
410        Some(v) => Ok(serde_yaml::from_value(v.clone())?),
411        None => Ok(Vec::new()),
412    }
413}
414
415fn parse_children(mapping: &serde_yaml::Mapping) -> Result<Vec<ChildRef>, PackParseError> {
416    match mapping.get(s("children")) {
417        Some(v) if v.is_null() => Ok(Vec::new()),
418        Some(v) => Ok(serde_yaml::from_value(v.clone())?),
419        None => Ok(Vec::new()),
420    }
421}
422
423fn parse_teardown(mapping: &serde_yaml::Mapping) -> Result<Option<Vec<Action>>, PackParseError> {
424    match mapping.get(s("teardown")) {
425        None => Ok(None),
426        Some(v) if v.is_null() => Ok(None),
427        Some(v) => Ok(Some(Action::parse_list(Some(v))?)),
428    }
429}
430
431/// Borrow-friendly shorthand for `serde_yaml::Value::String(key.into())`.
432fn s(key: &str) -> serde_yaml::Value {
433    serde_yaml::Value::String(key.to_string())
434}
435
436/// Render a scalar YAML value as a display string for error messages.
437fn render_scalar(v: &serde_yaml::Value) -> String {
438    match v {
439        serde_yaml::Value::String(s) => s.clone(),
440        serde_yaml::Value::Number(n) => n.to_string(),
441        serde_yaml::Value::Bool(b) => b.to_string(),
442        serde_yaml::Value::Null => "null".to_string(),
443        other => format!("{other:?}"),
444    }
445}
446
447/// Validate a pack name against `^[a-z][a-z0-9-]*$`.
448fn is_valid_pack_name(name: &str) -> bool {
449    let mut chars = name.chars();
450    let Some(first) = chars.next() else {
451        return false;
452    };
453    if !first.is_ascii_lowercase() {
454        return false;
455    }
456    chars.all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '-')
457}
458
459/// Pre-scan the raw YAML text for anchor (`&`) or alias (`*`) events.
460///
461/// We drive `serde_yaml::Deserializer` purely for its token stream — no
462/// typed structure is built. Any [`serde_yaml::Value`] that contains an
463/// anchored or aliased node would round-trip without warning through a
464/// typed parse, so we reject here before structural parsing runs.
465fn reject_yaml_aliases(yaml: &str) -> Result<(), PackParseError> {
466    // serde_yaml does not expose a public event stream, but a YAML alias
467    // node always manifests as repeated structure sharing. A cheap but
468    // correct detector: scan for anchor/alias sigils outside of string
469    // scalars. A full YAML tokenizer would be heavier than warranted for
470    // Stage A; the lightweight scanner below is deliberately conservative
471    // (false-positive preferred over false-negative for a security gate).
472    let mut in_single = false;
473    let mut in_double = false;
474    let mut prev: char = '\n';
475    for ch in yaml.chars() {
476        match ch {
477            '\'' if !in_double => in_single = !in_single,
478            '"' if !in_single && prev != '\\' => in_double = !in_double,
479            '&' | '*' if !in_single && !in_double => {
480                // Anchors/aliases begin at token-start positions: after
481                // whitespace, `:`, `-`, `[`, `,`, `{`, or at start of
482                // line. A bare `*` in a flow scalar is unlikely but we
483                // err toward rejecting.
484                if matches!(prev, ' ' | '\t' | '\n' | ':' | '-' | '[' | ',' | '{') {
485                    // Require at least one name char to avoid rejecting
486                    // `* ` used as a literal bullet in a folded scalar —
487                    // though inside a YAML mapping this is itself
488                    // unusual. Accepts a single false-positive window.
489                    return Err(PackParseError::YamlAliasRejected);
490                }
491            }
492            _ => {}
493        }
494        prev = ch;
495    }
496    Ok(())
497}