Skip to main content

omne_cli/
pipe.rs

1//! Pipe YAML loader + validator.
2//!
3//! Parses pipe definitions from `dist/pipes/<name>.md` — markdown
4//! files whose YAML frontmatter holds the DAG schema described in
5//! the v2 design doc. The runner consumes the parsed `Pipe` for DAG
6//! dispatch (Unit 6) and node execution (Unit 11).
7//!
8//! Two layers:
9//! - `parse_str` / `load_from_str` — pure YAML→struct deserialization.
10//! - `validate_structure` — pure structural rules (exactly-one-of,
11//!   cycle detection, loop body shape, scope rules on bash/AI nodes).
12//! - `validate_with_volume` — adds filesystem-aware checks (command
13//!   resolution against `dist/skills/` + `core/skills/`, gate hook
14//!   existence at the platform-current variant).
15//!
16//! `load(md_path, volume_root)` is the high-level entry that runs all
17//! three in sequence. `commands::validate` calls it for every
18//! `dist/pipes/*.md` it finds.
19//!
20//! Error grain: every variant carries the offending node id (or
21//! pipe name + path) so `omne validate` can surface actionable
22//! messages without re-scanning the YAML.
23
24#![allow(dead_code)]
25
26use std::collections::{BTreeMap, BTreeSet, HashMap};
27use std::path::{Path, PathBuf};
28
29use serde::Deserialize;
30use thiserror::Error;
31
32use crate::manifest;
33use crate::volume;
34
35// ── Schema ──────────────────────────────────────────────────────────
36
37/// Top-level pipe definition. Deserialized from the markdown
38/// frontmatter in `dist/pipes/<name>.md`.
39#[derive(Debug, Clone, Deserialize)]
40pub struct Pipe {
41    pub name: String,
42    pub version: u32,
43    #[serde(default)]
44    pub default_model: Option<String>,
45    #[serde(default)]
46    pub inputs: BTreeMap<String, InputSpec>,
47    pub nodes: Vec<Node>,
48}
49
50/// A single named pipe input. `type:` is required; other fields
51/// optional. Defaults to non-required so simple `inputs: { x: { type:
52/// string } }` works.
53#[derive(Debug, Clone, Deserialize)]
54pub struct InputSpec {
55    #[serde(rename = "type")]
56    pub kind: String,
57    #[serde(default)]
58    pub required: bool,
59    #[serde(default)]
60    pub default: Option<String>,
61}
62
63/// One DAG node. Exactly one of `command` / `prompt` / `bash` /
64/// `loop_` must be populated — enforced by `validate_structure`,
65/// not by serde, so we can produce a typed error rather than a
66/// generic deserialization failure. `loop_` carries the YAML field
67/// name `loop` via serde rename.
68#[derive(Debug, Clone, Deserialize)]
69pub struct Node {
70    pub id: String,
71    #[serde(default)]
72    pub depends_on: Vec<String>,
73    #[serde(default)]
74    pub model: Option<String>,
75    #[serde(default)]
76    pub allowed_tools: Vec<String>,
77    #[serde(default)]
78    pub gate: Option<String>,
79    #[serde(default)]
80    pub timeout: Option<u64>,
81    #[serde(default)]
82    pub trigger_rule: TriggerRule,
83
84    // Execution kind — exactly one must be Some.
85    #[serde(default)]
86    pub command: Option<String>,
87    #[serde(default)]
88    pub prompt: Option<String>,
89    #[serde(default)]
90    pub bash: Option<String>,
91    #[serde(default, rename = "loop")]
92    pub loop_: Option<LoopBody>,
93}
94
95/// Loop body. Exactly one of `command` / `prompt` must be populated
96/// (validator-enforced). `until` is the sentinel that terminates the
97/// loop cleanly; `max_iterations` is a hard cap.
98#[derive(Debug, Clone, Deserialize)]
99pub struct LoopBody {
100    #[serde(default)]
101    pub command: Option<String>,
102    #[serde(default)]
103    pub prompt: Option<String>,
104    pub until: String,
105    pub max_iterations: u32,
106    #[serde(default)]
107    pub fresh_context: bool,
108}
109
110impl Pipe {
111    pub fn needs_claude(&self) -> bool {
112        self.nodes
113            .iter()
114            .any(|n| n.command.is_some() || n.prompt.is_some() || n.loop_.is_some())
115    }
116}
117
118/// How a node with multiple `depends_on` predecessors is triggered.
119#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize)]
120#[serde(rename_all = "snake_case")]
121pub enum TriggerRule {
122    #[default]
123    AllSuccess,
124    OneSuccess,
125}
126
127/// Which one-of execution kind a node carries.
128#[derive(Debug, Clone, Copy, PartialEq, Eq)]
129pub enum ExecutionKind {
130    Command,
131    Prompt,
132    Bash,
133    Loop,
134}
135
136impl ExecutionKind {
137    pub fn as_str(self) -> &'static str {
138        match self {
139            ExecutionKind::Command => "command",
140            ExecutionKind::Prompt => "prompt",
141            ExecutionKind::Bash => "bash",
142            ExecutionKind::Loop => "loop",
143        }
144    }
145}
146
147impl Node {
148    /// Returns the populated execution kind, or `None` when the node
149    /// has zero or multiple kinds (validator surfaces both as errors).
150    pub fn execution_kind(&self) -> Option<ExecutionKind> {
151        let kinds = self.populated_kinds();
152        if kinds.len() == 1 {
153            Some(kinds[0])
154        } else {
155            None
156        }
157    }
158
159    fn populated_kinds(&self) -> Vec<ExecutionKind> {
160        let mut out = Vec::with_capacity(1);
161        if self.command.is_some() {
162            out.push(ExecutionKind::Command);
163        }
164        if self.prompt.is_some() {
165            out.push(ExecutionKind::Prompt);
166        }
167        if self.bash.is_some() {
168            out.push(ExecutionKind::Bash);
169        }
170        if self.loop_.is_some() {
171            out.push(ExecutionKind::Loop);
172        }
173        out
174    }
175
176    /// True when the node spawns an AI subprocess (`claude -p`).
177    pub fn is_ai(&self) -> bool {
178        matches!(
179            self.execution_kind(),
180            Some(ExecutionKind::Command) | Some(ExecutionKind::Prompt) | Some(ExecutionKind::Loop)
181        )
182    }
183}
184
185// ── Errors ──────────────────────────────────────────────────────────
186
187/// Errors produced while parsing the markdown frontmatter into a
188/// `Pipe`. Validator errors live in `ValidationError` so the parse
189/// surface is small and unambiguous.
190#[derive(Debug, Error)]
191pub enum ParseError {
192    #[error("pipe file is missing its `---...---` YAML frontmatter")]
193    MissingFrontmatter,
194
195    #[error("pipe YAML is malformed: {0}")]
196    Yaml(#[from] serde_yml::Error),
197
198    #[error("cannot read pipe file {path}: {source}")]
199    Io {
200        path: PathBuf,
201        #[source]
202        source: std::io::Error,
203    },
204}
205
206/// Sentinel reserved by the runner. `loop.until` may not name it.
207pub const RESERVED_BLOCKED: &str = "BLOCKED";
208
209/// Upper bound on `loop.max_iterations`. The claude subprocess can be
210/// fast (1ms per invocation in tests) so an unbounded `max_iterations`
211/// combined with the default 1800s wall-clock budget could attempt
212/// millions of iterations and grow the capture file past available disk
213/// space before the deadline fires. 1000 is picked so a well-tuned loop
214/// can still run many iterations while impossible pipe definitions get
215/// rejected at validate time.
216pub const MAX_ITERATIONS_CAP: u32 = 1000;
217
218/// Validation issues. Plural `Vec<ValidationError>` is the public
219/// return shape so the validator can surface every issue in one pass
220/// instead of forcing the user to fix-and-rerun.
221#[derive(Debug, Error)]
222pub enum ValidationError {
223    #[error("pipe `{pipe}` has empty `nodes:` list")]
224    EmptyNodes { pipe: String },
225
226    #[error("pipe `{pipe}` declares duplicate node id `{id}`")]
227    DuplicateNodeId { pipe: String, id: String },
228
229    #[error("node `{node_id}` declares no execution kind (need exactly one of command/prompt/bash/loop)")]
230    NoExecutionKind { node_id: String },
231
232    #[error("node `{node_id}` declares multiple execution kinds: {kinds:?}")]
233    MultipleExecutionKinds {
234        node_id: String,
235        kinds: Vec<&'static str>,
236    },
237
238    #[error("node `{node_id}` depends on unknown node `{missing}`")]
239    UnknownDepends { node_id: String, missing: String },
240
241    #[error("DAG cycle: {ids:?}")]
242    Cycle { ids: Vec<String> },
243
244    #[error("node `{node_id}` is a bash node — `model:` is only allowed on AI nodes (command/prompt/loop)")]
245    ModelOnBash { node_id: String },
246
247    #[error("node `{node_id}` is a bash node — `allowed_tools:` is only allowed on AI nodes")]
248    AllowedToolsOnBash { node_id: String },
249
250    #[error("node `{node_id}` loop.until is empty — must name a sentinel token")]
251    EmptyLoopUntil { node_id: String },
252
253    #[error("node `{node_id}` loop.until uses reserved sentinel `BLOCKED` — pick any other token")]
254    ReservedSentinelInLoopUntil { node_id: String },
255
256    #[error("node `{node_id}` loop.max_iterations must be > 0")]
257    ZeroMaxIterations { node_id: String },
258
259    #[error("node `{node_id}` loop.max_iterations `{max}` exceeds cap `{cap}` — set a smaller value or split the loop into gated chunks")]
260    MaxIterationsTooLarge { node_id: String, max: u32, cap: u32 },
261
262    #[error("node `{node_id}` loop has no body (need exactly one of command/prompt)")]
263    LoopNoBody { node_id: String },
264
265    #[error("node `{node_id}` loop has both command and prompt — pick one")]
266    LoopMultipleBodies { node_id: String },
267
268    #[error("node `{node_id}` references unknown command `{name}` — not found at dist/skills/{name}.md or core/skills/{name}.md")]
269    UnknownCommand { node_id: String, name: String },
270
271    #[error("node `{node_id}` references gate `{gate}` but the platform-current hook script is missing: {expected_path}")]
272    GateMissing {
273        node_id: String,
274        gate: String,
275        expected_path: PathBuf,
276    },
277
278    #[error("node `{node_id}` has invalid gate name `{gate}`: {reason}")]
279    InvalidGateName {
280        node_id: String,
281        gate: String,
282        reason: &'static str,
283    },
284
285    #[error("pipe input `{name}` is missing its `type:` field")]
286    InputMissingType { name: String },
287
288    #[error("pipe input `{name}` contains invalid characters — keys must match `[A-Za-z0-9_]+` so they can be exported as `OMNE_INPUT_<KEY>` env vars")]
289    InvalidInputKey { name: String },
290}
291
292// ── Parsing ────────────────────────────────────────────────────────
293
294/// Parse a markdown document containing pipe frontmatter into a
295/// `Pipe`. Body content after the frontmatter is ignored.
296pub fn parse_str(md: &str) -> Result<Pipe, ParseError> {
297    let yaml =
298        manifest::extract_frontmatter_block(md).map_err(|_| ParseError::MissingFrontmatter)?;
299    let pipe: Pipe = serde_yml::from_str(&yaml)?;
300    Ok(pipe)
301}
302
303/// Read a pipe file, extract frontmatter, and parse into a `Pipe`.
304pub fn load_from_path(path: &Path) -> Result<Pipe, ParseError> {
305    let md = std::fs::read_to_string(path).map_err(|source| ParseError::Io {
306        path: path.to_path_buf(),
307        source,
308    })?;
309    parse_str(&md)
310}
311
312/// High-level entry: read + parse + structural validation + volume
313/// validation. Surfaces all validation errors in one shot.
314pub fn load(md_path: &Path, volume_root: &Path) -> Result<Pipe, LoadError> {
315    let pipe = load_from_path(md_path).map_err(LoadError::Parse)?;
316    let mut errors = Vec::new();
317    if let Err(mut e) = validate_structure(&pipe) {
318        errors.append(&mut e);
319    }
320    if let Err(mut e) = validate_with_volume(&pipe, volume_root) {
321        errors.append(&mut e);
322    }
323    if errors.is_empty() {
324        Ok(pipe)
325    } else {
326        Err(LoadError::Invalid(errors))
327    }
328}
329
330/// Combined error returned by `load`. Either the markdown didn't
331/// parse, or the parsed pipe failed validation (with one or more
332/// issues).
333#[derive(Debug, Error)]
334pub enum LoadError {
335    #[error(transparent)]
336    Parse(ParseError),
337
338    #[error("pipe failed validation with {} issue(s)", .0.len())]
339    Invalid(Vec<ValidationError>),
340}
341
342// ── Validation ─────────────────────────────────────────────────────
343
344/// Structural validation: every check that does not need a volume
345/// root. Pure function over the parsed `Pipe`.
346pub fn validate_structure(pipe: &Pipe) -> Result<(), Vec<ValidationError>> {
347    let mut errors = Vec::new();
348
349    if pipe.nodes.is_empty() {
350        errors.push(ValidationError::EmptyNodes {
351            pipe: pipe.name.clone(),
352        });
353        return Err(errors);
354    }
355
356    for (name, spec) in &pipe.inputs {
357        if spec.kind.is_empty() {
358            errors.push(ValidationError::InputMissingType { name: name.clone() });
359        }
360        if !is_valid_input_key(name) {
361            errors.push(ValidationError::InvalidInputKey { name: name.clone() });
362        }
363    }
364
365    let mut seen: BTreeSet<&str> = BTreeSet::new();
366    for node in &pipe.nodes {
367        if !seen.insert(node.id.as_str()) {
368            errors.push(ValidationError::DuplicateNodeId {
369                pipe: pipe.name.clone(),
370                id: node.id.clone(),
371            });
372        }
373    }
374
375    for node in &pipe.nodes {
376        check_one_of(node, &mut errors);
377        check_scope_rules(node, &mut errors);
378        check_loop_body(node, &mut errors);
379    }
380
381    check_depends_on(pipe, &mut errors);
382
383    if let Err(cycle) = detect_cycle(pipe) {
384        errors.push(cycle);
385    }
386
387    if errors.is_empty() {
388        Ok(())
389    } else {
390        Err(errors)
391    }
392}
393
394/// True iff `key` is a legal shell-safe identifier suitable for use
395/// as `OMNE_INPUT_<KEY>`. Rejects empty strings, keys containing `=`
396/// (would corrupt the env var name in `putenv`/`setenv`), NUL bytes
397/// (truncates the name in kernel-level APIs), and anything outside
398/// `[A-Za-z0-9_]`. The conservative pattern matches POSIX env var
399/// name rules on both Linux and Windows.
400fn is_valid_input_key(key: &str) -> bool {
401    !key.is_empty() && key.chars().all(|c| c.is_ascii_alphanumeric() || c == '_')
402}
403
404fn check_one_of(node: &Node, errors: &mut Vec<ValidationError>) {
405    let kinds = node.populated_kinds();
406    match kinds.len() {
407        0 => errors.push(ValidationError::NoExecutionKind {
408            node_id: node.id.clone(),
409        }),
410        1 => {}
411        _ => errors.push(ValidationError::MultipleExecutionKinds {
412            node_id: node.id.clone(),
413            kinds: kinds.into_iter().map(ExecutionKind::as_str).collect(),
414        }),
415    }
416}
417
418fn check_scope_rules(node: &Node, errors: &mut Vec<ValidationError>) {
419    // Scope rules only apply when the node has an unambiguous kind;
420    // multi-kind nodes already produced an error above.
421    if node.execution_kind() != Some(ExecutionKind::Bash) {
422        return;
423    }
424    if node.model.is_some() {
425        errors.push(ValidationError::ModelOnBash {
426            node_id: node.id.clone(),
427        });
428    }
429    if !node.allowed_tools.is_empty() {
430        errors.push(ValidationError::AllowedToolsOnBash {
431            node_id: node.id.clone(),
432        });
433    }
434}
435
436fn check_loop_body(node: &Node, errors: &mut Vec<ValidationError>) {
437    let Some(body) = &node.loop_ else {
438        return;
439    };
440    let bodies = [body.command.is_some(), body.prompt.is_some()]
441        .iter()
442        .filter(|p| **p)
443        .count();
444    match bodies {
445        0 => errors.push(ValidationError::LoopNoBody {
446            node_id: node.id.clone(),
447        }),
448        1 => {}
449        _ => errors.push(ValidationError::LoopMultipleBodies {
450            node_id: node.id.clone(),
451        }),
452    }
453    if body.until.trim().is_empty() {
454        errors.push(ValidationError::EmptyLoopUntil {
455            node_id: node.id.clone(),
456        });
457    } else if body.until == RESERVED_BLOCKED {
458        errors.push(ValidationError::ReservedSentinelInLoopUntil {
459            node_id: node.id.clone(),
460        });
461    }
462    if body.max_iterations == 0 {
463        errors.push(ValidationError::ZeroMaxIterations {
464            node_id: node.id.clone(),
465        });
466    } else if body.max_iterations > MAX_ITERATIONS_CAP {
467        errors.push(ValidationError::MaxIterationsTooLarge {
468            node_id: node.id.clone(),
469            max: body.max_iterations,
470            cap: MAX_ITERATIONS_CAP,
471        });
472    }
473}
474
475fn check_depends_on(pipe: &Pipe, errors: &mut Vec<ValidationError>) {
476    let known: BTreeSet<&str> = pipe.nodes.iter().map(|n| n.id.as_str()).collect();
477    for node in &pipe.nodes {
478        for dep in &node.depends_on {
479            if !known.contains(dep.as_str()) {
480                errors.push(ValidationError::UnknownDepends {
481                    node_id: node.id.clone(),
482                    missing: dep.clone(),
483                });
484            }
485        }
486    }
487}
488
489/// DFS three-color cycle detection. Returns the participating node
490/// ids in discovery order on the first cycle found.
491fn detect_cycle(pipe: &Pipe) -> Result<(), ValidationError> {
492    #[derive(Clone, Copy, PartialEq)]
493    enum Color {
494        White,
495        Gray,
496        Black,
497    }
498    let edges: HashMap<&str, Vec<&str>> = pipe
499        .nodes
500        .iter()
501        .map(|n| {
502            (
503                n.id.as_str(),
504                n.depends_on.iter().map(String::as_str).collect(),
505            )
506        })
507        .collect();
508    let mut color: HashMap<&str, Color> = pipe
509        .nodes
510        .iter()
511        .map(|n| (n.id.as_str(), Color::White))
512        .collect();
513    let mut stack: Vec<&str> = Vec::new();
514
515    fn visit<'a>(
516        id: &'a str,
517        edges: &HashMap<&'a str, Vec<&'a str>>,
518        color: &mut HashMap<&'a str, Color>,
519        stack: &mut Vec<&'a str>,
520    ) -> Option<Vec<String>> {
521        match color.get(id).copied().unwrap_or(Color::White) {
522            Color::Black => return None,
523            Color::Gray => {
524                let mut cycle: Vec<String> = stack.iter().map(|s| s.to_string()).collect();
525                cycle.push(id.to_string());
526                return Some(cycle);
527            }
528            Color::White => {}
529        }
530        color.insert(id, Color::Gray);
531        stack.push(id);
532        if let Some(deps) = edges.get(id) {
533            for dep in deps {
534                if let Some(c) = visit(dep, edges, color, stack) {
535                    return Some(c);
536                }
537            }
538        }
539        stack.pop();
540        color.insert(id, Color::Black);
541        None
542    }
543
544    for node in &pipe.nodes {
545        if color.get(node.id.as_str()).copied() == Some(Color::White) {
546            if let Some(cycle) = visit(node.id.as_str(), &edges, &mut color, &mut stack) {
547                return Err(ValidationError::Cycle { ids: cycle });
548            }
549        }
550    }
551    Ok(())
552}
553
554/// Volume-aware validation: command resolution against the volume's
555/// `dist/skills/` + `core/skills/`; gate hook existence at the
556/// platform-current variant.
557pub fn validate_with_volume(pipe: &Pipe, volume_root: &Path) -> Result<(), Vec<ValidationError>> {
558    let mut errors = Vec::new();
559    let dist_skills = volume::dist_dir(volume_root).join("skills");
560    let core_skills = volume::core_dir(volume_root).join("skills");
561    let dist_hooks = volume::dist_dir(volume_root).join("hooks");
562
563    for node in &pipe.nodes {
564        // Command resolution — covers both top-level `command:` and
565        // `loop.command:`.
566        if let Some(name) = &node.command {
567            check_command_exists(node, name, &dist_skills, &core_skills, &mut errors);
568        }
569        if let Some(body) = &node.loop_ {
570            if let Some(name) = &body.command {
571                check_command_exists(node, name, &dist_skills, &core_skills, &mut errors);
572            }
573        }
574
575        // Gate resolution — platform-current variant only. The
576        // pre-v2 cross-platform fallback is gone; distro authors
577        // ship both `.sh` and `.ps1` if they want portability.
578        if let Some(gate) = &node.gate {
579            if let Some(reason) = bad_gate_name_reason(gate) {
580                errors.push(ValidationError::InvalidGateName {
581                    node_id: node.id.clone(),
582                    gate: gate.clone(),
583                    reason,
584                });
585                continue;
586            }
587            let expected = dist_hooks.join(format!("{gate}.{}", platform_hook_extension()));
588            if !expected.is_file() {
589                errors.push(ValidationError::GateMissing {
590                    node_id: node.id.clone(),
591                    gate: gate.clone(),
592                    expected_path: expected,
593                });
594            }
595        }
596    }
597
598    if errors.is_empty() {
599        Ok(())
600    } else {
601        Err(errors)
602    }
603}
604
605fn check_command_exists(
606    node: &Node,
607    name: &str,
608    dist_skills: &Path,
609    core_skills: &Path,
610    errors: &mut Vec<ValidationError>,
611) {
612    let dist_match = dist_skills.join(format!("{name}.md"));
613    let core_match = core_skills.join(format!("{name}.md"));
614    if !dist_match.is_file() && !core_match.is_file() {
615        errors.push(ValidationError::UnknownCommand {
616            node_id: node.id.clone(),
617            name: name.to_string(),
618        });
619    }
620}
621
622/// Return a reason string if `gate` contains components that could
623/// escape `dist/hooks/` or that are otherwise not safe as a bare
624/// filename. Returns `None` when the name is acceptable.
625///
626/// Defence-in-depth: the executor also canonicalizes + boundary-asserts
627/// at dispatch time, but rejecting at validate time gives the distro
628/// author a clear error before any subprocess runs.
629fn bad_gate_name_reason(gate: &str) -> Option<&'static str> {
630    if gate.is_empty() {
631        return Some("empty gate name");
632    }
633    if gate.contains('/') || gate.contains('\\') {
634        return Some("gate name must not contain path separators");
635    }
636    if gate == "." || gate == ".." || gate.split('.').any(|c| c.is_empty()) {
637        return Some("gate name must not contain `..` or leading/trailing dots");
638    }
639    if gate.contains('\0') {
640        return Some("gate name must not contain NUL bytes");
641    }
642    None
643}
644
645#[cfg(windows)]
646fn platform_hook_extension() -> &'static str {
647    "ps1"
648}
649
650#[cfg(not(windows))]
651fn platform_hook_extension() -> &'static str {
652    "sh"
653}
654
655// ── Warnings ───────────────────────────────────────────────────────
656
657/// Non-fatal advisories about a pipe. Distinct from [`ValidationError`]
658/// so a warning does not block `omne validate` — the run can still
659/// start. The `omne validate` command surfaces these as yellow lines
660/// without exiting non-zero.
661#[derive(Debug, Clone, PartialEq, Eq)]
662pub enum ValidationWarning {
663    /// A bash node body references `$OMNE_INPUT_<KEY>` (Unix) or
664    /// `%OMNE_INPUT_<KEY>%` (Windows) without enclosing double quotes.
665    /// Plan R9 surfaces this as an injection-mitigation advisory: an
666    /// input value containing shell metacharacters (`; rm -rf /`) is
667    /// word-split by the shell when the reference is unquoted.
668    UnquotedOmneInputRef {
669        node_id: String,
670        /// The raw match as it appears in the body, e.g. `$OMNE_INPUT_FOO`.
671        reference: String,
672    },
673}
674
675impl std::fmt::Display for ValidationWarning {
676    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
677        match self {
678            ValidationWarning::UnquotedOmneInputRef { node_id, reference } => write!(
679                f,
680                "node `{node_id}` bash body references {reference} without surrounding double quotes — an input value containing shell metacharacters will be word-split. Wrap the reference as \"{reference}\" to mitigate injection.",
681            ),
682        }
683    }
684}
685
686/// Collect non-fatal advisories from a parsed `Pipe`. Callable
687/// independently of [`validate_structure`] so consumers that only want
688/// warnings (e.g. IDE integrations) do not pay for the full validation
689/// pass.
690pub fn collect_warnings(pipe: &Pipe) -> Vec<ValidationWarning> {
691    let mut out = Vec::new();
692    for node in &pipe.nodes {
693        if let Some(body) = &node.bash {
694            for reference in find_unquoted_omne_input_refs(body) {
695                out.push(ValidationWarning::UnquotedOmneInputRef {
696                    node_id: node.id.clone(),
697                    reference,
698                });
699            }
700        }
701    }
702    out
703}
704
705/// Scan `body` for unquoted `$OMNE_INPUT_<KEY>` / `%OMNE_INPUT_<KEY>%`
706/// references and return the textual match for each. "Unquoted" means
707/// the reference is not immediately preceded by a `"` character.
708/// Intentionally coarse: matches nested-quote scenarios conservatively
709/// (a single stray `"` before the reference suppresses the warning).
710/// That matches plan R9's stated intent of surfacing the risky case
711/// without producing false positives on well-quoted bodies.
712fn find_unquoted_omne_input_refs(body: &str) -> Vec<String> {
713    const UNIX_PREFIX: &str = "$OMNE_INPUT_";
714    const WIN_PREFIX: &str = "%OMNE_INPUT_";
715    let bytes = body.as_bytes();
716    let mut out = Vec::new();
717    for (sigil, end_char) in [(UNIX_PREFIX, None), (WIN_PREFIX, Some(b'%'))] {
718        let mut from = 0;
719        while let Some(rel) = body[from..].find(sigil) {
720            let start = from + rel;
721            let preceded_by_quote = start > 0 && bytes[start - 1] == b'"';
722            // Walk forward over ASCII [A-Z0-9_] characters to find the
723            // end of the variable name. For `%OMNE_INPUT_KEY%`, the
724            // terminating `%` is also included in the match.
725            let name_start = start + sigil.len();
726            let mut end = name_start;
727            while end < bytes.len() {
728                let c = bytes[end];
729                if c.is_ascii_uppercase() || c.is_ascii_digit() || c == b'_' {
730                    end += 1;
731                } else {
732                    break;
733                }
734            }
735            let mut match_end = end;
736            if let Some(close) = end_char {
737                if end < bytes.len() && bytes[end] == close {
738                    match_end = end + 1;
739                } else {
740                    // `%OMNE_INPUT_KEY` without closing `%` isn't a
741                    // Windows env-var reference; skip.
742                    from = match_end.max(start + 1);
743                    continue;
744                }
745            }
746            if !preceded_by_quote && end > name_start {
747                out.push(body[start..match_end].to_string());
748            }
749            from = match_end.max(start + 1);
750        }
751    }
752    out
753}
754
755// ── Tests ──────────────────────────────────────────────────────────
756
757#[cfg(test)]
758mod tests {
759    use super::*;
760
761    fn yaml_to_pipe(yaml: &str) -> Result<Pipe, ParseError> {
762        let md = format!("---\n{yaml}---\n\n# pipe body\n");
763        parse_str(&md)
764    }
765
766    #[test]
767    fn parses_minimal_pipe() {
768        let yaml = r#"name: feature
769version: 1
770nodes:
771  - id: only
772    bash: echo hi
773"#;
774        let pipe = yaml_to_pipe(yaml).unwrap();
775        assert_eq!(pipe.name, "feature");
776        assert_eq!(pipe.nodes.len(), 1);
777        assert_eq!(pipe.nodes[0].execution_kind(), Some(ExecutionKind::Bash));
778    }
779
780    #[test]
781    fn structural_validator_accepts_minimal_bash_pipe() {
782        let pipe = yaml_to_pipe(
783            r#"name: feature
784version: 1
785nodes:
786  - id: only
787    bash: echo hi
788"#,
789        )
790        .unwrap();
791        validate_structure(&pipe).unwrap();
792    }
793
794    #[test]
795    fn input_key_with_equals_sign_rejected() {
796        let pipe = yaml_to_pipe(
797            r#"name: bad_input_key
798version: 1
799inputs:
800  "foo=bar":
801    type: string
802nodes:
803  - id: only
804    bash: echo hi
805"#,
806        )
807        .unwrap();
808        let errs = validate_structure(&pipe).unwrap_err();
809        assert!(
810            errs.iter().any(
811                |e| matches!(e, ValidationError::InvalidInputKey { name } if name == "foo=bar")
812            ),
813            "expected InvalidInputKey for `foo=bar`, got {errs:?}"
814        );
815    }
816
817    #[test]
818    fn input_key_with_space_rejected() {
819        let pipe = yaml_to_pipe(
820            r#"name: spacey_key
821version: 1
822inputs:
823  "has space":
824    type: string
825nodes:
826  - id: only
827    bash: echo hi
828"#,
829        )
830        .unwrap();
831        let errs = validate_structure(&pipe).unwrap_err();
832        assert!(
833            errs.iter()
834                .any(|e| matches!(e, ValidationError::InvalidInputKey { .. })),
835            "spaces rejected"
836        );
837    }
838
839    #[test]
840    fn input_key_allows_underscores_and_digits() {
841        let pipe = yaml_to_pipe(
842            r#"name: ok_keys
843version: 1
844inputs:
845  task_1:
846    type: string
847  OMNE_VERSION:
848    type: string
849nodes:
850  - id: only
851    bash: echo hi
852"#,
853        )
854        .unwrap();
855        validate_structure(&pipe).expect("alphanumeric + underscore keys valid");
856    }
857
858    #[test]
859    fn cycle_detected_in_two_node_loop() {
860        let pipe = yaml_to_pipe(
861            r#"name: cyclic
862version: 1
863nodes:
864  - id: a
865    bash: echo a
866    depends_on: [b]
867  - id: b
868    bash: echo b
869    depends_on: [a]
870"#,
871        )
872        .unwrap();
873        let errs = validate_structure(&pipe).unwrap_err();
874        assert!(errs
875            .iter()
876            .any(|e| matches!(e, ValidationError::Cycle { .. })));
877    }
878
879    #[test]
880    fn max_iterations_above_cap_rejected() {
881        let pipe = yaml_to_pipe(
882            r#"name: unbounded_loop
883version: 1
884nodes:
885  - id: l1
886    loop:
887      prompt: keep going
888      until: DONE
889      max_iterations: 999999999
890"#,
891        )
892        .unwrap();
893        let errs = validate_structure(&pipe).unwrap_err();
894        assert!(
895            errs.iter().any(|e| matches!(
896                e,
897                ValidationError::MaxIterationsTooLarge { cap, .. } if *cap == MAX_ITERATIONS_CAP
898            )),
899            "expected MaxIterationsTooLarge, got {errs:?}"
900        );
901    }
902
903    #[test]
904    fn max_iterations_at_cap_allowed() {
905        let yaml = format!(
906            "name: at_cap
907version: 1
908nodes:
909  - id: l1
910    loop:
911      prompt: keep going
912      until: DONE
913      max_iterations: {MAX_ITERATIONS_CAP}
914"
915        );
916        let pipe = yaml_to_pipe(&yaml).unwrap();
917        validate_structure(&pipe).expect("value exactly at cap is allowed");
918    }
919
920    #[test]
921    fn unquoted_dollar_omne_input_emits_warning() {
922        let pipe = yaml_to_pipe(
923            r#"name: unquoted
924version: 1
925inputs:
926  task:
927    type: string
928nodes:
929  - id: only
930    bash: "echo value=$OMNE_INPUT_TASK"
931"#,
932        )
933        .unwrap();
934        let warnings = collect_warnings(&pipe);
935        assert_eq!(warnings.len(), 1, "one warning expected: {warnings:?}");
936        match &warnings[0] {
937            ValidationWarning::UnquotedOmneInputRef { node_id, reference } => {
938                assert_eq!(node_id, "only");
939                assert_eq!(reference, "$OMNE_INPUT_TASK");
940            }
941        }
942    }
943
944    #[test]
945    fn quoted_dollar_omne_input_no_warning() {
946        let pipe = yaml_to_pipe(
947            r#"name: quoted
948version: 1
949inputs:
950  task:
951    type: string
952nodes:
953  - id: only
954    bash: "echo \"$OMNE_INPUT_TASK\""
955"#,
956        )
957        .unwrap();
958        assert!(collect_warnings(&pipe).is_empty());
959    }
960
961    #[test]
962    fn percent_omne_input_unquoted_emits_warning() {
963        let pipe = yaml_to_pipe(
964            r#"name: win_unquoted
965version: 1
966inputs:
967  task:
968    type: string
969nodes:
970  - id: only
971    bash: "echo %OMNE_INPUT_TASK%"
972"#,
973        )
974        .unwrap();
975        let warnings = collect_warnings(&pipe);
976        assert_eq!(warnings.len(), 1);
977        match &warnings[0] {
978            ValidationWarning::UnquotedOmneInputRef { reference, .. } => {
979                assert_eq!(reference, "%OMNE_INPUT_TASK%");
980            }
981        }
982    }
983
984    #[test]
985    fn bad_gate_name_reason_rejects_traversal_and_separators() {
986        assert!(bad_gate_name_reason("").is_some());
987        assert!(bad_gate_name_reason("../evil").is_some());
988        assert!(bad_gate_name_reason("..\\evil").is_some());
989        assert!(bad_gate_name_reason("foo/bar").is_some());
990        assert!(bad_gate_name_reason("foo\\bar").is_some());
991        assert!(bad_gate_name_reason("..").is_some());
992        assert!(bad_gate_name_reason(".").is_some());
993        assert!(bad_gate_name_reason(".hidden").is_some());
994        assert!(bad_gate_name_reason("trailing.").is_some());
995        assert!(bad_gate_name_reason("with\0nul").is_some());
996
997        assert!(bad_gate_name_reason("pre_commit").is_none());
998        assert!(bad_gate_name_reason("research_ok").is_none());
999        assert!(bad_gate_name_reason("foo.bar").is_none());
1000    }
1001}