Skip to main content

lowfat_core/
lf.rs

1//! lf — the lowfat filter DSL parser.
2//!
3//! Parses `.lf` files into a [`RuleSet`]. Execution lives elsewhere
4//! (Task 2+). The DSL is line-oriented and indentation-sensitive; we
5//! avoid INDENT/DEDENT tokens by working directly on `(indent, text)`
6//! pairs, which keeps the parser short and the error messages tied to
7//! source line numbers.
8
9use crate::level::Level;
10use anyhow::{Context, Result, anyhow, bail};
11use regex::Regex;
12
13// ──────────────────────────────────────────────────────────────────
14// AST
15// ──────────────────────────────────────────────────────────────────
16
17#[derive(Debug, Default)]
18pub struct RuleSet {
19    pub defines: Vec<Define>,
20    pub rules: Vec<Rule>,
21}
22
23#[derive(Debug, Clone)]
24pub struct Define {
25    pub name: String,
26    pub params: Vec<String>,
27    pub ops: Vec<Op>,
28}
29
30#[derive(Debug, Clone)]
31pub struct Rule {
32    pub sub: SubPattern,
33    pub level: LevelPattern,
34    pub ops: Vec<Op>,
35    pub line_no: usize,
36}
37
38#[derive(Debug, Clone)]
39pub enum SubPattern {
40    Star,
41    Alt(Vec<String>),
42}
43
44#[derive(Debug, Clone)]
45pub enum LevelPattern {
46    Star,
47    Specific(Level),
48}
49
50#[derive(Debug, Clone)]
51pub enum Op {
52    Keep(PatternRegex),
53    Drop(PatternRegex),
54    Head(HeadArg),
55    Tail(HeadArg),
56    Or(String),
57    OrShell(String),
58    Shell(String),
59    Python(String),
60    Raw,
61    MacroCall {
62        name: String,
63        args: Vec<MacroArg>,
64    },
65    Split {
66        delimiter: PatternRegex,
67        pre: Vec<Op>,
68        post: Vec<Op>,
69    },
70    /// `if` / `elif` / `else` cascade — first matching branch runs.
71    Cascade(Vec<Branch>),
72}
73
74/// One arm of an [`Op::Cascade`]. `guard: None` is the `else` arm.
75#[derive(Debug, Clone)]
76pub struct Branch {
77    pub guard: Option<Guard>,
78    pub ops: Vec<Op>,
79}
80
81/// A guard is an AND of atoms — `if level ultra and --stat:`.
82#[derive(Debug, Clone)]
83pub struct Guard {
84    pub atoms: Vec<Atom>,
85}
86
87/// One closed-vocabulary condition inside a [`Guard`].
88#[derive(Debug, Clone)]
89pub enum Atom {
90    Exit(ExitMatch),
91    Level(Level),
92    Flag(String),
93}
94
95#[derive(Debug, Clone, Copy, PartialEq, Eq)]
96pub enum ExitMatch {
97    Ok,
98    Failed,
99}
100
101#[derive(Debug, Clone)]
102pub struct PatternRegex {
103    pub source: String,
104    pub compiled: Regex,
105}
106
107#[derive(Debug, Clone, PartialEq, Eq)]
108pub enum HeadArg {
109    Number(usize),
110    Auto,
111}
112
113#[derive(Debug, Clone, PartialEq, Eq)]
114pub enum MacroArg {
115    Number(usize),
116    String(String),
117}
118
119// ──────────────────────────────────────────────────────────────────
120// Selection
121// ──────────────────────────────────────────────────────────────────
122
123impl RuleSet {
124    /// First-match-wins. Returns `None` when no rule matches.
125    pub fn select(&self, sub: &str, level: Level) -> Option<&Rule> {
126        self.rules.iter().find(|r| r.matches(sub, level))
127    }
128
129    pub fn find_define(&self, name: &str) -> Option<&Define> {
130        self.defines.iter().find(|d| d.name == name)
131    }
132}
133
134impl Rule {
135    pub fn matches(&self, sub: &str, level: Level) -> bool {
136        let sub_ok = match &self.sub {
137            SubPattern::Star => true,
138            SubPattern::Alt(alts) => alts.iter().any(|a| glob_match(a, sub)),
139        };
140        let lvl_ok = match &self.level {
141            LevelPattern::Star => true,
142            LevelPattern::Specific(l) => *l == level,
143        };
144        sub_ok && lvl_ok
145    }
146}
147
148// ──────────────────────────────────────────────────────────────────
149// Line preprocessing
150// ──────────────────────────────────────────────────────────────────
151
152#[derive(Debug, Clone)]
153struct Line {
154    indent: usize,
155    text: String, // trimmed of leading/trailing whitespace; "" if blank
156    raw: String,  // original line, no trailing newline
157    line_no: usize,
158    /// Blank or starts with `#` at top-level. Meta lines are skipped by
159    /// the structural parser but preserved as-is in block bodies.
160    is_meta: bool,
161}
162
163fn split_lines(input: &str) -> Vec<Line> {
164    input
165        .split('\n')
166        .enumerate()
167        .map(|(i, raw_line)| {
168            let raw = raw_line.trim_end_matches('\r').to_string();
169            let stripped = raw.trim_start();
170            let indent = raw.len() - stripped.len();
171            let text = stripped.trim_end().to_string();
172            let is_meta = text.is_empty() || text.starts_with('#');
173            Line {
174                indent,
175                text,
176                raw,
177                line_no: i + 1,
178                is_meta,
179            }
180        })
181        .collect()
182}
183
184// ──────────────────────────────────────────────────────────────────
185// Parser
186// ──────────────────────────────────────────────────────────────────
187
188const OP_KEYWORDS: &[&str] = &[
189    "keep",
190    "drop",
191    "head",
192    "tail",
193    "or",
194    "or-shell:",
195    "else",
196    "else-shell:",
197    "shell:",
198    "python:",
199    "split",
200    "raw",
201    "passthrough",
202    "if",
203    "elif",
204    "match",
205];
206
207pub fn parse(input: &str) -> Result<RuleSet> {
208    let lines = split_lines(input);
209    let macro_names = collect_macro_names(&lines);
210    let mut p = Parser {
211        lines: &lines,
212        pos: 0,
213        macro_names,
214    };
215    p.parse_ruleset()
216}
217
218fn collect_macro_names(lines: &[Line]) -> Vec<String> {
219    let mut names = Vec::new();
220    for l in lines {
221        if l.is_meta {
222            continue;
223        }
224        if let Some(rest) = l.text.strip_prefix("define ") {
225            let end = rest
226                .find(|c: char| c == '(' || c == ':' || c.is_whitespace())
227                .unwrap_or(rest.len());
228            let name = rest[..end].trim().to_string();
229            if !name.is_empty() {
230                names.push(name);
231            }
232        }
233    }
234    names
235}
236
237struct Parser<'a> {
238    lines: &'a [Line],
239    pos: usize,
240    macro_names: Vec<String>,
241}
242
243impl<'a> Parser<'a> {
244    /// Advance past meta lines and return the next structural line without
245    /// consuming it.
246    fn peek_significant(&mut self) -> Option<&'a Line> {
247        while let Some(l) = self.lines.get(self.pos) {
248            if l.is_meta {
249                self.pos += 1;
250            } else {
251                return Some(l);
252            }
253        }
254        None
255    }
256
257    fn advance(&mut self) -> Option<&'a Line> {
258        let l = self.lines.get(self.pos);
259        if l.is_some() {
260            self.pos += 1;
261        }
262        l
263    }
264
265    fn is_macro(&self, name: &str) -> bool {
266        self.macro_names.iter().any(|n| n == name)
267    }
268
269    // ── top-level ────────────────────────────────────────────────
270
271    fn parse_ruleset(&mut self) -> Result<RuleSet> {
272        let mut rs = RuleSet::default();
273        while let Some(line) = self.peek_significant() {
274            if line.indent != 0 {
275                bail!("line {}: unexpected indent at top level", line.line_no);
276            }
277            if line.text.starts_with("define ") {
278                let d = self.parse_define()?;
279                rs.defines.push(d);
280            } else {
281                let r = self.parse_rule()?;
282                rs.rules.push(r);
283            }
284        }
285        Ok(rs)
286    }
287
288    fn parse_define(&mut self) -> Result<Define> {
289        let header = self.advance().unwrap();
290        let line_no = header.line_no;
291        let rest = header
292            .text
293            .strip_prefix("define ")
294            .ok_or_else(|| anyhow!("line {}: expected `define`", line_no))?;
295        let (name, params, after_paren) =
296            parse_define_header(rest).with_context(|| format!("line {line_no}"))?;
297        if !after_paren.starts_with(':') {
298            bail!(
299                "line {}: expected `:` after define header, got `{}`",
300                line_no,
301                after_paren
302            );
303        }
304        let trailing = after_paren[1..].trim();
305        if !trailing.is_empty() {
306            bail!(
307                "line {}: one-line `define` body not supported (use indented body)",
308                line_no
309            );
310        }
311        let ops = self.parse_indented_ops(header.indent)?;
312        if ops.is_empty() {
313            bail!("line {}: `define {}` has empty body", line_no, name);
314        }
315        Ok(Define { name, params, ops })
316    }
317
318    fn parse_rule(&mut self) -> Result<Rule> {
319        let header = self.advance().unwrap();
320        let line_no = header.line_no;
321        let parent_indent = header.indent;
322        let colon_pos = header
323            .text
324            .find(':')
325            .ok_or_else(|| anyhow!("line {}: missing `:` in rule header", line_no))?;
326        let selector = &header.text[..colon_pos];
327        let after = &header.text[colon_pos + 1..];
328        let (sub, level) =
329            parse_selector(selector).with_context(|| format!("line {line_no}"))?;
330
331        let mut ops = Vec::new();
332        let inline = after.trim();
333        if !inline.is_empty() {
334            // Inline ops after `:` are always a pipeline (v1 form).
335            ops.extend(self.parse_inline_ops(inline, line_no)?);
336            ops.extend(self.parse_indented_ops(parent_indent)?);
337        } else {
338            // An indented body may be a pipeline or an if/elif/else cascade.
339            ops = self.parse_body(parent_indent)?;
340        }
341
342        if ops.is_empty() {
343            bail!("line {}: rule has no ops", line_no);
344        }
345        Ok(Rule {
346            sub,
347            level,
348            ops,
349            line_no,
350        })
351    }
352
353    // ── op chains ────────────────────────────────────────────────
354
355    /// Parse op-lines strictly deeper-indented than `parent_indent`.
356    /// Stops at first significant line whose indent <= parent_indent.
357    fn parse_indented_ops(&mut self, parent_indent: usize) -> Result<Vec<Op>> {
358        let mut ops = Vec::new();
359        loop {
360            let Some(line) = self.peek_significant() else {
361                break;
362            };
363            if line.indent <= parent_indent {
364                break;
365            }
366            let op = self.parse_op_line()?;
367            ops.push(op);
368        }
369        Ok(ops)
370    }
371
372    /// An indented rule body: a plain pipeline, or a cascade when the
373    /// first significant line opens with `if` (full cascade) or `match`
374    /// (single-dimension sugar). Both desugar to `Op::Cascade`.
375    fn parse_body(&mut self, parent_indent: usize) -> Result<Vec<Op>> {
376        if let Some(line) = self.peek_significant() {
377            if line.indent > parent_indent {
378                if is_body_opener(&line.text, "if") {
379                    let branches = self.parse_cascade(parent_indent)?;
380                    return Ok(vec![Op::Cascade(branches)]);
381                }
382                if is_body_opener(&line.text, "match") {
383                    let branches = self.parse_match(parent_indent)?;
384                    return Ok(vec![Op::Cascade(branches)]);
385                }
386            }
387        }
388        self.parse_indented_ops(parent_indent)
389    }
390
391    /// Parse `if` / `elif`* / `else`? arms — all share one indent.
392    fn parse_cascade(&mut self, parent_indent: usize) -> Result<Vec<Branch>> {
393        let mut branches: Vec<Branch> = Vec::new();
394        let mut arm_indent: Option<usize> = None;
395        loop {
396            let Some(line) = self.peek_significant() else {
397                break;
398            };
399            if line.indent <= parent_indent {
400                break;
401            }
402            match arm_indent {
403                None => arm_indent = Some(line.indent),
404                Some(ai) if line.indent != ai => break,
405                Some(_) => {}
406            }
407            let line_no = line.line_no;
408            // `else` is glued to its colon (`else:`), so take the leading
409            // alphabetic run rather than the whitespace-delimited word.
410            let kw: String = line
411                .text
412                .chars()
413                .take_while(|c| c.is_ascii_alphabetic())
414                .collect();
415            match kw.as_str() {
416                "if" if branches.is_empty() => {}
417                "elif" | "else" if !branches.is_empty() => {}
418                "if" => bail!("line {}: unexpected `if` — cascade already open", line_no),
419                "elif" | "else" => {
420                    bail!("line {}: `{}` without a leading `if`", line_no, kw)
421                }
422                _ => break,
423            }
424            let branch = self.parse_branch(&kw)?;
425            let is_else = branch.guard.is_none();
426            branches.push(branch);
427            if is_else {
428                break; // `else` is always the last arm
429            }
430        }
431        Ok(branches)
432    }
433
434    /// Parse one cascade arm: `<if|elif|else> <guard>:` then inline or
435    /// indented ops.
436    fn parse_branch(&mut self, head: &str) -> Result<Branch> {
437        let line = self.advance().unwrap();
438        let line_no = line.line_no;
439        let indent = line.indent;
440        let rest = line.text[head.len()..].trim_start();
441        let colon = rest
442            .find(':')
443            .ok_or_else(|| anyhow!("line {}: missing `:` in `{}` arm", line_no, head))?;
444        let guard_str = rest[..colon].trim();
445        let after = rest[colon + 1..].trim();
446        let guard = if head == "else" {
447            if !guard_str.is_empty() {
448                bail!("line {}: `else` takes no guard", line_no);
449            }
450            None
451        } else {
452            Some(parse_guard(guard_str, line_no)?)
453        };
454        let ops = self.parse_arm_body(after, indent, line_no)?;
455        if ops.is_empty() {
456            bail!("line {}: `{}` arm has no ops", line_no, head);
457        }
458        Ok(Branch { guard, ops })
459    }
460
461    /// Body of one arm — used by `if`/`elif`/`else` and by `match` arms.
462    /// Inline ops after `:` force a pipeline body; otherwise the body may
463    /// be a nested cascade (`if` or `match`) or a plain indented pipeline.
464    fn parse_arm_body(
465        &mut self,
466        inline: &str,
467        indent: usize,
468        line_no: usize,
469    ) -> Result<Vec<Op>> {
470        let mut ops = Vec::new();
471        if !inline.is_empty() {
472            ops.extend(self.parse_inline_ops(inline, line_no)?);
473        }
474        if ops.is_empty() {
475            if let Some(child) = self.peek_significant() {
476                if child.indent > indent {
477                    if is_body_opener(&child.text, "if") {
478                        return Ok(vec![Op::Cascade(self.parse_cascade(indent)?)]);
479                    }
480                    if is_body_opener(&child.text, "match") {
481                        return Ok(vec![Op::Cascade(self.parse_match(indent)?)]);
482                    }
483                }
484            }
485        }
486        ops.extend(self.parse_indented_ops(indent)?);
487        Ok(ops)
488    }
489
490    /// Sugar for a single-dimension cascade.
491    ///   match level:
492    ///       ultra: head 30
493    ///       lite:  head 200
494    ///       else:  head 80
495    /// desugars to `if level ultra: … elif level lite: … else: …`.
496    /// The dimension is `level` or `exit`; flags require the full `if` form.
497    fn parse_match(&mut self, parent_indent: usize) -> Result<Vec<Branch>> {
498        let header = self.advance().unwrap();
499        let line_no = header.line_no;
500        // Accept `match`, `match:`, or `match <dim>:` uniformly. The
501        // is_body_opener gate above guarantees text starts with "match".
502        let rest = header
503            .text
504            .strip_prefix("match")
505            .ok_or_else(|| anyhow!("line {}: expected `match`", line_no))?
506            .trim_start();
507        let colon = rest
508            .find(':')
509            .ok_or_else(|| anyhow!("line {}: missing `:` after match dimension", line_no))?;
510        let dim_str = rest[..colon].trim();
511        let trailing = rest[colon + 1..].trim();
512        if !trailing.is_empty() {
513            bail!(
514                "line {}: `match` header doesn't take inline ops (got `{}`)",
515                line_no,
516                trailing
517            );
518        }
519        let dim = parse_match_dim(dim_str, line_no)?;
520
521        let mut branches: Vec<Branch> = Vec::new();
522        let mut arm_indent: Option<usize> = None;
523        loop {
524            let Some(line) = self.peek_significant() else {
525                break;
526            };
527            if line.indent <= parent_indent {
528                break;
529            }
530            match arm_indent {
531                None => arm_indent = Some(line.indent),
532                Some(ai) if line.indent != ai => break,
533                Some(_) => {}
534            }
535            let branch = self.parse_match_arm(dim)?;
536            let is_else = branch.guard.is_none();
537            branches.push(branch);
538            if is_else {
539                break;
540            }
541        }
542
543        if branches.is_empty() {
544            bail!("line {}: `match` has no arms", line_no);
545        }
546        Ok(branches)
547    }
548
549    /// One `match` arm: `<value>: <ops>` or `else: <ops>`. Builds the
550    /// guard atom by interpreting `<value>` against the captured `dim`.
551    fn parse_match_arm(&mut self, dim: MatchDim) -> Result<Branch> {
552        let line = self.advance().unwrap();
553        let line_no = line.line_no;
554        let indent = line.indent;
555        let colon = line
556            .text
557            .find(':')
558            .ok_or_else(|| anyhow!("line {}: missing `:` in match arm", line_no))?;
559        let value = line.text[..colon].trim();
560        let after = line.text[colon + 1..].trim();
561
562        let guard = if value == "else" {
563            None
564        } else {
565            let atom = build_match_atom(dim, value, line_no)?;
566            Some(Guard { atoms: vec![atom] })
567        };
568
569        let ops = self.parse_arm_body(after, indent, line_no)?;
570        if ops.is_empty() {
571            bail!("line {}: match arm `{}` has no ops", line_no, value);
572        }
573        Ok(Branch { guard, ops })
574    }
575
576    /// Parse a single op from the current significant line, advancing
577    /// past any block bodies and sub-blocks the op consumes.
578    fn parse_op_line(&mut self) -> Result<Op> {
579        let line = self.advance().unwrap();
580        let line_no = line.line_no;
581        let indent = line.indent;
582        let text = line.text.as_str();
583        let (head, _) = split_first_word(text);
584
585        match head {
586            "keep" => {
587                let rest = text[head.len()..].trim_start();
588                Ok(Op::Keep(parse_regex_literal(rest, line_no)?))
589            }
590            "drop" => {
591                let rest = text[head.len()..].trim_start();
592                Ok(Op::Drop(parse_regex_literal(rest, line_no)?))
593            }
594            "head" => {
595                let rest = text[head.len()..].trim();
596                Ok(Op::Head(parse_head_arg(rest, line_no)?))
597            }
598            "tail" => {
599                let rest = text[head.len()..].trim();
600                Ok(Op::Tail(parse_head_arg(rest, line_no)?))
601            }
602            "or" | "else" => {
603                let rest = text[head.len()..].trim_start();
604                Ok(Op::Or(parse_string_literal(rest, line_no)?))
605            }
606            "or-shell:" | "else-shell:" => {
607                let body = text[head.len()..].trim_start().to_string();
608                if body.is_empty() {
609                    bail!("line {}: `{}` requires a command", line_no, head);
610                }
611                Ok(Op::OrShell(body))
612            }
613            // `raw` is canonical; `passthrough` is a v0.5.0 legacy alias.
614            "raw" | "passthrough" => Ok(Op::Raw),
615            "shell:" => Ok(Op::Shell(self.parse_block_body(
616                text,
617                head,
618                indent,
619                line_no,
620            )?)),
621            "python:" => Ok(Op::Python(self.parse_block_body(
622                text,
623                head,
624                indent,
625                line_no,
626            )?)),
627            "split" => {
628                let rest = text[head.len()..].trim_start();
629                let delim = parse_regex_literal(rest, line_no)?;
630                let (pre, post) = self.parse_split_branches(indent)?;
631                if pre.is_empty() && post.is_empty() {
632                    bail!(
633                        "line {}: `split` needs at least one `pre:` or `post:` block",
634                        line_no
635                    );
636                }
637                Ok(Op::Split {
638                    delimiter: delim,
639                    pre,
640                    post,
641                })
642            }
643            name if self.is_macro(name) => {
644                let rest = text[head.len()..].trim();
645                let args = parse_macro_args(rest, line_no)?;
646                Ok(Op::MacroCall {
647                    name: name.to_string(),
648                    args,
649                })
650            }
651            _ => bail!("line {}: unknown op `{}`", line_no, head),
652        }
653    }
654
655    /// Parse a `shell:` or `python:` body. Two forms:
656    ///   inline: `shell: <command on rest of line>`
657    ///   block:  `shell: |` then indented body lines until dedent.
658    /// Body lines preserve internal blank lines and relative indentation.
659    fn parse_block_body(
660        &mut self,
661        line_text: &str,
662        head: &str,
663        parent_indent: usize,
664        line_no: usize,
665    ) -> Result<String> {
666        let after = line_text[head.len()..].trim_start();
667        if after != "|" {
668            if after.is_empty() {
669                bail!(
670                    "line {}: empty `{}` body (use `| <newline>` for block form)",
671                    line_no,
672                    head
673                );
674            }
675            return Ok(after.to_string());
676        }
677
678        // Block form: scan lines until indent drops back to parent_indent.
679        // Include blank lines that fall between body lines.
680        let mut collected: Vec<&'a Line> = Vec::new();
681        let mut base: Option<usize> = None;
682        while let Some(l) = self.lines.get(self.pos) {
683            if l.text.is_empty() {
684                collected.push(l);
685                self.pos += 1;
686                continue;
687            }
688            if l.indent <= parent_indent {
689                break;
690            }
691            if base.is_none() {
692                base = Some(l.indent);
693            }
694            collected.push(l);
695            self.pos += 1;
696        }
697        // Trim trailing blank lines (they belong to the gap, not the body).
698        while collected.last().map_or(false, |l| l.text.is_empty()) {
699            collected.pop();
700        }
701        if collected.is_empty() {
702            bail!("line {}: `{}` block is empty", line_no, head);
703        }
704        let base = base.unwrap_or(parent_indent + 4);
705        let dedented: Vec<String> = collected
706            .iter()
707            .map(|l| {
708                if l.text.is_empty() {
709                    String::new()
710                } else if l.raw.len() >= base {
711                    l.raw[base..].to_string()
712                } else {
713                    l.raw.trim_start().to_string()
714                }
715            })
716            .collect();
717        Ok(dedented.join("\n"))
718    }
719
720    /// After a `split /regex/`, consume any sibling `pre:` / `post:`
721    /// blocks at the same indent.
722    fn parse_split_branches(&mut self, parent_indent: usize) -> Result<(Vec<Op>, Vec<Op>)> {
723        let mut pre = Vec::new();
724        let mut post = Vec::new();
725        loop {
726            let Some(line) = self.peek_significant() else {
727                break;
728            };
729            if line.indent != parent_indent {
730                break;
731            }
732            match line.text.as_str() {
733                "pre:" => {
734                    self.advance();
735                    pre = self.parse_indented_ops(parent_indent)?;
736                }
737                "post:" => {
738                    self.advance();
739                    post = self.parse_indented_ops(parent_indent)?;
740                }
741                _ => break,
742            }
743        }
744        Ok((pre, post))
745    }
746
747    /// Parse multiple ops appearing on the same line (after a rule
748    /// header's `:`). `shell:` / `python:` / `else-shell:` greedily
749    /// consume rest of line; other ops yield to the next op keyword
750    /// or macro name.
751    fn parse_inline_ops(&self, text: &str, line_no: usize) -> Result<Vec<Op>> {
752        let mut ops = Vec::new();
753        let mut remaining = text.trim();
754        while !remaining.is_empty() {
755            let (head, _) = split_first_word(remaining);
756            match head {
757                "shell:" => {
758                    let body = remaining[head.len()..].trim_start().to_string();
759                    if body.is_empty() {
760                        bail!("line {}: inline `shell:` needs a command", line_no);
761                    }
762                    ops.push(Op::Shell(body));
763                    remaining = "";
764                }
765                "python:" => {
766                    let body = remaining[head.len()..].trim_start().to_string();
767                    if body.is_empty() {
768                        bail!("line {}: inline `python:` needs a command", line_no);
769                    }
770                    ops.push(Op::Python(body));
771                    remaining = "";
772                }
773                "or-shell:" | "else-shell:" => {
774                    let body = remaining[head.len()..].trim_start().to_string();
775                    if body.is_empty() {
776                        bail!("line {}: inline `{}` needs a command", line_no, head);
777                    }
778                    ops.push(Op::OrShell(body));
779                    remaining = "";
780                }
781                "raw" | "passthrough" => {
782                    ops.push(Op::Raw);
783                    remaining = remaining[head.len()..].trim_start();
784                }
785                "keep" | "drop" => {
786                    let rest = remaining[head.len()..].trim_start();
787                    let (re, after) = parse_regex_literal_and_rest(rest, line_no)?;
788                    ops.push(if head == "keep" {
789                        Op::Keep(re)
790                    } else {
791                        Op::Drop(re)
792                    });
793                    remaining = after.trim_start();
794                }
795                "head" | "tail" => {
796                    let rest = remaining[head.len()..].trim_start();
797                    let (arg_word, after) = take_word(rest);
798                    let h = parse_head_arg(arg_word, line_no)?;
799                    ops.push(if head == "head" {
800                        Op::Head(h)
801                    } else {
802                        Op::Tail(h)
803                    });
804                    remaining = after.trim_start();
805                }
806                "or" | "else" => {
807                    let rest = remaining[head.len()..].trim_start();
808                    let (s, after) = parse_string_literal_and_rest(rest, line_no)?;
809                    ops.push(Op::Or(s));
810                    remaining = after.trim_start();
811                }
812                "split" => {
813                    bail!(
814                        "line {}: `split` cannot appear inline (needs pre:/post: blocks)",
815                        line_no
816                    )
817                }
818                name if self.is_macro(name) => {
819                    let rest = remaining[head.len()..].trim_start();
820                    let (args, after) =
821                        parse_macro_args_until_op(rest, &self.macro_names, line_no)?;
822                    ops.push(Op::MacroCall {
823                        name: name.to_string(),
824                        args,
825                    });
826                    remaining = after.trim_start();
827                }
828                _ => bail!("line {}: unknown op `{}` in inline chain", line_no, head),
829            }
830        }
831        Ok(ops)
832    }
833}
834
835// ──────────────────────────────────────────────────────────────────
836// Sub-parsers (free functions, no Parser state)
837// ──────────────────────────────────────────────────────────────────
838
839/// True when `text` opens with `kw` followed by whitespace, a `:`, or
840/// end of input — i.e. `kw` introduces a body construct rather than
841/// being a prefix of some other word (`matching`, `iffy`).
842fn is_body_opener(text: &str, kw: &str) -> bool {
843    match text.strip_prefix(kw) {
844        None => false,
845        Some(rest) => rest.is_empty() || rest.starts_with(|c: char| c.is_whitespace() || c == ':'),
846    }
847}
848
849fn split_first_word(s: &str) -> (&str, &str) {
850    let s = s.trim_start();
851    let end = s.find(char::is_whitespace).unwrap_or(s.len());
852    (&s[..end], &s[end..])
853}
854
855fn take_word(s: &str) -> (&str, &str) {
856    let s = s.trim_start();
857    let end = s.find(char::is_whitespace).unwrap_or(s.len());
858    (&s[..end], &s[end..])
859}
860
861fn parse_selector(s: &str) -> Result<(SubPattern, LevelPattern)> {
862    let s = s.trim();
863    if s.is_empty() {
864        bail!("empty selector");
865    }
866    let mut parts = s.splitn(2, ',');
867    let sub_str = parts.next().unwrap().trim();
868    let level_str = parts.next().map(|s| s.trim()).unwrap_or("*");
869
870    let sub = if sub_str == "*" {
871        SubPattern::Star
872    } else {
873        let alts: Vec<String> = sub_str
874            .split('|')
875            .map(|s| s.trim().to_string())
876            .collect();
877        if alts.iter().any(|a| a.is_empty()) {
878            bail!("empty alternative in sub pattern `{}`", sub_str);
879        }
880        SubPattern::Alt(alts)
881    };
882
883    let level = if level_str == "*" {
884        LevelPattern::Star
885    } else {
886        let lvl: Level = level_str.parse().map_err(|e: String| anyhow!(e))?;
887        LevelPattern::Specific(lvl)
888    };
889
890    Ok((sub, level))
891}
892
893/// Glob match for subcommand selectors. `*` matches any run of chars
894/// (including empty); no other metacharacters. With no `*` it is an
895/// exact compare, so plain selectors behave exactly as in v1.
896fn glob_match(pat: &str, text: &str) -> bool {
897    match pat.find('*') {
898        None => pat == text,
899        Some(star) => {
900            let prefix = &pat[..star];
901            let rest = &pat[star + 1..];
902            let Some(tail) = text.strip_prefix(prefix) else {
903                return false;
904            };
905            if rest.is_empty() {
906                return true;
907            }
908            (0..=tail.len())
909                .filter(|&i| tail.is_char_boundary(i))
910                .any(|i| glob_match(rest, &tail[i..]))
911        }
912    }
913}
914
915/// Parse a guard — an AND of atoms joined by ` and `.
916fn parse_guard(s: &str, line_no: usize) -> Result<Guard> {
917    let mut atoms = Vec::new();
918    for part in s.split(" and ") {
919        let part = part.trim();
920        if part.is_empty() {
921            bail!("line {}: empty guard", line_no);
922        }
923        atoms.push(parse_atom(part, line_no)?);
924    }
925    if atoms.is_empty() {
926        bail!("line {}: empty guard", line_no);
927    }
928    Ok(Guard { atoms })
929}
930
931/// Parse one guard atom: `exit ok|failed`, `level ultra|full|lite`, or a
932/// `--flag` / `-x`.
933fn parse_atom(s: &str, line_no: usize) -> Result<Atom> {
934    if s.starts_with('-') {
935        return Ok(Atom::Flag(s.to_string()));
936    }
937    let mut words = s.split_whitespace();
938    let dim = words.next().unwrap_or("");
939    let val = words.next();
940    if words.next().is_some() {
941        bail!("line {}: guard `{}` has too many words", line_no, s);
942    }
943    match (dim, val) {
944        ("exit", Some("ok")) => Ok(Atom::Exit(ExitMatch::Ok)),
945        ("exit", Some("failed")) => Ok(Atom::Exit(ExitMatch::Failed)),
946        ("exit", Some(v)) => {
947            bail!("line {}: unknown exit value `{}` (expected ok|failed)", line_no, v)
948        }
949        ("exit", None) => bail!("line {}: `exit` guard needs a value (ok|failed)", line_no),
950        ("level", Some(v)) => {
951            let lvl: Level = v.parse().map_err(|e: String| anyhow!("line {line_no}: {e}"))?;
952            Ok(Atom::Level(lvl))
953        }
954        ("level", None) => bail!("line {}: `level` guard needs a value", line_no),
955        (other, _) => bail!(
956            "line {}: unknown guard `{}` (expected `exit ...`, `level ...`, or a --flag)",
957            line_no,
958            other
959        ),
960    }
961}
962
963/// Closed set of dimensions a `match` header may switch on. Flags are
964/// not a `match` dimension — their presence is binary, with no "values"
965/// to enumerate, so they must use `if --flag: ...` instead.
966#[derive(Copy, Clone)]
967enum MatchDim {
968    Level,
969    Exit,
970}
971
972fn parse_match_dim(s: &str, line_no: usize) -> Result<MatchDim> {
973    match s {
974        "level" => Ok(MatchDim::Level),
975        "exit" => Ok(MatchDim::Exit),
976        "" => bail!("line {}: `match` needs a dimension (level|exit)", line_no),
977        other => bail!(
978            "line {}: unknown match dimension `{}` (expected level|exit; flags must use `if --flag:`)",
979            line_no,
980            other
981        ),
982    }
983}
984
985fn build_match_atom(dim: MatchDim, value: &str, line_no: usize) -> Result<Atom> {
986    match dim {
987        MatchDim::Level => {
988            let lvl: Level = value
989                .parse()
990                .map_err(|e: String| anyhow!("line {line_no}: {e}"))?;
991            Ok(Atom::Level(lvl))
992        }
993        MatchDim::Exit => match value {
994            "ok" => Ok(Atom::Exit(ExitMatch::Ok)),
995            "failed" => Ok(Atom::Exit(ExitMatch::Failed)),
996            other => bail!(
997                "line {}: unknown exit value `{}` (expected ok|failed)",
998                line_no,
999                other
1000            ),
1001        },
1002    }
1003}
1004
1005fn parse_define_header(s: &str) -> Result<(String, Vec<String>, &str)> {
1006    let s = s.trim_start();
1007    let end = s
1008        .find(|c: char| c == '(' || c == ':' || c.is_whitespace())
1009        .unwrap_or(s.len());
1010    let name = s[..end].to_string();
1011    if name.is_empty() {
1012        bail!("define needs a name");
1013    }
1014    let rest = s[end..].trim_start();
1015    if let Some(rest) = rest.strip_prefix('(') {
1016        let close = rest
1017            .find(')')
1018            .ok_or_else(|| anyhow!("missing `)` in define params"))?;
1019        let params: Vec<String> = rest[..close]
1020            .split(',')
1021            .map(|p| p.trim().to_string())
1022            .filter(|p| !p.is_empty())
1023            .collect();
1024        Ok((name, params, rest[close + 1..].trim_start()))
1025    } else {
1026        Ok((name, Vec::new(), rest))
1027    }
1028}
1029
1030fn parse_regex_literal(s: &str, line_no: usize) -> Result<PatternRegex> {
1031    let (re, after) = parse_regex_literal_and_rest(s, line_no)?;
1032    let after = after.trim();
1033    if !after.is_empty() {
1034        bail!(
1035            "line {}: unexpected trailing input after regex: `{}`",
1036            line_no,
1037            after
1038        );
1039    }
1040    Ok(re)
1041}
1042
1043fn parse_regex_literal_and_rest(s: &str, line_no: usize) -> Result<(PatternRegex, &str)> {
1044    let s = s.trim_start();
1045    if !s.starts_with('/') {
1046        bail!(
1047            "line {}: expected `/regex/`, got `{}`",
1048            line_no,
1049            preview(s)
1050        );
1051    }
1052    let body = &s[1..];
1053    let mut src = String::new();
1054    let mut chars = body.char_indices().peekable();
1055    let mut end_byte: Option<usize> = None;
1056    while let Some((i, c)) = chars.next() {
1057        if c == '\\' {
1058            if let Some((_, n)) = chars.next() {
1059                if n == '/' {
1060                    src.push('/');
1061                } else {
1062                    src.push('\\');
1063                    src.push(n);
1064                }
1065            } else {
1066                bail!("line {}: trailing backslash in regex", line_no);
1067            }
1068        } else if c == '/' {
1069            end_byte = Some(i);
1070            break;
1071        } else {
1072            src.push(c);
1073        }
1074    }
1075    let end_byte = end_byte.ok_or_else(|| anyhow!("line {}: unterminated regex", line_no))?;
1076    let after = &body[end_byte + 1..];
1077    let compiled = Regex::new(&src)
1078        .map_err(|e| anyhow!("line {}: invalid regex `{}`: {}", line_no, src, e))?;
1079    Ok((
1080        PatternRegex {
1081            source: src,
1082            compiled,
1083        },
1084        after,
1085    ))
1086}
1087
1088fn parse_string_literal(s: &str, line_no: usize) -> Result<String> {
1089    let (s, after) = parse_string_literal_and_rest(s, line_no)?;
1090    let after = after.trim();
1091    if !after.is_empty() {
1092        bail!(
1093            "line {}: unexpected trailing input after string: `{}`",
1094            line_no,
1095            after
1096        );
1097    }
1098    Ok(s)
1099}
1100
1101fn parse_string_literal_and_rest(s: &str, line_no: usize) -> Result<(String, &str)> {
1102    let s = s.trim_start();
1103    if !s.starts_with('"') {
1104        bail!(
1105            "line {}: expected `\"...\"`, got `{}`",
1106            line_no,
1107            preview(s)
1108        );
1109    }
1110    let body = &s[1..];
1111    let mut out = String::new();
1112    let mut chars = body.char_indices();
1113    let mut end_byte: Option<usize> = None;
1114    while let Some((i, c)) = chars.next() {
1115        if c == '\\' {
1116            if let Some((_, n)) = chars.next() {
1117                match n {
1118                    'n' => out.push('\n'),
1119                    't' => out.push('\t'),
1120                    'r' => out.push('\r'),
1121                    '\\' => out.push('\\'),
1122                    '"' => out.push('"'),
1123                    other => {
1124                        out.push('\\');
1125                        out.push(other);
1126                    }
1127                }
1128            } else {
1129                bail!("line {}: trailing backslash in string", line_no);
1130            }
1131        } else if c == '"' {
1132            end_byte = Some(i);
1133            break;
1134        } else {
1135            out.push(c);
1136        }
1137    }
1138    let end_byte = end_byte.ok_or_else(|| anyhow!("line {}: unterminated string", line_no))?;
1139    let after = &body[end_byte + 1..];
1140    Ok((out, after))
1141}
1142
1143fn parse_head_arg(s: &str, line_no: usize) -> Result<HeadArg> {
1144    let s = s.trim();
1145    if s == "auto" {
1146        return Ok(HeadArg::Auto);
1147    }
1148    s.parse::<usize>().map(HeadArg::Number).map_err(|_| {
1149        anyhow!(
1150            "line {}: expected number or `auto`, got `{}`",
1151            line_no,
1152            s
1153        )
1154    })
1155}
1156
1157fn parse_macro_args(s: &str, line_no: usize) -> Result<Vec<MacroArg>> {
1158    let mut out = Vec::new();
1159    let mut rest = s.trim();
1160    while !rest.is_empty() {
1161        if rest.starts_with('"') {
1162            let (sv, after) = parse_string_literal_and_rest(rest, line_no)?;
1163            out.push(MacroArg::String(sv));
1164            rest = after.trim_start();
1165        } else {
1166            let (word, after) = take_word(rest);
1167            out.push(match word.parse::<usize>() {
1168                Ok(n) => MacroArg::Number(n),
1169                Err(_) => MacroArg::String(word.to_string()),
1170            });
1171            rest = after.trim_start();
1172        }
1173    }
1174    Ok(out)
1175}
1176
1177fn parse_macro_args_until_op<'a>(
1178    s: &'a str,
1179    macro_names: &[String],
1180    line_no: usize,
1181) -> Result<(Vec<MacroArg>, &'a str)> {
1182    let mut out = Vec::new();
1183    let mut rest = s.trim_start();
1184    while !rest.is_empty() {
1185        let (word, _) = take_word(rest);
1186        if OP_KEYWORDS.contains(&word) || macro_names.iter().any(|n| n == word) {
1187            break;
1188        }
1189        if rest.starts_with('"') {
1190            let (sv, after) = parse_string_literal_and_rest(rest, line_no)?;
1191            out.push(MacroArg::String(sv));
1192            rest = after.trim_start();
1193        } else {
1194            let (w, after) = take_word(rest);
1195            out.push(match w.parse::<usize>() {
1196                Ok(n) => MacroArg::Number(n),
1197                Err(_) => MacroArg::String(w.to_string()),
1198            });
1199            rest = after.trim_start();
1200        }
1201    }
1202    Ok((out, rest))
1203}
1204
1205fn preview(s: &str) -> &str {
1206    let n = s.char_indices().nth(40).map(|(i, _)| i).unwrap_or(s.len());
1207    &s[..n]
1208}
1209
1210// ──────────────────────────────────────────────────────────────────
1211// Execution
1212// ──────────────────────────────────────────────────────────────────
1213
1214use std::io::Write;
1215use std::process::{Command, Stdio};
1216
1217/// Per-invocation context passed to the executor and propagated as env
1218/// vars to `shell:` / `python:` subprocesses.
1219#[derive(Debug, Clone)]
1220pub struct ExecCtx<'a> {
1221    pub sub: &'a str,
1222    pub level: Level,
1223    pub exit_code: i32,
1224    pub args: &'a [String],
1225}
1226
1227/// Run the matching rule against `input` and return the filtered output.
1228/// If no rule matches, the input is returned unchanged (passthrough).
1229///
1230/// Non-empty output always ends in a newline, matching the convention
1231/// of shell tools like `echo` and `grep`.
1232pub fn execute(rs: &RuleSet, ctx: &ExecCtx, input: &str) -> Result<String> {
1233    let Some(rule) = rs.select(ctx.sub, ctx.level) else {
1234        return Ok(input.to_string());
1235    };
1236    let out = run_ops(&rule.ops, ctx, input, rs, &[])?;
1237    Ok(ensure_trailing_newline(out))
1238}
1239
1240fn ensure_trailing_newline(mut s: String) -> String {
1241    if !s.is_empty() && !s.ends_with('\n') {
1242        s.push('\n');
1243    }
1244    s
1245}
1246
1247/// One stage's input/output stats, recorded by [`execute_explain`].
1248#[derive(Debug, Clone)]
1249pub struct StageRecord {
1250    pub op_desc: String,
1251    pub stdin_lines: usize,
1252    pub stdin_bytes: usize,
1253    pub stdout_lines: usize,
1254    pub stdout_bytes: usize,
1255    pub elapsed_us: u128,
1256}
1257
1258#[derive(Debug, Default, Clone)]
1259pub struct ExplainTrace {
1260    /// Index into `RuleSet::rules` of the matched rule (None if no match).
1261    pub matched_rule: Option<usize>,
1262    pub stages: Vec<StageRecord>,
1263}
1264
1265/// Like [`execute`] but records per-op stats. Only top-level ops are
1266/// recorded — macros and split sub-chains run silently. Adds ~µs of
1267/// overhead per op for line/byte counting; safe for interactive use,
1268/// avoid in tight loops.
1269pub fn execute_explain(
1270    rs: &RuleSet,
1271    ctx: &ExecCtx,
1272    input: &str,
1273) -> Result<(String, ExplainTrace)> {
1274    let mut trace = ExplainTrace::default();
1275    let Some((idx, rule)) = rs
1276        .rules
1277        .iter()
1278        .enumerate()
1279        .find(|(_, r)| r.matches(ctx.sub, ctx.level))
1280    else {
1281        return Ok((input.to_string(), trace));
1282    };
1283    trace.matched_rule = Some(idx);
1284
1285    let raw = input.to_string();
1286    let mut state = input.to_string();
1287    for op in &rule.ops {
1288        let stdin_lines = state.lines().count();
1289        let stdin_bytes = state.len();
1290        let start = std::time::Instant::now();
1291        let new_state = apply_op(op, &state, &raw, ctx, rs, &[])?;
1292        let elapsed_us = start.elapsed().as_micros();
1293        trace.stages.push(StageRecord {
1294            op_desc: describe_op(op),
1295            stdin_lines,
1296            stdin_bytes,
1297            stdout_lines: new_state.lines().count(),
1298            stdout_bytes: new_state.len(),
1299            elapsed_us,
1300        });
1301        state = new_state;
1302    }
1303    Ok((ensure_trailing_newline(state), trace))
1304}
1305
1306fn describe_op(op: &Op) -> String {
1307    match op {
1308        Op::Keep(p) => format!("keep /{}/", p.source),
1309        Op::Drop(p) => format!("drop /{}/", p.source),
1310        Op::Head(arg) => format!("head {}", describe_head(arg)),
1311        Op::Tail(arg) => format!("tail {}", describe_head(arg)),
1312        Op::Or(s) => format!("or {s:?}"),
1313        Op::OrShell(s) => format!("or-shell: {}", first_line(s)),
1314        Op::Raw => "raw".to_string(),
1315        Op::Cascade(branches) => format!("cascade ({} arms)", branches.len()),
1316        Op::Shell(s) => format!("shell: {}", first_line(s)),
1317        Op::Python(s) => {
1318            if has_pep723_header(s) {
1319                format!("python (uv): {}", first_line(s))
1320            } else {
1321                format!("python: {}", first_line(s))
1322            }
1323        }
1324        Op::MacroCall { name, args } => {
1325            let parts: Vec<String> = args
1326                .iter()
1327                .map(|a| match a {
1328                    MacroArg::Number(n) => n.to_string(),
1329                    MacroArg::String(s) => s.clone(),
1330                })
1331                .collect();
1332            if parts.is_empty() {
1333                name.clone()
1334            } else {
1335                format!("{name} {}", parts.join(" "))
1336            }
1337        }
1338        Op::Split { delimiter, .. } => format!("split /{}/", delimiter.source),
1339    }
1340}
1341
1342fn describe_head(a: &HeadArg) -> String {
1343    match a {
1344        HeadArg::Number(n) => n.to_string(),
1345        HeadArg::Auto => "auto".into(),
1346    }
1347}
1348
1349fn first_line(s: &str) -> String {
1350    s.lines().next().unwrap_or("").chars().take(60).collect()
1351}
1352
1353fn run_ops(
1354    ops: &[Op],
1355    ctx: &ExecCtx,
1356    input: &str,
1357    rs: &RuleSet,
1358    macro_args: &[MacroArg],
1359) -> Result<String> {
1360    let raw = input.to_string();
1361    let mut state = input.to_string();
1362    for op in ops {
1363        state = apply_op(op, &state, &raw, ctx, rs, macro_args)?;
1364    }
1365    Ok(state)
1366}
1367
1368fn apply_op(
1369    op: &Op,
1370    state: &str,
1371    raw: &str,
1372    ctx: &ExecCtx,
1373    rs: &RuleSet,
1374    macro_args: &[MacroArg],
1375) -> Result<String> {
1376    match op {
1377        Op::Keep(pat) => Ok(filter_lines(state, |l| pat.compiled.is_match(l))),
1378        Op::Drop(pat) => Ok(filter_lines(state, |l| !pat.compiled.is_match(l))),
1379        Op::Head(arg) => Ok(take_head(state, resolve_head(arg, ctx.level))),
1380        Op::Tail(arg) => Ok(take_tail(state, resolve_head(arg, ctx.level))),
1381        Op::Or(s) => Ok(if state.trim().is_empty() {
1382            s.clone()
1383        } else {
1384            state.to_string()
1385        }),
1386        Op::OrShell(cmd) => {
1387            if state.trim().is_empty() {
1388                let expanded = expand_args(cmd, macro_args);
1389                run_shell(&expanded, raw, ctx)
1390            } else {
1391                Ok(state.to_string())
1392            }
1393        }
1394        Op::Raw => Ok(state.to_string()),
1395        Op::Cascade(branches) => {
1396            for br in branches {
1397                let hit = match &br.guard {
1398                    None => true,
1399                    Some(g) => guard_matches(g, ctx),
1400                };
1401                if hit {
1402                    return run_ops(&br.ops, ctx, state, rs, macro_args);
1403                }
1404            }
1405            // No arm matched and no `else` — leave the stream untouched.
1406            Ok(state.to_string())
1407        }
1408        Op::Shell(cmd) => {
1409            let expanded = expand_args(cmd, macro_args);
1410            run_shell(&expanded, state, ctx)
1411        }
1412        Op::Python(body) => {
1413            let expanded = expand_args(body, macro_args);
1414            run_python(&expanded, state, ctx)
1415        }
1416        Op::MacroCall { name, args } => {
1417            let def = rs
1418                .find_define(name)
1419                .ok_or_else(|| anyhow!("undefined macro `{}`", name))?;
1420            if args.len() != def.params.len() {
1421                bail!(
1422                    "macro `{}` expects {} arg(s), got {}",
1423                    name,
1424                    def.params.len(),
1425                    args.len()
1426                );
1427            }
1428            run_ops(&def.ops, ctx, state, rs, args)
1429        }
1430        Op::Split {
1431            delimiter,
1432            pre,
1433            post,
1434        } => {
1435            let (a, b) = split_at_first_match(state, &delimiter.compiled);
1436            let pre_out = if pre.is_empty() {
1437                a
1438            } else {
1439                run_ops(pre, ctx, &a, rs, macro_args)?
1440            };
1441            let post_out = if post.is_empty() {
1442                b
1443            } else {
1444                run_ops(post, ctx, &b, rs, macro_args)?
1445            };
1446            Ok(join_nonempty(&pre_out, &post_out))
1447        }
1448    }
1449}
1450
1451/// A guard holds when every atom holds (AND).
1452fn guard_matches(g: &Guard, ctx: &ExecCtx) -> bool {
1453    g.atoms.iter().all(|a| atom_matches(a, ctx))
1454}
1455
1456fn atom_matches(a: &Atom, ctx: &ExecCtx) -> bool {
1457    match a {
1458        Atom::Exit(ExitMatch::Ok) => ctx.exit_code == 0,
1459        Atom::Exit(ExitMatch::Failed) => ctx.exit_code != 0,
1460        Atom::Level(l) => *l == ctx.level,
1461        Atom::Flag(f) => flag_matches(f, ctx.args),
1462    }
1463}
1464
1465/// Match a flag guard against the invoked args.
1466///
1467/// Two shapes:
1468/// - presence — `--stat` / `-o`: true if any arg is that flag, in either the
1469///   bare (`--stat`) or `--flag=value` form (`--output=json` matches `--output`).
1470/// - flag + value — `-o yaml` / `--output json`: true if the flag carries that
1471///   value, written `-o yaml` (two tokens), `-o=yaml`, or glued short `-oyaml`.
1472///
1473/// Split on `=` rather than prefix-matching so `--stat` never matches
1474/// `--statistics`. This is what lets a kubectl `get` rule treat `-o yaml`
1475/// (prune) differently from `-o json` (pass through byte-exact).
1476fn flag_matches(spec: &str, args: &[String]) -> bool {
1477    match spec.split_once(char::is_whitespace) {
1478        None => args.iter().any(|a| {
1479            a == spec || a.split_once('=').is_some_and(|(name, _)| name == spec)
1480        }),
1481        Some((flag, value)) => {
1482            let value = value.trim();
1483            args.windows(2).any(|w| w[0] == flag && w[1] == value)
1484                || args.iter().any(|a| a == &format!("{flag}={value}"))
1485                || (flag.len() == 2 && args.iter().any(|a| a == &format!("{flag}{value}")))
1486        }
1487    }
1488}
1489
1490fn resolve_head(arg: &HeadArg, level: Level) -> usize {
1491    match arg {
1492        HeadArg::Number(n) => *n,
1493        HeadArg::Auto => level.head_limit(30),
1494    }
1495}
1496
1497fn filter_lines(s: &str, mut keep: impl FnMut(&str) -> bool) -> String {
1498    s.lines()
1499        .filter(|l| keep(l))
1500        .collect::<Vec<_>>()
1501        .join("\n")
1502}
1503
1504fn take_head(s: &str, n: usize) -> String {
1505    s.lines().take(n).collect::<Vec<_>>().join("\n")
1506}
1507
1508fn take_tail(s: &str, n: usize) -> String {
1509    let lines: Vec<&str> = s.lines().collect();
1510    let start = lines.len().saturating_sub(n);
1511    lines[start..].join("\n")
1512}
1513
1514/// Split input at the first line matching `re`. The matching line goes
1515/// into `post`. If no line matches, everything is `pre` and `post` is
1516/// empty.
1517fn split_at_first_match(s: &str, re: &Regex) -> (String, String) {
1518    let mut pre = String::new();
1519    let mut post = String::new();
1520    let mut in_post = false;
1521    for line in s.lines() {
1522        if !in_post && re.is_match(line) {
1523            in_post = true;
1524        }
1525        let buf = if in_post { &mut post } else { &mut pre };
1526        if !buf.is_empty() {
1527            buf.push('\n');
1528        }
1529        buf.push_str(line);
1530    }
1531    (pre, post)
1532}
1533
1534fn join_nonempty(a: &str, b: &str) -> String {
1535    match (a.is_empty(), b.is_empty()) {
1536        (true, true) => String::new(),
1537        (true, false) => b.to_string(),
1538        (false, true) => a.to_string(),
1539        (false, false) => format!("{a}\n{b}"),
1540    }
1541}
1542
1543/// Replace `$1`..`$9` with macro positional args. Other `$NAME` tokens
1544/// (e.g. `$level`, `$sub`) are left intact so shell can expand them
1545/// from env vars.
1546fn expand_args(body: &str, args: &[MacroArg]) -> String {
1547    if args.is_empty() {
1548        return body.to_string();
1549    }
1550    let mut out = String::with_capacity(body.len());
1551    let bytes = body.as_bytes();
1552    let mut i = 0;
1553    while i < bytes.len() {
1554        let c = bytes[i];
1555        if c == b'$' && i + 1 < bytes.len() {
1556            let n = bytes[i + 1];
1557            if n.is_ascii_digit() && n != b'0' {
1558                let idx = (n - b'0') as usize;
1559                if idx <= args.len() {
1560                    match &args[idx - 1] {
1561                        MacroArg::Number(v) => out.push_str(&v.to_string()),
1562                        MacroArg::String(v) => out.push_str(v),
1563                    }
1564                    i += 2;
1565                    continue;
1566                }
1567            }
1568        }
1569        out.push(c as char);
1570        i += 1;
1571    }
1572    out
1573}
1574
1575fn run_shell(cmd: &str, stdin_data: &str, ctx: &ExecCtx) -> Result<String> {
1576    let mut child = Command::new("sh")
1577        .arg("-c")
1578        .arg(cmd)
1579        .env("level", ctx.level.to_string())
1580        .env("sub", ctx.sub)
1581        .env("exit", ctx.exit_code.to_string())
1582        .env("args", ctx.args.join(" "))
1583        .stdin(Stdio::piped())
1584        .stdout(Stdio::piped())
1585        .stderr(Stdio::piped())
1586        .spawn()
1587        .context("spawning sh")?;
1588
1589    if let Some(mut stdin) = child.stdin.take() {
1590        stdin
1591            .write_all(stdin_data.as_bytes())
1592            .context("writing to sh stdin")?;
1593    }
1594
1595    let output = child.wait_with_output().context("waiting for sh")?;
1596    if !output.status.success() {
1597        let stderr = String::from_utf8_lossy(&output.stderr);
1598        bail!(
1599            "shell exited {}: {}",
1600            output.status.code().unwrap_or(-1),
1601            stderr.trim()
1602        );
1603    }
1604    Ok(String::from_utf8_lossy(&output.stdout).into_owned())
1605}
1606
1607fn run_python(body: &str, stdin_data: &str, ctx: &ExecCtx) -> Result<String> {
1608    if has_pep723_header(body) {
1609        run_python_uv(body, stdin_data, ctx)
1610    } else {
1611        run_python_plain(body, stdin_data, ctx)
1612    }
1613}
1614
1615fn has_pep723_header(body: &str) -> bool {
1616    body.lines()
1617        .any(|l| l.trim_start().starts_with("# /// script"))
1618}
1619
1620fn run_python_plain(body: &str, stdin_data: &str, ctx: &ExecCtx) -> Result<String> {
1621    let mut child = Command::new("python3")
1622        .arg("-c")
1623        .arg(body)
1624        .env("level", ctx.level.to_string())
1625        .env("sub", ctx.sub)
1626        .env("exit", ctx.exit_code.to_string())
1627        .env("args", ctx.args.join(" "))
1628        .stdin(Stdio::piped())
1629        .stdout(Stdio::piped())
1630        .stderr(Stdio::piped())
1631        .spawn()
1632        .context("spawning python3")?;
1633
1634    if let Some(mut stdin) = child.stdin.take() {
1635        stdin
1636            .write_all(stdin_data.as_bytes())
1637            .context("writing to python stdin")?;
1638    }
1639    let output = child.wait_with_output().context("waiting for python")?;
1640    if !output.status.success() {
1641        let stderr = String::from_utf8_lossy(&output.stderr);
1642        bail!(
1643            "python exited {}: {}",
1644            output.status.code().unwrap_or(-1),
1645            stderr.trim()
1646        );
1647    }
1648    Ok(String::from_utf8_lossy(&output.stdout).into_owned())
1649}
1650
1651/// PEP 723: write the body to a temp file and let `uv run --script` resolve
1652/// inline dependencies. Data flows via stdin to the script.
1653fn run_python_uv(body: &str, stdin_data: &str, ctx: &ExecCtx) -> Result<String> {
1654    let mut script = tempfile::Builder::new()
1655        .prefix("lowfat-lf-")
1656        .suffix(".py")
1657        .tempfile()
1658        .context("creating temp script file")?;
1659    script
1660        .write_all(body.as_bytes())
1661        .context("writing temp script")?;
1662    script.flush().ok();
1663
1664    let path = script
1665        .path()
1666        .to_str()
1667        .ok_or_else(|| anyhow!("non-UTF8 temp path"))?
1668        .to_string();
1669
1670    let mut child = Command::new("uv")
1671        .args(["run", "--script", &path])
1672        .env("level", ctx.level.to_string())
1673        .env("sub", ctx.sub)
1674        .env("exit", ctx.exit_code.to_string())
1675        .env("args", ctx.args.join(" "))
1676        .stdin(Stdio::piped())
1677        .stdout(Stdio::piped())
1678        .stderr(Stdio::piped())
1679        .spawn()
1680        .context("spawning uv (is `uv` installed?)")?;
1681
1682    if let Some(mut stdin) = child.stdin.take() {
1683        stdin
1684            .write_all(stdin_data.as_bytes())
1685            .context("writing to uv stdin")?;
1686    }
1687    let output = child.wait_with_output().context("waiting for uv")?;
1688    if !output.status.success() {
1689        let stderr = String::from_utf8_lossy(&output.stderr);
1690        bail!(
1691            "uv exited {}: {}",
1692            output.status.code().unwrap_or(-1),
1693            stderr.trim()
1694        );
1695    }
1696    Ok(String::from_utf8_lossy(&output.stdout).into_owned())
1697}
1698
1699// ──────────────────────────────────────────────────────────────────
1700// Tests
1701// ──────────────────────────────────────────────────────────────────
1702
1703#[cfg(test)]
1704mod tests {
1705    use super::*;
1706
1707    fn parse_ok(src: &str) -> RuleSet {
1708        parse(src).unwrap_or_else(|e| panic!("parse failed: {e}\n--- src ---\n{src}"))
1709    }
1710
1711    #[test]
1712    fn empty_input() {
1713        let rs = parse_ok("");
1714        assert!(rs.rules.is_empty());
1715        assert!(rs.defines.is_empty());
1716    }
1717
1718    #[test]
1719    fn comments_and_blanks_only() {
1720        let rs = parse_ok("# hi\n\n# more\n");
1721        assert!(rs.rules.is_empty());
1722    }
1723
1724    #[test]
1725    fn simple_rule() {
1726        let rs = parse_ok(
1727            r#"
1728status:
1729    keep /foo/
1730    head 10
1731"#,
1732        );
1733        assert_eq!(rs.rules.len(), 1);
1734        let r = &rs.rules[0];
1735        assert!(matches!(&r.sub, SubPattern::Alt(a) if a == &["status".to_string()]));
1736        assert!(matches!(r.level, LevelPattern::Star));
1737        assert_eq!(r.ops.len(), 2);
1738        match &r.ops[0] {
1739            Op::Keep(p) => assert_eq!(p.source, "foo"),
1740            _ => panic!("expected Keep"),
1741        }
1742        assert!(matches!(r.ops[1], Op::Head(HeadArg::Number(10))));
1743    }
1744
1745    #[test]
1746    fn sub_with_alternation_and_level() {
1747        let rs = parse_ok(
1748            r#"
1749build|check, ultra:
1750    head 15
1751"#,
1752        );
1753        let r = &rs.rules[0];
1754        match &r.sub {
1755            SubPattern::Alt(a) => assert_eq!(a, &["build".to_string(), "check".to_string()]),
1756            _ => panic!("expected Alt"),
1757        }
1758        assert!(matches!(r.level, LevelPattern::Specific(Level::Ultra)));
1759    }
1760
1761    #[test]
1762    fn star_wildcards() {
1763        let rs = parse_ok(
1764            r#"
1765*:
1766    head 30
1767"#,
1768        );
1769        assert!(matches!(rs.rules[0].sub, SubPattern::Star));
1770        assert!(matches!(rs.rules[0].level, LevelPattern::Star));
1771    }
1772
1773    #[test]
1774    fn else_string_fallback() {
1775        let rs = parse_ok(
1776            r#"
1777status:
1778    keep /^M /
1779    head 5
1780    else "clean"
1781"#,
1782        );
1783        match &rs.rules[0].ops[2] {
1784            Op::Or(s) => assert_eq!(s, "clean"),
1785            _ => panic!("expected Or"),
1786        }
1787    }
1788
1789    #[test]
1790    fn shell_inline_and_block() {
1791        let rs = parse_ok(
1792            r#"
1793define a:
1794    shell: sed -E 's/x/y/'
1795
1796define b:
1797    shell: |
1798        awk '
1799          BEGIN { n=0 }
1800          { print; n++ }
1801        '
1802"#,
1803        );
1804        match &rs.defines[0].ops[0] {
1805            Op::Shell(s) => assert_eq!(s, "sed -E 's/x/y/'"),
1806            _ => panic!("expected inline Shell"),
1807        }
1808        match &rs.defines[1].ops[0] {
1809            Op::Shell(s) => {
1810                assert!(s.starts_with("awk '"));
1811                assert!(s.contains("BEGIN { n=0 }"));
1812                assert!(s.contains("{ print; n++ }"));
1813            }
1814            _ => panic!("expected block Shell"),
1815        }
1816    }
1817
1818    #[test]
1819    fn python_block_preserves_pep723_and_blanks() {
1820        let rs = parse_ok(
1821            r#"
1822define clean:
1823    python: |
1824        # /// script
1825        # dependencies = ["pyyaml>=6"]
1826        # ///
1827        import sys, yaml
1828
1829        for d in yaml.safe_load_all(sys.stdin):
1830            print(d)
1831"#,
1832        );
1833        match &rs.defines[0].ops[0] {
1834            Op::Python(s) => {
1835                assert!(s.contains("# /// script"));
1836                assert!(s.contains("# dependencies = [\"pyyaml>=6\"]"));
1837                assert!(s.contains("import sys, yaml"));
1838                // Blank line between imports and loop preserved
1839                assert!(s.contains("yaml\n\nfor"));
1840                // Internal indent preserved (4 spaces under `for`)
1841                assert!(s.contains("    print(d)"));
1842            }
1843            _ => panic!("expected Python"),
1844        }
1845    }
1846
1847    #[test]
1848    fn macro_call_with_args() {
1849        let rs = parse_ok(
1850            r#"
1851define compact(n):
1852    head 1
1853
1854diff, ultra:
1855    compact 30
1856"#,
1857        );
1858        match &rs.rules[0].ops[0] {
1859            Op::MacroCall { name, args } => {
1860                assert_eq!(name, "compact");
1861                assert_eq!(args, &[MacroArg::Number(30)]);
1862            }
1863            _ => panic!("expected MacroCall"),
1864        }
1865    }
1866
1867    #[test]
1868    fn inline_ops_after_rule_header() {
1869        let rs = parse_ok(
1870            r#"
1871define compact(n):
1872    head 1
1873
1874diff, ultra:  compact 30  else-shell: awk 'NF' | head -50
1875"#,
1876        );
1877        let ops = &rs.rules[0].ops;
1878        assert_eq!(ops.len(), 2);
1879        assert!(matches!(&ops[0], Op::MacroCall { name, .. } if name == "compact"));
1880        match &ops[1] {
1881            Op::OrShell(s) => assert_eq!(s, "awk 'NF' | head -50"),
1882            _ => panic!("expected OrShell, got {:?}", &ops[1]),
1883        }
1884    }
1885
1886    #[test]
1887    fn split_with_pre_and_post() {
1888        let rs = parse_ok(
1889            r#"
1890define ah:
1891    shell: cat
1892
1893show:
1894    split /^diff /
1895    pre:
1896        keep /^commit /
1897        ah
1898    post:
1899        head 10
1900    head 100
1901"#,
1902        );
1903        let ops = &rs.rules[0].ops;
1904        assert_eq!(ops.len(), 2);
1905        match &ops[0] {
1906            Op::Split {
1907                delimiter,
1908                pre,
1909                post,
1910            } => {
1911                assert_eq!(delimiter.source, "^diff ");
1912                assert_eq!(pre.len(), 2);
1913                assert_eq!(post.len(), 1);
1914                assert!(matches!(&pre[0], Op::Keep(_)));
1915                assert!(matches!(&pre[1], Op::MacroCall { name, .. } if name == "ah"));
1916                assert!(matches!(post[0], Op::Head(HeadArg::Number(10))));
1917            }
1918            _ => panic!("expected Split"),
1919        }
1920        assert!(matches!(ops[1], Op::Head(HeadArg::Number(100))));
1921    }
1922
1923    #[test]
1924    fn first_match_wins_selection() {
1925        let rs = parse_ok(
1926            r#"
1927diff, ultra:
1928    head 5
1929
1930diff:
1931    head 20
1932
1933*:
1934    head 30
1935"#,
1936        );
1937        let r = rs.select("diff", Level::Ultra).unwrap();
1938        assert!(matches!(r.ops[0], Op::Head(HeadArg::Number(5))));
1939        let r = rs.select("diff", Level::Full).unwrap();
1940        assert!(matches!(r.ops[0], Op::Head(HeadArg::Number(20))));
1941        let r = rs.select("status", Level::Ultra).unwrap();
1942        assert!(matches!(r.ops[0], Op::Head(HeadArg::Number(30))));
1943    }
1944
1945    #[test]
1946    fn alternation_in_selector_matches() {
1947        let rs = parse_ok(
1948            r#"
1949build|check, ultra:
1950    head 15
1951"#,
1952        );
1953        assert!(rs.select("build", Level::Ultra).is_some());
1954        assert!(rs.select("check", Level::Ultra).is_some());
1955        assert!(rs.select("test", Level::Ultra).is_none());
1956        assert!(rs.select("build", Level::Full).is_none());
1957    }
1958
1959    #[test]
1960    fn head_auto_keyword() {
1961        let rs = parse_ok(
1962            r#"
1963foo:
1964    head auto
1965"#,
1966        );
1967        assert!(matches!(rs.rules[0].ops[0], Op::Head(HeadArg::Auto)));
1968    }
1969
1970    #[test]
1971    fn regex_with_escaped_slash() {
1972        let rs = parse_ok(
1973            r#"
1974foo:
1975    keep /a\/b/
1976"#,
1977        );
1978        match &rs.rules[0].ops[0] {
1979            Op::Keep(p) => assert_eq!(p.source, "a/b"),
1980            _ => panic!(),
1981        }
1982    }
1983
1984    #[test]
1985    fn errors_on_unterminated_regex() {
1986        let err = parse("foo:\n    keep /abc\n").unwrap_err();
1987        assert!(err.to_string().contains("unterminated regex"), "got: {err}");
1988    }
1989
1990    #[test]
1991    fn errors_on_unknown_op() {
1992        let err = parse("foo:\n    nonsense 1\n").unwrap_err();
1993        assert!(err.to_string().contains("unknown op"), "got: {err}");
1994    }
1995
1996    #[test]
1997    fn errors_on_invalid_level() {
1998        let err = parse("foo, gigamax:\n    head 5\n").unwrap_err();
1999        // anyhow only renders the outermost message via Display; use {:#}
2000        // to walk the cause chain.
2001        let chain = format!("{err:#}");
2002        assert!(chain.contains("unknown level"), "got: {chain}");
2003    }
2004
2005    #[test]
2006    fn errors_on_empty_rule_body() {
2007        let err = parse("foo:\nbar:\n    head 5\n").unwrap_err();
2008        assert!(err.to_string().contains("rule has no ops"), "got: {err}");
2009    }
2010
2011    // ── full plugin files parse cleanly ──────────────────────────
2012
2013    #[test]
2014    fn git_compact_plugin_parses() {
2015        let src = include_str!(
2016            "../../lowfat-plugin/embedded/git/git-compact/filter.lf"
2017        );
2018        let rs = parse_ok(src);
2019        // Defines: strip-trailers, abbrev-hash, compact-diff, drop-index-meta
2020        assert_eq!(rs.defines.len(), 4);
2021        let names: Vec<&str> = rs.defines.iter().map(|d| d.name.as_str()).collect();
2022        assert_eq!(names, ["strip-trailers", "abbrev-hash", "compact-diff", "drop-index-meta"]);
2023        assert_eq!(rs.defines[2].params, vec!["limit".to_string()]);
2024
2025        // Selection sanity
2026        assert!(rs.select("status", Level::Full).is_some());
2027        assert!(rs.select("diff", Level::Ultra).is_some());
2028        assert!(rs.select("diff", Level::Lite).is_some());
2029        assert!(rs.select("diff", Level::Full).is_some());
2030        assert!(rs.select("log", Level::Ultra).is_some());
2031        assert!(rs.select("show", Level::Ultra).is_some());
2032        assert!(rs.select("show", Level::Full).is_some());
2033        // Catch-all
2034        assert!(rs.select("nothing", Level::Full).is_some());
2035
2036        // Show rule is now a level cascade.
2037        let show_full = rs.select("show", Level::Full).unwrap();
2038        assert!(matches!(&show_full.ops[0], Op::Cascade(_)));
2039    }
2040
2041    // ── executor ─────────────────────────────────────────────────
2042
2043    fn ctx<'a>(sub: &'a str, level: Level) -> ExecCtx<'a> {
2044        ExecCtx {
2045            sub,
2046            level,
2047            exit_code: 0,
2048            args: &[],
2049        }
2050    }
2051
2052    #[test]
2053    fn exec_keep_drop_head_tail() {
2054        let rs = parse_ok(
2055            r#"
2056foo:
2057    keep /^a/
2058    drop /skip/
2059    head 3
2060"#,
2061        );
2062        let input = "alpha\nbeta\na-skip\namber\naxe\nakira\n";
2063        let out = execute(&rs, &ctx("foo", Level::Full), input).unwrap();
2064        assert_eq!(out, "alpha\namber\naxe\n");
2065    }
2066
2067    #[test]
2068    fn exec_tail() {
2069        let rs = parse_ok(
2070            r#"
2071foo:
2072    tail 2
2073"#,
2074        );
2075        let out = execute(&rs, &ctx("foo", Level::Full), "a\nb\nc\nd").unwrap();
2076        assert_eq!(out, "c\nd\n");
2077    }
2078
2079    #[test]
2080    fn exec_else_string_when_empty() {
2081        let rs = parse_ok(
2082            r#"
2083status:
2084    keep /^M /
2085    else "clean"
2086"#,
2087        );
2088        let out = execute(&rs, &ctx("status", Level::Full), "?? new.txt\n").unwrap();
2089        assert_eq!(out, "clean\n");
2090    }
2091
2092    #[test]
2093    fn exec_else_string_passthrough_when_nonempty() {
2094        let rs = parse_ok(
2095            r#"
2096status:
2097    keep /^M /
2098    else "clean"
2099"#,
2100        );
2101        let out = execute(&rs, &ctx("status", Level::Full), "M file.txt\n").unwrap();
2102        assert_eq!(out, "M file.txt\n");
2103    }
2104
2105    #[test]
2106    fn exec_no_match_passes_through() {
2107        let rs = parse_ok(
2108            r#"
2109foo:
2110    head 1
2111"#,
2112        );
2113        let input = "x\ny\nz";
2114        let out = execute(&rs, &ctx("other", Level::Full), input).unwrap();
2115        assert_eq!(out, input);
2116    }
2117
2118    #[test]
2119    fn exec_first_match_wins() {
2120        let rs = parse_ok(
2121            r#"
2122diff, ultra:
2123    head 1
2124diff:
2125    head 3
2126"#,
2127        );
2128        let input = "a\nb\nc\nd\n";
2129        let u = execute(&rs, &ctx("diff", Level::Ultra), input).unwrap();
2130        let f = execute(&rs, &ctx("diff", Level::Full), input).unwrap();
2131        assert_eq!(u, "a\n");
2132        assert_eq!(f, "a\nb\nc\n");
2133    }
2134
2135    #[test]
2136    fn exec_head_auto_uses_level() {
2137        let rs = parse_ok(
2138            r#"
2139foo:
2140    head auto
2141"#,
2142        );
2143        let input: String = (1..=80).map(|i| format!("{i}\n")).collect();
2144        let u = execute(&rs, &ctx("foo", Level::Ultra), &input).unwrap();
2145        let f = execute(&rs, &ctx("foo", Level::Full), &input).unwrap();
2146        let l = execute(&rs, &ctx("foo", Level::Lite), &input).unwrap();
2147        assert_eq!(u.lines().count(), 15);
2148        assert_eq!(f.lines().count(), 30);
2149        assert_eq!(l.lines().count(), 60);
2150    }
2151
2152    #[test]
2153    fn exec_shell_inline() {
2154        let rs = parse_ok(
2155            r#"
2156foo:
2157    shell: tr a-z A-Z
2158"#,
2159        );
2160        let out = execute(&rs, &ctx("foo", Level::Full), "hello\n").unwrap();
2161        assert_eq!(out.trim_end(), "HELLO");
2162    }
2163
2164    #[test]
2165    fn exec_shell_block() {
2166        let rs = parse_ok(
2167            r#"
2168foo:
2169    shell: |
2170        awk '{ print NR, $0 }'
2171"#,
2172        );
2173        let out = execute(&rs, &ctx("foo", Level::Full), "a\nb\n").unwrap();
2174        assert_eq!(out.trim_end(), "1 a\n2 b");
2175    }
2176
2177    #[test]
2178    fn exec_shell_sees_env_vars() {
2179        let rs = parse_ok(
2180            r#"
2181build:
2182    shell: printf '%s:%s' "$sub" "$level"
2183"#,
2184        );
2185        let out = execute(&rs, &ctx("build", Level::Ultra), "").unwrap();
2186        // ensure_trailing_newline normalizes shell output without a final \n
2187        assert_eq!(out, "build:ultra\n");
2188    }
2189
2190    #[test]
2191    fn exec_else_shell_uses_raw_input() {
2192        let rs = parse_ok(
2193            r#"
2194diff:
2195    keep /^IMPOSSIBLE/
2196    else-shell: head -2
2197"#,
2198        );
2199        let out = execute(&rs, &ctx("diff", Level::Full), "x\ny\nz\n").unwrap();
2200        assert_eq!(out, "x\ny\n");
2201    }
2202
2203    #[test]
2204    fn exec_macro_expansion_with_args() {
2205        let rs = parse_ok(
2206            r#"
2207define n-up(count):
2208    shell: head -$1
2209
2210foo:
2211    n-up 2
2212"#,
2213        );
2214        let out = execute(&rs, &ctx("foo", Level::Full), "a\nb\nc\nd\n").unwrap();
2215        assert_eq!(out, "a\nb\n");
2216    }
2217
2218    #[test]
2219    fn exec_split_pre_post() {
2220        let rs = parse_ok(
2221            r#"
2222show:
2223    split /^diff /
2224    pre:
2225        head 1
2226    post:
2227        head 2
2228"#,
2229        );
2230        let input = "commit abc\nAuthor: x\nDate: y\ndiff --git a b\n+line1\n+line2\n+line3\n";
2231        let out = execute(&rs, &ctx("show", Level::Full), input).unwrap();
2232        assert_eq!(out, "commit abc\ndiff --git a b\n+line1\n");
2233    }
2234
2235    #[test]
2236    fn exec_split_no_match() {
2237        let rs = parse_ok(
2238            r#"
2239show:
2240    split /^diff /
2241    pre:
2242        head 2
2243    post:
2244        head 10
2245"#,
2246        );
2247        // No `diff ` line — everything goes to pre, post is empty.
2248        let out = execute(&rs, &ctx("show", Level::Full), "a\nb\nc\nd\n").unwrap();
2249        assert_eq!(out, "a\nb\n");
2250    }
2251
2252    #[test]
2253    fn exec_macro_arg_count_mismatch_errors() {
2254        let rs = parse_ok(
2255            r#"
2256define needs-two(a, b):
2257    head 1
2258
2259foo:
2260    needs-two 5
2261"#,
2262        );
2263        let err = execute(&rs, &ctx("foo", Level::Full), "x").unwrap_err();
2264        assert!(err.to_string().contains("expects 2 arg"), "got: {err}");
2265    }
2266
2267    #[test]
2268    fn exec_python_plain_when_no_pep723() {
2269        // Skip if python3 not on PATH.
2270        if Command::new("python3").arg("--version").output().is_err() {
2271            eprintln!("skipping: python3 not available");
2272            return;
2273        }
2274        let rs = parse_ok(
2275            r#"
2276foo:
2277    python: |
2278        import sys
2279        for line in sys.stdin:
2280            print(line.upper(), end="")
2281"#,
2282        );
2283        let out = execute(&rs, &ctx("foo", Level::Full), "hello\nworld\n").unwrap();
2284        assert_eq!(out, "HELLO\nWORLD\n");
2285    }
2286
2287    #[test]
2288    fn exec_macro_arg_substitution_in_shell() {
2289        let rs = parse_ok(
2290            r#"
2291define grab(limit):
2292    shell: |
2293        awk -v lim=$1 '{ if (NR<=lim) print }'
2294
2295foo:
2296    grab 3
2297"#,
2298        );
2299        let out = execute(&rs, &ctx("foo", Level::Full), "a\nb\nc\nd\ne\n").unwrap();
2300        assert_eq!(out, "a\nb\nc\n");
2301    }
2302
2303    #[test]
2304    fn pep723_detection() {
2305        assert!(has_pep723_header(
2306            "# /// script\n# dependencies = []\n# ///\nimport sys"
2307        ));
2308        assert!(has_pep723_header(
2309            "    # /// script\n    # ///\nimport sys"
2310        ));
2311        assert!(!has_pep723_header("import sys\nprint('hi')"));
2312        assert!(!has_pep723_header("# not pep 723\nprint('hi')"));
2313    }
2314
2315    #[test]
2316    fn kubectl_compact_plugin_parses() {
2317        let src = include_str!(
2318            "../../../test-fixtures/plugins/kubectl/kubectl-compact/filter.lf"
2319        );
2320        let rs = parse_ok(src);
2321        // Define: clean-yaml (with PEP 723 body)
2322        assert_eq!(rs.defines.len(), 1);
2323        assert_eq!(rs.defines[0].name, "clean-yaml");
2324        match &rs.defines[0].ops[0] {
2325            Op::Python(body) => {
2326                assert!(body.contains("# /// script"));
2327                assert!(body.contains("dependencies = [\"pyyaml>=6\"]"));
2328                assert!(body.contains("yaml.safe_load_all"));
2329            }
2330            other => panic!("expected Python op, got {other:?}"),
2331        }
2332        // get/logs/events/* selection
2333        assert!(rs.select("get", Level::Full).is_some());
2334        assert!(rs.select("logs", Level::Ultra).is_some());
2335        assert!(rs.select("logs", Level::Full).is_some());
2336        assert!(rs.select("events", Level::Ultra).is_some());
2337        assert!(rs.select("describe", Level::Full).is_some()); // catch-all
2338    }
2339
2340    // ── v2: cascades, guards, globs ───────────────────────────────
2341
2342    #[test]
2343    fn parse_cascade_arms() {
2344        let rs = parse_ok(
2345            r#"
2346diff:
2347    if exit failed: raw
2348    elif level ultra: head 5
2349    else: head 99
2350"#,
2351        );
2352        match &rs.rules[0].ops[..] {
2353            [Op::Cascade(branches)] => {
2354                assert_eq!(branches.len(), 3);
2355                assert!(branches[0].guard.is_some());
2356                assert!(branches[1].guard.is_some());
2357                assert!(branches[2].guard.is_none());
2358            }
2359            other => panic!("expected one Cascade op, got {other:?}"),
2360        }
2361    }
2362
2363    #[test]
2364    fn exec_cascade_branches_on_exit() {
2365        let rs = parse_ok(
2366            r#"
2367diff:
2368    if exit failed: raw
2369    else: head 1
2370"#,
2371        );
2372        let input = "a\nb\nc\n";
2373        let failed = ExecCtx { sub: "diff", level: Level::Full, exit_code: 1, args: &[] };
2374        let ok = ExecCtx { sub: "diff", level: Level::Full, exit_code: 0, args: &[] };
2375        assert_eq!(execute(&rs, &failed, input).unwrap(), "a\nb\nc\n");
2376        assert_eq!(execute(&rs, &ok, input).unwrap(), "a\n");
2377    }
2378
2379    #[test]
2380    fn exec_cascade_level_and_flag_guards() {
2381        let rs = parse_ok(
2382            r#"
2383diff:
2384    if level ultra and --stat: head 1
2385    elif --stat: head 2
2386    else: head 3
2387"#,
2388        );
2389        let input = "1\n2\n3\n4\n";
2390        let stat = vec!["--stat".to_string()];
2391        let ultra_stat = ExecCtx { sub: "diff", level: Level::Ultra, exit_code: 0, args: &stat };
2392        let full_stat = ExecCtx { sub: "diff", level: Level::Full, exit_code: 0, args: &stat };
2393        let plain = ExecCtx { sub: "diff", level: Level::Full, exit_code: 0, args: &[] };
2394        assert_eq!(execute(&rs, &ultra_stat, input).unwrap(), "1\n");
2395        assert_eq!(execute(&rs, &full_stat, input).unwrap(), "1\n2\n");
2396        assert_eq!(execute(&rs, &plain, input).unwrap(), "1\n2\n3\n");
2397    }
2398
2399    #[test]
2400    fn flag_guard_matches_equals_value_form() {
2401        // A `--output` guard must fire on both `--output json` (two tokens)
2402        // and `--output=json` (one token). Used so kubectl `get -o json`
2403        // bypasses line-truncation that would corrupt the JSON for jq.
2404        let rs = parse_ok("get:\n    if --output: raw\n    else: head 1\n");
2405        let input = "{\n  \"a\": 1\n}\n";
2406        let split = vec!["--output".to_string(), "json".to_string()];
2407        let glued = vec!["--output=json".to_string()];
2408        let none = vec!["pods".to_string()];
2409        let split_ctx = ExecCtx { sub: "get", level: Level::Full, exit_code: 0, args: &split };
2410        let glued_ctx = ExecCtx { sub: "get", level: Level::Full, exit_code: 0, args: &glued };
2411        let none_ctx = ExecCtx { sub: "get", level: Level::Full, exit_code: 0, args: &none };
2412        assert_eq!(execute(&rs, &split_ctx, input).unwrap(), input);
2413        assert_eq!(execute(&rs, &glued_ctx, input).unwrap(), input);
2414        // No output flag → compaction (head 1) still applies.
2415        assert_eq!(execute(&rs, &none_ctx, input).unwrap(), "{\n");
2416    }
2417
2418    #[test]
2419    fn flag_guard_equals_does_not_prefix_match() {
2420        // `--stat` must NOT match `--statistics` — the `=` split guards this.
2421        let rs = parse_ok("diff:\n    if --stat: head 1\n    else: head 2\n");
2422        let stats = vec!["--statistics".to_string()];
2423        let ctx = ExecCtx { sub: "diff", level: Level::Full, exit_code: 0, args: &stats };
2424        assert_eq!(execute(&rs, &ctx, "1\n2\n3\n").unwrap(), "1\n2\n");
2425    }
2426
2427    #[test]
2428    fn flag_guard_matches_flag_with_value() {
2429        // `-o yaml` must fire on the two-token form, `-o=yaml`, and glued
2430        // `-oyaml` — but NOT on `-o json`. This lets the kubectl get rule
2431        // prune `-o yaml` while passing `-o json` through byte-exact.
2432        let rs = parse_ok("get:\n    if -o yaml: head 1\n    else: raw\n");
2433        let input = "a\nb\nc\n";
2434        let cases = [
2435            (vec!["-o".to_string(), "yaml".to_string()], "a\n"),
2436            (vec!["-o=yaml".to_string()], "a\n"),
2437            (vec!["-oyaml".to_string()], "a\n"),
2438            (vec!["-o".to_string(), "json".to_string()], input), // else → raw
2439        ];
2440        for (args, want) in cases {
2441            let ctx = ExecCtx { sub: "get", level: Level::Full, exit_code: 0, args: &args };
2442            assert_eq!(execute(&rs, &ctx, input).unwrap(), want, "args={args:?}");
2443        }
2444    }
2445
2446    #[test]
2447    fn exec_cascade_no_match_no_else_passes_through() {
2448        let rs = parse_ok("diff:\n    if exit failed: head 1\n");
2449        let out = execute(&rs, &ctx("diff", Level::Full), "x\ny\n").unwrap();
2450        assert_eq!(out, "x\ny\n");
2451    }
2452
2453    #[test]
2454    fn exec_raw_is_identity() {
2455        // `raw` is canonical; `passthrough` is a legacy alias for the same op.
2456        for kw in ["raw", "passthrough"] {
2457            let rs = parse_ok(&format!("diff:\n    {kw}\n"));
2458            let out = execute(&rs, &ctx("diff", Level::Full), "x\ny\n").unwrap();
2459            assert_eq!(out, "x\ny\n");
2460        }
2461    }
2462
2463    #[test]
2464    fn glob_selector_matches_prefix() {
2465        let rs = parse_ok("apply*:\n    head 1\n");
2466        assert!(rs.select("apply", Level::Full).is_some());
2467        assert!(rs.select("apply-set", Level::Full).is_some());
2468        assert!(rs.select("delete", Level::Full).is_none());
2469    }
2470
2471    #[test]
2472    fn or_is_alias_of_else() {
2473        let new = parse_ok("s:\n    keep /Z/\n    or \"clean\"\n");
2474        let old = parse_ok("s:\n    keep /Z/\n    else \"clean\"\n");
2475        assert_eq!(execute(&new, &ctx("s", Level::Full), "nope\n").unwrap(), "clean\n");
2476        assert_eq!(execute(&old, &ctx("s", Level::Full), "nope\n").unwrap(), "clean\n");
2477    }
2478
2479    #[test]
2480    fn errors_on_unknown_guard_value() {
2481        let chain = format!("{:#}", parse("diff:\n    if exit boom: head 1\n").unwrap_err());
2482        assert!(chain.contains("unknown exit value"), "got: {chain}");
2483    }
2484
2485    // ── match: single-dimension cascade sugar ─────────────────────
2486
2487    #[test]
2488    fn parse_match_level_desugars_to_cascade() {
2489        let rs = parse_ok(
2490            r#"
2491state:
2492    match level:
2493        ultra: head 1
2494        lite:  head 3
2495        else:  head 2
2496"#,
2497        );
2498        match &rs.rules[0].ops[..] {
2499            [Op::Cascade(branches)] => {
2500                assert_eq!(branches.len(), 3);
2501                assert!(matches!(
2502                    branches[0].guard.as_ref().unwrap().atoms.as_slice(),
2503                    [Atom::Level(Level::Ultra)]
2504                ));
2505                assert!(matches!(
2506                    branches[1].guard.as_ref().unwrap().atoms.as_slice(),
2507                    [Atom::Level(Level::Lite)]
2508                ));
2509                assert!(branches[2].guard.is_none());
2510            }
2511            other => panic!("expected one Cascade op, got {other:?}"),
2512        }
2513    }
2514
2515    #[test]
2516    fn exec_match_level_matches_equivalent_cascade() {
2517        let m = parse_ok(
2518            r#"
2519state:
2520    match level:
2521        ultra: head 1
2522        lite:  head 3
2523        else:  head 2
2524"#,
2525        );
2526        let c = parse_ok(
2527            r#"
2528state:
2529    if level ultra: head 1
2530    elif level lite: head 3
2531    else: head 2
2532"#,
2533        );
2534        let input = "a\nb\nc\nd\n";
2535        for level in [Level::Ultra, Level::Full, Level::Lite] {
2536            let mc = execute(&m, &ctx("state", level), input).unwrap();
2537            let cc = execute(&c, &ctx("state", level), input).unwrap();
2538            assert_eq!(mc, cc, "level {level:?}");
2539        }
2540    }
2541
2542    #[test]
2543    fn exec_match_exit() {
2544        let rs = parse_ok(
2545            r#"
2546diff:
2547    match exit:
2548        failed: raw
2549        ok: head 1
2550"#,
2551        );
2552        let input = "a\nb\nc\n";
2553        let failed = ExecCtx { sub: "diff", level: Level::Full, exit_code: 1, args: &[] };
2554        let okctx = ExecCtx { sub: "diff", level: Level::Full, exit_code: 0, args: &[] };
2555        assert_eq!(execute(&rs, &failed, input).unwrap(), "a\nb\nc\n");
2556        assert_eq!(execute(&rs, &okctx, input).unwrap(), "a\n");
2557    }
2558
2559    #[test]
2560    fn exec_nested_match_inside_else_arm() {
2561        let rs = parse_ok(
2562            r#"
2563plan:
2564    if exit failed:
2565        raw
2566    else:
2567        match level:
2568            ultra: head 1
2569            lite:  head 3
2570            else:  head 2
2571"#,
2572        );
2573        let input = "a\nb\nc\nd\n";
2574        let failed = ExecCtx { sub: "plan", level: Level::Full, exit_code: 1, args: &[] };
2575        let ok_full = ExecCtx { sub: "plan", level: Level::Full, exit_code: 0, args: &[] };
2576        let ok_ultra = ExecCtx { sub: "plan", level: Level::Ultra, exit_code: 0, args: &[] };
2577        let ok_lite = ExecCtx { sub: "plan", level: Level::Lite, exit_code: 0, args: &[] };
2578        assert_eq!(execute(&rs, &failed, input).unwrap(), input);
2579        assert_eq!(execute(&rs, &ok_full, input).unwrap(), "a\nb\n");
2580        assert_eq!(execute(&rs, &ok_ultra, input).unwrap(), "a\n");
2581        assert_eq!(execute(&rs, &ok_lite, input).unwrap(), "a\nb\nc\n");
2582    }
2583
2584    #[test]
2585    fn match_missing_dimension_errors() {
2586        let chain = format!("{:#}", parse("plan:\n    match:\n        ultra: head 1\n").unwrap_err());
2587        assert!(chain.contains("needs a dimension"), "got: {chain}");
2588    }
2589
2590    #[test]
2591    fn match_unknown_dimension_errors() {
2592        let chain = format!(
2593            "{:#}",
2594            parse("plan:\n    match flag:\n        x: head 1\n").unwrap_err()
2595        );
2596        assert!(chain.contains("unknown match dimension"), "got: {chain}");
2597    }
2598
2599    #[test]
2600    fn match_unknown_value_errors() {
2601        let chain = format!(
2602            "{:#}",
2603            parse("plan:\n    match exit:\n        boom: head 1\n").unwrap_err()
2604        );
2605        assert!(chain.contains("unknown exit value"), "got: {chain}");
2606    }
2607
2608    #[test]
2609    fn match_inline_after_header_errors() {
2610        let chain = format!(
2611            "{:#}",
2612            parse("plan:\n    match level: head 1\n").unwrap_err()
2613        );
2614        assert!(
2615            chain.contains("doesn't take inline ops"),
2616            "got: {chain}"
2617        );
2618    }
2619}