Skip to main content

lowfat_core/
lf.rs

1//! lf — the lowfat filter DSL parser.
2//!
3//! Parses `.lf` files into a [`RuleSet`]. Execution lives elsewhere
4//! (Task 2+). The DSL is line-oriented and indentation-sensitive; we
5//! avoid INDENT/DEDENT tokens by working directly on `(indent, text)`
6//! pairs, which keeps the parser short and the error messages tied to
7//! source line numbers.
8
9use crate::level::Level;
10use anyhow::{Context, Result, anyhow, bail};
11use regex::Regex;
12
13// ──────────────────────────────────────────────────────────────────
14// AST
15// ──────────────────────────────────────────────────────────────────
16
17#[derive(Debug, Default)]
18pub struct RuleSet {
19    pub defines: Vec<Define>,
20    pub rules: Vec<Rule>,
21}
22
23#[derive(Debug, Clone)]
24pub struct Define {
25    pub name: String,
26    pub params: Vec<String>,
27    pub ops: Vec<Op>,
28}
29
30#[derive(Debug, Clone)]
31pub struct Rule {
32    pub sub: SubPattern,
33    pub level: LevelPattern,
34    pub ops: Vec<Op>,
35    pub line_no: usize,
36}
37
38#[derive(Debug, Clone)]
39pub enum SubPattern {
40    Star,
41    Alt(Vec<String>),
42}
43
44#[derive(Debug, Clone)]
45pub enum LevelPattern {
46    Star,
47    Specific(Level),
48}
49
50#[derive(Debug, Clone)]
51pub enum Op {
52    Keep(PatternRegex),
53    Drop(PatternRegex),
54    Head(HeadArg),
55    Tail(HeadArg),
56    Or(String),
57    OrShell(String),
58    Shell(String),
59    Python(String),
60    Raw,
61    MacroCall {
62        name: String,
63        args: Vec<MacroArg>,
64    },
65    Split {
66        delimiter: PatternRegex,
67        pre: Vec<Op>,
68        post: Vec<Op>,
69    },
70    /// `if` / `elif` / `else` cascade — first matching branch runs.
71    Cascade(Vec<Branch>),
72}
73
74/// One arm of an [`Op::Cascade`]. `guard: None` is the `else` arm.
75#[derive(Debug, Clone)]
76pub struct Branch {
77    pub guard: Option<Guard>,
78    pub ops: Vec<Op>,
79}
80
81/// A guard is an AND of atoms — `if level ultra and --stat:`.
82#[derive(Debug, Clone)]
83pub struct Guard {
84    pub atoms: Vec<Atom>,
85}
86
87/// One closed-vocabulary condition inside a [`Guard`].
88#[derive(Debug, Clone)]
89pub enum Atom {
90    Exit(ExitMatch),
91    Level(Level),
92    Flag(String),
93}
94
95#[derive(Debug, Clone, Copy, PartialEq, Eq)]
96pub enum ExitMatch {
97    Ok,
98    Failed,
99}
100
101#[derive(Debug, Clone)]
102pub struct PatternRegex {
103    pub source: String,
104    pub compiled: Regex,
105}
106
107#[derive(Debug, Clone, PartialEq, Eq)]
108pub enum HeadArg {
109    Number(usize),
110    Auto,
111}
112
113#[derive(Debug, Clone, PartialEq, Eq)]
114pub enum MacroArg {
115    Number(usize),
116    String(String),
117}
118
119// ──────────────────────────────────────────────────────────────────
120// Selection
121// ──────────────────────────────────────────────────────────────────
122
123impl RuleSet {
124    /// First-match-wins. Returns `None` when no rule matches.
125    pub fn select(&self, sub: &str, level: Level) -> Option<&Rule> {
126        self.rules.iter().find(|r| r.matches(sub, level))
127    }
128
129    pub fn find_define(&self, name: &str) -> Option<&Define> {
130        self.defines.iter().find(|d| d.name == name)
131    }
132}
133
134impl Rule {
135    pub fn matches(&self, sub: &str, level: Level) -> bool {
136        let sub_ok = match &self.sub {
137            SubPattern::Star => true,
138            SubPattern::Alt(alts) => alts.iter().any(|a| glob_match(a, sub)),
139        };
140        let lvl_ok = match &self.level {
141            LevelPattern::Star => true,
142            LevelPattern::Specific(l) => *l == level,
143        };
144        sub_ok && lvl_ok
145    }
146}
147
148// ──────────────────────────────────────────────────────────────────
149// Line preprocessing
150// ──────────────────────────────────────────────────────────────────
151
152#[derive(Debug, Clone)]
153struct Line {
154    indent: usize,
155    text: String, // trimmed of leading/trailing whitespace; "" if blank
156    raw: String,  // original line, no trailing newline
157    line_no: usize,
158    /// Blank or starts with `#` at top-level. Meta lines are skipped by
159    /// the structural parser but preserved as-is in block bodies.
160    is_meta: bool,
161}
162
163fn split_lines(input: &str) -> Vec<Line> {
164    input
165        .split('\n')
166        .enumerate()
167        .map(|(i, raw_line)| {
168            let raw = raw_line.trim_end_matches('\r').to_string();
169            let stripped = raw.trim_start();
170            let indent = raw.len() - stripped.len();
171            let text = stripped.trim_end().to_string();
172            let is_meta = text.is_empty() || text.starts_with('#');
173            Line {
174                indent,
175                text,
176                raw,
177                line_no: i + 1,
178                is_meta,
179            }
180        })
181        .collect()
182}
183
184// ──────────────────────────────────────────────────────────────────
185// Parser
186// ──────────────────────────────────────────────────────────────────
187
188const OP_KEYWORDS: &[&str] = &[
189    "keep",
190    "drop",
191    "head",
192    "tail",
193    "or",
194    "or-shell:",
195    "else",
196    "else-shell:",
197    "shell:",
198    "python:",
199    "split",
200    "raw",
201    "passthrough",
202    "if",
203    "elif",
204    "match",
205];
206
207pub fn parse(input: &str) -> Result<RuleSet> {
208    let lines = split_lines(input);
209    let macro_names = collect_macro_names(&lines);
210    let mut p = Parser {
211        lines: &lines,
212        pos: 0,
213        macro_names,
214    };
215    p.parse_ruleset()
216}
217
218fn collect_macro_names(lines: &[Line]) -> Vec<String> {
219    let mut names = Vec::new();
220    for l in lines {
221        if l.is_meta {
222            continue;
223        }
224        if let Some(rest) = l.text.strip_prefix("define ") {
225            let end = rest
226                .find(|c: char| c == '(' || c == ':' || c.is_whitespace())
227                .unwrap_or(rest.len());
228            let name = rest[..end].trim().to_string();
229            if !name.is_empty() {
230                names.push(name);
231            }
232        }
233    }
234    names
235}
236
237struct Parser<'a> {
238    lines: &'a [Line],
239    pos: usize,
240    macro_names: Vec<String>,
241}
242
243impl<'a> Parser<'a> {
244    /// Advance past meta lines and return the next structural line without
245    /// consuming it.
246    fn peek_significant(&mut self) -> Option<&'a Line> {
247        while let Some(l) = self.lines.get(self.pos) {
248            if l.is_meta {
249                self.pos += 1;
250            } else {
251                return Some(l);
252            }
253        }
254        None
255    }
256
257    fn advance(&mut self) -> Option<&'a Line> {
258        let l = self.lines.get(self.pos);
259        if l.is_some() {
260            self.pos += 1;
261        }
262        l
263    }
264
265    fn is_macro(&self, name: &str) -> bool {
266        self.macro_names.iter().any(|n| n == name)
267    }
268
269    // ── top-level ────────────────────────────────────────────────
270
271    fn parse_ruleset(&mut self) -> Result<RuleSet> {
272        let mut rs = RuleSet::default();
273        while let Some(line) = self.peek_significant() {
274            if line.indent != 0 {
275                bail!("line {}: unexpected indent at top level", line.line_no);
276            }
277            if line.text.starts_with("define ") {
278                let d = self.parse_define()?;
279                rs.defines.push(d);
280            } else {
281                let r = self.parse_rule()?;
282                rs.rules.push(r);
283            }
284        }
285        Ok(rs)
286    }
287
288    fn parse_define(&mut self) -> Result<Define> {
289        let header = self.advance().unwrap();
290        let line_no = header.line_no;
291        let rest = header
292            .text
293            .strip_prefix("define ")
294            .ok_or_else(|| anyhow!("line {}: expected `define`", line_no))?;
295        let (name, params, after_paren) =
296            parse_define_header(rest).with_context(|| format!("line {line_no}"))?;
297        if !after_paren.starts_with(':') {
298            bail!(
299                "line {}: expected `:` after define header, got `{}`",
300                line_no,
301                after_paren
302            );
303        }
304        let trailing = after_paren[1..].trim();
305        if !trailing.is_empty() {
306            bail!(
307                "line {}: one-line `define` body not supported (use indented body)",
308                line_no
309            );
310        }
311        let ops = self.parse_indented_ops(header.indent)?;
312        if ops.is_empty() {
313            bail!("line {}: `define {}` has empty body", line_no, name);
314        }
315        Ok(Define { name, params, ops })
316    }
317
318    fn parse_rule(&mut self) -> Result<Rule> {
319        let header = self.advance().unwrap();
320        let line_no = header.line_no;
321        let parent_indent = header.indent;
322        let colon_pos = header
323            .text
324            .find(':')
325            .ok_or_else(|| anyhow!("line {}: missing `:` in rule header", line_no))?;
326        let selector = &header.text[..colon_pos];
327        let after = &header.text[colon_pos + 1..];
328        let (sub, level) =
329            parse_selector(selector).with_context(|| format!("line {line_no}"))?;
330
331        let mut ops = Vec::new();
332        let inline = after.trim();
333        if !inline.is_empty() {
334            // Inline ops after `:` are always a pipeline (v1 form).
335            ops.extend(self.parse_inline_ops(inline, line_no)?);
336            ops.extend(self.parse_indented_ops(parent_indent)?);
337        } else {
338            // An indented body may be a pipeline or an if/elif/else cascade.
339            ops = self.parse_body(parent_indent)?;
340        }
341
342        if ops.is_empty() {
343            bail!("line {}: rule has no ops", line_no);
344        }
345        Ok(Rule {
346            sub,
347            level,
348            ops,
349            line_no,
350        })
351    }
352
353    // ── op chains ────────────────────────────────────────────────
354
355    /// Parse op-lines strictly deeper-indented than `parent_indent`.
356    /// Stops at first significant line whose indent <= parent_indent.
357    fn parse_indented_ops(&mut self, parent_indent: usize) -> Result<Vec<Op>> {
358        let mut ops = Vec::new();
359        loop {
360            let Some(line) = self.peek_significant() else {
361                break;
362            };
363            if line.indent <= parent_indent {
364                break;
365            }
366            let op = self.parse_op_line()?;
367            ops.push(op);
368        }
369        Ok(ops)
370    }
371
372    /// An indented rule body: a plain pipeline, or a cascade when the
373    /// first significant line opens with `if` (full cascade) or `match`
374    /// (single-dimension sugar). Both desugar to `Op::Cascade`.
375    fn parse_body(&mut self, parent_indent: usize) -> Result<Vec<Op>> {
376        if let Some(line) = self.peek_significant() {
377            if line.indent > parent_indent {
378                if is_body_opener(&line.text, "if") {
379                    let branches = self.parse_cascade(parent_indent)?;
380                    return Ok(vec![Op::Cascade(branches)]);
381                }
382                if is_body_opener(&line.text, "match") {
383                    let branches = self.parse_match(parent_indent)?;
384                    return Ok(vec![Op::Cascade(branches)]);
385                }
386            }
387        }
388        self.parse_indented_ops(parent_indent)
389    }
390
391    /// Parse `if` / `elif`* / `else`? arms — all share one indent.
392    fn parse_cascade(&mut self, parent_indent: usize) -> Result<Vec<Branch>> {
393        let mut branches: Vec<Branch> = Vec::new();
394        let mut arm_indent: Option<usize> = None;
395        loop {
396            let Some(line) = self.peek_significant() else {
397                break;
398            };
399            if line.indent <= parent_indent {
400                break;
401            }
402            match arm_indent {
403                None => arm_indent = Some(line.indent),
404                Some(ai) if line.indent != ai => break,
405                Some(_) => {}
406            }
407            let line_no = line.line_no;
408            // `else` is glued to its colon (`else:`), so take the leading
409            // alphabetic run rather than the whitespace-delimited word.
410            let kw: String = line
411                .text
412                .chars()
413                .take_while(|c| c.is_ascii_alphabetic())
414                .collect();
415            match kw.as_str() {
416                "if" if branches.is_empty() => {}
417                "elif" | "else" if !branches.is_empty() => {}
418                "if" => bail!("line {}: unexpected `if` — cascade already open", line_no),
419                "elif" | "else" => {
420                    bail!("line {}: `{}` without a leading `if`", line_no, kw)
421                }
422                _ => break,
423            }
424            let branch = self.parse_branch(&kw)?;
425            let is_else = branch.guard.is_none();
426            branches.push(branch);
427            if is_else {
428                break; // `else` is always the last arm
429            }
430        }
431        Ok(branches)
432    }
433
434    /// Parse one cascade arm: `<if|elif|else> <guard>:` then inline or
435    /// indented ops.
436    fn parse_branch(&mut self, head: &str) -> Result<Branch> {
437        let line = self.advance().unwrap();
438        let line_no = line.line_no;
439        let indent = line.indent;
440        let rest = line.text[head.len()..].trim_start();
441        let colon = rest
442            .find(':')
443            .ok_or_else(|| anyhow!("line {}: missing `:` in `{}` arm", line_no, head))?;
444        let guard_str = rest[..colon].trim();
445        let after = rest[colon + 1..].trim();
446        let guard = if head == "else" {
447            if !guard_str.is_empty() {
448                bail!("line {}: `else` takes no guard", line_no);
449            }
450            None
451        } else {
452            Some(parse_guard(guard_str, line_no)?)
453        };
454        let ops = self.parse_arm_body(after, indent, line_no)?;
455        if ops.is_empty() {
456            bail!("line {}: `{}` arm has no ops", line_no, head);
457        }
458        Ok(Branch { guard, ops })
459    }
460
461    /// Body of one arm — used by `if`/`elif`/`else` and by `match` arms.
462    /// Inline ops after `:` force a pipeline body; otherwise the body may
463    /// be a nested cascade (`if` or `match`) or a plain indented pipeline.
464    fn parse_arm_body(
465        &mut self,
466        inline: &str,
467        indent: usize,
468        line_no: usize,
469    ) -> Result<Vec<Op>> {
470        let mut ops = Vec::new();
471        if !inline.is_empty() {
472            ops.extend(self.parse_inline_ops(inline, line_no)?);
473        }
474        if ops.is_empty() {
475            if let Some(child) = self.peek_significant() {
476                if child.indent > indent {
477                    if is_body_opener(&child.text, "if") {
478                        return Ok(vec![Op::Cascade(self.parse_cascade(indent)?)]);
479                    }
480                    if is_body_opener(&child.text, "match") {
481                        return Ok(vec![Op::Cascade(self.parse_match(indent)?)]);
482                    }
483                }
484            }
485        }
486        ops.extend(self.parse_indented_ops(indent)?);
487        Ok(ops)
488    }
489
490    /// Sugar for a single-dimension cascade.
491    ///   match level:
492    ///       ultra: head 30
493    ///       lite:  head 200
494    ///       else:  head 80
495    /// desugars to `if level ultra: … elif level lite: … else: …`.
496    /// The dimension is `level` or `exit`; flags require the full `if` form.
497    fn parse_match(&mut self, parent_indent: usize) -> Result<Vec<Branch>> {
498        let header = self.advance().unwrap();
499        let line_no = header.line_no;
500        // Accept `match`, `match:`, or `match <dim>:` uniformly. The
501        // is_body_opener gate above guarantees text starts with "match".
502        let rest = header
503            .text
504            .strip_prefix("match")
505            .ok_or_else(|| anyhow!("line {}: expected `match`", line_no))?
506            .trim_start();
507        let colon = rest
508            .find(':')
509            .ok_or_else(|| anyhow!("line {}: missing `:` after match dimension", line_no))?;
510        let dim_str = rest[..colon].trim();
511        let trailing = rest[colon + 1..].trim();
512        if !trailing.is_empty() {
513            bail!(
514                "line {}: `match` header doesn't take inline ops (got `{}`)",
515                line_no,
516                trailing
517            );
518        }
519        let dim = parse_match_dim(dim_str, line_no)?;
520
521        let mut branches: Vec<Branch> = Vec::new();
522        let mut arm_indent: Option<usize> = None;
523        loop {
524            let Some(line) = self.peek_significant() else {
525                break;
526            };
527            if line.indent <= parent_indent {
528                break;
529            }
530            match arm_indent {
531                None => arm_indent = Some(line.indent),
532                Some(ai) if line.indent != ai => break,
533                Some(_) => {}
534            }
535            let branch = self.parse_match_arm(dim)?;
536            let is_else = branch.guard.is_none();
537            branches.push(branch);
538            if is_else {
539                break;
540            }
541        }
542
543        if branches.is_empty() {
544            bail!("line {}: `match` has no arms", line_no);
545        }
546        Ok(branches)
547    }
548
549    /// One `match` arm: `<value>: <ops>` or `else: <ops>`. Builds the
550    /// guard atom by interpreting `<value>` against the captured `dim`.
551    fn parse_match_arm(&mut self, dim: MatchDim) -> Result<Branch> {
552        let line = self.advance().unwrap();
553        let line_no = line.line_no;
554        let indent = line.indent;
555        let colon = line
556            .text
557            .find(':')
558            .ok_or_else(|| anyhow!("line {}: missing `:` in match arm", line_no))?;
559        let value = line.text[..colon].trim();
560        let after = line.text[colon + 1..].trim();
561
562        let guard = if value == "else" {
563            None
564        } else {
565            let atom = build_match_atom(dim, value, line_no)?;
566            Some(Guard { atoms: vec![atom] })
567        };
568
569        let ops = self.parse_arm_body(after, indent, line_no)?;
570        if ops.is_empty() {
571            bail!("line {}: match arm `{}` has no ops", line_no, value);
572        }
573        Ok(Branch { guard, ops })
574    }
575
576    /// Parse a single op from the current significant line, advancing
577    /// past any block bodies and sub-blocks the op consumes.
578    fn parse_op_line(&mut self) -> Result<Op> {
579        let line = self.advance().unwrap();
580        let line_no = line.line_no;
581        let indent = line.indent;
582        let text = line.text.as_str();
583        let (head, _) = split_first_word(text);
584
585        match head {
586            "keep" => {
587                let rest = text[head.len()..].trim_start();
588                Ok(Op::Keep(parse_regex_literal(rest, line_no)?))
589            }
590            "drop" => {
591                let rest = text[head.len()..].trim_start();
592                Ok(Op::Drop(parse_regex_literal(rest, line_no)?))
593            }
594            "head" => {
595                let rest = text[head.len()..].trim();
596                Ok(Op::Head(parse_head_arg(rest, line_no)?))
597            }
598            "tail" => {
599                let rest = text[head.len()..].trim();
600                Ok(Op::Tail(parse_head_arg(rest, line_no)?))
601            }
602            "or" | "else" => {
603                let rest = text[head.len()..].trim_start();
604                Ok(Op::Or(parse_string_literal(rest, line_no)?))
605            }
606            "or-shell:" | "else-shell:" => {
607                let body = text[head.len()..].trim_start().to_string();
608                if body.is_empty() {
609                    bail!("line {}: `{}` requires a command", line_no, head);
610                }
611                Ok(Op::OrShell(body))
612            }
613            // `raw` is canonical; `passthrough` is a v0.5.0 legacy alias.
614            "raw" | "passthrough" => Ok(Op::Raw),
615            "shell:" => Ok(Op::Shell(self.parse_block_body(
616                text,
617                head,
618                indent,
619                line_no,
620            )?)),
621            "python:" => Ok(Op::Python(self.parse_block_body(
622                text,
623                head,
624                indent,
625                line_no,
626            )?)),
627            "split" => {
628                let rest = text[head.len()..].trim_start();
629                let delim = parse_regex_literal(rest, line_no)?;
630                let (pre, post) = self.parse_split_branches(indent)?;
631                if pre.is_empty() && post.is_empty() {
632                    bail!(
633                        "line {}: `split` needs at least one `pre:` or `post:` block",
634                        line_no
635                    );
636                }
637                Ok(Op::Split {
638                    delimiter: delim,
639                    pre,
640                    post,
641                })
642            }
643            name if self.is_macro(name) => {
644                let rest = text[head.len()..].trim();
645                let args = parse_macro_args(rest, line_no)?;
646                Ok(Op::MacroCall {
647                    name: name.to_string(),
648                    args,
649                })
650            }
651            _ => bail!("line {}: unknown op `{}`", line_no, head),
652        }
653    }
654
655    /// Parse a `shell:` or `python:` body. Two forms:
656    ///   inline: `shell: <command on rest of line>`
657    ///   block:  `shell: |` then indented body lines until dedent.
658    /// Body lines preserve internal blank lines and relative indentation.
659    fn parse_block_body(
660        &mut self,
661        line_text: &str,
662        head: &str,
663        parent_indent: usize,
664        line_no: usize,
665    ) -> Result<String> {
666        let after = line_text[head.len()..].trim_start();
667        if after != "|" {
668            if after.is_empty() {
669                bail!(
670                    "line {}: empty `{}` body (use `| <newline>` for block form)",
671                    line_no,
672                    head
673                );
674            }
675            return Ok(after.to_string());
676        }
677
678        // Block form: scan lines until indent drops back to parent_indent.
679        // Include blank lines that fall between body lines.
680        let mut collected: Vec<&'a Line> = Vec::new();
681        let mut base: Option<usize> = None;
682        while let Some(l) = self.lines.get(self.pos) {
683            if l.text.is_empty() {
684                collected.push(l);
685                self.pos += 1;
686                continue;
687            }
688            if l.indent <= parent_indent {
689                break;
690            }
691            if base.is_none() {
692                base = Some(l.indent);
693            }
694            collected.push(l);
695            self.pos += 1;
696        }
697        // Trim trailing blank lines (they belong to the gap, not the body).
698        while collected.last().map_or(false, |l| l.text.is_empty()) {
699            collected.pop();
700        }
701        if collected.is_empty() {
702            bail!("line {}: `{}` block is empty", line_no, head);
703        }
704        let base = base.unwrap_or(parent_indent + 4);
705        let dedented: Vec<String> = collected
706            .iter()
707            .map(|l| {
708                if l.text.is_empty() {
709                    String::new()
710                } else if l.raw.len() >= base {
711                    l.raw[base..].to_string()
712                } else {
713                    l.raw.trim_start().to_string()
714                }
715            })
716            .collect();
717        Ok(dedented.join("\n"))
718    }
719
720    /// After a `split /regex/`, consume any sibling `pre:` / `post:`
721    /// blocks at the same indent.
722    fn parse_split_branches(&mut self, parent_indent: usize) -> Result<(Vec<Op>, Vec<Op>)> {
723        let mut pre = Vec::new();
724        let mut post = Vec::new();
725        loop {
726            let Some(line) = self.peek_significant() else {
727                break;
728            };
729            if line.indent != parent_indent {
730                break;
731            }
732            match line.text.as_str() {
733                "pre:" => {
734                    self.advance();
735                    pre = self.parse_indented_ops(parent_indent)?;
736                }
737                "post:" => {
738                    self.advance();
739                    post = self.parse_indented_ops(parent_indent)?;
740                }
741                _ => break,
742            }
743        }
744        Ok((pre, post))
745    }
746
747    /// Parse multiple ops appearing on the same line (after a rule
748    /// header's `:`). `shell:` / `python:` / `else-shell:` greedily
749    /// consume rest of line; other ops yield to the next op keyword
750    /// or macro name.
751    fn parse_inline_ops(&self, text: &str, line_no: usize) -> Result<Vec<Op>> {
752        let mut ops = Vec::new();
753        let mut remaining = text.trim();
754        while !remaining.is_empty() {
755            let (head, _) = split_first_word(remaining);
756            match head {
757                "shell:" => {
758                    let body = remaining[head.len()..].trim_start().to_string();
759                    if body.is_empty() {
760                        bail!("line {}: inline `shell:` needs a command", line_no);
761                    }
762                    ops.push(Op::Shell(body));
763                    remaining = "";
764                }
765                "python:" => {
766                    let body = remaining[head.len()..].trim_start().to_string();
767                    if body.is_empty() {
768                        bail!("line {}: inline `python:` needs a command", line_no);
769                    }
770                    ops.push(Op::Python(body));
771                    remaining = "";
772                }
773                "or-shell:" | "else-shell:" => {
774                    let body = remaining[head.len()..].trim_start().to_string();
775                    if body.is_empty() {
776                        bail!("line {}: inline `{}` needs a command", line_no, head);
777                    }
778                    ops.push(Op::OrShell(body));
779                    remaining = "";
780                }
781                "raw" | "passthrough" => {
782                    ops.push(Op::Raw);
783                    remaining = remaining[head.len()..].trim_start();
784                }
785                "keep" | "drop" => {
786                    let rest = remaining[head.len()..].trim_start();
787                    let (re, after) = parse_regex_literal_and_rest(rest, line_no)?;
788                    ops.push(if head == "keep" {
789                        Op::Keep(re)
790                    } else {
791                        Op::Drop(re)
792                    });
793                    remaining = after.trim_start();
794                }
795                "head" | "tail" => {
796                    let rest = remaining[head.len()..].trim_start();
797                    let (arg_word, after) = take_word(rest);
798                    let h = parse_head_arg(arg_word, line_no)?;
799                    ops.push(if head == "head" {
800                        Op::Head(h)
801                    } else {
802                        Op::Tail(h)
803                    });
804                    remaining = after.trim_start();
805                }
806                "or" | "else" => {
807                    let rest = remaining[head.len()..].trim_start();
808                    let (s, after) = parse_string_literal_and_rest(rest, line_no)?;
809                    ops.push(Op::Or(s));
810                    remaining = after.trim_start();
811                }
812                "split" => {
813                    bail!(
814                        "line {}: `split` cannot appear inline (needs pre:/post: blocks)",
815                        line_no
816                    )
817                }
818                name if self.is_macro(name) => {
819                    let rest = remaining[head.len()..].trim_start();
820                    let (args, after) =
821                        parse_macro_args_until_op(rest, &self.macro_names, line_no)?;
822                    ops.push(Op::MacroCall {
823                        name: name.to_string(),
824                        args,
825                    });
826                    remaining = after.trim_start();
827                }
828                _ => bail!("line {}: unknown op `{}` in inline chain", line_no, head),
829            }
830        }
831        Ok(ops)
832    }
833}
834
835// ──────────────────────────────────────────────────────────────────
836// Sub-parsers (free functions, no Parser state)
837// ──────────────────────────────────────────────────────────────────
838
839/// True when `text` opens with `kw` followed by whitespace, a `:`, or
840/// end of input — i.e. `kw` introduces a body construct rather than
841/// being a prefix of some other word (`matching`, `iffy`).
842fn is_body_opener(text: &str, kw: &str) -> bool {
843    match text.strip_prefix(kw) {
844        None => false,
845        Some(rest) => rest.is_empty() || rest.starts_with(|c: char| c.is_whitespace() || c == ':'),
846    }
847}
848
849fn split_first_word(s: &str) -> (&str, &str) {
850    let s = s.trim_start();
851    let end = s.find(char::is_whitespace).unwrap_or(s.len());
852    (&s[..end], &s[end..])
853}
854
855fn take_word(s: &str) -> (&str, &str) {
856    let s = s.trim_start();
857    let end = s.find(char::is_whitespace).unwrap_or(s.len());
858    (&s[..end], &s[end..])
859}
860
861fn parse_selector(s: &str) -> Result<(SubPattern, LevelPattern)> {
862    let s = s.trim();
863    if s.is_empty() {
864        bail!("empty selector");
865    }
866    let mut parts = s.splitn(2, ',');
867    let sub_str = parts.next().unwrap().trim();
868    let level_str = parts.next().map(|s| s.trim()).unwrap_or("*");
869
870    let sub = if sub_str == "*" {
871        SubPattern::Star
872    } else {
873        let alts: Vec<String> = sub_str
874            .split('|')
875            .map(|s| s.trim().to_string())
876            .collect();
877        if alts.iter().any(|a| a.is_empty()) {
878            bail!("empty alternative in sub pattern `{}`", sub_str);
879        }
880        SubPattern::Alt(alts)
881    };
882
883    let level = if level_str == "*" {
884        LevelPattern::Star
885    } else {
886        let lvl: Level = level_str.parse().map_err(|e: String| anyhow!(e))?;
887        LevelPattern::Specific(lvl)
888    };
889
890    Ok((sub, level))
891}
892
893/// Glob match for subcommand selectors. `*` matches any run of chars
894/// (including empty); no other metacharacters. With no `*` it is an
895/// exact compare, so plain selectors behave exactly as in v1.
896fn glob_match(pat: &str, text: &str) -> bool {
897    match pat.find('*') {
898        None => pat == text,
899        Some(star) => {
900            let prefix = &pat[..star];
901            let rest = &pat[star + 1..];
902            let Some(tail) = text.strip_prefix(prefix) else {
903                return false;
904            };
905            if rest.is_empty() {
906                return true;
907            }
908            (0..=tail.len())
909                .filter(|&i| tail.is_char_boundary(i))
910                .any(|i| glob_match(rest, &tail[i..]))
911        }
912    }
913}
914
915/// Parse a guard — an AND of atoms joined by ` and `.
916fn parse_guard(s: &str, line_no: usize) -> Result<Guard> {
917    let mut atoms = Vec::new();
918    for part in s.split(" and ") {
919        let part = part.trim();
920        if part.is_empty() {
921            bail!("line {}: empty guard", line_no);
922        }
923        atoms.push(parse_atom(part, line_no)?);
924    }
925    if atoms.is_empty() {
926        bail!("line {}: empty guard", line_no);
927    }
928    Ok(Guard { atoms })
929}
930
931/// Parse one guard atom: `exit ok|failed`, `level ultra|full|lite`, or a
932/// `--flag` / `-x`.
933fn parse_atom(s: &str, line_no: usize) -> Result<Atom> {
934    if s.starts_with('-') {
935        return Ok(Atom::Flag(s.to_string()));
936    }
937    let mut words = s.split_whitespace();
938    let dim = words.next().unwrap_or("");
939    let val = words.next();
940    if words.next().is_some() {
941        bail!("line {}: guard `{}` has too many words", line_no, s);
942    }
943    match (dim, val) {
944        ("exit", Some("ok")) => Ok(Atom::Exit(ExitMatch::Ok)),
945        ("exit", Some("failed")) => Ok(Atom::Exit(ExitMatch::Failed)),
946        ("exit", Some(v)) => {
947            bail!("line {}: unknown exit value `{}` (expected ok|failed)", line_no, v)
948        }
949        ("exit", None) => bail!("line {}: `exit` guard needs a value (ok|failed)", line_no),
950        ("level", Some(v)) => {
951            let lvl: Level = v.parse().map_err(|e: String| anyhow!("line {line_no}: {e}"))?;
952            Ok(Atom::Level(lvl))
953        }
954        ("level", None) => bail!("line {}: `level` guard needs a value", line_no),
955        (other, _) => bail!(
956            "line {}: unknown guard `{}` (expected `exit ...`, `level ...`, or a --flag)",
957            line_no,
958            other
959        ),
960    }
961}
962
963/// Closed set of dimensions a `match` header may switch on. Flags are
964/// not a `match` dimension — their presence is binary, with no "values"
965/// to enumerate, so they must use `if --flag: ...` instead.
966#[derive(Copy, Clone)]
967enum MatchDim {
968    Level,
969    Exit,
970}
971
972fn parse_match_dim(s: &str, line_no: usize) -> Result<MatchDim> {
973    match s {
974        "level" => Ok(MatchDim::Level),
975        "exit" => Ok(MatchDim::Exit),
976        "" => bail!("line {}: `match` needs a dimension (level|exit)", line_no),
977        other => bail!(
978            "line {}: unknown match dimension `{}` (expected level|exit; flags must use `if --flag:`)",
979            line_no,
980            other
981        ),
982    }
983}
984
985fn build_match_atom(dim: MatchDim, value: &str, line_no: usize) -> Result<Atom> {
986    match dim {
987        MatchDim::Level => {
988            let lvl: Level = value
989                .parse()
990                .map_err(|e: String| anyhow!("line {line_no}: {e}"))?;
991            Ok(Atom::Level(lvl))
992        }
993        MatchDim::Exit => match value {
994            "ok" => Ok(Atom::Exit(ExitMatch::Ok)),
995            "failed" => Ok(Atom::Exit(ExitMatch::Failed)),
996            other => bail!(
997                "line {}: unknown exit value `{}` (expected ok|failed)",
998                line_no,
999                other
1000            ),
1001        },
1002    }
1003}
1004
1005fn parse_define_header(s: &str) -> Result<(String, Vec<String>, &str)> {
1006    let s = s.trim_start();
1007    let end = s
1008        .find(|c: char| c == '(' || c == ':' || c.is_whitespace())
1009        .unwrap_or(s.len());
1010    let name = s[..end].to_string();
1011    if name.is_empty() {
1012        bail!("define needs a name");
1013    }
1014    let rest = s[end..].trim_start();
1015    if let Some(rest) = rest.strip_prefix('(') {
1016        let close = rest
1017            .find(')')
1018            .ok_or_else(|| anyhow!("missing `)` in define params"))?;
1019        let params: Vec<String> = rest[..close]
1020            .split(',')
1021            .map(|p| p.trim().to_string())
1022            .filter(|p| !p.is_empty())
1023            .collect();
1024        Ok((name, params, rest[close + 1..].trim_start()))
1025    } else {
1026        Ok((name, Vec::new(), rest))
1027    }
1028}
1029
1030fn parse_regex_literal(s: &str, line_no: usize) -> Result<PatternRegex> {
1031    let (re, after) = parse_regex_literal_and_rest(s, line_no)?;
1032    let after = after.trim();
1033    if !after.is_empty() {
1034        bail!(
1035            "line {}: unexpected trailing input after regex: `{}`",
1036            line_no,
1037            after
1038        );
1039    }
1040    Ok(re)
1041}
1042
1043fn parse_regex_literal_and_rest(s: &str, line_no: usize) -> Result<(PatternRegex, &str)> {
1044    let s = s.trim_start();
1045    if !s.starts_with('/') {
1046        bail!(
1047            "line {}: expected `/regex/`, got `{}`",
1048            line_no,
1049            preview(s)
1050        );
1051    }
1052    let body = &s[1..];
1053    let mut src = String::new();
1054    let mut chars = body.char_indices().peekable();
1055    let mut end_byte: Option<usize> = None;
1056    while let Some((i, c)) = chars.next() {
1057        if c == '\\' {
1058            if let Some((_, n)) = chars.next() {
1059                if n == '/' {
1060                    src.push('/');
1061                } else {
1062                    src.push('\\');
1063                    src.push(n);
1064                }
1065            } else {
1066                bail!("line {}: trailing backslash in regex", line_no);
1067            }
1068        } else if c == '/' {
1069            end_byte = Some(i);
1070            break;
1071        } else {
1072            src.push(c);
1073        }
1074    }
1075    let end_byte = end_byte.ok_or_else(|| anyhow!("line {}: unterminated regex", line_no))?;
1076    let after = &body[end_byte + 1..];
1077    let compiled = Regex::new(&src)
1078        .map_err(|e| anyhow!("line {}: invalid regex `{}`: {}", line_no, src, e))?;
1079    Ok((
1080        PatternRegex {
1081            source: src,
1082            compiled,
1083        },
1084        after,
1085    ))
1086}
1087
1088fn parse_string_literal(s: &str, line_no: usize) -> Result<String> {
1089    let (s, after) = parse_string_literal_and_rest(s, line_no)?;
1090    let after = after.trim();
1091    if !after.is_empty() {
1092        bail!(
1093            "line {}: unexpected trailing input after string: `{}`",
1094            line_no,
1095            after
1096        );
1097    }
1098    Ok(s)
1099}
1100
1101fn parse_string_literal_and_rest(s: &str, line_no: usize) -> Result<(String, &str)> {
1102    let s = s.trim_start();
1103    if !s.starts_with('"') {
1104        bail!(
1105            "line {}: expected `\"...\"`, got `{}`",
1106            line_no,
1107            preview(s)
1108        );
1109    }
1110    let body = &s[1..];
1111    let mut out = String::new();
1112    let mut chars = body.char_indices();
1113    let mut end_byte: Option<usize> = None;
1114    while let Some((i, c)) = chars.next() {
1115        if c == '\\' {
1116            if let Some((_, n)) = chars.next() {
1117                match n {
1118                    'n' => out.push('\n'),
1119                    't' => out.push('\t'),
1120                    'r' => out.push('\r'),
1121                    '\\' => out.push('\\'),
1122                    '"' => out.push('"'),
1123                    other => {
1124                        out.push('\\');
1125                        out.push(other);
1126                    }
1127                }
1128            } else {
1129                bail!("line {}: trailing backslash in string", line_no);
1130            }
1131        } else if c == '"' {
1132            end_byte = Some(i);
1133            break;
1134        } else {
1135            out.push(c);
1136        }
1137    }
1138    let end_byte = end_byte.ok_or_else(|| anyhow!("line {}: unterminated string", line_no))?;
1139    let after = &body[end_byte + 1..];
1140    Ok((out, after))
1141}
1142
1143fn parse_head_arg(s: &str, line_no: usize) -> Result<HeadArg> {
1144    let s = s.trim();
1145    if s == "auto" {
1146        return Ok(HeadArg::Auto);
1147    }
1148    s.parse::<usize>().map(HeadArg::Number).map_err(|_| {
1149        anyhow!(
1150            "line {}: expected number or `auto`, got `{}`",
1151            line_no,
1152            s
1153        )
1154    })
1155}
1156
1157fn parse_macro_args(s: &str, line_no: usize) -> Result<Vec<MacroArg>> {
1158    let mut out = Vec::new();
1159    let mut rest = s.trim();
1160    while !rest.is_empty() {
1161        if rest.starts_with('"') {
1162            let (sv, after) = parse_string_literal_and_rest(rest, line_no)?;
1163            out.push(MacroArg::String(sv));
1164            rest = after.trim_start();
1165        } else {
1166            let (word, after) = take_word(rest);
1167            out.push(match word.parse::<usize>() {
1168                Ok(n) => MacroArg::Number(n),
1169                Err(_) => MacroArg::String(word.to_string()),
1170            });
1171            rest = after.trim_start();
1172        }
1173    }
1174    Ok(out)
1175}
1176
1177fn parse_macro_args_until_op<'a>(
1178    s: &'a str,
1179    macro_names: &[String],
1180    line_no: usize,
1181) -> Result<(Vec<MacroArg>, &'a str)> {
1182    let mut out = Vec::new();
1183    let mut rest = s.trim_start();
1184    while !rest.is_empty() {
1185        let (word, _) = take_word(rest);
1186        if OP_KEYWORDS.contains(&word) || macro_names.iter().any(|n| n == word) {
1187            break;
1188        }
1189        if rest.starts_with('"') {
1190            let (sv, after) = parse_string_literal_and_rest(rest, line_no)?;
1191            out.push(MacroArg::String(sv));
1192            rest = after.trim_start();
1193        } else {
1194            let (w, after) = take_word(rest);
1195            out.push(match w.parse::<usize>() {
1196                Ok(n) => MacroArg::Number(n),
1197                Err(_) => MacroArg::String(w.to_string()),
1198            });
1199            rest = after.trim_start();
1200        }
1201    }
1202    Ok((out, rest))
1203}
1204
1205fn preview(s: &str) -> &str {
1206    let n = s.char_indices().nth(40).map(|(i, _)| i).unwrap_or(s.len());
1207    &s[..n]
1208}
1209
1210// ──────────────────────────────────────────────────────────────────
1211// Execution
1212// ──────────────────────────────────────────────────────────────────
1213
1214use std::io::Write;
1215use std::process::{Command, Stdio};
1216
1217/// Per-invocation context passed to the executor and propagated as env
1218/// vars to `shell:` / `python:` subprocesses.
1219#[derive(Debug, Clone)]
1220pub struct ExecCtx<'a> {
1221    pub sub: &'a str,
1222    pub level: Level,
1223    pub exit_code: i32,
1224    pub args: &'a [String],
1225}
1226
1227/// Run the matching rule against `input` and return the filtered output.
1228/// If no rule matches, the input is returned unchanged (passthrough).
1229///
1230/// Non-empty output always ends in a newline, matching the convention
1231/// of shell tools like `echo` and `grep`.
1232pub fn execute(rs: &RuleSet, ctx: &ExecCtx, input: &str) -> Result<String> {
1233    let Some(rule) = rs.select(ctx.sub, ctx.level) else {
1234        return Ok(input.to_string());
1235    };
1236    let out = run_ops(&rule.ops, ctx, input, rs, &[])?;
1237    Ok(ensure_trailing_newline(out))
1238}
1239
1240fn ensure_trailing_newline(mut s: String) -> String {
1241    if !s.is_empty() && !s.ends_with('\n') {
1242        s.push('\n');
1243    }
1244    s
1245}
1246
1247/// One stage's input/output stats, recorded by [`execute_explain`].
1248#[derive(Debug, Clone)]
1249pub struct StageRecord {
1250    pub op_desc: String,
1251    pub stdin_lines: usize,
1252    pub stdin_bytes: usize,
1253    pub stdout_lines: usize,
1254    pub stdout_bytes: usize,
1255    pub elapsed_us: u128,
1256}
1257
1258#[derive(Debug, Default, Clone)]
1259pub struct ExplainTrace {
1260    /// Index into `RuleSet::rules` of the matched rule (None if no match).
1261    pub matched_rule: Option<usize>,
1262    pub stages: Vec<StageRecord>,
1263}
1264
1265/// Like [`execute`] but records per-op stats. Only top-level ops are
1266/// recorded — macros and split sub-chains run silently. Adds ~µs of
1267/// overhead per op for line/byte counting; safe for interactive use,
1268/// avoid in tight loops.
1269pub fn execute_explain(
1270    rs: &RuleSet,
1271    ctx: &ExecCtx,
1272    input: &str,
1273) -> Result<(String, ExplainTrace)> {
1274    let mut trace = ExplainTrace::default();
1275    let Some((idx, rule)) = rs
1276        .rules
1277        .iter()
1278        .enumerate()
1279        .find(|(_, r)| r.matches(ctx.sub, ctx.level))
1280    else {
1281        return Ok((input.to_string(), trace));
1282    };
1283    trace.matched_rule = Some(idx);
1284
1285    let raw = input.to_string();
1286    let mut state = input.to_string();
1287    for op in &rule.ops {
1288        let stdin_lines = state.lines().count();
1289        let stdin_bytes = state.len();
1290        let start = std::time::Instant::now();
1291        let new_state = apply_op(op, &state, &raw, ctx, rs, &[])?;
1292        let elapsed_us = start.elapsed().as_micros();
1293        trace.stages.push(StageRecord {
1294            op_desc: describe_op(op),
1295            stdin_lines,
1296            stdin_bytes,
1297            stdout_lines: new_state.lines().count(),
1298            stdout_bytes: new_state.len(),
1299            elapsed_us,
1300        });
1301        state = new_state;
1302    }
1303    Ok((ensure_trailing_newline(state), trace))
1304}
1305
1306fn describe_op(op: &Op) -> String {
1307    match op {
1308        Op::Keep(p) => format!("keep /{}/", p.source),
1309        Op::Drop(p) => format!("drop /{}/", p.source),
1310        Op::Head(arg) => format!("head {}", describe_head(arg)),
1311        Op::Tail(arg) => format!("tail {}", describe_head(arg)),
1312        Op::Or(s) => format!("or {s:?}"),
1313        Op::OrShell(s) => format!("or-shell: {}", first_line(s)),
1314        Op::Raw => "raw".to_string(),
1315        Op::Cascade(branches) => format!("cascade ({} arms)", branches.len()),
1316        Op::Shell(s) => format!("shell: {}", first_line(s)),
1317        Op::Python(s) => {
1318            if has_pep723_header(s) {
1319                format!("python (uv): {}", first_line(s))
1320            } else {
1321                format!("python: {}", first_line(s))
1322            }
1323        }
1324        Op::MacroCall { name, args } => {
1325            let parts: Vec<String> = args
1326                .iter()
1327                .map(|a| match a {
1328                    MacroArg::Number(n) => n.to_string(),
1329                    MacroArg::String(s) => s.clone(),
1330                })
1331                .collect();
1332            if parts.is_empty() {
1333                name.clone()
1334            } else {
1335                format!("{name} {}", parts.join(" "))
1336            }
1337        }
1338        Op::Split { delimiter, .. } => format!("split /{}/", delimiter.source),
1339    }
1340}
1341
1342fn describe_head(a: &HeadArg) -> String {
1343    match a {
1344        HeadArg::Number(n) => n.to_string(),
1345        HeadArg::Auto => "auto".into(),
1346    }
1347}
1348
1349fn first_line(s: &str) -> String {
1350    s.lines().next().unwrap_or("").chars().take(60).collect()
1351}
1352
1353fn run_ops(
1354    ops: &[Op],
1355    ctx: &ExecCtx,
1356    input: &str,
1357    rs: &RuleSet,
1358    macro_args: &[MacroArg],
1359) -> Result<String> {
1360    let raw = input.to_string();
1361    let mut state = input.to_string();
1362    for op in ops {
1363        state = apply_op(op, &state, &raw, ctx, rs, macro_args)?;
1364    }
1365    Ok(state)
1366}
1367
1368fn apply_op(
1369    op: &Op,
1370    state: &str,
1371    raw: &str,
1372    ctx: &ExecCtx,
1373    rs: &RuleSet,
1374    macro_args: &[MacroArg],
1375) -> Result<String> {
1376    match op {
1377        Op::Keep(pat) => Ok(filter_lines(state, |l| pat.compiled.is_match(l))),
1378        Op::Drop(pat) => Ok(filter_lines(state, |l| !pat.compiled.is_match(l))),
1379        Op::Head(arg) => Ok(take_head(state, resolve_head(arg, ctx.level))),
1380        Op::Tail(arg) => Ok(take_tail(state, resolve_head(arg, ctx.level))),
1381        Op::Or(s) => Ok(if state.trim().is_empty() {
1382            s.clone()
1383        } else {
1384            state.to_string()
1385        }),
1386        Op::OrShell(cmd) => {
1387            if state.trim().is_empty() {
1388                let expanded = expand_args(cmd, macro_args);
1389                run_shell(&expanded, raw, ctx)
1390            } else {
1391                Ok(state.to_string())
1392            }
1393        }
1394        Op::Raw => Ok(state.to_string()),
1395        Op::Cascade(branches) => {
1396            for br in branches {
1397                let hit = match &br.guard {
1398                    None => true,
1399                    Some(g) => guard_matches(g, ctx),
1400                };
1401                if hit {
1402                    return run_ops(&br.ops, ctx, state, rs, macro_args);
1403                }
1404            }
1405            // No arm matched and no `else` — leave the stream untouched.
1406            Ok(state.to_string())
1407        }
1408        Op::Shell(cmd) => {
1409            let expanded = expand_args(cmd, macro_args);
1410            run_shell(&expanded, state, ctx)
1411        }
1412        Op::Python(body) => {
1413            let expanded = expand_args(body, macro_args);
1414            run_python(&expanded, state, ctx)
1415        }
1416        Op::MacroCall { name, args } => {
1417            let def = rs
1418                .find_define(name)
1419                .ok_or_else(|| anyhow!("undefined macro `{}`", name))?;
1420            if args.len() != def.params.len() {
1421                bail!(
1422                    "macro `{}` expects {} arg(s), got {}",
1423                    name,
1424                    def.params.len(),
1425                    args.len()
1426                );
1427            }
1428            run_ops(&def.ops, ctx, state, rs, args)
1429        }
1430        Op::Split {
1431            delimiter,
1432            pre,
1433            post,
1434        } => {
1435            let (a, b) = split_at_first_match(state, &delimiter.compiled);
1436            let pre_out = if pre.is_empty() {
1437                a
1438            } else {
1439                run_ops(pre, ctx, &a, rs, macro_args)?
1440            };
1441            let post_out = if post.is_empty() {
1442                b
1443            } else {
1444                run_ops(post, ctx, &b, rs, macro_args)?
1445            };
1446            Ok(join_nonempty(&pre_out, &post_out))
1447        }
1448    }
1449}
1450
1451/// A guard holds when every atom holds (AND).
1452fn guard_matches(g: &Guard, ctx: &ExecCtx) -> bool {
1453    g.atoms.iter().all(|a| atom_matches(a, ctx))
1454}
1455
1456fn atom_matches(a: &Atom, ctx: &ExecCtx) -> bool {
1457    match a {
1458        Atom::Exit(ExitMatch::Ok) => ctx.exit_code == 0,
1459        Atom::Exit(ExitMatch::Failed) => ctx.exit_code != 0,
1460        Atom::Level(l) => *l == ctx.level,
1461        Atom::Flag(f) => ctx.args.iter().any(|arg| arg == f),
1462    }
1463}
1464
1465fn resolve_head(arg: &HeadArg, level: Level) -> usize {
1466    match arg {
1467        HeadArg::Number(n) => *n,
1468        HeadArg::Auto => level.head_limit(30),
1469    }
1470}
1471
1472fn filter_lines(s: &str, mut keep: impl FnMut(&str) -> bool) -> String {
1473    s.lines()
1474        .filter(|l| keep(l))
1475        .collect::<Vec<_>>()
1476        .join("\n")
1477}
1478
1479fn take_head(s: &str, n: usize) -> String {
1480    s.lines().take(n).collect::<Vec<_>>().join("\n")
1481}
1482
1483fn take_tail(s: &str, n: usize) -> String {
1484    let lines: Vec<&str> = s.lines().collect();
1485    let start = lines.len().saturating_sub(n);
1486    lines[start..].join("\n")
1487}
1488
1489/// Split input at the first line matching `re`. The matching line goes
1490/// into `post`. If no line matches, everything is `pre` and `post` is
1491/// empty.
1492fn split_at_first_match(s: &str, re: &Regex) -> (String, String) {
1493    let mut pre = String::new();
1494    let mut post = String::new();
1495    let mut in_post = false;
1496    for line in s.lines() {
1497        if !in_post && re.is_match(line) {
1498            in_post = true;
1499        }
1500        let buf = if in_post { &mut post } else { &mut pre };
1501        if !buf.is_empty() {
1502            buf.push('\n');
1503        }
1504        buf.push_str(line);
1505    }
1506    (pre, post)
1507}
1508
1509fn join_nonempty(a: &str, b: &str) -> String {
1510    match (a.is_empty(), b.is_empty()) {
1511        (true, true) => String::new(),
1512        (true, false) => b.to_string(),
1513        (false, true) => a.to_string(),
1514        (false, false) => format!("{a}\n{b}"),
1515    }
1516}
1517
1518/// Replace `$1`..`$9` with macro positional args. Other `$NAME` tokens
1519/// (e.g. `$level`, `$sub`) are left intact so shell can expand them
1520/// from env vars.
1521fn expand_args(body: &str, args: &[MacroArg]) -> String {
1522    if args.is_empty() {
1523        return body.to_string();
1524    }
1525    let mut out = String::with_capacity(body.len());
1526    let bytes = body.as_bytes();
1527    let mut i = 0;
1528    while i < bytes.len() {
1529        let c = bytes[i];
1530        if c == b'$' && i + 1 < bytes.len() {
1531            let n = bytes[i + 1];
1532            if n.is_ascii_digit() && n != b'0' {
1533                let idx = (n - b'0') as usize;
1534                if idx <= args.len() {
1535                    match &args[idx - 1] {
1536                        MacroArg::Number(v) => out.push_str(&v.to_string()),
1537                        MacroArg::String(v) => out.push_str(v),
1538                    }
1539                    i += 2;
1540                    continue;
1541                }
1542            }
1543        }
1544        out.push(c as char);
1545        i += 1;
1546    }
1547    out
1548}
1549
1550fn run_shell(cmd: &str, stdin_data: &str, ctx: &ExecCtx) -> Result<String> {
1551    let mut child = Command::new("sh")
1552        .arg("-c")
1553        .arg(cmd)
1554        .env("level", ctx.level.to_string())
1555        .env("sub", ctx.sub)
1556        .env("exit", ctx.exit_code.to_string())
1557        .env("args", ctx.args.join(" "))
1558        .stdin(Stdio::piped())
1559        .stdout(Stdio::piped())
1560        .stderr(Stdio::piped())
1561        .spawn()
1562        .context("spawning sh")?;
1563
1564    if let Some(mut stdin) = child.stdin.take() {
1565        stdin
1566            .write_all(stdin_data.as_bytes())
1567            .context("writing to sh stdin")?;
1568    }
1569
1570    let output = child.wait_with_output().context("waiting for sh")?;
1571    if !output.status.success() {
1572        let stderr = String::from_utf8_lossy(&output.stderr);
1573        bail!(
1574            "shell exited {}: {}",
1575            output.status.code().unwrap_or(-1),
1576            stderr.trim()
1577        );
1578    }
1579    Ok(String::from_utf8_lossy(&output.stdout).into_owned())
1580}
1581
1582fn run_python(body: &str, stdin_data: &str, ctx: &ExecCtx) -> Result<String> {
1583    if has_pep723_header(body) {
1584        run_python_uv(body, stdin_data, ctx)
1585    } else {
1586        run_python_plain(body, stdin_data, ctx)
1587    }
1588}
1589
1590fn has_pep723_header(body: &str) -> bool {
1591    body.lines()
1592        .any(|l| l.trim_start().starts_with("# /// script"))
1593}
1594
1595fn run_python_plain(body: &str, stdin_data: &str, ctx: &ExecCtx) -> Result<String> {
1596    let mut child = Command::new("python3")
1597        .arg("-c")
1598        .arg(body)
1599        .env("level", ctx.level.to_string())
1600        .env("sub", ctx.sub)
1601        .env("exit", ctx.exit_code.to_string())
1602        .env("args", ctx.args.join(" "))
1603        .stdin(Stdio::piped())
1604        .stdout(Stdio::piped())
1605        .stderr(Stdio::piped())
1606        .spawn()
1607        .context("spawning python3")?;
1608
1609    if let Some(mut stdin) = child.stdin.take() {
1610        stdin
1611            .write_all(stdin_data.as_bytes())
1612            .context("writing to python stdin")?;
1613    }
1614    let output = child.wait_with_output().context("waiting for python")?;
1615    if !output.status.success() {
1616        let stderr = String::from_utf8_lossy(&output.stderr);
1617        bail!(
1618            "python exited {}: {}",
1619            output.status.code().unwrap_or(-1),
1620            stderr.trim()
1621        );
1622    }
1623    Ok(String::from_utf8_lossy(&output.stdout).into_owned())
1624}
1625
1626/// PEP 723: write the body to a temp file and let `uv run --script` resolve
1627/// inline dependencies. Data flows via stdin to the script.
1628fn run_python_uv(body: &str, stdin_data: &str, ctx: &ExecCtx) -> Result<String> {
1629    let mut script = tempfile::Builder::new()
1630        .prefix("lowfat-lf-")
1631        .suffix(".py")
1632        .tempfile()
1633        .context("creating temp script file")?;
1634    script
1635        .write_all(body.as_bytes())
1636        .context("writing temp script")?;
1637    script.flush().ok();
1638
1639    let path = script
1640        .path()
1641        .to_str()
1642        .ok_or_else(|| anyhow!("non-UTF8 temp path"))?
1643        .to_string();
1644
1645    let mut child = Command::new("uv")
1646        .args(["run", "--script", &path])
1647        .env("level", ctx.level.to_string())
1648        .env("sub", ctx.sub)
1649        .env("exit", ctx.exit_code.to_string())
1650        .env("args", ctx.args.join(" "))
1651        .stdin(Stdio::piped())
1652        .stdout(Stdio::piped())
1653        .stderr(Stdio::piped())
1654        .spawn()
1655        .context("spawning uv (is `uv` installed?)")?;
1656
1657    if let Some(mut stdin) = child.stdin.take() {
1658        stdin
1659            .write_all(stdin_data.as_bytes())
1660            .context("writing to uv stdin")?;
1661    }
1662    let output = child.wait_with_output().context("waiting for uv")?;
1663    if !output.status.success() {
1664        let stderr = String::from_utf8_lossy(&output.stderr);
1665        bail!(
1666            "uv exited {}: {}",
1667            output.status.code().unwrap_or(-1),
1668            stderr.trim()
1669        );
1670    }
1671    Ok(String::from_utf8_lossy(&output.stdout).into_owned())
1672}
1673
1674// ──────────────────────────────────────────────────────────────────
1675// Tests
1676// ──────────────────────────────────────────────────────────────────
1677
1678#[cfg(test)]
1679mod tests {
1680    use super::*;
1681
1682    fn parse_ok(src: &str) -> RuleSet {
1683        parse(src).unwrap_or_else(|e| panic!("parse failed: {e}\n--- src ---\n{src}"))
1684    }
1685
1686    #[test]
1687    fn empty_input() {
1688        let rs = parse_ok("");
1689        assert!(rs.rules.is_empty());
1690        assert!(rs.defines.is_empty());
1691    }
1692
1693    #[test]
1694    fn comments_and_blanks_only() {
1695        let rs = parse_ok("# hi\n\n# more\n");
1696        assert!(rs.rules.is_empty());
1697    }
1698
1699    #[test]
1700    fn simple_rule() {
1701        let rs = parse_ok(
1702            r#"
1703status:
1704    keep /foo/
1705    head 10
1706"#,
1707        );
1708        assert_eq!(rs.rules.len(), 1);
1709        let r = &rs.rules[0];
1710        assert!(matches!(&r.sub, SubPattern::Alt(a) if a == &["status".to_string()]));
1711        assert!(matches!(r.level, LevelPattern::Star));
1712        assert_eq!(r.ops.len(), 2);
1713        match &r.ops[0] {
1714            Op::Keep(p) => assert_eq!(p.source, "foo"),
1715            _ => panic!("expected Keep"),
1716        }
1717        assert!(matches!(r.ops[1], Op::Head(HeadArg::Number(10))));
1718    }
1719
1720    #[test]
1721    fn sub_with_alternation_and_level() {
1722        let rs = parse_ok(
1723            r#"
1724build|check, ultra:
1725    head 15
1726"#,
1727        );
1728        let r = &rs.rules[0];
1729        match &r.sub {
1730            SubPattern::Alt(a) => assert_eq!(a, &["build".to_string(), "check".to_string()]),
1731            _ => panic!("expected Alt"),
1732        }
1733        assert!(matches!(r.level, LevelPattern::Specific(Level::Ultra)));
1734    }
1735
1736    #[test]
1737    fn star_wildcards() {
1738        let rs = parse_ok(
1739            r#"
1740*:
1741    head 30
1742"#,
1743        );
1744        assert!(matches!(rs.rules[0].sub, SubPattern::Star));
1745        assert!(matches!(rs.rules[0].level, LevelPattern::Star));
1746    }
1747
1748    #[test]
1749    fn else_string_fallback() {
1750        let rs = parse_ok(
1751            r#"
1752status:
1753    keep /^M /
1754    head 5
1755    else "clean"
1756"#,
1757        );
1758        match &rs.rules[0].ops[2] {
1759            Op::Or(s) => assert_eq!(s, "clean"),
1760            _ => panic!("expected Or"),
1761        }
1762    }
1763
1764    #[test]
1765    fn shell_inline_and_block() {
1766        let rs = parse_ok(
1767            r#"
1768define a:
1769    shell: sed -E 's/x/y/'
1770
1771define b:
1772    shell: |
1773        awk '
1774          BEGIN { n=0 }
1775          { print; n++ }
1776        '
1777"#,
1778        );
1779        match &rs.defines[0].ops[0] {
1780            Op::Shell(s) => assert_eq!(s, "sed -E 's/x/y/'"),
1781            _ => panic!("expected inline Shell"),
1782        }
1783        match &rs.defines[1].ops[0] {
1784            Op::Shell(s) => {
1785                assert!(s.starts_with("awk '"));
1786                assert!(s.contains("BEGIN { n=0 }"));
1787                assert!(s.contains("{ print; n++ }"));
1788            }
1789            _ => panic!("expected block Shell"),
1790        }
1791    }
1792
1793    #[test]
1794    fn python_block_preserves_pep723_and_blanks() {
1795        let rs = parse_ok(
1796            r#"
1797define clean:
1798    python: |
1799        # /// script
1800        # dependencies = ["pyyaml>=6"]
1801        # ///
1802        import sys, yaml
1803
1804        for d in yaml.safe_load_all(sys.stdin):
1805            print(d)
1806"#,
1807        );
1808        match &rs.defines[0].ops[0] {
1809            Op::Python(s) => {
1810                assert!(s.contains("# /// script"));
1811                assert!(s.contains("# dependencies = [\"pyyaml>=6\"]"));
1812                assert!(s.contains("import sys, yaml"));
1813                // Blank line between imports and loop preserved
1814                assert!(s.contains("yaml\n\nfor"));
1815                // Internal indent preserved (4 spaces under `for`)
1816                assert!(s.contains("    print(d)"));
1817            }
1818            _ => panic!("expected Python"),
1819        }
1820    }
1821
1822    #[test]
1823    fn macro_call_with_args() {
1824        let rs = parse_ok(
1825            r#"
1826define compact(n):
1827    head 1
1828
1829diff, ultra:
1830    compact 30
1831"#,
1832        );
1833        match &rs.rules[0].ops[0] {
1834            Op::MacroCall { name, args } => {
1835                assert_eq!(name, "compact");
1836                assert_eq!(args, &[MacroArg::Number(30)]);
1837            }
1838            _ => panic!("expected MacroCall"),
1839        }
1840    }
1841
1842    #[test]
1843    fn inline_ops_after_rule_header() {
1844        let rs = parse_ok(
1845            r#"
1846define compact(n):
1847    head 1
1848
1849diff, ultra:  compact 30  else-shell: awk 'NF' | head -50
1850"#,
1851        );
1852        let ops = &rs.rules[0].ops;
1853        assert_eq!(ops.len(), 2);
1854        assert!(matches!(&ops[0], Op::MacroCall { name, .. } if name == "compact"));
1855        match &ops[1] {
1856            Op::OrShell(s) => assert_eq!(s, "awk 'NF' | head -50"),
1857            _ => panic!("expected OrShell, got {:?}", &ops[1]),
1858        }
1859    }
1860
1861    #[test]
1862    fn split_with_pre_and_post() {
1863        let rs = parse_ok(
1864            r#"
1865define ah:
1866    shell: cat
1867
1868show:
1869    split /^diff /
1870    pre:
1871        keep /^commit /
1872        ah
1873    post:
1874        head 10
1875    head 100
1876"#,
1877        );
1878        let ops = &rs.rules[0].ops;
1879        assert_eq!(ops.len(), 2);
1880        match &ops[0] {
1881            Op::Split {
1882                delimiter,
1883                pre,
1884                post,
1885            } => {
1886                assert_eq!(delimiter.source, "^diff ");
1887                assert_eq!(pre.len(), 2);
1888                assert_eq!(post.len(), 1);
1889                assert!(matches!(&pre[0], Op::Keep(_)));
1890                assert!(matches!(&pre[1], Op::MacroCall { name, .. } if name == "ah"));
1891                assert!(matches!(post[0], Op::Head(HeadArg::Number(10))));
1892            }
1893            _ => panic!("expected Split"),
1894        }
1895        assert!(matches!(ops[1], Op::Head(HeadArg::Number(100))));
1896    }
1897
1898    #[test]
1899    fn first_match_wins_selection() {
1900        let rs = parse_ok(
1901            r#"
1902diff, ultra:
1903    head 5
1904
1905diff:
1906    head 20
1907
1908*:
1909    head 30
1910"#,
1911        );
1912        let r = rs.select("diff", Level::Ultra).unwrap();
1913        assert!(matches!(r.ops[0], Op::Head(HeadArg::Number(5))));
1914        let r = rs.select("diff", Level::Full).unwrap();
1915        assert!(matches!(r.ops[0], Op::Head(HeadArg::Number(20))));
1916        let r = rs.select("status", Level::Ultra).unwrap();
1917        assert!(matches!(r.ops[0], Op::Head(HeadArg::Number(30))));
1918    }
1919
1920    #[test]
1921    fn alternation_in_selector_matches() {
1922        let rs = parse_ok(
1923            r#"
1924build|check, ultra:
1925    head 15
1926"#,
1927        );
1928        assert!(rs.select("build", Level::Ultra).is_some());
1929        assert!(rs.select("check", Level::Ultra).is_some());
1930        assert!(rs.select("test", Level::Ultra).is_none());
1931        assert!(rs.select("build", Level::Full).is_none());
1932    }
1933
1934    #[test]
1935    fn head_auto_keyword() {
1936        let rs = parse_ok(
1937            r#"
1938foo:
1939    head auto
1940"#,
1941        );
1942        assert!(matches!(rs.rules[0].ops[0], Op::Head(HeadArg::Auto)));
1943    }
1944
1945    #[test]
1946    fn regex_with_escaped_slash() {
1947        let rs = parse_ok(
1948            r#"
1949foo:
1950    keep /a\/b/
1951"#,
1952        );
1953        match &rs.rules[0].ops[0] {
1954            Op::Keep(p) => assert_eq!(p.source, "a/b"),
1955            _ => panic!(),
1956        }
1957    }
1958
1959    #[test]
1960    fn errors_on_unterminated_regex() {
1961        let err = parse("foo:\n    keep /abc\n").unwrap_err();
1962        assert!(err.to_string().contains("unterminated regex"), "got: {err}");
1963    }
1964
1965    #[test]
1966    fn errors_on_unknown_op() {
1967        let err = parse("foo:\n    nonsense 1\n").unwrap_err();
1968        assert!(err.to_string().contains("unknown op"), "got: {err}");
1969    }
1970
1971    #[test]
1972    fn errors_on_invalid_level() {
1973        let err = parse("foo, gigamax:\n    head 5\n").unwrap_err();
1974        // anyhow only renders the outermost message via Display; use {:#}
1975        // to walk the cause chain.
1976        let chain = format!("{err:#}");
1977        assert!(chain.contains("unknown level"), "got: {chain}");
1978    }
1979
1980    #[test]
1981    fn errors_on_empty_rule_body() {
1982        let err = parse("foo:\nbar:\n    head 5\n").unwrap_err();
1983        assert!(err.to_string().contains("rule has no ops"), "got: {err}");
1984    }
1985
1986    // ── full plugin files parse cleanly ──────────────────────────
1987
1988    #[test]
1989    fn git_compact_plugin_parses() {
1990        let src = include_str!(
1991            "../../../plugins/git/git-compact/filter.lf"
1992        );
1993        let rs = parse_ok(src);
1994        // Defines: strip-trailers, abbrev-hash, compact-diff
1995        assert_eq!(rs.defines.len(), 3);
1996        let names: Vec<&str> = rs.defines.iter().map(|d| d.name.as_str()).collect();
1997        assert_eq!(names, ["strip-trailers", "abbrev-hash", "compact-diff"]);
1998        assert_eq!(rs.defines[2].params, vec!["limit".to_string()]);
1999
2000        // Selection sanity
2001        assert!(rs.select("status", Level::Full).is_some());
2002        assert!(rs.select("diff", Level::Ultra).is_some());
2003        assert!(rs.select("diff", Level::Lite).is_some());
2004        assert!(rs.select("diff", Level::Full).is_some());
2005        assert!(rs.select("log", Level::Ultra).is_some());
2006        assert!(rs.select("show", Level::Ultra).is_some());
2007        assert!(rs.select("show", Level::Full).is_some());
2008        // Catch-all
2009        assert!(rs.select("nothing", Level::Full).is_some());
2010
2011        // Show rule is now a level cascade.
2012        let show_full = rs.select("show", Level::Full).unwrap();
2013        assert!(matches!(&show_full.ops[0], Op::Cascade(_)));
2014    }
2015
2016    // ── executor ─────────────────────────────────────────────────
2017
2018    fn ctx<'a>(sub: &'a str, level: Level) -> ExecCtx<'a> {
2019        ExecCtx {
2020            sub,
2021            level,
2022            exit_code: 0,
2023            args: &[],
2024        }
2025    }
2026
2027    #[test]
2028    fn exec_keep_drop_head_tail() {
2029        let rs = parse_ok(
2030            r#"
2031foo:
2032    keep /^a/
2033    drop /skip/
2034    head 3
2035"#,
2036        );
2037        let input = "alpha\nbeta\na-skip\namber\naxe\nakira\n";
2038        let out = execute(&rs, &ctx("foo", Level::Full), input).unwrap();
2039        assert_eq!(out, "alpha\namber\naxe\n");
2040    }
2041
2042    #[test]
2043    fn exec_tail() {
2044        let rs = parse_ok(
2045            r#"
2046foo:
2047    tail 2
2048"#,
2049        );
2050        let out = execute(&rs, &ctx("foo", Level::Full), "a\nb\nc\nd").unwrap();
2051        assert_eq!(out, "c\nd\n");
2052    }
2053
2054    #[test]
2055    fn exec_else_string_when_empty() {
2056        let rs = parse_ok(
2057            r#"
2058status:
2059    keep /^M /
2060    else "clean"
2061"#,
2062        );
2063        let out = execute(&rs, &ctx("status", Level::Full), "?? new.txt\n").unwrap();
2064        assert_eq!(out, "clean\n");
2065    }
2066
2067    #[test]
2068    fn exec_else_string_passthrough_when_nonempty() {
2069        let rs = parse_ok(
2070            r#"
2071status:
2072    keep /^M /
2073    else "clean"
2074"#,
2075        );
2076        let out = execute(&rs, &ctx("status", Level::Full), "M file.txt\n").unwrap();
2077        assert_eq!(out, "M file.txt\n");
2078    }
2079
2080    #[test]
2081    fn exec_no_match_passes_through() {
2082        let rs = parse_ok(
2083            r#"
2084foo:
2085    head 1
2086"#,
2087        );
2088        let input = "x\ny\nz";
2089        let out = execute(&rs, &ctx("other", Level::Full), input).unwrap();
2090        assert_eq!(out, input);
2091    }
2092
2093    #[test]
2094    fn exec_first_match_wins() {
2095        let rs = parse_ok(
2096            r#"
2097diff, ultra:
2098    head 1
2099diff:
2100    head 3
2101"#,
2102        );
2103        let input = "a\nb\nc\nd\n";
2104        let u = execute(&rs, &ctx("diff", Level::Ultra), input).unwrap();
2105        let f = execute(&rs, &ctx("diff", Level::Full), input).unwrap();
2106        assert_eq!(u, "a\n");
2107        assert_eq!(f, "a\nb\nc\n");
2108    }
2109
2110    #[test]
2111    fn exec_head_auto_uses_level() {
2112        let rs = parse_ok(
2113            r#"
2114foo:
2115    head auto
2116"#,
2117        );
2118        let input: String = (1..=80).map(|i| format!("{i}\n")).collect();
2119        let u = execute(&rs, &ctx("foo", Level::Ultra), &input).unwrap();
2120        let f = execute(&rs, &ctx("foo", Level::Full), &input).unwrap();
2121        let l = execute(&rs, &ctx("foo", Level::Lite), &input).unwrap();
2122        assert_eq!(u.lines().count(), 15);
2123        assert_eq!(f.lines().count(), 30);
2124        assert_eq!(l.lines().count(), 60);
2125    }
2126
2127    #[test]
2128    fn exec_shell_inline() {
2129        let rs = parse_ok(
2130            r#"
2131foo:
2132    shell: tr a-z A-Z
2133"#,
2134        );
2135        let out = execute(&rs, &ctx("foo", Level::Full), "hello\n").unwrap();
2136        assert_eq!(out.trim_end(), "HELLO");
2137    }
2138
2139    #[test]
2140    fn exec_shell_block() {
2141        let rs = parse_ok(
2142            r#"
2143foo:
2144    shell: |
2145        awk '{ print NR, $0 }'
2146"#,
2147        );
2148        let out = execute(&rs, &ctx("foo", Level::Full), "a\nb\n").unwrap();
2149        assert_eq!(out.trim_end(), "1 a\n2 b");
2150    }
2151
2152    #[test]
2153    fn exec_shell_sees_env_vars() {
2154        let rs = parse_ok(
2155            r#"
2156build:
2157    shell: printf '%s:%s' "$sub" "$level"
2158"#,
2159        );
2160        let out = execute(&rs, &ctx("build", Level::Ultra), "").unwrap();
2161        // ensure_trailing_newline normalizes shell output without a final \n
2162        assert_eq!(out, "build:ultra\n");
2163    }
2164
2165    #[test]
2166    fn exec_else_shell_uses_raw_input() {
2167        let rs = parse_ok(
2168            r#"
2169diff:
2170    keep /^IMPOSSIBLE/
2171    else-shell: head -2
2172"#,
2173        );
2174        let out = execute(&rs, &ctx("diff", Level::Full), "x\ny\nz\n").unwrap();
2175        assert_eq!(out, "x\ny\n");
2176    }
2177
2178    #[test]
2179    fn exec_macro_expansion_with_args() {
2180        let rs = parse_ok(
2181            r#"
2182define n-up(count):
2183    shell: head -$1
2184
2185foo:
2186    n-up 2
2187"#,
2188        );
2189        let out = execute(&rs, &ctx("foo", Level::Full), "a\nb\nc\nd\n").unwrap();
2190        assert_eq!(out, "a\nb\n");
2191    }
2192
2193    #[test]
2194    fn exec_split_pre_post() {
2195        let rs = parse_ok(
2196            r#"
2197show:
2198    split /^diff /
2199    pre:
2200        head 1
2201    post:
2202        head 2
2203"#,
2204        );
2205        let input = "commit abc\nAuthor: x\nDate: y\ndiff --git a b\n+line1\n+line2\n+line3\n";
2206        let out = execute(&rs, &ctx("show", Level::Full), input).unwrap();
2207        assert_eq!(out, "commit abc\ndiff --git a b\n+line1\n");
2208    }
2209
2210    #[test]
2211    fn exec_split_no_match() {
2212        let rs = parse_ok(
2213            r#"
2214show:
2215    split /^diff /
2216    pre:
2217        head 2
2218    post:
2219        head 10
2220"#,
2221        );
2222        // No `diff ` line — everything goes to pre, post is empty.
2223        let out = execute(&rs, &ctx("show", Level::Full), "a\nb\nc\nd\n").unwrap();
2224        assert_eq!(out, "a\nb\n");
2225    }
2226
2227    #[test]
2228    fn exec_macro_arg_count_mismatch_errors() {
2229        let rs = parse_ok(
2230            r#"
2231define needs-two(a, b):
2232    head 1
2233
2234foo:
2235    needs-two 5
2236"#,
2237        );
2238        let err = execute(&rs, &ctx("foo", Level::Full), "x").unwrap_err();
2239        assert!(err.to_string().contains("expects 2 arg"), "got: {err}");
2240    }
2241
2242    #[test]
2243    fn exec_python_plain_when_no_pep723() {
2244        // Skip if python3 not on PATH.
2245        if Command::new("python3").arg("--version").output().is_err() {
2246            eprintln!("skipping: python3 not available");
2247            return;
2248        }
2249        let rs = parse_ok(
2250            r#"
2251foo:
2252    python: |
2253        import sys
2254        for line in sys.stdin:
2255            print(line.upper(), end="")
2256"#,
2257        );
2258        let out = execute(&rs, &ctx("foo", Level::Full), "hello\nworld\n").unwrap();
2259        assert_eq!(out, "HELLO\nWORLD\n");
2260    }
2261
2262    #[test]
2263    fn exec_macro_arg_substitution_in_shell() {
2264        let rs = parse_ok(
2265            r#"
2266define grab(limit):
2267    shell: |
2268        awk -v lim=$1 '{ if (NR<=lim) print }'
2269
2270foo:
2271    grab 3
2272"#,
2273        );
2274        let out = execute(&rs, &ctx("foo", Level::Full), "a\nb\nc\nd\ne\n").unwrap();
2275        assert_eq!(out, "a\nb\nc\n");
2276    }
2277
2278    #[test]
2279    fn pep723_detection() {
2280        assert!(has_pep723_header(
2281            "# /// script\n# dependencies = []\n# ///\nimport sys"
2282        ));
2283        assert!(has_pep723_header(
2284            "    # /// script\n    # ///\nimport sys"
2285        ));
2286        assert!(!has_pep723_header("import sys\nprint('hi')"));
2287        assert!(!has_pep723_header("# not pep 723\nprint('hi')"));
2288    }
2289
2290    #[test]
2291    fn kubectl_compact_plugin_parses() {
2292        let src = include_str!(
2293            "../../../plugins/kubectl/kubectl-compact/filter.lf"
2294        );
2295        let rs = parse_ok(src);
2296        // Define: clean-yaml (with PEP 723 body)
2297        assert_eq!(rs.defines.len(), 1);
2298        assert_eq!(rs.defines[0].name, "clean-yaml");
2299        match &rs.defines[0].ops[0] {
2300            Op::Python(body) => {
2301                assert!(body.contains("# /// script"));
2302                assert!(body.contains("dependencies = [\"pyyaml>=6\"]"));
2303                assert!(body.contains("yaml.safe_load_all"));
2304            }
2305            other => panic!("expected Python op, got {other:?}"),
2306        }
2307        // get/logs/events/* selection
2308        assert!(rs.select("get", Level::Full).is_some());
2309        assert!(rs.select("logs", Level::Ultra).is_some());
2310        assert!(rs.select("logs", Level::Full).is_some());
2311        assert!(rs.select("events", Level::Ultra).is_some());
2312        assert!(rs.select("describe", Level::Full).is_some()); // catch-all
2313    }
2314
2315    // ── v2: cascades, guards, globs ───────────────────────────────
2316
2317    #[test]
2318    fn parse_cascade_arms() {
2319        let rs = parse_ok(
2320            r#"
2321diff:
2322    if exit failed: raw
2323    elif level ultra: head 5
2324    else: head 99
2325"#,
2326        );
2327        match &rs.rules[0].ops[..] {
2328            [Op::Cascade(branches)] => {
2329                assert_eq!(branches.len(), 3);
2330                assert!(branches[0].guard.is_some());
2331                assert!(branches[1].guard.is_some());
2332                assert!(branches[2].guard.is_none());
2333            }
2334            other => panic!("expected one Cascade op, got {other:?}"),
2335        }
2336    }
2337
2338    #[test]
2339    fn exec_cascade_branches_on_exit() {
2340        let rs = parse_ok(
2341            r#"
2342diff:
2343    if exit failed: raw
2344    else: head 1
2345"#,
2346        );
2347        let input = "a\nb\nc\n";
2348        let failed = ExecCtx { sub: "diff", level: Level::Full, exit_code: 1, args: &[] };
2349        let ok = ExecCtx { sub: "diff", level: Level::Full, exit_code: 0, args: &[] };
2350        assert_eq!(execute(&rs, &failed, input).unwrap(), "a\nb\nc\n");
2351        assert_eq!(execute(&rs, &ok, input).unwrap(), "a\n");
2352    }
2353
2354    #[test]
2355    fn exec_cascade_level_and_flag_guards() {
2356        let rs = parse_ok(
2357            r#"
2358diff:
2359    if level ultra and --stat: head 1
2360    elif --stat: head 2
2361    else: head 3
2362"#,
2363        );
2364        let input = "1\n2\n3\n4\n";
2365        let stat = vec!["--stat".to_string()];
2366        let ultra_stat = ExecCtx { sub: "diff", level: Level::Ultra, exit_code: 0, args: &stat };
2367        let full_stat = ExecCtx { sub: "diff", level: Level::Full, exit_code: 0, args: &stat };
2368        let plain = ExecCtx { sub: "diff", level: Level::Full, exit_code: 0, args: &[] };
2369        assert_eq!(execute(&rs, &ultra_stat, input).unwrap(), "1\n");
2370        assert_eq!(execute(&rs, &full_stat, input).unwrap(), "1\n2\n");
2371        assert_eq!(execute(&rs, &plain, input).unwrap(), "1\n2\n3\n");
2372    }
2373
2374    #[test]
2375    fn exec_cascade_no_match_no_else_passes_through() {
2376        let rs = parse_ok("diff:\n    if exit failed: head 1\n");
2377        let out = execute(&rs, &ctx("diff", Level::Full), "x\ny\n").unwrap();
2378        assert_eq!(out, "x\ny\n");
2379    }
2380
2381    #[test]
2382    fn exec_raw_is_identity() {
2383        // `raw` is canonical; `passthrough` is a legacy alias for the same op.
2384        for kw in ["raw", "passthrough"] {
2385            let rs = parse_ok(&format!("diff:\n    {kw}\n"));
2386            let out = execute(&rs, &ctx("diff", Level::Full), "x\ny\n").unwrap();
2387            assert_eq!(out, "x\ny\n");
2388        }
2389    }
2390
2391    #[test]
2392    fn glob_selector_matches_prefix() {
2393        let rs = parse_ok("apply*:\n    head 1\n");
2394        assert!(rs.select("apply", Level::Full).is_some());
2395        assert!(rs.select("apply-set", Level::Full).is_some());
2396        assert!(rs.select("delete", Level::Full).is_none());
2397    }
2398
2399    #[test]
2400    fn or_is_alias_of_else() {
2401        let new = parse_ok("s:\n    keep /Z/\n    or \"clean\"\n");
2402        let old = parse_ok("s:\n    keep /Z/\n    else \"clean\"\n");
2403        assert_eq!(execute(&new, &ctx("s", Level::Full), "nope\n").unwrap(), "clean\n");
2404        assert_eq!(execute(&old, &ctx("s", Level::Full), "nope\n").unwrap(), "clean\n");
2405    }
2406
2407    #[test]
2408    fn errors_on_unknown_guard_value() {
2409        let chain = format!("{:#}", parse("diff:\n    if exit boom: head 1\n").unwrap_err());
2410        assert!(chain.contains("unknown exit value"), "got: {chain}");
2411    }
2412
2413    // ── match: single-dimension cascade sugar ─────────────────────
2414
2415    #[test]
2416    fn parse_match_level_desugars_to_cascade() {
2417        let rs = parse_ok(
2418            r#"
2419state:
2420    match level:
2421        ultra: head 1
2422        lite:  head 3
2423        else:  head 2
2424"#,
2425        );
2426        match &rs.rules[0].ops[..] {
2427            [Op::Cascade(branches)] => {
2428                assert_eq!(branches.len(), 3);
2429                assert!(matches!(
2430                    branches[0].guard.as_ref().unwrap().atoms.as_slice(),
2431                    [Atom::Level(Level::Ultra)]
2432                ));
2433                assert!(matches!(
2434                    branches[1].guard.as_ref().unwrap().atoms.as_slice(),
2435                    [Atom::Level(Level::Lite)]
2436                ));
2437                assert!(branches[2].guard.is_none());
2438            }
2439            other => panic!("expected one Cascade op, got {other:?}"),
2440        }
2441    }
2442
2443    #[test]
2444    fn exec_match_level_matches_equivalent_cascade() {
2445        let m = parse_ok(
2446            r#"
2447state:
2448    match level:
2449        ultra: head 1
2450        lite:  head 3
2451        else:  head 2
2452"#,
2453        );
2454        let c = parse_ok(
2455            r#"
2456state:
2457    if level ultra: head 1
2458    elif level lite: head 3
2459    else: head 2
2460"#,
2461        );
2462        let input = "a\nb\nc\nd\n";
2463        for level in [Level::Ultra, Level::Full, Level::Lite] {
2464            let mc = execute(&m, &ctx("state", level), input).unwrap();
2465            let cc = execute(&c, &ctx("state", level), input).unwrap();
2466            assert_eq!(mc, cc, "level {level:?}");
2467        }
2468    }
2469
2470    #[test]
2471    fn exec_match_exit() {
2472        let rs = parse_ok(
2473            r#"
2474diff:
2475    match exit:
2476        failed: raw
2477        ok: head 1
2478"#,
2479        );
2480        let input = "a\nb\nc\n";
2481        let failed = ExecCtx { sub: "diff", level: Level::Full, exit_code: 1, args: &[] };
2482        let okctx = ExecCtx { sub: "diff", level: Level::Full, exit_code: 0, args: &[] };
2483        assert_eq!(execute(&rs, &failed, input).unwrap(), "a\nb\nc\n");
2484        assert_eq!(execute(&rs, &okctx, input).unwrap(), "a\n");
2485    }
2486
2487    #[test]
2488    fn exec_nested_match_inside_else_arm() {
2489        let rs = parse_ok(
2490            r#"
2491plan:
2492    if exit failed:
2493        raw
2494    else:
2495        match level:
2496            ultra: head 1
2497            lite:  head 3
2498            else:  head 2
2499"#,
2500        );
2501        let input = "a\nb\nc\nd\n";
2502        let failed = ExecCtx { sub: "plan", level: Level::Full, exit_code: 1, args: &[] };
2503        let ok_full = ExecCtx { sub: "plan", level: Level::Full, exit_code: 0, args: &[] };
2504        let ok_ultra = ExecCtx { sub: "plan", level: Level::Ultra, exit_code: 0, args: &[] };
2505        let ok_lite = ExecCtx { sub: "plan", level: Level::Lite, exit_code: 0, args: &[] };
2506        assert_eq!(execute(&rs, &failed, input).unwrap(), input);
2507        assert_eq!(execute(&rs, &ok_full, input).unwrap(), "a\nb\n");
2508        assert_eq!(execute(&rs, &ok_ultra, input).unwrap(), "a\n");
2509        assert_eq!(execute(&rs, &ok_lite, input).unwrap(), "a\nb\nc\n");
2510    }
2511
2512    #[test]
2513    fn match_missing_dimension_errors() {
2514        let chain = format!("{:#}", parse("plan:\n    match:\n        ultra: head 1\n").unwrap_err());
2515        assert!(chain.contains("needs a dimension"), "got: {chain}");
2516    }
2517
2518    #[test]
2519    fn match_unknown_dimension_errors() {
2520        let chain = format!(
2521            "{:#}",
2522            parse("plan:\n    match flag:\n        x: head 1\n").unwrap_err()
2523        );
2524        assert!(chain.contains("unknown match dimension"), "got: {chain}");
2525    }
2526
2527    #[test]
2528    fn match_unknown_value_errors() {
2529        let chain = format!(
2530            "{:#}",
2531            parse("plan:\n    match exit:\n        boom: head 1\n").unwrap_err()
2532        );
2533        assert!(chain.contains("unknown exit value"), "got: {chain}");
2534    }
2535
2536    #[test]
2537    fn match_inline_after_header_errors() {
2538        let chain = format!(
2539            "{:#}",
2540            parse("plan:\n    match level: head 1\n").unwrap_err()
2541        );
2542        assert!(
2543            chain.contains("doesn't take inline ops"),
2544            "got: {chain}"
2545        );
2546    }
2547}