Skip to main content

lowfat_core/
lf.rs

1//! lf — the lowfat filter DSL parser.
2//!
3//! Parses `.lf` files into a [`RuleSet`]. Execution lives elsewhere
4//! (Task 2+). The DSL is line-oriented and indentation-sensitive; we
5//! avoid INDENT/DEDENT tokens by working directly on `(indent, text)`
6//! pairs, which keeps the parser short and the error messages tied to
7//! source line numbers.
8
9use crate::level::Level;
10use anyhow::{Context, Result, anyhow, bail};
11use regex::Regex;
12
13// ──────────────────────────────────────────────────────────────────
14// AST
15// ──────────────────────────────────────────────────────────────────
16
17#[derive(Debug, Default)]
18pub struct RuleSet {
19    pub defines: Vec<Define>,
20    pub rules: Vec<Rule>,
21}
22
23#[derive(Debug, Clone)]
24pub struct Define {
25    pub name: String,
26    pub params: Vec<String>,
27    pub ops: Vec<Op>,
28}
29
30#[derive(Debug, Clone)]
31pub struct Rule {
32    pub sub: SubPattern,
33    pub level: LevelPattern,
34    pub ops: Vec<Op>,
35    pub line_no: usize,
36}
37
38#[derive(Debug, Clone)]
39pub enum SubPattern {
40    Star,
41    Alt(Vec<String>),
42}
43
44#[derive(Debug, Clone)]
45pub enum LevelPattern {
46    Star,
47    Specific(Level),
48}
49
50#[derive(Debug, Clone)]
51pub enum Op {
52    Keep(PatternRegex),
53    Drop(PatternRegex),
54    Head(HeadArg),
55    Tail(HeadArg),
56    Else(String),
57    ElseShell(String),
58    Shell(String),
59    Python(String),
60    MacroCall {
61        name: String,
62        args: Vec<MacroArg>,
63    },
64    Split {
65        delimiter: PatternRegex,
66        pre: Vec<Op>,
67        post: Vec<Op>,
68    },
69}
70
71#[derive(Debug, Clone)]
72pub struct PatternRegex {
73    pub source: String,
74    pub compiled: Regex,
75}
76
77#[derive(Debug, Clone, PartialEq, Eq)]
78pub enum HeadArg {
79    Number(usize),
80    Auto,
81}
82
83#[derive(Debug, Clone, PartialEq, Eq)]
84pub enum MacroArg {
85    Number(usize),
86    String(String),
87}
88
89// ──────────────────────────────────────────────────────────────────
90// Selection
91// ──────────────────────────────────────────────────────────────────
92
93impl RuleSet {
94    /// First-match-wins. Returns `None` when no rule matches.
95    pub fn select(&self, sub: &str, level: Level) -> Option<&Rule> {
96        self.rules.iter().find(|r| r.matches(sub, level))
97    }
98
99    pub fn find_define(&self, name: &str) -> Option<&Define> {
100        self.defines.iter().find(|d| d.name == name)
101    }
102}
103
104impl Rule {
105    pub fn matches(&self, sub: &str, level: Level) -> bool {
106        let sub_ok = match &self.sub {
107            SubPattern::Star => true,
108            SubPattern::Alt(alts) => alts.iter().any(|a| a == sub),
109        };
110        let lvl_ok = match &self.level {
111            LevelPattern::Star => true,
112            LevelPattern::Specific(l) => *l == level,
113        };
114        sub_ok && lvl_ok
115    }
116}
117
118// ──────────────────────────────────────────────────────────────────
119// Line preprocessing
120// ──────────────────────────────────────────────────────────────────
121
122#[derive(Debug, Clone)]
123struct Line {
124    indent: usize,
125    text: String, // trimmed of leading/trailing whitespace; "" if blank
126    raw: String,  // original line, no trailing newline
127    line_no: usize,
128    /// Blank or starts with `#` at top-level. Meta lines are skipped by
129    /// the structural parser but preserved as-is in block bodies.
130    is_meta: bool,
131}
132
133fn split_lines(input: &str) -> Vec<Line> {
134    input
135        .split('\n')
136        .enumerate()
137        .map(|(i, raw_line)| {
138            let raw = raw_line.trim_end_matches('\r').to_string();
139            let stripped = raw.trim_start();
140            let indent = raw.len() - stripped.len();
141            let text = stripped.trim_end().to_string();
142            let is_meta = text.is_empty() || text.starts_with('#');
143            Line {
144                indent,
145                text,
146                raw,
147                line_no: i + 1,
148                is_meta,
149            }
150        })
151        .collect()
152}
153
154// ──────────────────────────────────────────────────────────────────
155// Parser
156// ──────────────────────────────────────────────────────────────────
157
158const OP_KEYWORDS: &[&str] = &[
159    "keep",
160    "drop",
161    "head",
162    "tail",
163    "else",
164    "else-shell:",
165    "shell:",
166    "python:",
167    "split",
168];
169
170pub fn parse(input: &str) -> Result<RuleSet> {
171    let lines = split_lines(input);
172    let macro_names = collect_macro_names(&lines);
173    let mut p = Parser {
174        lines: &lines,
175        pos: 0,
176        macro_names,
177    };
178    p.parse_ruleset()
179}
180
181fn collect_macro_names(lines: &[Line]) -> Vec<String> {
182    let mut names = Vec::new();
183    for l in lines {
184        if l.is_meta {
185            continue;
186        }
187        if let Some(rest) = l.text.strip_prefix("define ") {
188            let end = rest
189                .find(|c: char| c == '(' || c == ':' || c.is_whitespace())
190                .unwrap_or(rest.len());
191            let name = rest[..end].trim().to_string();
192            if !name.is_empty() {
193                names.push(name);
194            }
195        }
196    }
197    names
198}
199
200struct Parser<'a> {
201    lines: &'a [Line],
202    pos: usize,
203    macro_names: Vec<String>,
204}
205
206impl<'a> Parser<'a> {
207    /// Advance past meta lines and return the next structural line without
208    /// consuming it.
209    fn peek_significant(&mut self) -> Option<&'a Line> {
210        while let Some(l) = self.lines.get(self.pos) {
211            if l.is_meta {
212                self.pos += 1;
213            } else {
214                return Some(l);
215            }
216        }
217        None
218    }
219
220    fn advance(&mut self) -> Option<&'a Line> {
221        let l = self.lines.get(self.pos);
222        if l.is_some() {
223            self.pos += 1;
224        }
225        l
226    }
227
228    fn is_macro(&self, name: &str) -> bool {
229        self.macro_names.iter().any(|n| n == name)
230    }
231
232    // ── top-level ────────────────────────────────────────────────
233
234    fn parse_ruleset(&mut self) -> Result<RuleSet> {
235        let mut rs = RuleSet::default();
236        while let Some(line) = self.peek_significant() {
237            if line.indent != 0 {
238                bail!("line {}: unexpected indent at top level", line.line_no);
239            }
240            if line.text.starts_with("define ") {
241                let d = self.parse_define()?;
242                rs.defines.push(d);
243            } else {
244                let r = self.parse_rule()?;
245                rs.rules.push(r);
246            }
247        }
248        Ok(rs)
249    }
250
251    fn parse_define(&mut self) -> Result<Define> {
252        let header = self.advance().unwrap();
253        let line_no = header.line_no;
254        let rest = header
255            .text
256            .strip_prefix("define ")
257            .ok_or_else(|| anyhow!("line {}: expected `define`", line_no))?;
258        let (name, params, after_paren) =
259            parse_define_header(rest).with_context(|| format!("line {line_no}"))?;
260        if !after_paren.starts_with(':') {
261            bail!(
262                "line {}: expected `:` after define header, got `{}`",
263                line_no,
264                after_paren
265            );
266        }
267        let trailing = after_paren[1..].trim();
268        if !trailing.is_empty() {
269            bail!(
270                "line {}: one-line `define` body not supported (use indented body)",
271                line_no
272            );
273        }
274        let ops = self.parse_indented_ops(header.indent)?;
275        if ops.is_empty() {
276            bail!("line {}: `define {}` has empty body", line_no, name);
277        }
278        Ok(Define { name, params, ops })
279    }
280
281    fn parse_rule(&mut self) -> Result<Rule> {
282        let header = self.advance().unwrap();
283        let line_no = header.line_no;
284        let parent_indent = header.indent;
285        let colon_pos = header
286            .text
287            .find(':')
288            .ok_or_else(|| anyhow!("line {}: missing `:` in rule header", line_no))?;
289        let selector = &header.text[..colon_pos];
290        let after = &header.text[colon_pos + 1..];
291        let (sub, level) =
292            parse_selector(selector).with_context(|| format!("line {line_no}"))?;
293
294        let mut ops = Vec::new();
295        let inline = after.trim();
296        if !inline.is_empty() {
297            let inline_ops = self.parse_inline_ops(inline, line_no)?;
298            ops.extend(inline_ops);
299        }
300        let indented = self.parse_indented_ops(parent_indent)?;
301        ops.extend(indented);
302
303        if ops.is_empty() {
304            bail!("line {}: rule has no ops", line_no);
305        }
306        Ok(Rule {
307            sub,
308            level,
309            ops,
310            line_no,
311        })
312    }
313
314    // ── op chains ────────────────────────────────────────────────
315
316    /// Parse op-lines strictly deeper-indented than `parent_indent`.
317    /// Stops at first significant line whose indent <= parent_indent.
318    fn parse_indented_ops(&mut self, parent_indent: usize) -> Result<Vec<Op>> {
319        let mut ops = Vec::new();
320        loop {
321            let Some(line) = self.peek_significant() else {
322                break;
323            };
324            if line.indent <= parent_indent {
325                break;
326            }
327            let op = self.parse_op_line()?;
328            ops.push(op);
329        }
330        Ok(ops)
331    }
332
333    /// Parse a single op from the current significant line, advancing
334    /// past any block bodies and sub-blocks the op consumes.
335    fn parse_op_line(&mut self) -> Result<Op> {
336        let line = self.advance().unwrap();
337        let line_no = line.line_no;
338        let indent = line.indent;
339        let text = line.text.as_str();
340        let (head, _) = split_first_word(text);
341
342        match head {
343            "keep" => {
344                let rest = text[head.len()..].trim_start();
345                Ok(Op::Keep(parse_regex_literal(rest, line_no)?))
346            }
347            "drop" => {
348                let rest = text[head.len()..].trim_start();
349                Ok(Op::Drop(parse_regex_literal(rest, line_no)?))
350            }
351            "head" => {
352                let rest = text[head.len()..].trim();
353                Ok(Op::Head(parse_head_arg(rest, line_no)?))
354            }
355            "tail" => {
356                let rest = text[head.len()..].trim();
357                Ok(Op::Tail(parse_head_arg(rest, line_no)?))
358            }
359            "else" => {
360                let rest = text[head.len()..].trim_start();
361                Ok(Op::Else(parse_string_literal(rest, line_no)?))
362            }
363            "else-shell:" => {
364                let body = text[head.len()..].trim_start().to_string();
365                if body.is_empty() {
366                    bail!("line {}: `else-shell:` requires a command", line_no);
367                }
368                Ok(Op::ElseShell(body))
369            }
370            "shell:" => Ok(Op::Shell(self.parse_block_body(
371                text,
372                head,
373                indent,
374                line_no,
375            )?)),
376            "python:" => Ok(Op::Python(self.parse_block_body(
377                text,
378                head,
379                indent,
380                line_no,
381            )?)),
382            "split" => {
383                let rest = text[head.len()..].trim_start();
384                let delim = parse_regex_literal(rest, line_no)?;
385                let (pre, post) = self.parse_split_branches(indent)?;
386                if pre.is_empty() && post.is_empty() {
387                    bail!(
388                        "line {}: `split` needs at least one `pre:` or `post:` block",
389                        line_no
390                    );
391                }
392                Ok(Op::Split {
393                    delimiter: delim,
394                    pre,
395                    post,
396                })
397            }
398            name if self.is_macro(name) => {
399                let rest = text[head.len()..].trim();
400                let args = parse_macro_args(rest, line_no)?;
401                Ok(Op::MacroCall {
402                    name: name.to_string(),
403                    args,
404                })
405            }
406            _ => bail!("line {}: unknown op `{}`", line_no, head),
407        }
408    }
409
410    /// Parse a `shell:` or `python:` body. Two forms:
411    ///   inline: `shell: <command on rest of line>`
412    ///   block:  `shell: |` then indented body lines until dedent.
413    /// Body lines preserve internal blank lines and relative indentation.
414    fn parse_block_body(
415        &mut self,
416        line_text: &str,
417        head: &str,
418        parent_indent: usize,
419        line_no: usize,
420    ) -> Result<String> {
421        let after = line_text[head.len()..].trim_start();
422        if after != "|" {
423            if after.is_empty() {
424                bail!(
425                    "line {}: empty `{}` body (use `| <newline>` for block form)",
426                    line_no,
427                    head
428                );
429            }
430            return Ok(after.to_string());
431        }
432
433        // Block form: scan lines until indent drops back to parent_indent.
434        // Include blank lines that fall between body lines.
435        let mut collected: Vec<&'a Line> = Vec::new();
436        let mut base: Option<usize> = None;
437        while let Some(l) = self.lines.get(self.pos) {
438            if l.text.is_empty() {
439                collected.push(l);
440                self.pos += 1;
441                continue;
442            }
443            if l.indent <= parent_indent {
444                break;
445            }
446            if base.is_none() {
447                base = Some(l.indent);
448            }
449            collected.push(l);
450            self.pos += 1;
451        }
452        // Trim trailing blank lines (they belong to the gap, not the body).
453        while collected.last().map_or(false, |l| l.text.is_empty()) {
454            collected.pop();
455        }
456        if collected.is_empty() {
457            bail!("line {}: `{}` block is empty", line_no, head);
458        }
459        let base = base.unwrap_or(parent_indent + 4);
460        let dedented: Vec<String> = collected
461            .iter()
462            .map(|l| {
463                if l.text.is_empty() {
464                    String::new()
465                } else if l.raw.len() >= base {
466                    l.raw[base..].to_string()
467                } else {
468                    l.raw.trim_start().to_string()
469                }
470            })
471            .collect();
472        Ok(dedented.join("\n"))
473    }
474
475    /// After a `split /regex/`, consume any sibling `pre:` / `post:`
476    /// blocks at the same indent.
477    fn parse_split_branches(&mut self, parent_indent: usize) -> Result<(Vec<Op>, Vec<Op>)> {
478        let mut pre = Vec::new();
479        let mut post = Vec::new();
480        loop {
481            let Some(line) = self.peek_significant() else {
482                break;
483            };
484            if line.indent != parent_indent {
485                break;
486            }
487            match line.text.as_str() {
488                "pre:" => {
489                    self.advance();
490                    pre = self.parse_indented_ops(parent_indent)?;
491                }
492                "post:" => {
493                    self.advance();
494                    post = self.parse_indented_ops(parent_indent)?;
495                }
496                _ => break,
497            }
498        }
499        Ok((pre, post))
500    }
501
502    /// Parse multiple ops appearing on the same line (after a rule
503    /// header's `:`). `shell:` / `python:` / `else-shell:` greedily
504    /// consume rest of line; other ops yield to the next op keyword
505    /// or macro name.
506    fn parse_inline_ops(&self, text: &str, line_no: usize) -> Result<Vec<Op>> {
507        let mut ops = Vec::new();
508        let mut remaining = text.trim();
509        while !remaining.is_empty() {
510            let (head, _) = split_first_word(remaining);
511            match head {
512                "shell:" => {
513                    let body = remaining[head.len()..].trim_start().to_string();
514                    if body.is_empty() {
515                        bail!("line {}: inline `shell:` needs a command", line_no);
516                    }
517                    ops.push(Op::Shell(body));
518                    remaining = "";
519                }
520                "python:" => {
521                    let body = remaining[head.len()..].trim_start().to_string();
522                    if body.is_empty() {
523                        bail!("line {}: inline `python:` needs a command", line_no);
524                    }
525                    ops.push(Op::Python(body));
526                    remaining = "";
527                }
528                "else-shell:" => {
529                    let body = remaining[head.len()..].trim_start().to_string();
530                    if body.is_empty() {
531                        bail!("line {}: inline `else-shell:` needs a command", line_no);
532                    }
533                    ops.push(Op::ElseShell(body));
534                    remaining = "";
535                }
536                "keep" | "drop" => {
537                    let rest = remaining[head.len()..].trim_start();
538                    let (re, after) = parse_regex_literal_and_rest(rest, line_no)?;
539                    ops.push(if head == "keep" {
540                        Op::Keep(re)
541                    } else {
542                        Op::Drop(re)
543                    });
544                    remaining = after.trim_start();
545                }
546                "head" | "tail" => {
547                    let rest = remaining[head.len()..].trim_start();
548                    let (arg_word, after) = take_word(rest);
549                    let h = parse_head_arg(arg_word, line_no)?;
550                    ops.push(if head == "head" {
551                        Op::Head(h)
552                    } else {
553                        Op::Tail(h)
554                    });
555                    remaining = after.trim_start();
556                }
557                "else" => {
558                    let rest = remaining[head.len()..].trim_start();
559                    let (s, after) = parse_string_literal_and_rest(rest, line_no)?;
560                    ops.push(Op::Else(s));
561                    remaining = after.trim_start();
562                }
563                "split" => {
564                    bail!(
565                        "line {}: `split` cannot appear inline (needs pre:/post: blocks)",
566                        line_no
567                    )
568                }
569                name if self.is_macro(name) => {
570                    let rest = remaining[head.len()..].trim_start();
571                    let (args, after) =
572                        parse_macro_args_until_op(rest, &self.macro_names, line_no)?;
573                    ops.push(Op::MacroCall {
574                        name: name.to_string(),
575                        args,
576                    });
577                    remaining = after.trim_start();
578                }
579                _ => bail!("line {}: unknown op `{}` in inline chain", line_no, head),
580            }
581        }
582        Ok(ops)
583    }
584}
585
586// ──────────────────────────────────────────────────────────────────
587// Sub-parsers (free functions, no Parser state)
588// ──────────────────────────────────────────────────────────────────
589
590fn split_first_word(s: &str) -> (&str, &str) {
591    let s = s.trim_start();
592    let end = s.find(char::is_whitespace).unwrap_or(s.len());
593    (&s[..end], &s[end..])
594}
595
596fn take_word(s: &str) -> (&str, &str) {
597    let s = s.trim_start();
598    let end = s.find(char::is_whitespace).unwrap_or(s.len());
599    (&s[..end], &s[end..])
600}
601
602fn parse_selector(s: &str) -> Result<(SubPattern, LevelPattern)> {
603    let s = s.trim();
604    if s.is_empty() {
605        bail!("empty selector");
606    }
607    let mut parts = s.splitn(2, ',');
608    let sub_str = parts.next().unwrap().trim();
609    let level_str = parts.next().map(|s| s.trim()).unwrap_or("*");
610
611    let sub = if sub_str == "*" {
612        SubPattern::Star
613    } else {
614        let alts: Vec<String> = sub_str
615            .split('|')
616            .map(|s| s.trim().to_string())
617            .collect();
618        if alts.iter().any(|a| a.is_empty()) {
619            bail!("empty alternative in sub pattern `{}`", sub_str);
620        }
621        SubPattern::Alt(alts)
622    };
623
624    let level = if level_str == "*" {
625        LevelPattern::Star
626    } else {
627        let lvl: Level = level_str.parse().map_err(|e: String| anyhow!(e))?;
628        LevelPattern::Specific(lvl)
629    };
630
631    Ok((sub, level))
632}
633
634fn parse_define_header(s: &str) -> Result<(String, Vec<String>, &str)> {
635    let s = s.trim_start();
636    let end = s
637        .find(|c: char| c == '(' || c == ':' || c.is_whitespace())
638        .unwrap_or(s.len());
639    let name = s[..end].to_string();
640    if name.is_empty() {
641        bail!("define needs a name");
642    }
643    let rest = s[end..].trim_start();
644    if let Some(rest) = rest.strip_prefix('(') {
645        let close = rest
646            .find(')')
647            .ok_or_else(|| anyhow!("missing `)` in define params"))?;
648        let params: Vec<String> = rest[..close]
649            .split(',')
650            .map(|p| p.trim().to_string())
651            .filter(|p| !p.is_empty())
652            .collect();
653        Ok((name, params, rest[close + 1..].trim_start()))
654    } else {
655        Ok((name, Vec::new(), rest))
656    }
657}
658
659fn parse_regex_literal(s: &str, line_no: usize) -> Result<PatternRegex> {
660    let (re, after) = parse_regex_literal_and_rest(s, line_no)?;
661    let after = after.trim();
662    if !after.is_empty() {
663        bail!(
664            "line {}: unexpected trailing input after regex: `{}`",
665            line_no,
666            after
667        );
668    }
669    Ok(re)
670}
671
672fn parse_regex_literal_and_rest(s: &str, line_no: usize) -> Result<(PatternRegex, &str)> {
673    let s = s.trim_start();
674    if !s.starts_with('/') {
675        bail!(
676            "line {}: expected `/regex/`, got `{}`",
677            line_no,
678            preview(s)
679        );
680    }
681    let body = &s[1..];
682    let mut src = String::new();
683    let mut chars = body.char_indices().peekable();
684    let mut end_byte: Option<usize> = None;
685    while let Some((i, c)) = chars.next() {
686        if c == '\\' {
687            if let Some((_, n)) = chars.next() {
688                if n == '/' {
689                    src.push('/');
690                } else {
691                    src.push('\\');
692                    src.push(n);
693                }
694            } else {
695                bail!("line {}: trailing backslash in regex", line_no);
696            }
697        } else if c == '/' {
698            end_byte = Some(i);
699            break;
700        } else {
701            src.push(c);
702        }
703    }
704    let end_byte = end_byte.ok_or_else(|| anyhow!("line {}: unterminated regex", line_no))?;
705    let after = &body[end_byte + 1..];
706    let compiled = Regex::new(&src)
707        .map_err(|e| anyhow!("line {}: invalid regex `{}`: {}", line_no, src, e))?;
708    Ok((
709        PatternRegex {
710            source: src,
711            compiled,
712        },
713        after,
714    ))
715}
716
717fn parse_string_literal(s: &str, line_no: usize) -> Result<String> {
718    let (s, after) = parse_string_literal_and_rest(s, line_no)?;
719    let after = after.trim();
720    if !after.is_empty() {
721        bail!(
722            "line {}: unexpected trailing input after string: `{}`",
723            line_no,
724            after
725        );
726    }
727    Ok(s)
728}
729
730fn parse_string_literal_and_rest(s: &str, line_no: usize) -> Result<(String, &str)> {
731    let s = s.trim_start();
732    if !s.starts_with('"') {
733        bail!(
734            "line {}: expected `\"...\"`, got `{}`",
735            line_no,
736            preview(s)
737        );
738    }
739    let body = &s[1..];
740    let mut out = String::new();
741    let mut chars = body.char_indices();
742    let mut end_byte: Option<usize> = None;
743    while let Some((i, c)) = chars.next() {
744        if c == '\\' {
745            if let Some((_, n)) = chars.next() {
746                match n {
747                    'n' => out.push('\n'),
748                    't' => out.push('\t'),
749                    'r' => out.push('\r'),
750                    '\\' => out.push('\\'),
751                    '"' => out.push('"'),
752                    other => {
753                        out.push('\\');
754                        out.push(other);
755                    }
756                }
757            } else {
758                bail!("line {}: trailing backslash in string", line_no);
759            }
760        } else if c == '"' {
761            end_byte = Some(i);
762            break;
763        } else {
764            out.push(c);
765        }
766    }
767    let end_byte = end_byte.ok_or_else(|| anyhow!("line {}: unterminated string", line_no))?;
768    let after = &body[end_byte + 1..];
769    Ok((out, after))
770}
771
772fn parse_head_arg(s: &str, line_no: usize) -> Result<HeadArg> {
773    let s = s.trim();
774    if s == "auto" {
775        return Ok(HeadArg::Auto);
776    }
777    s.parse::<usize>().map(HeadArg::Number).map_err(|_| {
778        anyhow!(
779            "line {}: expected number or `auto`, got `{}`",
780            line_no,
781            s
782        )
783    })
784}
785
786fn parse_macro_args(s: &str, line_no: usize) -> Result<Vec<MacroArg>> {
787    let mut out = Vec::new();
788    let mut rest = s.trim();
789    while !rest.is_empty() {
790        if rest.starts_with('"') {
791            let (sv, after) = parse_string_literal_and_rest(rest, line_no)?;
792            out.push(MacroArg::String(sv));
793            rest = after.trim_start();
794        } else {
795            let (word, after) = take_word(rest);
796            out.push(match word.parse::<usize>() {
797                Ok(n) => MacroArg::Number(n),
798                Err(_) => MacroArg::String(word.to_string()),
799            });
800            rest = after.trim_start();
801        }
802    }
803    Ok(out)
804}
805
806fn parse_macro_args_until_op<'a>(
807    s: &'a str,
808    macro_names: &[String],
809    line_no: usize,
810) -> Result<(Vec<MacroArg>, &'a str)> {
811    let mut out = Vec::new();
812    let mut rest = s.trim_start();
813    while !rest.is_empty() {
814        let (word, _) = take_word(rest);
815        if OP_KEYWORDS.contains(&word) || macro_names.iter().any(|n| n == word) {
816            break;
817        }
818        if rest.starts_with('"') {
819            let (sv, after) = parse_string_literal_and_rest(rest, line_no)?;
820            out.push(MacroArg::String(sv));
821            rest = after.trim_start();
822        } else {
823            let (w, after) = take_word(rest);
824            out.push(match w.parse::<usize>() {
825                Ok(n) => MacroArg::Number(n),
826                Err(_) => MacroArg::String(w.to_string()),
827            });
828            rest = after.trim_start();
829        }
830    }
831    Ok((out, rest))
832}
833
834fn preview(s: &str) -> &str {
835    let n = s.char_indices().nth(40).map(|(i, _)| i).unwrap_or(s.len());
836    &s[..n]
837}
838
839// ──────────────────────────────────────────────────────────────────
840// Execution
841// ──────────────────────────────────────────────────────────────────
842
843use std::io::Write;
844use std::process::{Command, Stdio};
845
846/// Per-invocation context passed to the executor and propagated as env
847/// vars to `shell:` / `python:` subprocesses.
848#[derive(Debug, Clone)]
849pub struct ExecCtx<'a> {
850    pub sub: &'a str,
851    pub level: Level,
852    pub exit_code: i32,
853    pub args: &'a [String],
854}
855
856/// Run the matching rule against `input` and return the filtered output.
857/// If no rule matches, the input is returned unchanged (passthrough).
858///
859/// Non-empty output always ends in a newline, matching the convention
860/// of shell tools like `echo` and `grep`.
861pub fn execute(rs: &RuleSet, ctx: &ExecCtx, input: &str) -> Result<String> {
862    let Some(rule) = rs.select(ctx.sub, ctx.level) else {
863        return Ok(input.to_string());
864    };
865    let out = run_ops(&rule.ops, ctx, input, rs, &[])?;
866    Ok(ensure_trailing_newline(out))
867}
868
869fn ensure_trailing_newline(mut s: String) -> String {
870    if !s.is_empty() && !s.ends_with('\n') {
871        s.push('\n');
872    }
873    s
874}
875
876/// One stage's input/output stats, recorded by [`execute_explain`].
877#[derive(Debug, Clone)]
878pub struct StageRecord {
879    pub op_desc: String,
880    pub stdin_lines: usize,
881    pub stdin_bytes: usize,
882    pub stdout_lines: usize,
883    pub stdout_bytes: usize,
884    pub elapsed_us: u128,
885}
886
887#[derive(Debug, Default, Clone)]
888pub struct ExplainTrace {
889    /// Index into `RuleSet::rules` of the matched rule (None if no match).
890    pub matched_rule: Option<usize>,
891    pub stages: Vec<StageRecord>,
892}
893
894/// Like [`execute`] but records per-op stats. Only top-level ops are
895/// recorded — macros and split sub-chains run silently. Adds ~µs of
896/// overhead per op for line/byte counting; safe for interactive use,
897/// avoid in tight loops.
898pub fn execute_explain(
899    rs: &RuleSet,
900    ctx: &ExecCtx,
901    input: &str,
902) -> Result<(String, ExplainTrace)> {
903    let mut trace = ExplainTrace::default();
904    let Some((idx, rule)) = rs
905        .rules
906        .iter()
907        .enumerate()
908        .find(|(_, r)| r.matches(ctx.sub, ctx.level))
909    else {
910        return Ok((input.to_string(), trace));
911    };
912    trace.matched_rule = Some(idx);
913
914    let raw = input.to_string();
915    let mut state = input.to_string();
916    for op in &rule.ops {
917        let stdin_lines = state.lines().count();
918        let stdin_bytes = state.len();
919        let start = std::time::Instant::now();
920        let new_state = apply_op(op, &state, &raw, ctx, rs, &[])?;
921        let elapsed_us = start.elapsed().as_micros();
922        trace.stages.push(StageRecord {
923            op_desc: describe_op(op),
924            stdin_lines,
925            stdin_bytes,
926            stdout_lines: new_state.lines().count(),
927            stdout_bytes: new_state.len(),
928            elapsed_us,
929        });
930        state = new_state;
931    }
932    Ok((ensure_trailing_newline(state), trace))
933}
934
935fn describe_op(op: &Op) -> String {
936    match op {
937        Op::Keep(p) => format!("keep /{}/", p.source),
938        Op::Drop(p) => format!("drop /{}/", p.source),
939        Op::Head(arg) => format!("head {}", describe_head(arg)),
940        Op::Tail(arg) => format!("tail {}", describe_head(arg)),
941        Op::Else(s) => format!("else {s:?}"),
942        Op::ElseShell(s) => format!("else-shell: {}", first_line(s)),
943        Op::Shell(s) => format!("shell: {}", first_line(s)),
944        Op::Python(s) => {
945            if has_pep723_header(s) {
946                format!("python (uv): {}", first_line(s))
947            } else {
948                format!("python: {}", first_line(s))
949            }
950        }
951        Op::MacroCall { name, args } => {
952            let parts: Vec<String> = args
953                .iter()
954                .map(|a| match a {
955                    MacroArg::Number(n) => n.to_string(),
956                    MacroArg::String(s) => s.clone(),
957                })
958                .collect();
959            if parts.is_empty() {
960                name.clone()
961            } else {
962                format!("{name} {}", parts.join(" "))
963            }
964        }
965        Op::Split { delimiter, .. } => format!("split /{}/", delimiter.source),
966    }
967}
968
969fn describe_head(a: &HeadArg) -> String {
970    match a {
971        HeadArg::Number(n) => n.to_string(),
972        HeadArg::Auto => "auto".into(),
973    }
974}
975
976fn first_line(s: &str) -> String {
977    s.lines().next().unwrap_or("").chars().take(60).collect()
978}
979
980fn run_ops(
981    ops: &[Op],
982    ctx: &ExecCtx,
983    input: &str,
984    rs: &RuleSet,
985    macro_args: &[MacroArg],
986) -> Result<String> {
987    let raw = input.to_string();
988    let mut state = input.to_string();
989    for op in ops {
990        state = apply_op(op, &state, &raw, ctx, rs, macro_args)?;
991    }
992    Ok(state)
993}
994
995fn apply_op(
996    op: &Op,
997    state: &str,
998    raw: &str,
999    ctx: &ExecCtx,
1000    rs: &RuleSet,
1001    macro_args: &[MacroArg],
1002) -> Result<String> {
1003    match op {
1004        Op::Keep(pat) => Ok(filter_lines(state, |l| pat.compiled.is_match(l))),
1005        Op::Drop(pat) => Ok(filter_lines(state, |l| !pat.compiled.is_match(l))),
1006        Op::Head(arg) => Ok(take_head(state, resolve_head(arg, ctx.level))),
1007        Op::Tail(arg) => Ok(take_tail(state, resolve_head(arg, ctx.level))),
1008        Op::Else(s) => Ok(if state.trim().is_empty() {
1009            s.clone()
1010        } else {
1011            state.to_string()
1012        }),
1013        Op::ElseShell(cmd) => {
1014            if state.trim().is_empty() {
1015                let expanded = expand_args(cmd, macro_args);
1016                run_shell(&expanded, raw, ctx)
1017            } else {
1018                Ok(state.to_string())
1019            }
1020        }
1021        Op::Shell(cmd) => {
1022            let expanded = expand_args(cmd, macro_args);
1023            run_shell(&expanded, state, ctx)
1024        }
1025        Op::Python(body) => {
1026            let expanded = expand_args(body, macro_args);
1027            run_python(&expanded, state, ctx)
1028        }
1029        Op::MacroCall { name, args } => {
1030            let def = rs
1031                .find_define(name)
1032                .ok_or_else(|| anyhow!("undefined macro `{}`", name))?;
1033            if args.len() != def.params.len() {
1034                bail!(
1035                    "macro `{}` expects {} arg(s), got {}",
1036                    name,
1037                    def.params.len(),
1038                    args.len()
1039                );
1040            }
1041            run_ops(&def.ops, ctx, state, rs, args)
1042        }
1043        Op::Split {
1044            delimiter,
1045            pre,
1046            post,
1047        } => {
1048            let (a, b) = split_at_first_match(state, &delimiter.compiled);
1049            let pre_out = if pre.is_empty() {
1050                a
1051            } else {
1052                run_ops(pre, ctx, &a, rs, macro_args)?
1053            };
1054            let post_out = if post.is_empty() {
1055                b
1056            } else {
1057                run_ops(post, ctx, &b, rs, macro_args)?
1058            };
1059            Ok(join_nonempty(&pre_out, &post_out))
1060        }
1061    }
1062}
1063
1064fn resolve_head(arg: &HeadArg, level: Level) -> usize {
1065    match arg {
1066        HeadArg::Number(n) => *n,
1067        HeadArg::Auto => level.head_limit(30),
1068    }
1069}
1070
1071fn filter_lines(s: &str, mut keep: impl FnMut(&str) -> bool) -> String {
1072    s.lines()
1073        .filter(|l| keep(l))
1074        .collect::<Vec<_>>()
1075        .join("\n")
1076}
1077
1078fn take_head(s: &str, n: usize) -> String {
1079    s.lines().take(n).collect::<Vec<_>>().join("\n")
1080}
1081
1082fn take_tail(s: &str, n: usize) -> String {
1083    let lines: Vec<&str> = s.lines().collect();
1084    let start = lines.len().saturating_sub(n);
1085    lines[start..].join("\n")
1086}
1087
1088/// Split input at the first line matching `re`. The matching line goes
1089/// into `post`. If no line matches, everything is `pre` and `post` is
1090/// empty.
1091fn split_at_first_match(s: &str, re: &Regex) -> (String, String) {
1092    let mut pre = String::new();
1093    let mut post = String::new();
1094    let mut in_post = false;
1095    for line in s.lines() {
1096        if !in_post && re.is_match(line) {
1097            in_post = true;
1098        }
1099        let buf = if in_post { &mut post } else { &mut pre };
1100        if !buf.is_empty() {
1101            buf.push('\n');
1102        }
1103        buf.push_str(line);
1104    }
1105    (pre, post)
1106}
1107
1108fn join_nonempty(a: &str, b: &str) -> String {
1109    match (a.is_empty(), b.is_empty()) {
1110        (true, true) => String::new(),
1111        (true, false) => b.to_string(),
1112        (false, true) => a.to_string(),
1113        (false, false) => format!("{a}\n{b}"),
1114    }
1115}
1116
1117/// Replace `$1`..`$9` with macro positional args. Other `$NAME` tokens
1118/// (e.g. `$level`, `$sub`) are left intact so shell can expand them
1119/// from env vars.
1120fn expand_args(body: &str, args: &[MacroArg]) -> String {
1121    if args.is_empty() {
1122        return body.to_string();
1123    }
1124    let mut out = String::with_capacity(body.len());
1125    let bytes = body.as_bytes();
1126    let mut i = 0;
1127    while i < bytes.len() {
1128        let c = bytes[i];
1129        if c == b'$' && i + 1 < bytes.len() {
1130            let n = bytes[i + 1];
1131            if n.is_ascii_digit() && n != b'0' {
1132                let idx = (n - b'0') as usize;
1133                if idx <= args.len() {
1134                    match &args[idx - 1] {
1135                        MacroArg::Number(v) => out.push_str(&v.to_string()),
1136                        MacroArg::String(v) => out.push_str(v),
1137                    }
1138                    i += 2;
1139                    continue;
1140                }
1141            }
1142        }
1143        out.push(c as char);
1144        i += 1;
1145    }
1146    out
1147}
1148
1149fn run_shell(cmd: &str, stdin_data: &str, ctx: &ExecCtx) -> Result<String> {
1150    let mut child = Command::new("sh")
1151        .arg("-c")
1152        .arg(cmd)
1153        .env("level", ctx.level.to_string())
1154        .env("sub", ctx.sub)
1155        .env("exit", ctx.exit_code.to_string())
1156        .env("args", ctx.args.join(" "))
1157        .stdin(Stdio::piped())
1158        .stdout(Stdio::piped())
1159        .stderr(Stdio::piped())
1160        .spawn()
1161        .context("spawning sh")?;
1162
1163    if let Some(mut stdin) = child.stdin.take() {
1164        stdin
1165            .write_all(stdin_data.as_bytes())
1166            .context("writing to sh stdin")?;
1167    }
1168
1169    let output = child.wait_with_output().context("waiting for sh")?;
1170    if !output.status.success() {
1171        let stderr = String::from_utf8_lossy(&output.stderr);
1172        bail!(
1173            "shell exited {}: {}",
1174            output.status.code().unwrap_or(-1),
1175            stderr.trim()
1176        );
1177    }
1178    Ok(String::from_utf8_lossy(&output.stdout).into_owned())
1179}
1180
1181fn run_python(body: &str, stdin_data: &str, ctx: &ExecCtx) -> Result<String> {
1182    if has_pep723_header(body) {
1183        run_python_uv(body, stdin_data, ctx)
1184    } else {
1185        run_python_plain(body, stdin_data, ctx)
1186    }
1187}
1188
1189fn has_pep723_header(body: &str) -> bool {
1190    body.lines()
1191        .any(|l| l.trim_start().starts_with("# /// script"))
1192}
1193
1194fn run_python_plain(body: &str, stdin_data: &str, ctx: &ExecCtx) -> Result<String> {
1195    let mut child = Command::new("python3")
1196        .arg("-c")
1197        .arg(body)
1198        .env("level", ctx.level.to_string())
1199        .env("sub", ctx.sub)
1200        .env("exit", ctx.exit_code.to_string())
1201        .env("args", ctx.args.join(" "))
1202        .stdin(Stdio::piped())
1203        .stdout(Stdio::piped())
1204        .stderr(Stdio::piped())
1205        .spawn()
1206        .context("spawning python3")?;
1207
1208    if let Some(mut stdin) = child.stdin.take() {
1209        stdin
1210            .write_all(stdin_data.as_bytes())
1211            .context("writing to python stdin")?;
1212    }
1213    let output = child.wait_with_output().context("waiting for python")?;
1214    if !output.status.success() {
1215        let stderr = String::from_utf8_lossy(&output.stderr);
1216        bail!(
1217            "python exited {}: {}",
1218            output.status.code().unwrap_or(-1),
1219            stderr.trim()
1220        );
1221    }
1222    Ok(String::from_utf8_lossy(&output.stdout).into_owned())
1223}
1224
1225/// PEP 723: write the body to a temp file and let `uv run --script` resolve
1226/// inline dependencies. Data flows via stdin to the script.
1227fn run_python_uv(body: &str, stdin_data: &str, ctx: &ExecCtx) -> Result<String> {
1228    let mut script = tempfile::Builder::new()
1229        .prefix("lowfat-lf-")
1230        .suffix(".py")
1231        .tempfile()
1232        .context("creating temp script file")?;
1233    script
1234        .write_all(body.as_bytes())
1235        .context("writing temp script")?;
1236    script.flush().ok();
1237
1238    let path = script
1239        .path()
1240        .to_str()
1241        .ok_or_else(|| anyhow!("non-UTF8 temp path"))?
1242        .to_string();
1243
1244    let mut child = Command::new("uv")
1245        .args(["run", "--script", &path])
1246        .env("level", ctx.level.to_string())
1247        .env("sub", ctx.sub)
1248        .env("exit", ctx.exit_code.to_string())
1249        .env("args", ctx.args.join(" "))
1250        .stdin(Stdio::piped())
1251        .stdout(Stdio::piped())
1252        .stderr(Stdio::piped())
1253        .spawn()
1254        .context("spawning uv (is `uv` installed?)")?;
1255
1256    if let Some(mut stdin) = child.stdin.take() {
1257        stdin
1258            .write_all(stdin_data.as_bytes())
1259            .context("writing to uv stdin")?;
1260    }
1261    let output = child.wait_with_output().context("waiting for uv")?;
1262    if !output.status.success() {
1263        let stderr = String::from_utf8_lossy(&output.stderr);
1264        bail!(
1265            "uv exited {}: {}",
1266            output.status.code().unwrap_or(-1),
1267            stderr.trim()
1268        );
1269    }
1270    Ok(String::from_utf8_lossy(&output.stdout).into_owned())
1271}
1272
1273// ──────────────────────────────────────────────────────────────────
1274// Tests
1275// ──────────────────────────────────────────────────────────────────
1276
1277#[cfg(test)]
1278mod tests {
1279    use super::*;
1280
1281    fn parse_ok(src: &str) -> RuleSet {
1282        parse(src).unwrap_or_else(|e| panic!("parse failed: {e}\n--- src ---\n{src}"))
1283    }
1284
1285    #[test]
1286    fn empty_input() {
1287        let rs = parse_ok("");
1288        assert!(rs.rules.is_empty());
1289        assert!(rs.defines.is_empty());
1290    }
1291
1292    #[test]
1293    fn comments_and_blanks_only() {
1294        let rs = parse_ok("# hi\n\n# more\n");
1295        assert!(rs.rules.is_empty());
1296    }
1297
1298    #[test]
1299    fn simple_rule() {
1300        let rs = parse_ok(
1301            r#"
1302status:
1303    keep /foo/
1304    head 10
1305"#,
1306        );
1307        assert_eq!(rs.rules.len(), 1);
1308        let r = &rs.rules[0];
1309        assert!(matches!(&r.sub, SubPattern::Alt(a) if a == &["status".to_string()]));
1310        assert!(matches!(r.level, LevelPattern::Star));
1311        assert_eq!(r.ops.len(), 2);
1312        match &r.ops[0] {
1313            Op::Keep(p) => assert_eq!(p.source, "foo"),
1314            _ => panic!("expected Keep"),
1315        }
1316        assert!(matches!(r.ops[1], Op::Head(HeadArg::Number(10))));
1317    }
1318
1319    #[test]
1320    fn sub_with_alternation_and_level() {
1321        let rs = parse_ok(
1322            r#"
1323build|check, ultra:
1324    head 15
1325"#,
1326        );
1327        let r = &rs.rules[0];
1328        match &r.sub {
1329            SubPattern::Alt(a) => assert_eq!(a, &["build".to_string(), "check".to_string()]),
1330            _ => panic!("expected Alt"),
1331        }
1332        assert!(matches!(r.level, LevelPattern::Specific(Level::Ultra)));
1333    }
1334
1335    #[test]
1336    fn star_wildcards() {
1337        let rs = parse_ok(
1338            r#"
1339*:
1340    head 30
1341"#,
1342        );
1343        assert!(matches!(rs.rules[0].sub, SubPattern::Star));
1344        assert!(matches!(rs.rules[0].level, LevelPattern::Star));
1345    }
1346
1347    #[test]
1348    fn else_string_fallback() {
1349        let rs = parse_ok(
1350            r#"
1351status:
1352    keep /^M /
1353    head 5
1354    else "clean"
1355"#,
1356        );
1357        match &rs.rules[0].ops[2] {
1358            Op::Else(s) => assert_eq!(s, "clean"),
1359            _ => panic!("expected Else"),
1360        }
1361    }
1362
1363    #[test]
1364    fn shell_inline_and_block() {
1365        let rs = parse_ok(
1366            r#"
1367define a:
1368    shell: sed -E 's/x/y/'
1369
1370define b:
1371    shell: |
1372        awk '
1373          BEGIN { n=0 }
1374          { print; n++ }
1375        '
1376"#,
1377        );
1378        match &rs.defines[0].ops[0] {
1379            Op::Shell(s) => assert_eq!(s, "sed -E 's/x/y/'"),
1380            _ => panic!("expected inline Shell"),
1381        }
1382        match &rs.defines[1].ops[0] {
1383            Op::Shell(s) => {
1384                assert!(s.starts_with("awk '"));
1385                assert!(s.contains("BEGIN { n=0 }"));
1386                assert!(s.contains("{ print; n++ }"));
1387            }
1388            _ => panic!("expected block Shell"),
1389        }
1390    }
1391
1392    #[test]
1393    fn python_block_preserves_pep723_and_blanks() {
1394        let rs = parse_ok(
1395            r#"
1396define clean:
1397    python: |
1398        # /// script
1399        # dependencies = ["pyyaml>=6"]
1400        # ///
1401        import sys, yaml
1402
1403        for d in yaml.safe_load_all(sys.stdin):
1404            print(d)
1405"#,
1406        );
1407        match &rs.defines[0].ops[0] {
1408            Op::Python(s) => {
1409                assert!(s.contains("# /// script"));
1410                assert!(s.contains("# dependencies = [\"pyyaml>=6\"]"));
1411                assert!(s.contains("import sys, yaml"));
1412                // Blank line between imports and loop preserved
1413                assert!(s.contains("yaml\n\nfor"));
1414                // Internal indent preserved (4 spaces under `for`)
1415                assert!(s.contains("    print(d)"));
1416            }
1417            _ => panic!("expected Python"),
1418        }
1419    }
1420
1421    #[test]
1422    fn macro_call_with_args() {
1423        let rs = parse_ok(
1424            r#"
1425define compact(n):
1426    head 1
1427
1428diff, ultra:
1429    compact 30
1430"#,
1431        );
1432        match &rs.rules[0].ops[0] {
1433            Op::MacroCall { name, args } => {
1434                assert_eq!(name, "compact");
1435                assert_eq!(args, &[MacroArg::Number(30)]);
1436            }
1437            _ => panic!("expected MacroCall"),
1438        }
1439    }
1440
1441    #[test]
1442    fn inline_ops_after_rule_header() {
1443        let rs = parse_ok(
1444            r#"
1445define compact(n):
1446    head 1
1447
1448diff, ultra:  compact 30  else-shell: awk 'NF' | head -50
1449"#,
1450        );
1451        let ops = &rs.rules[0].ops;
1452        assert_eq!(ops.len(), 2);
1453        assert!(matches!(&ops[0], Op::MacroCall { name, .. } if name == "compact"));
1454        match &ops[1] {
1455            Op::ElseShell(s) => assert_eq!(s, "awk 'NF' | head -50"),
1456            _ => panic!("expected ElseShell, got {:?}", &ops[1]),
1457        }
1458    }
1459
1460    #[test]
1461    fn split_with_pre_and_post() {
1462        let rs = parse_ok(
1463            r#"
1464define ah:
1465    shell: cat
1466
1467show:
1468    split /^diff /
1469    pre:
1470        keep /^commit /
1471        ah
1472    post:
1473        head 10
1474    head 100
1475"#,
1476        );
1477        let ops = &rs.rules[0].ops;
1478        assert_eq!(ops.len(), 2);
1479        match &ops[0] {
1480            Op::Split {
1481                delimiter,
1482                pre,
1483                post,
1484            } => {
1485                assert_eq!(delimiter.source, "^diff ");
1486                assert_eq!(pre.len(), 2);
1487                assert_eq!(post.len(), 1);
1488                assert!(matches!(&pre[0], Op::Keep(_)));
1489                assert!(matches!(&pre[1], Op::MacroCall { name, .. } if name == "ah"));
1490                assert!(matches!(post[0], Op::Head(HeadArg::Number(10))));
1491            }
1492            _ => panic!("expected Split"),
1493        }
1494        assert!(matches!(ops[1], Op::Head(HeadArg::Number(100))));
1495    }
1496
1497    #[test]
1498    fn first_match_wins_selection() {
1499        let rs = parse_ok(
1500            r#"
1501diff, ultra:
1502    head 5
1503
1504diff:
1505    head 20
1506
1507*:
1508    head 30
1509"#,
1510        );
1511        let r = rs.select("diff", Level::Ultra).unwrap();
1512        assert!(matches!(r.ops[0], Op::Head(HeadArg::Number(5))));
1513        let r = rs.select("diff", Level::Full).unwrap();
1514        assert!(matches!(r.ops[0], Op::Head(HeadArg::Number(20))));
1515        let r = rs.select("status", Level::Ultra).unwrap();
1516        assert!(matches!(r.ops[0], Op::Head(HeadArg::Number(30))));
1517    }
1518
1519    #[test]
1520    fn alternation_in_selector_matches() {
1521        let rs = parse_ok(
1522            r#"
1523build|check, ultra:
1524    head 15
1525"#,
1526        );
1527        assert!(rs.select("build", Level::Ultra).is_some());
1528        assert!(rs.select("check", Level::Ultra).is_some());
1529        assert!(rs.select("test", Level::Ultra).is_none());
1530        assert!(rs.select("build", Level::Full).is_none());
1531    }
1532
1533    #[test]
1534    fn head_auto_keyword() {
1535        let rs = parse_ok(
1536            r#"
1537foo:
1538    head auto
1539"#,
1540        );
1541        assert!(matches!(rs.rules[0].ops[0], Op::Head(HeadArg::Auto)));
1542    }
1543
1544    #[test]
1545    fn regex_with_escaped_slash() {
1546        let rs = parse_ok(
1547            r#"
1548foo:
1549    keep /a\/b/
1550"#,
1551        );
1552        match &rs.rules[0].ops[0] {
1553            Op::Keep(p) => assert_eq!(p.source, "a/b"),
1554            _ => panic!(),
1555        }
1556    }
1557
1558    #[test]
1559    fn errors_on_unterminated_regex() {
1560        let err = parse("foo:\n    keep /abc\n").unwrap_err();
1561        assert!(err.to_string().contains("unterminated regex"), "got: {err}");
1562    }
1563
1564    #[test]
1565    fn errors_on_unknown_op() {
1566        let err = parse("foo:\n    nonsense 1\n").unwrap_err();
1567        assert!(err.to_string().contains("unknown op"), "got: {err}");
1568    }
1569
1570    #[test]
1571    fn errors_on_invalid_level() {
1572        let err = parse("foo, gigamax:\n    head 5\n").unwrap_err();
1573        // anyhow only renders the outermost message via Display; use {:#}
1574        // to walk the cause chain.
1575        let chain = format!("{err:#}");
1576        assert!(chain.contains("unknown level"), "got: {chain}");
1577    }
1578
1579    #[test]
1580    fn errors_on_empty_rule_body() {
1581        let err = parse("foo:\nbar:\n    head 5\n").unwrap_err();
1582        assert!(err.to_string().contains("rule has no ops"), "got: {err}");
1583    }
1584
1585    // ── full plugin files parse cleanly ──────────────────────────
1586
1587    #[test]
1588    fn git_compact_plugin_parses() {
1589        let src = include_str!(
1590            "../../../plugins/git/git-compact/filter.lf"
1591        );
1592        let rs = parse_ok(src);
1593        // Defines: strip-trailers, abbrev-hash, compact-diff
1594        assert_eq!(rs.defines.len(), 3);
1595        let names: Vec<&str> = rs.defines.iter().map(|d| d.name.as_str()).collect();
1596        assert_eq!(names, ["strip-trailers", "abbrev-hash", "compact-diff"]);
1597        assert_eq!(rs.defines[2].params, vec!["limit".to_string()]);
1598
1599        // Selection sanity
1600        assert!(rs.select("status", Level::Full).is_some());
1601        assert!(rs.select("diff", Level::Ultra).is_some());
1602        assert!(rs.select("diff", Level::Lite).is_some());
1603        assert!(rs.select("diff", Level::Full).is_some());
1604        assert!(rs.select("log", Level::Ultra).is_some());
1605        assert!(rs.select("show", Level::Ultra).is_some());
1606        assert!(rs.select("show", Level::Full).is_some());
1607        // Catch-all
1608        assert!(rs.select("nothing", Level::Full).is_some());
1609
1610        // Show rule with split has the structure we expect
1611        let show_full = rs.select("show", Level::Full).unwrap();
1612        assert!(matches!(&show_full.ops[0], Op::Split { .. }));
1613    }
1614
1615    // ── executor ─────────────────────────────────────────────────
1616
1617    fn ctx<'a>(sub: &'a str, level: Level) -> ExecCtx<'a> {
1618        ExecCtx {
1619            sub,
1620            level,
1621            exit_code: 0,
1622            args: &[],
1623        }
1624    }
1625
1626    #[test]
1627    fn exec_keep_drop_head_tail() {
1628        let rs = parse_ok(
1629            r#"
1630foo:
1631    keep /^a/
1632    drop /skip/
1633    head 3
1634"#,
1635        );
1636        let input = "alpha\nbeta\na-skip\namber\naxe\nakira\n";
1637        let out = execute(&rs, &ctx("foo", Level::Full), input).unwrap();
1638        assert_eq!(out, "alpha\namber\naxe\n");
1639    }
1640
1641    #[test]
1642    fn exec_tail() {
1643        let rs = parse_ok(
1644            r#"
1645foo:
1646    tail 2
1647"#,
1648        );
1649        let out = execute(&rs, &ctx("foo", Level::Full), "a\nb\nc\nd").unwrap();
1650        assert_eq!(out, "c\nd\n");
1651    }
1652
1653    #[test]
1654    fn exec_else_string_when_empty() {
1655        let rs = parse_ok(
1656            r#"
1657status:
1658    keep /^M /
1659    else "clean"
1660"#,
1661        );
1662        let out = execute(&rs, &ctx("status", Level::Full), "?? new.txt\n").unwrap();
1663        assert_eq!(out, "clean\n");
1664    }
1665
1666    #[test]
1667    fn exec_else_string_passthrough_when_nonempty() {
1668        let rs = parse_ok(
1669            r#"
1670status:
1671    keep /^M /
1672    else "clean"
1673"#,
1674        );
1675        let out = execute(&rs, &ctx("status", Level::Full), "M file.txt\n").unwrap();
1676        assert_eq!(out, "M file.txt\n");
1677    }
1678
1679    #[test]
1680    fn exec_no_match_passes_through() {
1681        let rs = parse_ok(
1682            r#"
1683foo:
1684    head 1
1685"#,
1686        );
1687        let input = "x\ny\nz";
1688        let out = execute(&rs, &ctx("other", Level::Full), input).unwrap();
1689        assert_eq!(out, input);
1690    }
1691
1692    #[test]
1693    fn exec_first_match_wins() {
1694        let rs = parse_ok(
1695            r#"
1696diff, ultra:
1697    head 1
1698diff:
1699    head 3
1700"#,
1701        );
1702        let input = "a\nb\nc\nd\n";
1703        let u = execute(&rs, &ctx("diff", Level::Ultra), input).unwrap();
1704        let f = execute(&rs, &ctx("diff", Level::Full), input).unwrap();
1705        assert_eq!(u, "a\n");
1706        assert_eq!(f, "a\nb\nc\n");
1707    }
1708
1709    #[test]
1710    fn exec_head_auto_uses_level() {
1711        let rs = parse_ok(
1712            r#"
1713foo:
1714    head auto
1715"#,
1716        );
1717        let input: String = (1..=80).map(|i| format!("{i}\n")).collect();
1718        let u = execute(&rs, &ctx("foo", Level::Ultra), &input).unwrap();
1719        let f = execute(&rs, &ctx("foo", Level::Full), &input).unwrap();
1720        let l = execute(&rs, &ctx("foo", Level::Lite), &input).unwrap();
1721        assert_eq!(u.lines().count(), 15);
1722        assert_eq!(f.lines().count(), 30);
1723        assert_eq!(l.lines().count(), 60);
1724    }
1725
1726    #[test]
1727    fn exec_shell_inline() {
1728        let rs = parse_ok(
1729            r#"
1730foo:
1731    shell: tr a-z A-Z
1732"#,
1733        );
1734        let out = execute(&rs, &ctx("foo", Level::Full), "hello\n").unwrap();
1735        assert_eq!(out.trim_end(), "HELLO");
1736    }
1737
1738    #[test]
1739    fn exec_shell_block() {
1740        let rs = parse_ok(
1741            r#"
1742foo:
1743    shell: |
1744        awk '{ print NR, $0 }'
1745"#,
1746        );
1747        let out = execute(&rs, &ctx("foo", Level::Full), "a\nb\n").unwrap();
1748        assert_eq!(out.trim_end(), "1 a\n2 b");
1749    }
1750
1751    #[test]
1752    fn exec_shell_sees_env_vars() {
1753        let rs = parse_ok(
1754            r#"
1755build:
1756    shell: printf '%s:%s' "$sub" "$level"
1757"#,
1758        );
1759        let out = execute(&rs, &ctx("build", Level::Ultra), "").unwrap();
1760        // ensure_trailing_newline normalizes shell output without a final \n
1761        assert_eq!(out, "build:ultra\n");
1762    }
1763
1764    #[test]
1765    fn exec_else_shell_uses_raw_input() {
1766        let rs = parse_ok(
1767            r#"
1768diff:
1769    keep /^IMPOSSIBLE/
1770    else-shell: head -2
1771"#,
1772        );
1773        let out = execute(&rs, &ctx("diff", Level::Full), "x\ny\nz\n").unwrap();
1774        assert_eq!(out, "x\ny\n");
1775    }
1776
1777    #[test]
1778    fn exec_macro_expansion_with_args() {
1779        let rs = parse_ok(
1780            r#"
1781define n-up(count):
1782    shell: head -$1
1783
1784foo:
1785    n-up 2
1786"#,
1787        );
1788        let out = execute(&rs, &ctx("foo", Level::Full), "a\nb\nc\nd\n").unwrap();
1789        assert_eq!(out, "a\nb\n");
1790    }
1791
1792    #[test]
1793    fn exec_split_pre_post() {
1794        let rs = parse_ok(
1795            r#"
1796show:
1797    split /^diff /
1798    pre:
1799        head 1
1800    post:
1801        head 2
1802"#,
1803        );
1804        let input = "commit abc\nAuthor: x\nDate: y\ndiff --git a b\n+line1\n+line2\n+line3\n";
1805        let out = execute(&rs, &ctx("show", Level::Full), input).unwrap();
1806        assert_eq!(out, "commit abc\ndiff --git a b\n+line1\n");
1807    }
1808
1809    #[test]
1810    fn exec_split_no_match() {
1811        let rs = parse_ok(
1812            r#"
1813show:
1814    split /^diff /
1815    pre:
1816        head 2
1817    post:
1818        head 10
1819"#,
1820        );
1821        // No `diff ` line — everything goes to pre, post is empty.
1822        let out = execute(&rs, &ctx("show", Level::Full), "a\nb\nc\nd\n").unwrap();
1823        assert_eq!(out, "a\nb\n");
1824    }
1825
1826    #[test]
1827    fn exec_macro_arg_count_mismatch_errors() {
1828        let rs = parse_ok(
1829            r#"
1830define needs-two(a, b):
1831    head 1
1832
1833foo:
1834    needs-two 5
1835"#,
1836        );
1837        let err = execute(&rs, &ctx("foo", Level::Full), "x").unwrap_err();
1838        assert!(err.to_string().contains("expects 2 arg"), "got: {err}");
1839    }
1840
1841    #[test]
1842    fn exec_python_plain_when_no_pep723() {
1843        // Skip if python3 not on PATH.
1844        if Command::new("python3").arg("--version").output().is_err() {
1845            eprintln!("skipping: python3 not available");
1846            return;
1847        }
1848        let rs = parse_ok(
1849            r#"
1850foo:
1851    python: |
1852        import sys
1853        for line in sys.stdin:
1854            print(line.upper(), end="")
1855"#,
1856        );
1857        let out = execute(&rs, &ctx("foo", Level::Full), "hello\nworld\n").unwrap();
1858        assert_eq!(out, "HELLO\nWORLD\n");
1859    }
1860
1861    #[test]
1862    fn exec_macro_arg_substitution_in_shell() {
1863        let rs = parse_ok(
1864            r#"
1865define grab(limit):
1866    shell: |
1867        awk -v lim=$1 '{ if (NR<=lim) print }'
1868
1869foo:
1870    grab 3
1871"#,
1872        );
1873        let out = execute(&rs, &ctx("foo", Level::Full), "a\nb\nc\nd\ne\n").unwrap();
1874        assert_eq!(out, "a\nb\nc\n");
1875    }
1876
1877    #[test]
1878    fn pep723_detection() {
1879        assert!(has_pep723_header(
1880            "# /// script\n# dependencies = []\n# ///\nimport sys"
1881        ));
1882        assert!(has_pep723_header(
1883            "    # /// script\n    # ///\nimport sys"
1884        ));
1885        assert!(!has_pep723_header("import sys\nprint('hi')"));
1886        assert!(!has_pep723_header("# not pep 723\nprint('hi')"));
1887    }
1888
1889    #[test]
1890    fn kubectl_compact_plugin_parses() {
1891        let src = include_str!(
1892            "../../../plugins/kubectl/kubectl-compact/filter.lf"
1893        );
1894        let rs = parse_ok(src);
1895        // Define: clean-yaml (with PEP 723 body)
1896        assert_eq!(rs.defines.len(), 1);
1897        assert_eq!(rs.defines[0].name, "clean-yaml");
1898        match &rs.defines[0].ops[0] {
1899            Op::Python(body) => {
1900                assert!(body.contains("# /// script"));
1901                assert!(body.contains("dependencies = [\"pyyaml>=6\"]"));
1902                assert!(body.contains("yaml.safe_load_all"));
1903            }
1904            other => panic!("expected Python op, got {other:?}"),
1905        }
1906        // get/logs/events/* selection
1907        assert!(rs.select("get", Level::Full).is_some());
1908        assert!(rs.select("logs", Level::Ultra).is_some());
1909        assert!(rs.select("logs", Level::Full).is_some());
1910        assert!(rs.select("events", Level::Ultra).is_some());
1911        assert!(rs.select("describe", Level::Full).is_some()); // catch-all
1912    }
1913}