use crate::level::Level;
use anyhow::{Context, Result, anyhow, bail};
use regex::Regex;
#[derive(Debug, Default)]
pub struct RuleSet {
pub defines: Vec<Define>,
pub rules: Vec<Rule>,
}
#[derive(Debug, Clone)]
pub struct Define {
pub name: String,
pub params: Vec<String>,
pub ops: Vec<Op>,
}
#[derive(Debug, Clone)]
pub struct Rule {
pub sub: SubPattern,
pub level: LevelPattern,
pub ops: Vec<Op>,
pub line_no: usize,
}
#[derive(Debug, Clone)]
pub enum SubPattern {
Star,
Alt(Vec<String>),
}
#[derive(Debug, Clone)]
pub enum LevelPattern {
Star,
Specific(Level),
}
#[derive(Debug, Clone)]
pub enum Op {
Keep(PatternRegex),
Drop(PatternRegex),
Head(HeadArg),
Tail(HeadArg),
Else(String),
ElseShell(String),
Shell(String),
Python(String),
MacroCall {
name: String,
args: Vec<MacroArg>,
},
Split {
delimiter: PatternRegex,
pre: Vec<Op>,
post: Vec<Op>,
},
}
#[derive(Debug, Clone)]
pub struct PatternRegex {
pub source: String,
pub compiled: Regex,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum HeadArg {
Number(usize),
Auto,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum MacroArg {
Number(usize),
String(String),
}
impl RuleSet {
pub fn select(&self, sub: &str, level: Level) -> Option<&Rule> {
self.rules.iter().find(|r| r.matches(sub, level))
}
pub fn find_define(&self, name: &str) -> Option<&Define> {
self.defines.iter().find(|d| d.name == name)
}
}
impl Rule {
pub fn matches(&self, sub: &str, level: Level) -> bool {
let sub_ok = match &self.sub {
SubPattern::Star => true,
SubPattern::Alt(alts) => alts.iter().any(|a| a == sub),
};
let lvl_ok = match &self.level {
LevelPattern::Star => true,
LevelPattern::Specific(l) => *l == level,
};
sub_ok && lvl_ok
}
}
#[derive(Debug, Clone)]
struct Line {
indent: usize,
text: String, raw: String, line_no: usize,
is_meta: bool,
}
fn split_lines(input: &str) -> Vec<Line> {
input
.split('\n')
.enumerate()
.map(|(i, raw_line)| {
let raw = raw_line.trim_end_matches('\r').to_string();
let stripped = raw.trim_start();
let indent = raw.len() - stripped.len();
let text = stripped.trim_end().to_string();
let is_meta = text.is_empty() || text.starts_with('#');
Line {
indent,
text,
raw,
line_no: i + 1,
is_meta,
}
})
.collect()
}
const OP_KEYWORDS: &[&str] = &[
"keep",
"drop",
"head",
"tail",
"else",
"else-shell:",
"shell:",
"python:",
"split",
];
pub fn parse(input: &str) -> Result<RuleSet> {
let lines = split_lines(input);
let macro_names = collect_macro_names(&lines);
let mut p = Parser {
lines: &lines,
pos: 0,
macro_names,
};
p.parse_ruleset()
}
fn collect_macro_names(lines: &[Line]) -> Vec<String> {
let mut names = Vec::new();
for l in lines {
if l.is_meta {
continue;
}
if let Some(rest) = l.text.strip_prefix("define ") {
let end = rest
.find(|c: char| c == '(' || c == ':' || c.is_whitespace())
.unwrap_or(rest.len());
let name = rest[..end].trim().to_string();
if !name.is_empty() {
names.push(name);
}
}
}
names
}
struct Parser<'a> {
lines: &'a [Line],
pos: usize,
macro_names: Vec<String>,
}
impl<'a> Parser<'a> {
fn peek_significant(&mut self) -> Option<&'a Line> {
while let Some(l) = self.lines.get(self.pos) {
if l.is_meta {
self.pos += 1;
} else {
return Some(l);
}
}
None
}
fn advance(&mut self) -> Option<&'a Line> {
let l = self.lines.get(self.pos);
if l.is_some() {
self.pos += 1;
}
l
}
fn is_macro(&self, name: &str) -> bool {
self.macro_names.iter().any(|n| n == name)
}
fn parse_ruleset(&mut self) -> Result<RuleSet> {
let mut rs = RuleSet::default();
while let Some(line) = self.peek_significant() {
if line.indent != 0 {
bail!("line {}: unexpected indent at top level", line.line_no);
}
if line.text.starts_with("define ") {
let d = self.parse_define()?;
rs.defines.push(d);
} else {
let r = self.parse_rule()?;
rs.rules.push(r);
}
}
Ok(rs)
}
fn parse_define(&mut self) -> Result<Define> {
let header = self.advance().unwrap();
let line_no = header.line_no;
let rest = header
.text
.strip_prefix("define ")
.ok_or_else(|| anyhow!("line {}: expected `define`", line_no))?;
let (name, params, after_paren) =
parse_define_header(rest).with_context(|| format!("line {line_no}"))?;
if !after_paren.starts_with(':') {
bail!(
"line {}: expected `:` after define header, got `{}`",
line_no,
after_paren
);
}
let trailing = after_paren[1..].trim();
if !trailing.is_empty() {
bail!(
"line {}: one-line `define` body not supported (use indented body)",
line_no
);
}
let ops = self.parse_indented_ops(header.indent)?;
if ops.is_empty() {
bail!("line {}: `define {}` has empty body", line_no, name);
}
Ok(Define { name, params, ops })
}
fn parse_rule(&mut self) -> Result<Rule> {
let header = self.advance().unwrap();
let line_no = header.line_no;
let parent_indent = header.indent;
let colon_pos = header
.text
.find(':')
.ok_or_else(|| anyhow!("line {}: missing `:` in rule header", line_no))?;
let selector = &header.text[..colon_pos];
let after = &header.text[colon_pos + 1..];
let (sub, level) =
parse_selector(selector).with_context(|| format!("line {line_no}"))?;
let mut ops = Vec::new();
let inline = after.trim();
if !inline.is_empty() {
let inline_ops = self.parse_inline_ops(inline, line_no)?;
ops.extend(inline_ops);
}
let indented = self.parse_indented_ops(parent_indent)?;
ops.extend(indented);
if ops.is_empty() {
bail!("line {}: rule has no ops", line_no);
}
Ok(Rule {
sub,
level,
ops,
line_no,
})
}
fn parse_indented_ops(&mut self, parent_indent: usize) -> Result<Vec<Op>> {
let mut ops = Vec::new();
loop {
let Some(line) = self.peek_significant() else {
break;
};
if line.indent <= parent_indent {
break;
}
let op = self.parse_op_line()?;
ops.push(op);
}
Ok(ops)
}
fn parse_op_line(&mut self) -> Result<Op> {
let line = self.advance().unwrap();
let line_no = line.line_no;
let indent = line.indent;
let text = line.text.as_str();
let (head, _) = split_first_word(text);
match head {
"keep" => {
let rest = text[head.len()..].trim_start();
Ok(Op::Keep(parse_regex_literal(rest, line_no)?))
}
"drop" => {
let rest = text[head.len()..].trim_start();
Ok(Op::Drop(parse_regex_literal(rest, line_no)?))
}
"head" => {
let rest = text[head.len()..].trim();
Ok(Op::Head(parse_head_arg(rest, line_no)?))
}
"tail" => {
let rest = text[head.len()..].trim();
Ok(Op::Tail(parse_head_arg(rest, line_no)?))
}
"else" => {
let rest = text[head.len()..].trim_start();
Ok(Op::Else(parse_string_literal(rest, line_no)?))
}
"else-shell:" => {
let body = text[head.len()..].trim_start().to_string();
if body.is_empty() {
bail!("line {}: `else-shell:` requires a command", line_no);
}
Ok(Op::ElseShell(body))
}
"shell:" => Ok(Op::Shell(self.parse_block_body(
text,
head,
indent,
line_no,
)?)),
"python:" => Ok(Op::Python(self.parse_block_body(
text,
head,
indent,
line_no,
)?)),
"split" => {
let rest = text[head.len()..].trim_start();
let delim = parse_regex_literal(rest, line_no)?;
let (pre, post) = self.parse_split_branches(indent)?;
if pre.is_empty() && post.is_empty() {
bail!(
"line {}: `split` needs at least one `pre:` or `post:` block",
line_no
);
}
Ok(Op::Split {
delimiter: delim,
pre,
post,
})
}
name if self.is_macro(name) => {
let rest = text[head.len()..].trim();
let args = parse_macro_args(rest, line_no)?;
Ok(Op::MacroCall {
name: name.to_string(),
args,
})
}
_ => bail!("line {}: unknown op `{}`", line_no, head),
}
}
fn parse_block_body(
&mut self,
line_text: &str,
head: &str,
parent_indent: usize,
line_no: usize,
) -> Result<String> {
let after = line_text[head.len()..].trim_start();
if after != "|" {
if after.is_empty() {
bail!(
"line {}: empty `{}` body (use `| <newline>` for block form)",
line_no,
head
);
}
return Ok(after.to_string());
}
let mut collected: Vec<&'a Line> = Vec::new();
let mut base: Option<usize> = None;
while let Some(l) = self.lines.get(self.pos) {
if l.text.is_empty() {
collected.push(l);
self.pos += 1;
continue;
}
if l.indent <= parent_indent {
break;
}
if base.is_none() {
base = Some(l.indent);
}
collected.push(l);
self.pos += 1;
}
while collected.last().map_or(false, |l| l.text.is_empty()) {
collected.pop();
}
if collected.is_empty() {
bail!("line {}: `{}` block is empty", line_no, head);
}
let base = base.unwrap_or(parent_indent + 4);
let dedented: Vec<String> = collected
.iter()
.map(|l| {
if l.text.is_empty() {
String::new()
} else if l.raw.len() >= base {
l.raw[base..].to_string()
} else {
l.raw.trim_start().to_string()
}
})
.collect();
Ok(dedented.join("\n"))
}
fn parse_split_branches(&mut self, parent_indent: usize) -> Result<(Vec<Op>, Vec<Op>)> {
let mut pre = Vec::new();
let mut post = Vec::new();
loop {
let Some(line) = self.peek_significant() else {
break;
};
if line.indent != parent_indent {
break;
}
match line.text.as_str() {
"pre:" => {
self.advance();
pre = self.parse_indented_ops(parent_indent)?;
}
"post:" => {
self.advance();
post = self.parse_indented_ops(parent_indent)?;
}
_ => break,
}
}
Ok((pre, post))
}
fn parse_inline_ops(&self, text: &str, line_no: usize) -> Result<Vec<Op>> {
let mut ops = Vec::new();
let mut remaining = text.trim();
while !remaining.is_empty() {
let (head, _) = split_first_word(remaining);
match head {
"shell:" => {
let body = remaining[head.len()..].trim_start().to_string();
if body.is_empty() {
bail!("line {}: inline `shell:` needs a command", line_no);
}
ops.push(Op::Shell(body));
remaining = "";
}
"python:" => {
let body = remaining[head.len()..].trim_start().to_string();
if body.is_empty() {
bail!("line {}: inline `python:` needs a command", line_no);
}
ops.push(Op::Python(body));
remaining = "";
}
"else-shell:" => {
let body = remaining[head.len()..].trim_start().to_string();
if body.is_empty() {
bail!("line {}: inline `else-shell:` needs a command", line_no);
}
ops.push(Op::ElseShell(body));
remaining = "";
}
"keep" | "drop" => {
let rest = remaining[head.len()..].trim_start();
let (re, after) = parse_regex_literal_and_rest(rest, line_no)?;
ops.push(if head == "keep" {
Op::Keep(re)
} else {
Op::Drop(re)
});
remaining = after.trim_start();
}
"head" | "tail" => {
let rest = remaining[head.len()..].trim_start();
let (arg_word, after) = take_word(rest);
let h = parse_head_arg(arg_word, line_no)?;
ops.push(if head == "head" {
Op::Head(h)
} else {
Op::Tail(h)
});
remaining = after.trim_start();
}
"else" => {
let rest = remaining[head.len()..].trim_start();
let (s, after) = parse_string_literal_and_rest(rest, line_no)?;
ops.push(Op::Else(s));
remaining = after.trim_start();
}
"split" => {
bail!(
"line {}: `split` cannot appear inline (needs pre:/post: blocks)",
line_no
)
}
name if self.is_macro(name) => {
let rest = remaining[head.len()..].trim_start();
let (args, after) =
parse_macro_args_until_op(rest, &self.macro_names, line_no)?;
ops.push(Op::MacroCall {
name: name.to_string(),
args,
});
remaining = after.trim_start();
}
_ => bail!("line {}: unknown op `{}` in inline chain", line_no, head),
}
}
Ok(ops)
}
}
fn split_first_word(s: &str) -> (&str, &str) {
let s = s.trim_start();
let end = s.find(char::is_whitespace).unwrap_or(s.len());
(&s[..end], &s[end..])
}
fn take_word(s: &str) -> (&str, &str) {
let s = s.trim_start();
let end = s.find(char::is_whitespace).unwrap_or(s.len());
(&s[..end], &s[end..])
}
fn parse_selector(s: &str) -> Result<(SubPattern, LevelPattern)> {
let s = s.trim();
if s.is_empty() {
bail!("empty selector");
}
let mut parts = s.splitn(2, ',');
let sub_str = parts.next().unwrap().trim();
let level_str = parts.next().map(|s| s.trim()).unwrap_or("*");
let sub = if sub_str == "*" {
SubPattern::Star
} else {
let alts: Vec<String> = sub_str
.split('|')
.map(|s| s.trim().to_string())
.collect();
if alts.iter().any(|a| a.is_empty()) {
bail!("empty alternative in sub pattern `{}`", sub_str);
}
SubPattern::Alt(alts)
};
let level = if level_str == "*" {
LevelPattern::Star
} else {
let lvl: Level = level_str.parse().map_err(|e: String| anyhow!(e))?;
LevelPattern::Specific(lvl)
};
Ok((sub, level))
}
fn parse_define_header(s: &str) -> Result<(String, Vec<String>, &str)> {
let s = s.trim_start();
let end = s
.find(|c: char| c == '(' || c == ':' || c.is_whitespace())
.unwrap_or(s.len());
let name = s[..end].to_string();
if name.is_empty() {
bail!("define needs a name");
}
let rest = s[end..].trim_start();
if let Some(rest) = rest.strip_prefix('(') {
let close = rest
.find(')')
.ok_or_else(|| anyhow!("missing `)` in define params"))?;
let params: Vec<String> = rest[..close]
.split(',')
.map(|p| p.trim().to_string())
.filter(|p| !p.is_empty())
.collect();
Ok((name, params, rest[close + 1..].trim_start()))
} else {
Ok((name, Vec::new(), rest))
}
}
fn parse_regex_literal(s: &str, line_no: usize) -> Result<PatternRegex> {
let (re, after) = parse_regex_literal_and_rest(s, line_no)?;
let after = after.trim();
if !after.is_empty() {
bail!(
"line {}: unexpected trailing input after regex: `{}`",
line_no,
after
);
}
Ok(re)
}
fn parse_regex_literal_and_rest(s: &str, line_no: usize) -> Result<(PatternRegex, &str)> {
let s = s.trim_start();
if !s.starts_with('/') {
bail!(
"line {}: expected `/regex/`, got `{}`",
line_no,
preview(s)
);
}
let body = &s[1..];
let mut src = String::new();
let mut chars = body.char_indices().peekable();
let mut end_byte: Option<usize> = None;
while let Some((i, c)) = chars.next() {
if c == '\\' {
if let Some((_, n)) = chars.next() {
if n == '/' {
src.push('/');
} else {
src.push('\\');
src.push(n);
}
} else {
bail!("line {}: trailing backslash in regex", line_no);
}
} else if c == '/' {
end_byte = Some(i);
break;
} else {
src.push(c);
}
}
let end_byte = end_byte.ok_or_else(|| anyhow!("line {}: unterminated regex", line_no))?;
let after = &body[end_byte + 1..];
let compiled = Regex::new(&src)
.map_err(|e| anyhow!("line {}: invalid regex `{}`: {}", line_no, src, e))?;
Ok((
PatternRegex {
source: src,
compiled,
},
after,
))
}
fn parse_string_literal(s: &str, line_no: usize) -> Result<String> {
let (s, after) = parse_string_literal_and_rest(s, line_no)?;
let after = after.trim();
if !after.is_empty() {
bail!(
"line {}: unexpected trailing input after string: `{}`",
line_no,
after
);
}
Ok(s)
}
fn parse_string_literal_and_rest(s: &str, line_no: usize) -> Result<(String, &str)> {
let s = s.trim_start();
if !s.starts_with('"') {
bail!(
"line {}: expected `\"...\"`, got `{}`",
line_no,
preview(s)
);
}
let body = &s[1..];
let mut out = String::new();
let mut chars = body.char_indices();
let mut end_byte: Option<usize> = None;
while let Some((i, c)) = chars.next() {
if c == '\\' {
if let Some((_, n)) = chars.next() {
match n {
'n' => out.push('\n'),
't' => out.push('\t'),
'r' => out.push('\r'),
'\\' => out.push('\\'),
'"' => out.push('"'),
other => {
out.push('\\');
out.push(other);
}
}
} else {
bail!("line {}: trailing backslash in string", line_no);
}
} else if c == '"' {
end_byte = Some(i);
break;
} else {
out.push(c);
}
}
let end_byte = end_byte.ok_or_else(|| anyhow!("line {}: unterminated string", line_no))?;
let after = &body[end_byte + 1..];
Ok((out, after))
}
fn parse_head_arg(s: &str, line_no: usize) -> Result<HeadArg> {
let s = s.trim();
if s == "auto" {
return Ok(HeadArg::Auto);
}
s.parse::<usize>().map(HeadArg::Number).map_err(|_| {
anyhow!(
"line {}: expected number or `auto`, got `{}`",
line_no,
s
)
})
}
fn parse_macro_args(s: &str, line_no: usize) -> Result<Vec<MacroArg>> {
let mut out = Vec::new();
let mut rest = s.trim();
while !rest.is_empty() {
if rest.starts_with('"') {
let (sv, after) = parse_string_literal_and_rest(rest, line_no)?;
out.push(MacroArg::String(sv));
rest = after.trim_start();
} else {
let (word, after) = take_word(rest);
out.push(match word.parse::<usize>() {
Ok(n) => MacroArg::Number(n),
Err(_) => MacroArg::String(word.to_string()),
});
rest = after.trim_start();
}
}
Ok(out)
}
fn parse_macro_args_until_op<'a>(
s: &'a str,
macro_names: &[String],
line_no: usize,
) -> Result<(Vec<MacroArg>, &'a str)> {
let mut out = Vec::new();
let mut rest = s.trim_start();
while !rest.is_empty() {
let (word, _) = take_word(rest);
if OP_KEYWORDS.contains(&word) || macro_names.iter().any(|n| n == word) {
break;
}
if rest.starts_with('"') {
let (sv, after) = parse_string_literal_and_rest(rest, line_no)?;
out.push(MacroArg::String(sv));
rest = after.trim_start();
} else {
let (w, after) = take_word(rest);
out.push(match w.parse::<usize>() {
Ok(n) => MacroArg::Number(n),
Err(_) => MacroArg::String(w.to_string()),
});
rest = after.trim_start();
}
}
Ok((out, rest))
}
fn preview(s: &str) -> &str {
let n = s.char_indices().nth(40).map(|(i, _)| i).unwrap_or(s.len());
&s[..n]
}
use std::io::Write;
use std::process::{Command, Stdio};
#[derive(Debug, Clone)]
pub struct ExecCtx<'a> {
pub sub: &'a str,
pub level: Level,
pub exit_code: i32,
pub args: &'a [String],
}
pub fn execute(rs: &RuleSet, ctx: &ExecCtx, input: &str) -> Result<String> {
let Some(rule) = rs.select(ctx.sub, ctx.level) else {
return Ok(input.to_string());
};
let out = run_ops(&rule.ops, ctx, input, rs, &[])?;
Ok(ensure_trailing_newline(out))
}
fn ensure_trailing_newline(mut s: String) -> String {
if !s.is_empty() && !s.ends_with('\n') {
s.push('\n');
}
s
}
#[derive(Debug, Clone)]
pub struct StageRecord {
pub op_desc: String,
pub stdin_lines: usize,
pub stdin_bytes: usize,
pub stdout_lines: usize,
pub stdout_bytes: usize,
pub elapsed_us: u128,
}
#[derive(Debug, Default, Clone)]
pub struct ExplainTrace {
pub matched_rule: Option<usize>,
pub stages: Vec<StageRecord>,
}
pub fn execute_explain(
rs: &RuleSet,
ctx: &ExecCtx,
input: &str,
) -> Result<(String, ExplainTrace)> {
let mut trace = ExplainTrace::default();
let Some((idx, rule)) = rs
.rules
.iter()
.enumerate()
.find(|(_, r)| r.matches(ctx.sub, ctx.level))
else {
return Ok((input.to_string(), trace));
};
trace.matched_rule = Some(idx);
let raw = input.to_string();
let mut state = input.to_string();
for op in &rule.ops {
let stdin_lines = state.lines().count();
let stdin_bytes = state.len();
let start = std::time::Instant::now();
let new_state = apply_op(op, &state, &raw, ctx, rs, &[])?;
let elapsed_us = start.elapsed().as_micros();
trace.stages.push(StageRecord {
op_desc: describe_op(op),
stdin_lines,
stdin_bytes,
stdout_lines: new_state.lines().count(),
stdout_bytes: new_state.len(),
elapsed_us,
});
state = new_state;
}
Ok((ensure_trailing_newline(state), trace))
}
fn describe_op(op: &Op) -> String {
match op {
Op::Keep(p) => format!("keep /{}/", p.source),
Op::Drop(p) => format!("drop /{}/", p.source),
Op::Head(arg) => format!("head {}", describe_head(arg)),
Op::Tail(arg) => format!("tail {}", describe_head(arg)),
Op::Else(s) => format!("else {s:?}"),
Op::ElseShell(s) => format!("else-shell: {}", first_line(s)),
Op::Shell(s) => format!("shell: {}", first_line(s)),
Op::Python(s) => {
if has_pep723_header(s) {
format!("python (uv): {}", first_line(s))
} else {
format!("python: {}", first_line(s))
}
}
Op::MacroCall { name, args } => {
let parts: Vec<String> = args
.iter()
.map(|a| match a {
MacroArg::Number(n) => n.to_string(),
MacroArg::String(s) => s.clone(),
})
.collect();
if parts.is_empty() {
name.clone()
} else {
format!("{name} {}", parts.join(" "))
}
}
Op::Split { delimiter, .. } => format!("split /{}/", delimiter.source),
}
}
fn describe_head(a: &HeadArg) -> String {
match a {
HeadArg::Number(n) => n.to_string(),
HeadArg::Auto => "auto".into(),
}
}
fn first_line(s: &str) -> String {
s.lines().next().unwrap_or("").chars().take(60).collect()
}
fn run_ops(
ops: &[Op],
ctx: &ExecCtx,
input: &str,
rs: &RuleSet,
macro_args: &[MacroArg],
) -> Result<String> {
let raw = input.to_string();
let mut state = input.to_string();
for op in ops {
state = apply_op(op, &state, &raw, ctx, rs, macro_args)?;
}
Ok(state)
}
fn apply_op(
op: &Op,
state: &str,
raw: &str,
ctx: &ExecCtx,
rs: &RuleSet,
macro_args: &[MacroArg],
) -> Result<String> {
match op {
Op::Keep(pat) => Ok(filter_lines(state, |l| pat.compiled.is_match(l))),
Op::Drop(pat) => Ok(filter_lines(state, |l| !pat.compiled.is_match(l))),
Op::Head(arg) => Ok(take_head(state, resolve_head(arg, ctx.level))),
Op::Tail(arg) => Ok(take_tail(state, resolve_head(arg, ctx.level))),
Op::Else(s) => Ok(if state.trim().is_empty() {
s.clone()
} else {
state.to_string()
}),
Op::ElseShell(cmd) => {
if state.trim().is_empty() {
let expanded = expand_args(cmd, macro_args);
run_shell(&expanded, raw, ctx)
} else {
Ok(state.to_string())
}
}
Op::Shell(cmd) => {
let expanded = expand_args(cmd, macro_args);
run_shell(&expanded, state, ctx)
}
Op::Python(body) => {
let expanded = expand_args(body, macro_args);
run_python(&expanded, state, ctx)
}
Op::MacroCall { name, args } => {
let def = rs
.find_define(name)
.ok_or_else(|| anyhow!("undefined macro `{}`", name))?;
if args.len() != def.params.len() {
bail!(
"macro `{}` expects {} arg(s), got {}",
name,
def.params.len(),
args.len()
);
}
run_ops(&def.ops, ctx, state, rs, args)
}
Op::Split {
delimiter,
pre,
post,
} => {
let (a, b) = split_at_first_match(state, &delimiter.compiled);
let pre_out = if pre.is_empty() {
a
} else {
run_ops(pre, ctx, &a, rs, macro_args)?
};
let post_out = if post.is_empty() {
b
} else {
run_ops(post, ctx, &b, rs, macro_args)?
};
Ok(join_nonempty(&pre_out, &post_out))
}
}
}
fn resolve_head(arg: &HeadArg, level: Level) -> usize {
match arg {
HeadArg::Number(n) => *n,
HeadArg::Auto => level.head_limit(30),
}
}
fn filter_lines(s: &str, mut keep: impl FnMut(&str) -> bool) -> String {
s.lines()
.filter(|l| keep(l))
.collect::<Vec<_>>()
.join("\n")
}
fn take_head(s: &str, n: usize) -> String {
s.lines().take(n).collect::<Vec<_>>().join("\n")
}
fn take_tail(s: &str, n: usize) -> String {
let lines: Vec<&str> = s.lines().collect();
let start = lines.len().saturating_sub(n);
lines[start..].join("\n")
}
fn split_at_first_match(s: &str, re: &Regex) -> (String, String) {
let mut pre = String::new();
let mut post = String::new();
let mut in_post = false;
for line in s.lines() {
if !in_post && re.is_match(line) {
in_post = true;
}
let buf = if in_post { &mut post } else { &mut pre };
if !buf.is_empty() {
buf.push('\n');
}
buf.push_str(line);
}
(pre, post)
}
fn join_nonempty(a: &str, b: &str) -> String {
match (a.is_empty(), b.is_empty()) {
(true, true) => String::new(),
(true, false) => b.to_string(),
(false, true) => a.to_string(),
(false, false) => format!("{a}\n{b}"),
}
}
fn expand_args(body: &str, args: &[MacroArg]) -> String {
if args.is_empty() {
return body.to_string();
}
let mut out = String::with_capacity(body.len());
let bytes = body.as_bytes();
let mut i = 0;
while i < bytes.len() {
let c = bytes[i];
if c == b'$' && i + 1 < bytes.len() {
let n = bytes[i + 1];
if n.is_ascii_digit() && n != b'0' {
let idx = (n - b'0') as usize;
if idx <= args.len() {
match &args[idx - 1] {
MacroArg::Number(v) => out.push_str(&v.to_string()),
MacroArg::String(v) => out.push_str(v),
}
i += 2;
continue;
}
}
}
out.push(c as char);
i += 1;
}
out
}
fn run_shell(cmd: &str, stdin_data: &str, ctx: &ExecCtx) -> Result<String> {
let mut child = Command::new("sh")
.arg("-c")
.arg(cmd)
.env("level", ctx.level.to_string())
.env("sub", ctx.sub)
.env("exit", ctx.exit_code.to_string())
.env("args", ctx.args.join(" "))
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.spawn()
.context("spawning sh")?;
if let Some(mut stdin) = child.stdin.take() {
stdin
.write_all(stdin_data.as_bytes())
.context("writing to sh stdin")?;
}
let output = child.wait_with_output().context("waiting for sh")?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
bail!(
"shell exited {}: {}",
output.status.code().unwrap_or(-1),
stderr.trim()
);
}
Ok(String::from_utf8_lossy(&output.stdout).into_owned())
}
fn run_python(body: &str, stdin_data: &str, ctx: &ExecCtx) -> Result<String> {
if has_pep723_header(body) {
run_python_uv(body, stdin_data, ctx)
} else {
run_python_plain(body, stdin_data, ctx)
}
}
fn has_pep723_header(body: &str) -> bool {
body.lines()
.any(|l| l.trim_start().starts_with("# /// script"))
}
fn run_python_plain(body: &str, stdin_data: &str, ctx: &ExecCtx) -> Result<String> {
let mut child = Command::new("python3")
.arg("-c")
.arg(body)
.env("level", ctx.level.to_string())
.env("sub", ctx.sub)
.env("exit", ctx.exit_code.to_string())
.env("args", ctx.args.join(" "))
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.spawn()
.context("spawning python3")?;
if let Some(mut stdin) = child.stdin.take() {
stdin
.write_all(stdin_data.as_bytes())
.context("writing to python stdin")?;
}
let output = child.wait_with_output().context("waiting for python")?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
bail!(
"python exited {}: {}",
output.status.code().unwrap_or(-1),
stderr.trim()
);
}
Ok(String::from_utf8_lossy(&output.stdout).into_owned())
}
fn run_python_uv(body: &str, stdin_data: &str, ctx: &ExecCtx) -> Result<String> {
let mut script = tempfile::Builder::new()
.prefix("lowfat-lf-")
.suffix(".py")
.tempfile()
.context("creating temp script file")?;
script
.write_all(body.as_bytes())
.context("writing temp script")?;
script.flush().ok();
let path = script
.path()
.to_str()
.ok_or_else(|| anyhow!("non-UTF8 temp path"))?
.to_string();
let mut child = Command::new("uv")
.args(["run", "--script", &path])
.env("level", ctx.level.to_string())
.env("sub", ctx.sub)
.env("exit", ctx.exit_code.to_string())
.env("args", ctx.args.join(" "))
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.spawn()
.context("spawning uv (is `uv` installed?)")?;
if let Some(mut stdin) = child.stdin.take() {
stdin
.write_all(stdin_data.as_bytes())
.context("writing to uv stdin")?;
}
let output = child.wait_with_output().context("waiting for uv")?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
bail!(
"uv exited {}: {}",
output.status.code().unwrap_or(-1),
stderr.trim()
);
}
Ok(String::from_utf8_lossy(&output.stdout).into_owned())
}
#[cfg(test)]
mod tests {
use super::*;
fn parse_ok(src: &str) -> RuleSet {
parse(src).unwrap_or_else(|e| panic!("parse failed: {e}\n--- src ---\n{src}"))
}
#[test]
fn empty_input() {
let rs = parse_ok("");
assert!(rs.rules.is_empty());
assert!(rs.defines.is_empty());
}
#[test]
fn comments_and_blanks_only() {
let rs = parse_ok("# hi\n\n# more\n");
assert!(rs.rules.is_empty());
}
#[test]
fn simple_rule() {
let rs = parse_ok(
r#"
status:
keep /foo/
head 10
"#,
);
assert_eq!(rs.rules.len(), 1);
let r = &rs.rules[0];
assert!(matches!(&r.sub, SubPattern::Alt(a) if a == &["status".to_string()]));
assert!(matches!(r.level, LevelPattern::Star));
assert_eq!(r.ops.len(), 2);
match &r.ops[0] {
Op::Keep(p) => assert_eq!(p.source, "foo"),
_ => panic!("expected Keep"),
}
assert!(matches!(r.ops[1], Op::Head(HeadArg::Number(10))));
}
#[test]
fn sub_with_alternation_and_level() {
let rs = parse_ok(
r#"
build|check, ultra:
head 15
"#,
);
let r = &rs.rules[0];
match &r.sub {
SubPattern::Alt(a) => assert_eq!(a, &["build".to_string(), "check".to_string()]),
_ => panic!("expected Alt"),
}
assert!(matches!(r.level, LevelPattern::Specific(Level::Ultra)));
}
#[test]
fn star_wildcards() {
let rs = parse_ok(
r#"
*:
head 30
"#,
);
assert!(matches!(rs.rules[0].sub, SubPattern::Star));
assert!(matches!(rs.rules[0].level, LevelPattern::Star));
}
#[test]
fn else_string_fallback() {
let rs = parse_ok(
r#"
status:
keep /^M /
head 5
else "clean"
"#,
);
match &rs.rules[0].ops[2] {
Op::Else(s) => assert_eq!(s, "clean"),
_ => panic!("expected Else"),
}
}
#[test]
fn shell_inline_and_block() {
let rs = parse_ok(
r#"
define a:
shell: sed -E 's/x/y/'
define b:
shell: |
awk '
BEGIN { n=0 }
{ print; n++ }
'
"#,
);
match &rs.defines[0].ops[0] {
Op::Shell(s) => assert_eq!(s, "sed -E 's/x/y/'"),
_ => panic!("expected inline Shell"),
}
match &rs.defines[1].ops[0] {
Op::Shell(s) => {
assert!(s.starts_with("awk '"));
assert!(s.contains("BEGIN { n=0 }"));
assert!(s.contains("{ print; n++ }"));
}
_ => panic!("expected block Shell"),
}
}
#[test]
fn python_block_preserves_pep723_and_blanks() {
let rs = parse_ok(
r#"
define clean:
python: |
# /// script
# dependencies = ["pyyaml>=6"]
# ///
import sys, yaml
for d in yaml.safe_load_all(sys.stdin):
print(d)
"#,
);
match &rs.defines[0].ops[0] {
Op::Python(s) => {
assert!(s.contains("# /// script"));
assert!(s.contains("# dependencies = [\"pyyaml>=6\"]"));
assert!(s.contains("import sys, yaml"));
assert!(s.contains("yaml\n\nfor"));
assert!(s.contains(" print(d)"));
}
_ => panic!("expected Python"),
}
}
#[test]
fn macro_call_with_args() {
let rs = parse_ok(
r#"
define compact(n):
head 1
diff, ultra:
compact 30
"#,
);
match &rs.rules[0].ops[0] {
Op::MacroCall { name, args } => {
assert_eq!(name, "compact");
assert_eq!(args, &[MacroArg::Number(30)]);
}
_ => panic!("expected MacroCall"),
}
}
#[test]
fn inline_ops_after_rule_header() {
let rs = parse_ok(
r#"
define compact(n):
head 1
diff, ultra: compact 30 else-shell: awk 'NF' | head -50
"#,
);
let ops = &rs.rules[0].ops;
assert_eq!(ops.len(), 2);
assert!(matches!(&ops[0], Op::MacroCall { name, .. } if name == "compact"));
match &ops[1] {
Op::ElseShell(s) => assert_eq!(s, "awk 'NF' | head -50"),
_ => panic!("expected ElseShell, got {:?}", &ops[1]),
}
}
#[test]
fn split_with_pre_and_post() {
let rs = parse_ok(
r#"
define ah:
shell: cat
show:
split /^diff /
pre:
keep /^commit /
ah
post:
head 10
head 100
"#,
);
let ops = &rs.rules[0].ops;
assert_eq!(ops.len(), 2);
match &ops[0] {
Op::Split {
delimiter,
pre,
post,
} => {
assert_eq!(delimiter.source, "^diff ");
assert_eq!(pre.len(), 2);
assert_eq!(post.len(), 1);
assert!(matches!(&pre[0], Op::Keep(_)));
assert!(matches!(&pre[1], Op::MacroCall { name, .. } if name == "ah"));
assert!(matches!(post[0], Op::Head(HeadArg::Number(10))));
}
_ => panic!("expected Split"),
}
assert!(matches!(ops[1], Op::Head(HeadArg::Number(100))));
}
#[test]
fn first_match_wins_selection() {
let rs = parse_ok(
r#"
diff, ultra:
head 5
diff:
head 20
*:
head 30
"#,
);
let r = rs.select("diff", Level::Ultra).unwrap();
assert!(matches!(r.ops[0], Op::Head(HeadArg::Number(5))));
let r = rs.select("diff", Level::Full).unwrap();
assert!(matches!(r.ops[0], Op::Head(HeadArg::Number(20))));
let r = rs.select("status", Level::Ultra).unwrap();
assert!(matches!(r.ops[0], Op::Head(HeadArg::Number(30))));
}
#[test]
fn alternation_in_selector_matches() {
let rs = parse_ok(
r#"
build|check, ultra:
head 15
"#,
);
assert!(rs.select("build", Level::Ultra).is_some());
assert!(rs.select("check", Level::Ultra).is_some());
assert!(rs.select("test", Level::Ultra).is_none());
assert!(rs.select("build", Level::Full).is_none());
}
#[test]
fn head_auto_keyword() {
let rs = parse_ok(
r#"
foo:
head auto
"#,
);
assert!(matches!(rs.rules[0].ops[0], Op::Head(HeadArg::Auto)));
}
#[test]
fn regex_with_escaped_slash() {
let rs = parse_ok(
r#"
foo:
keep /a\/b/
"#,
);
match &rs.rules[0].ops[0] {
Op::Keep(p) => assert_eq!(p.source, "a/b"),
_ => panic!(),
}
}
#[test]
fn errors_on_unterminated_regex() {
let err = parse("foo:\n keep /abc\n").unwrap_err();
assert!(err.to_string().contains("unterminated regex"), "got: {err}");
}
#[test]
fn errors_on_unknown_op() {
let err = parse("foo:\n nonsense 1\n").unwrap_err();
assert!(err.to_string().contains("unknown op"), "got: {err}");
}
#[test]
fn errors_on_invalid_level() {
let err = parse("foo, gigamax:\n head 5\n").unwrap_err();
let chain = format!("{err:#}");
assert!(chain.contains("unknown level"), "got: {chain}");
}
#[test]
fn errors_on_empty_rule_body() {
let err = parse("foo:\nbar:\n head 5\n").unwrap_err();
assert!(err.to_string().contains("rule has no ops"), "got: {err}");
}
#[test]
fn git_compact_plugin_parses() {
let src = include_str!(
"../../../plugins/git/git-compact/filter.lf"
);
let rs = parse_ok(src);
assert_eq!(rs.defines.len(), 3);
let names: Vec<&str> = rs.defines.iter().map(|d| d.name.as_str()).collect();
assert_eq!(names, ["strip-trailers", "abbrev-hash", "compact-diff"]);
assert_eq!(rs.defines[2].params, vec!["limit".to_string()]);
assert!(rs.select("status", Level::Full).is_some());
assert!(rs.select("diff", Level::Ultra).is_some());
assert!(rs.select("diff", Level::Lite).is_some());
assert!(rs.select("diff", Level::Full).is_some());
assert!(rs.select("log", Level::Ultra).is_some());
assert!(rs.select("show", Level::Ultra).is_some());
assert!(rs.select("show", Level::Full).is_some());
assert!(rs.select("nothing", Level::Full).is_some());
let show_full = rs.select("show", Level::Full).unwrap();
assert!(matches!(&show_full.ops[0], Op::Split { .. }));
}
fn ctx<'a>(sub: &'a str, level: Level) -> ExecCtx<'a> {
ExecCtx {
sub,
level,
exit_code: 0,
args: &[],
}
}
#[test]
fn exec_keep_drop_head_tail() {
let rs = parse_ok(
r#"
foo:
keep /^a/
drop /skip/
head 3
"#,
);
let input = "alpha\nbeta\na-skip\namber\naxe\nakira\n";
let out = execute(&rs, &ctx("foo", Level::Full), input).unwrap();
assert_eq!(out, "alpha\namber\naxe\n");
}
#[test]
fn exec_tail() {
let rs = parse_ok(
r#"
foo:
tail 2
"#,
);
let out = execute(&rs, &ctx("foo", Level::Full), "a\nb\nc\nd").unwrap();
assert_eq!(out, "c\nd\n");
}
#[test]
fn exec_else_string_when_empty() {
let rs = parse_ok(
r#"
status:
keep /^M /
else "clean"
"#,
);
let out = execute(&rs, &ctx("status", Level::Full), "?? new.txt\n").unwrap();
assert_eq!(out, "clean\n");
}
#[test]
fn exec_else_string_passthrough_when_nonempty() {
let rs = parse_ok(
r#"
status:
keep /^M /
else "clean"
"#,
);
let out = execute(&rs, &ctx("status", Level::Full), "M file.txt\n").unwrap();
assert_eq!(out, "M file.txt\n");
}
#[test]
fn exec_no_match_passes_through() {
let rs = parse_ok(
r#"
foo:
head 1
"#,
);
let input = "x\ny\nz";
let out = execute(&rs, &ctx("other", Level::Full), input).unwrap();
assert_eq!(out, input);
}
#[test]
fn exec_first_match_wins() {
let rs = parse_ok(
r#"
diff, ultra:
head 1
diff:
head 3
"#,
);
let input = "a\nb\nc\nd\n";
let u = execute(&rs, &ctx("diff", Level::Ultra), input).unwrap();
let f = execute(&rs, &ctx("diff", Level::Full), input).unwrap();
assert_eq!(u, "a\n");
assert_eq!(f, "a\nb\nc\n");
}
#[test]
fn exec_head_auto_uses_level() {
let rs = parse_ok(
r#"
foo:
head auto
"#,
);
let input: String = (1..=80).map(|i| format!("{i}\n")).collect();
let u = execute(&rs, &ctx("foo", Level::Ultra), &input).unwrap();
let f = execute(&rs, &ctx("foo", Level::Full), &input).unwrap();
let l = execute(&rs, &ctx("foo", Level::Lite), &input).unwrap();
assert_eq!(u.lines().count(), 15);
assert_eq!(f.lines().count(), 30);
assert_eq!(l.lines().count(), 60);
}
#[test]
fn exec_shell_inline() {
let rs = parse_ok(
r#"
foo:
shell: tr a-z A-Z
"#,
);
let out = execute(&rs, &ctx("foo", Level::Full), "hello\n").unwrap();
assert_eq!(out.trim_end(), "HELLO");
}
#[test]
fn exec_shell_block() {
let rs = parse_ok(
r#"
foo:
shell: |
awk '{ print NR, $0 }'
"#,
);
let out = execute(&rs, &ctx("foo", Level::Full), "a\nb\n").unwrap();
assert_eq!(out.trim_end(), "1 a\n2 b");
}
#[test]
fn exec_shell_sees_env_vars() {
let rs = parse_ok(
r#"
build:
shell: printf '%s:%s' "$sub" "$level"
"#,
);
let out = execute(&rs, &ctx("build", Level::Ultra), "").unwrap();
assert_eq!(out, "build:ultra\n");
}
#[test]
fn exec_else_shell_uses_raw_input() {
let rs = parse_ok(
r#"
diff:
keep /^IMPOSSIBLE/
else-shell: head -2
"#,
);
let out = execute(&rs, &ctx("diff", Level::Full), "x\ny\nz\n").unwrap();
assert_eq!(out, "x\ny\n");
}
#[test]
fn exec_macro_expansion_with_args() {
let rs = parse_ok(
r#"
define n-up(count):
shell: head -$1
foo:
n-up 2
"#,
);
let out = execute(&rs, &ctx("foo", Level::Full), "a\nb\nc\nd\n").unwrap();
assert_eq!(out, "a\nb\n");
}
#[test]
fn exec_split_pre_post() {
let rs = parse_ok(
r#"
show:
split /^diff /
pre:
head 1
post:
head 2
"#,
);
let input = "commit abc\nAuthor: x\nDate: y\ndiff --git a b\n+line1\n+line2\n+line3\n";
let out = execute(&rs, &ctx("show", Level::Full), input).unwrap();
assert_eq!(out, "commit abc\ndiff --git a b\n+line1\n");
}
#[test]
fn exec_split_no_match() {
let rs = parse_ok(
r#"
show:
split /^diff /
pre:
head 2
post:
head 10
"#,
);
let out = execute(&rs, &ctx("show", Level::Full), "a\nb\nc\nd\n").unwrap();
assert_eq!(out, "a\nb\n");
}
#[test]
fn exec_macro_arg_count_mismatch_errors() {
let rs = parse_ok(
r#"
define needs-two(a, b):
head 1
foo:
needs-two 5
"#,
);
let err = execute(&rs, &ctx("foo", Level::Full), "x").unwrap_err();
assert!(err.to_string().contains("expects 2 arg"), "got: {err}");
}
#[test]
fn exec_python_plain_when_no_pep723() {
if Command::new("python3").arg("--version").output().is_err() {
eprintln!("skipping: python3 not available");
return;
}
let rs = parse_ok(
r#"
foo:
python: |
import sys
for line in sys.stdin:
print(line.upper(), end="")
"#,
);
let out = execute(&rs, &ctx("foo", Level::Full), "hello\nworld\n").unwrap();
assert_eq!(out, "HELLO\nWORLD\n");
}
#[test]
fn exec_macro_arg_substitution_in_shell() {
let rs = parse_ok(
r#"
define grab(limit):
shell: |
awk -v lim=$1 '{ if (NR<=lim) print }'
foo:
grab 3
"#,
);
let out = execute(&rs, &ctx("foo", Level::Full), "a\nb\nc\nd\ne\n").unwrap();
assert_eq!(out, "a\nb\nc\n");
}
#[test]
fn pep723_detection() {
assert!(has_pep723_header(
"# /// script\n# dependencies = []\n# ///\nimport sys"
));
assert!(has_pep723_header(
" # /// script\n # ///\nimport sys"
));
assert!(!has_pep723_header("import sys\nprint('hi')"));
assert!(!has_pep723_header("# not pep 723\nprint('hi')"));
}
#[test]
fn kubectl_compact_plugin_parses() {
let src = include_str!(
"../../../plugins/kubectl/kubectl-compact/filter.lf"
);
let rs = parse_ok(src);
assert_eq!(rs.defines.len(), 1);
assert_eq!(rs.defines[0].name, "clean-yaml");
match &rs.defines[0].ops[0] {
Op::Python(body) => {
assert!(body.contains("# /// script"));
assert!(body.contains("dependencies = [\"pyyaml>=6\"]"));
assert!(body.contains("yaml.safe_load_all"));
}
other => panic!("expected Python op, got {other:?}"),
}
assert!(rs.select("get", Level::Full).is_some());
assert!(rs.select("logs", Level::Ultra).is_some());
assert!(rs.select("logs", Level::Full).is_some());
assert!(rs.select("events", Level::Ultra).is_some());
assert!(rs.select("describe", Level::Full).is_some()); }
}