use crate::ast::*;
use crate::bytecode::*;
use crate::error::{Error, Result};
use std::collections::{HashMap, HashSet};
const SPECIAL_VARS: &[&str] = &[
"NR",
"FNR",
"NF",
"FILENAME",
"FS",
"OFS",
"ORS",
"RS",
"RT",
"SUBSEP",
"OFMT",
"CONVFMT",
"FPAT",
"RSTART",
"RLENGTH",
"ENVIRON",
"ARGC",
"ARGV",
"ARGIND",
"ERRNO",
"PROCINFO",
"SYMTAB",
"FUNCTAB",
"FIELDWIDTHS",
"IGNORECASE",
"BINMODE",
"LINT",
"TEXTDOMAIN",
];
fn flatten_print_args(e: &Expr) -> Vec<&Expr> {
match e {
Expr::Tuple(parts) => parts.iter().flat_map(|p| flatten_print_args(p)).collect(),
_ => vec![e],
}
}
enum StructuralKind {
Loop {
break_patches: Vec<usize>,
continue_patches: Vec<usize>,
},
Switch {
break_patches: Vec<usize>,
},
}
pub struct Compiler {
pub strings: StringPool,
structural_stack: Vec<StructuralKind>,
var_slots: HashMap<String, u16>,
next_slot: u16,
array_names: HashSet<String>,
current_func_params: HashSet<String>,
}
impl Compiler {
pub fn compile_program(prog: &Program) -> Result<CompiledProgram> {
validate_program(prog)?;
let array_names = collect_array_names(prog);
let mut c = Compiler {
strings: StringPool::default(),
structural_stack: Vec::new(),
var_slots: HashMap::new(),
next_slot: 0,
array_names,
current_func_params: HashSet::new(),
};
let mut begin_chunks = Vec::new();
let mut end_chunks = Vec::new();
let mut beginfile_chunks = Vec::new();
let mut endfile_chunks = Vec::new();
let mut record_rules = Vec::new();
for (i, rule) in prog.rules.iter().enumerate() {
match &rule.pattern {
Pattern::Begin => begin_chunks.push(c.compile_chunk(&rule.stmts)),
Pattern::End => end_chunks.push(c.compile_chunk(&rule.stmts)),
Pattern::BeginFile => beginfile_chunks.push(c.compile_chunk(&rule.stmts)),
Pattern::EndFile => endfile_chunks.push(c.compile_chunk(&rule.stmts)),
pat => {
let cpat = c.compile_pattern(pat);
let body = c.compile_chunk(&rule.stmts);
record_rules.push(CompiledRule {
pattern: cpat,
body,
original_index: i,
});
}
}
}
let mut functions = HashMap::new();
for (name, fd) in &prog.funcs {
c.current_func_params = fd.params.iter().cloned().collect();
let body = c.compile_chunk(&fd.body);
c.current_func_params.clear();
functions.insert(
name.clone(),
CompiledFunc {
params: fd.params.clone(),
body,
},
);
}
let slot_count = c.next_slot;
let mut slot_names = vec![String::new(); slot_count as usize];
let mut slot_map = HashMap::new();
for (name, &idx) in &c.var_slots {
slot_names[idx as usize] = name.clone();
slot_map.insert(name.clone(), idx);
}
let mut array_var_names: Vec<String> = c.array_names.iter().cloned().collect();
array_var_names.sort();
Ok(CompiledProgram {
begin_chunks,
end_chunks,
beginfile_chunks,
endfile_chunks,
record_rules,
functions,
strings: c.strings,
slot_count,
slot_names,
slot_map,
array_var_names,
})
}
fn var_slot(&mut self, name: &str) -> Option<u16> {
if SPECIAL_VARS.contains(&name) {
return None;
}
if self.array_names.contains(name) {
return None;
}
if self.current_func_params.contains(name) {
return None;
}
if let Some(&idx) = self.var_slots.get(name) {
return Some(idx);
}
let idx = self.next_slot;
self.next_slot += 1;
self.var_slots.insert(name.to_string(), idx);
Some(idx)
}
fn compile_chunk(&mut self, stmts: &[Stmt]) -> Chunk {
let mut ops = Vec::new();
self.compile_stmts(stmts, &mut ops);
peephole_optimize(&mut ops);
Chunk::from_ops(ops)
}
fn compile_pattern(&mut self, pat: &Pattern) -> CompiledPattern {
match pat {
Pattern::Empty => CompiledPattern::Always,
Pattern::Regexp(re) => {
let idx = self.strings.intern(re);
if is_literal_regex(re) {
CompiledPattern::LiteralRegexp(idx)
} else {
CompiledPattern::Regexp(idx)
}
}
Pattern::Expr(e) => {
let mut ops = Vec::new();
self.compile_expr(e, &mut ops);
peephole_optimize(&mut ops);
CompiledPattern::Expr(Chunk::from_ops(ops))
}
Pattern::Range(p1, p2) => CompiledPattern::Range {
start: self.compile_range_endpoint(p1),
end: self.compile_range_endpoint(p2),
},
Pattern::Begin | Pattern::End | Pattern::BeginFile | Pattern::EndFile => {
CompiledPattern::Always
}
}
}
fn compile_range_endpoint(&mut self, pat: &Pattern) -> CompiledRangeEndpoint {
match pat {
Pattern::Empty => CompiledRangeEndpoint::Always,
Pattern::Regexp(re) => {
let idx = self.strings.intern(re);
if is_literal_regex(re) {
CompiledRangeEndpoint::LiteralRegexp(idx)
} else {
CompiledRangeEndpoint::Regexp(idx)
}
}
Pattern::Expr(e) => {
let mut ops = Vec::new();
self.compile_expr(e, &mut ops);
peephole_optimize(&mut ops);
CompiledRangeEndpoint::Expr(Chunk::from_ops(ops))
}
Pattern::Begin | Pattern::End | Pattern::BeginFile | Pattern::EndFile => {
CompiledRangeEndpoint::Never
}
Pattern::Range(_, _) => CompiledRangeEndpoint::NestedRangeError,
}
}
fn compile_stmts(&mut self, stmts: &[Stmt], ops: &mut Vec<Op>) {
for s in stmts {
self.compile_stmt(s, ops);
}
}
fn compile_stmt(&mut self, stmt: &Stmt, ops: &mut Vec<Op>) {
match stmt {
Stmt::Expr(e) => {
self.compile_expr(e, ops);
ops.push(Op::Pop);
}
Stmt::Print { args, redir } => {
self.compile_print(args, redir, false, ops);
}
Stmt::Printf { args, redir } => {
self.compile_print(args, redir, true, ops);
}
Stmt::If { cond, then_, else_ } => {
self.compile_expr(cond, ops);
let jump_else = ops.len();
ops.push(Op::JumpIfFalsePop(0));
self.compile_stmts(then_, ops);
if else_.is_empty() {
let after = ops.len();
ops[jump_else] = Op::JumpIfFalsePop(after);
} else {
let jump_end = ops.len();
ops.push(Op::Jump(0));
let else_start = ops.len();
ops[jump_else] = Op::JumpIfFalsePop(else_start);
self.compile_stmts(else_, ops);
let after = ops.len();
ops[jump_end] = Op::Jump(after);
}
}
Stmt::While { cond, body } => {
let loop_start = ops.len();
self.structural_stack.push(StructuralKind::Loop {
break_patches: Vec::new(),
continue_patches: Vec::new(),
});
self.compile_expr(cond, ops);
let cond_jump = ops.len();
ops.push(Op::JumpIfFalsePop(0));
self.compile_stmts(body, ops);
ops.push(Op::Jump(loop_start));
let after_loop = ops.len();
ops[cond_jump] = Op::JumpIfFalsePop(after_loop);
let info = match self.structural_stack.pop().unwrap() {
StructuralKind::Loop {
break_patches,
continue_patches,
} => (break_patches, continue_patches),
StructuralKind::Switch { .. } => unreachable!(),
};
for pos in info.0 {
ops[pos] = Op::Jump(after_loop);
}
for pos in info.1 {
ops[pos] = Op::Jump(loop_start);
}
}
Stmt::DoWhile { body, cond } => {
let body_start = ops.len();
self.structural_stack.push(StructuralKind::Loop {
break_patches: Vec::new(),
continue_patches: Vec::new(),
});
self.compile_stmts(body, ops);
let cond_start = ops.len();
self.compile_expr(cond, ops);
ops.push(Op::JumpIfTruePop(body_start));
let after_loop = ops.len();
let info = match self.structural_stack.pop().unwrap() {
StructuralKind::Loop {
break_patches,
continue_patches,
} => (break_patches, continue_patches),
StructuralKind::Switch { .. } => unreachable!(),
};
for pos in info.0 {
ops[pos] = Op::Jump(after_loop);
}
for pos in info.1 {
ops[pos] = Op::Jump(cond_start);
}
}
Stmt::ForC {
init,
cond,
iter,
body,
} => {
if let Some(e) = init {
self.compile_expr(e, ops);
ops.push(Op::Pop);
}
let loop_start = ops.len();
self.structural_stack.push(StructuralKind::Loop {
break_patches: Vec::new(),
continue_patches: Vec::new(),
});
let mut cond_jump = None;
if let Some(c) = cond {
self.compile_expr(c, ops);
cond_jump = Some(ops.len());
ops.push(Op::JumpIfFalsePop(0));
}
self.compile_stmts(body, ops);
let continue_target = ops.len();
if let Some(it) = iter {
self.compile_expr(it, ops);
ops.push(Op::Pop);
}
ops.push(Op::Jump(loop_start));
let after_loop = ops.len();
if let Some(cj) = cond_jump {
ops[cj] = Op::JumpIfFalsePop(after_loop);
}
let info = match self.structural_stack.pop().unwrap() {
StructuralKind::Loop {
break_patches,
continue_patches,
} => (break_patches, continue_patches),
StructuralKind::Switch { .. } => unreachable!(),
};
for pos in info.0 {
ops[pos] = Op::Jump(after_loop);
}
for pos in info.1 {
ops[pos] = Op::Jump(continue_target);
}
}
Stmt::ForIn { var, arr, body } => {
let arr_idx = self.strings.intern(arr);
let var_idx = self.strings.intern(var);
ops.push(Op::ForInStart(arr_idx));
let loop_top = ops.len();
self.structural_stack.push(StructuralKind::Loop {
break_patches: Vec::new(),
continue_patches: Vec::new(),
});
let next_pos = ops.len();
ops.push(Op::ForInNext {
var: var_idx,
end_jump: 0,
});
self.compile_stmts(body, ops);
ops.push(Op::Jump(loop_top));
let after_loop = ops.len();
ops.push(Op::ForInEnd);
let cleanup_done = ops.len();
ops[next_pos] = Op::ForInNext {
var: var_idx,
end_jump: after_loop,
};
let info = match self.structural_stack.pop().unwrap() {
StructuralKind::Loop {
break_patches,
continue_patches,
} => (break_patches, continue_patches),
StructuralKind::Switch { .. } => unreachable!(),
};
for pos in info.0 {
ops[pos] = Op::Jump(after_loop);
}
for pos in info.1 {
ops[pos] = Op::Jump(loop_top);
}
let _ = cleanup_done;
}
Stmt::Block(stmts) => {
self.compile_stmts(stmts, ops);
}
Stmt::Break => {
let pos = ops.len();
ops.push(Op::Jump(0));
match self.structural_stack.last_mut() {
Some(StructuralKind::Loop { break_patches, .. }) => break_patches.push(pos),
Some(StructuralKind::Switch { break_patches }) => break_patches.push(pos),
None => {}
}
}
Stmt::Continue => {
let pos = ops.len();
ops.push(Op::Jump(0));
for ctx in self.structural_stack.iter_mut().rev() {
if let StructuralKind::Loop {
continue_patches, ..
} = ctx
{
continue_patches.push(pos);
break;
}
}
}
Stmt::Next => {
ops.push(Op::Next);
}
Stmt::NextFile => {
ops.push(Op::NextFile);
}
Stmt::Exit(e) => {
if let Some(ex) = e {
self.compile_expr(ex, ops);
ops.push(Op::ExitWithCode);
} else {
ops.push(Op::ExitDefault);
}
}
Stmt::Return(e) => {
if let Some(ex) = e {
self.compile_expr(ex, ops);
ops.push(Op::ReturnVal);
} else {
ops.push(Op::ReturnEmpty);
}
}
Stmt::Delete { name, indices } => {
let arr_idx = self.strings.intern(name);
match indices {
None => ops.push(Op::DeleteArray(arr_idx)),
Some(ixs) => {
self.compile_array_key(ixs, ops);
ops.push(Op::DeleteElem(arr_idx));
}
}
}
Stmt::Switch { expr, arms } => {
self.compile_switch(expr, arms, ops);
}
Stmt::GetLine {
pipe_cmd,
var,
redir,
} => {
let var_idx = var.as_ref().map(|v| self.strings.intern(v));
if let Some(cmd) = pipe_cmd {
self.compile_expr(cmd, ops);
}
match redir {
GetlineRedir::Primary => {
let src = if pipe_cmd.is_some() {
GetlineSource::Pipe
} else {
GetlineSource::Primary
};
ops.push(Op::GetLine {
var: var_idx,
source: src,
push_result: false,
});
}
GetlineRedir::File(e) => {
self.compile_expr(e, ops);
ops.push(Op::GetLine {
var: var_idx,
source: GetlineSource::File,
push_result: false,
});
}
GetlineRedir::Coproc(e) => {
self.compile_expr(e, ops);
ops.push(Op::GetLine {
var: var_idx,
source: GetlineSource::Coproc,
push_result: false,
});
}
}
}
}
}
fn compile_print(
&mut self,
args: &[Expr],
redir: &Option<PrintRedir>,
is_printf: bool,
ops: &mut Vec<Op>,
) {
let argc = if is_printf {
for a in args {
self.compile_expr(a, ops);
}
args.len() as u16
} else {
let mut n = 0usize;
for a in args {
let flat = flatten_print_args(a);
n += flat.len();
for fe in flat {
self.compile_expr(fe, ops);
}
}
u16::try_from(n).unwrap_or(u16::MAX)
};
let rk = match redir {
None => RedirKind::Stdout,
Some(PrintRedir::Overwrite(e)) => {
self.compile_expr(e, ops);
RedirKind::Overwrite
}
Some(PrintRedir::Append(e)) => {
self.compile_expr(e, ops);
RedirKind::Append
}
Some(PrintRedir::Pipe(e)) => {
self.compile_expr(e, ops);
RedirKind::Pipe
}
Some(PrintRedir::Coproc(e)) => {
self.compile_expr(e, ops);
RedirKind::Coproc
}
};
if is_printf {
ops.push(Op::Printf { argc, redir: rk });
} else {
ops.push(Op::Print { argc, redir: rk });
}
}
fn compile_expr(&mut self, expr: &Expr, ops: &mut Vec<Op>) {
match expr {
Expr::Number(n) => ops.push(Op::PushNum(*n)),
Expr::IntegerLiteral(s) => {
let idx = self.strings.intern(s);
ops.push(Op::PushNumDecimalStr(idx));
}
Expr::Str(s) => {
let idx = self.strings.intern(s);
ops.push(Op::PushStr(idx));
}
Expr::RegexpLiteral(s) => {
let idx = self.strings.intern(s);
ops.push(Op::PushRegexp(idx));
}
Expr::Var(name) => {
match name.as_str() {
"NR" => ops.push(Op::GetNR),
"FNR" => ops.push(Op::GetFNR),
"NF" => ops.push(Op::GetNF),
_ => {
if let Some(slot) = self.var_slot(name) {
ops.push(Op::GetSlot(slot));
} else {
let idx = self.strings.intern(name);
ops.push(Op::GetVar(idx));
}
}
}
}
Expr::Field(inner) => {
self.compile_expr(inner, ops);
ops.push(Op::GetField);
}
Expr::Index { name, indices } => {
let arr_idx = self.strings.intern(name);
self.compile_array_key(indices, ops);
ops.push(Op::GetArrayElem(arr_idx));
}
Expr::Binary {
op: BinOp::And,
left,
right,
} => {
self.compile_expr(left, ops);
let false_jump = ops.len();
ops.push(Op::JumpIfFalsePop(0));
self.compile_expr(right, ops);
ops.push(Op::ToBool);
let end_jump = ops.len();
ops.push(Op::Jump(0));
let false_branch = ops.len();
ops.push(Op::PushNum(0.0));
let after = ops.len();
ops[false_jump] = Op::JumpIfFalsePop(false_branch);
ops[end_jump] = Op::Jump(after);
}
Expr::Binary {
op: BinOp::Or,
left,
right,
} => {
self.compile_expr(left, ops);
let true_jump = ops.len();
ops.push(Op::JumpIfTruePop(0));
self.compile_expr(right, ops);
ops.push(Op::ToBool);
let end_jump = ops.len();
ops.push(Op::Jump(0));
let true_branch = ops.len();
ops.push(Op::PushNum(1.0));
let after = ops.len();
ops[true_jump] = Op::JumpIfTruePop(true_branch);
ops[end_jump] = Op::Jump(after);
}
Expr::Binary { op, left, right } => {
self.compile_expr(left, ops);
self.compile_expr(right, ops);
ops.push(match op {
BinOp::Add => Op::Add,
BinOp::Sub => Op::Sub,
BinOp::Mul => Op::Mul,
BinOp::Div => Op::Div,
BinOp::Mod => Op::Mod,
BinOp::Eq => Op::CmpEq,
BinOp::Ne => Op::CmpNe,
BinOp::Lt => Op::CmpLt,
BinOp::Le => Op::CmpLe,
BinOp::Gt => Op::CmpGt,
BinOp::Ge => Op::CmpGe,
BinOp::Concat => Op::Concat,
BinOp::Match => Op::RegexMatch,
BinOp::NotMatch => Op::RegexNotMatch,
BinOp::Pow => Op::Pow,
BinOp::And | BinOp::Or => unreachable!("handled above"),
});
}
Expr::Unary { op, expr: inner } => {
self.compile_expr(inner, ops);
ops.push(match op {
UnaryOp::Neg => Op::Neg,
UnaryOp::Pos => Op::Pos,
UnaryOp::Not => Op::Not,
});
}
Expr::Assign { name, op, rhs } => {
self.compile_expr(rhs, ops);
if let Some(bop) = op {
if let Some(slot) = self.var_slot(name) {
ops.push(Op::CompoundAssignSlot(slot, *bop));
} else {
let var_idx = self.strings.intern(name);
ops.push(Op::CompoundAssignVar(var_idx, *bop));
}
} else if let Some(slot) = self.var_slot(name) {
ops.push(Op::SetSlot(slot));
} else {
let var_idx = self.strings.intern(name);
ops.push(Op::SetVar(var_idx));
}
}
Expr::AssignField { field, op, rhs } => {
self.compile_expr(field, ops);
self.compile_expr(rhs, ops);
if let Some(bop) = op {
ops.push(Op::CompoundAssignField(*bop));
} else {
ops.push(Op::SetField);
}
}
Expr::AssignIndex {
name,
indices,
op,
rhs,
} => {
let arr_idx = self.strings.intern(name);
self.compile_array_key(indices, ops);
self.compile_expr(rhs, ops);
if let Some(bop) = op {
ops.push(Op::CompoundAssignIndex(arr_idx, *bop));
} else {
ops.push(Op::SetArrayElem(arr_idx));
}
}
Expr::Call { name, args } => {
self.compile_call(name, args, ops);
}
Expr::IndirectCall { callee, args } => {
for a in args {
self.compile_expr(a, ops);
}
self.compile_expr(callee, ops);
ops.push(Op::CallIndirect(args.len() as u16));
}
Expr::Ternary { cond, then_, else_ } => {
self.compile_expr(cond, ops);
let jump_else = ops.len();
ops.push(Op::JumpIfFalsePop(0));
self.compile_expr(then_, ops);
let jump_end = ops.len();
ops.push(Op::Jump(0));
let else_start = ops.len();
self.compile_expr(else_, ops);
let after = ops.len();
ops[jump_else] = Op::JumpIfFalsePop(else_start);
ops[jump_end] = Op::Jump(after);
}
Expr::In { key, arr } => {
let arr_idx = self.strings.intern(arr);
match key.as_ref() {
Expr::Tuple(parts) => self.compile_array_key(parts, ops),
_ => self.compile_expr(key, ops),
}
ops.push(Op::InArray(arr_idx));
}
Expr::Tuple(_) => {
unreachable!("Tuple should be rejected by validate_program before compile");
}
Expr::IncDec { op, target } => match target {
IncDecTarget::Var(name) => {
if let Some(slot) = self.var_slot(name) {
ops.push(Op::IncDecSlot(slot, *op));
} else {
let idx = self.strings.intern(name);
ops.push(Op::IncDecVar(idx, *op));
}
}
IncDecTarget::Field(inner) => {
self.compile_expr(inner, ops);
ops.push(Op::IncDecField(*op));
}
IncDecTarget::Index { name, indices } => {
let arr_idx = self.strings.intern(name);
self.compile_array_key(indices, ops);
ops.push(Op::IncDecIndex(arr_idx, *op));
}
},
Expr::GetLine {
pipe_cmd,
var,
redir,
} => {
let var_idx = var.as_ref().map(|v| self.strings.intern(v));
if let Some(cmd) = pipe_cmd {
self.compile_expr(cmd, ops);
}
match redir {
GetlineRedir::Primary => {
let src = if pipe_cmd.is_some() {
GetlineSource::Pipe
} else {
GetlineSource::Primary
};
ops.push(Op::GetLine {
var: var_idx,
source: src,
push_result: true,
});
}
GetlineRedir::File(e) => {
self.compile_expr(e, ops);
ops.push(Op::GetLine {
var: var_idx,
source: GetlineSource::File,
push_result: true,
});
}
GetlineRedir::Coproc(e) => {
self.compile_expr(e, ops);
ops.push(Op::GetLine {
var: var_idx,
source: GetlineSource::Coproc,
push_result: true,
});
}
}
}
}
}
fn compile_call(&mut self, name: &str, args: &[Expr], ops: &mut Vec<Op>) {
match name {
"stat" | "statvfs" | "fts" | "writea" | "reada" | "readdir" => {
if args.len() == 2 {
if let Expr::Var(arr) = &args[1] {
self.compile_expr(&args[0], ops);
let idx = self.strings.intern(arr);
ops.push(Op::PushStr(idx));
ops.push(Op::CallBuiltin(self.strings.intern(name), 2));
return;
}
}
for a in args {
self.compile_expr(a, ops);
}
ops.push(Op::CallBuiltin(
self.strings.intern(name),
args.len() as u16,
));
}
"gettimeofday" | "getlocaltime" => {
if let Some(Expr::Var(arr)) = args.first() {
let idx = self.strings.intern(arr);
ops.push(Op::PushStr(idx));
for a in &args[1..] {
self.compile_expr(a, ops);
}
ops.push(Op::CallBuiltin(
self.strings.intern(name),
args.len() as u16,
));
return;
}
for a in args {
self.compile_expr(a, ops);
}
ops.push(Op::CallBuiltin(
self.strings.intern(name),
args.len() as u16,
));
}
"length" => {
if args.len() == 1 {
if let Expr::Var(arr) = &args[0] {
if arr == "SYMTAB" {
ops.push(Op::SymtabKeyCount);
return;
}
}
}
for a in args {
self.compile_expr(a, ops);
}
let name_idx = self.strings.intern("length");
ops.push(Op::CallBuiltin(name_idx, args.len() as u16));
}
"sub" => self.compile_sub_gsub(args, false, ops),
"gsub" => self.compile_sub_gsub(args, true, ops),
"split" => self.compile_split(args, ops),
"patsplit" => self.compile_patsplit(args, ops),
"match" => self.compile_match(args, ops),
"asort" => self.compile_asort(args, ops),
"asorti" => self.compile_asorti(args, ops),
"typeof" => {
if args.len() != 1 {
for a in args {
self.compile_expr(a, ops);
}
let name_idx = self.strings.intern("typeof");
ops.push(Op::CallBuiltin(name_idx, args.len() as u16));
return;
}
match &args[0] {
Expr::Var(name) => {
if let Some(slot) = self.var_slot(name) {
ops.push(Op::TypeofSlot(slot));
} else {
let idx = self.strings.intern(name);
ops.push(Op::TypeofVar(idx));
}
}
Expr::Index { name, indices } => {
let arr_idx = self.strings.intern(name);
self.compile_array_key(indices, ops);
ops.push(Op::TypeofArrayElem(arr_idx));
}
Expr::Field(inner) => {
self.compile_expr(inner, ops);
ops.push(Op::TypeofField);
}
other => {
self.compile_expr(other, ops);
ops.push(Op::TypeofValue);
}
}
}
_ => {
for a in args {
self.compile_expr(a, ops);
}
let name_idx = self.strings.intern(name);
let argc = args.len() as u16;
ops.push(Op::CallBuiltin(name_idx, argc));
}
}
}
fn compile_sub_gsub(&mut self, args: &[Expr], is_global: bool, ops: &mut Vec<Op>) {
self.compile_expr(&args[0], ops);
self.compile_expr(&args[1], ops);
let target = if args.len() >= 3 {
match &args[2] {
Expr::Var(name) => {
if let Some(slot) = self.var_slot(name) {
SubTarget::SlotVar(slot)
} else {
SubTarget::Var(self.strings.intern(name))
}
}
Expr::Field(inner) => {
self.compile_expr(inner, ops);
SubTarget::Field
}
Expr::Index { name, indices } => {
let arr_idx = self.strings.intern(name);
self.compile_array_key(indices, ops);
SubTarget::Index(arr_idx)
}
_ => SubTarget::Record,
}
} else {
SubTarget::Record
};
if is_global {
ops.push(Op::GsubFn(target));
} else {
ops.push(Op::SubFn(target));
}
}
fn compile_split(&mut self, args: &[Expr], ops: &mut Vec<Op>) {
self.compile_expr(&args[0], ops);
let arr_name = match &args[1] {
Expr::Var(n) => n.as_str(),
_ => "",
};
let arr_idx = self.strings.intern(arr_name);
let has_fs = args.len() >= 3;
if has_fs {
self.compile_expr(&args[2], ops);
}
ops.push(Op::Split {
arr: arr_idx,
has_fs,
});
}
fn compile_patsplit(&mut self, args: &[Expr], ops: &mut Vec<Op>) {
self.compile_expr(&args[0], ops);
let arr_name = match &args[1] {
Expr::Var(n) => n.as_str(),
_ => "",
};
let arr_idx = self.strings.intern(arr_name);
let has_fp = args.len() >= 3;
if has_fp {
self.compile_expr(&args[2], ops);
}
let seps = if args.len() >= 4 {
match &args[3] {
Expr::Var(n) => Some(self.strings.intern(n)),
_ => None,
}
} else {
None
};
ops.push(Op::Patsplit {
arr: arr_idx,
has_fp,
seps,
});
}
fn compile_match(&mut self, args: &[Expr], ops: &mut Vec<Op>) {
self.compile_expr(&args[0], ops);
self.compile_expr(&args[1], ops);
let arr = if args.len() >= 3 {
match &args[2] {
Expr::Var(n) => Some(self.strings.intern(n)),
_ => None,
}
} else {
None
};
ops.push(Op::MatchBuiltin { arr });
}
fn compile_asort(&mut self, args: &[Expr], ops: &mut Vec<Op>) {
let src = match args.first() {
Some(Expr::Var(n)) => self.strings.intern(n),
_ => self.strings.intern(""),
};
let dest = if args.len() >= 2 {
match &args[1] {
Expr::Var(n) => Some(self.strings.intern(n)),
_ => None,
}
} else {
None
};
ops.push(Op::Asort { src, dest });
}
fn compile_asorti(&mut self, args: &[Expr], ops: &mut Vec<Op>) {
let src = match args.first() {
Some(Expr::Var(n)) => self.strings.intern(n),
_ => self.strings.intern(""),
};
let dest = if args.len() >= 2 {
match &args[1] {
Expr::Var(n) => Some(self.strings.intern(n)),
_ => None,
}
} else {
None
};
ops.push(Op::Asorti { src, dest });
}
fn compile_switch(&mut self, expr: &Expr, arms: &[SwitchArm], ops: &mut Vec<Op>) {
self.structural_stack.push(StructuralKind::Switch {
break_patches: Vec::new(),
});
self.compile_expr(expr, ops);
if arms.is_empty() {
ops.push(Op::Pop);
let _ = self.structural_stack.pop();
return;
}
let mut pending_jfail: Option<usize> = None;
let mut end_jumps: Vec<usize> = Vec::new();
for arm in arms {
match arm {
SwitchArm::Case { label, stmts } => {
if let Some(p) = pending_jfail.take() {
ops[p] = Op::JumpIfFalsePop(ops.len());
}
ops.push(Op::Dup);
match label {
SwitchLabel::Expr(e) => {
self.compile_expr(e, ops);
ops.push(Op::CmpEq);
}
SwitchLabel::Regexp(re) => {
let idx = self.strings.intern(re);
ops.push(Op::PushStr(idx));
ops.push(Op::RegexMatch);
}
}
let jfail = ops.len();
ops.push(Op::JumpIfFalsePop(0));
pending_jfail = Some(jfail);
ops.push(Op::Pop);
ops.push(Op::Pop);
self.compile_stmts(stmts, ops);
let jend = ops.len();
ops.push(Op::Jump(0));
end_jumps.push(jend);
}
SwitchArm::Default { stmts } => {
if let Some(p) = pending_jfail.take() {
ops[p] = Op::JumpIfFalsePop(ops.len());
}
ops.push(Op::Pop);
self.compile_stmts(stmts, ops);
}
}
}
if let Some(p) = pending_jfail {
let pop_pos = ops.len();
ops.push(Op::Pop);
ops[p] = Op::JumpIfFalsePop(pop_pos);
}
let end = ops.len();
for j in end_jumps {
ops[j] = Op::Jump(end);
}
if let StructuralKind::Switch { break_patches } = self.structural_stack.pop().unwrap() {
for bp in break_patches {
ops[bp] = Op::Jump(end);
}
}
}
fn compile_array_key(&mut self, indices: &[Expr], ops: &mut Vec<Op>) {
for ix in indices {
self.compile_expr(ix, ops);
}
if indices.len() > 1 {
ops.push(Op::JoinArrayKey(indices.len() as u16));
}
}
}
#[cfg(test)]
fn compiler_isolated() -> Compiler {
Compiler {
strings: StringPool::default(),
structural_stack: Vec::new(),
var_slots: HashMap::new(),
next_slot: 0,
array_names: HashSet::new(),
current_func_params: HashSet::new(),
}
}
#[cfg(test)]
fn lower_pattern(pat: &Pattern) -> (CompiledPattern, StringPool) {
let mut c = compiler_isolated();
let p = c.compile_pattern(pat);
(p, c.strings)
}
#[cfg(test)]
fn lower_range_endpoint(pat: &Pattern) -> (CompiledRangeEndpoint, StringPool) {
let mut c = compiler_isolated();
let p = c.compile_range_endpoint(pat);
(p, c.strings)
}
fn collect_array_names(prog: &Program) -> HashSet<String> {
let mut names = HashSet::new();
for rule in &prog.rules {
for s in &rule.stmts {
collect_array_names_stmt(s, &mut names);
}
}
for fd in prog.funcs.values() {
for s in &fd.body {
collect_array_names_stmt(s, &mut names);
}
}
names
}
fn collect_array_names_stmt(s: &Stmt, names: &mut HashSet<String>) {
match s {
Stmt::If { cond, then_, else_ } => {
collect_array_names_expr(cond, names);
for t in then_ {
collect_array_names_stmt(t, names);
}
for t in else_ {
collect_array_names_stmt(t, names);
}
}
Stmt::While { cond, body } => {
collect_array_names_expr(cond, names);
for t in body {
collect_array_names_stmt(t, names);
}
}
Stmt::DoWhile { cond, body } => {
collect_array_names_expr(cond, names);
for t in body {
collect_array_names_stmt(t, names);
}
}
Stmt::ForC {
init,
cond,
iter,
body,
} => {
if let Some(e) = init {
collect_array_names_expr(e, names);
}
if let Some(e) = cond {
collect_array_names_expr(e, names);
}
if let Some(e) = iter {
collect_array_names_expr(e, names);
}
for t in body {
collect_array_names_stmt(t, names);
}
}
Stmt::ForIn { arr, body, .. } => {
names.insert(arr.clone());
for t in body {
collect_array_names_stmt(t, names);
}
}
Stmt::Block(stmts) => {
for t in stmts {
collect_array_names_stmt(t, names);
}
}
Stmt::Expr(e) => collect_array_names_expr(e, names),
Stmt::Print { args, redir } | Stmt::Printf { args, redir } => {
for a in args {
collect_array_names_expr(a, names);
}
if let Some(r) = redir {
match r {
PrintRedir::Overwrite(e)
| PrintRedir::Append(e)
| PrintRedir::Pipe(e)
| PrintRedir::Coproc(e) => collect_array_names_expr(e, names),
}
}
}
Stmt::Delete { name, indices } => {
names.insert(name.clone());
if let Some(ixs) = indices {
for e in ixs {
collect_array_names_expr(e, names);
}
}
}
Stmt::Exit(Some(e)) | Stmt::Return(Some(e)) => collect_array_names_expr(e, names),
Stmt::GetLine {
pipe_cmd, redir, ..
} => {
if let Some(p) = pipe_cmd {
collect_array_names_expr(p, names);
}
match redir {
GetlineRedir::File(e) | GetlineRedir::Coproc(e) => {
collect_array_names_expr(e, names)
}
GetlineRedir::Primary => {}
}
}
Stmt::Switch { expr, arms } => {
collect_array_names_expr(expr, names);
for arm in arms {
match arm {
SwitchArm::Case { label, stmts } => {
if let SwitchLabel::Expr(e) = label {
collect_array_names_expr(e, names);
}
for t in stmts {
collect_array_names_stmt(t, names);
}
}
SwitchArm::Default { stmts } => {
for t in stmts {
collect_array_names_stmt(t, names);
}
}
}
}
}
Stmt::Break
| Stmt::Continue
| Stmt::Next
| Stmt::NextFile
| Stmt::Exit(None)
| Stmt::Return(None) => {}
}
}
fn collect_array_names_expr(e: &Expr, names: &mut HashSet<String>) {
match e {
Expr::Index { name, indices } => {
names.insert(name.clone());
for ix in indices {
collect_array_names_expr(ix, names);
}
}
Expr::AssignIndex {
name, indices, rhs, ..
} => {
names.insert(name.clone());
for ix in indices {
collect_array_names_expr(ix, names);
}
collect_array_names_expr(rhs, names);
}
Expr::In { arr, key } => {
names.insert(arr.clone());
collect_array_names_expr(key, names);
}
Expr::Binary { left, right, .. } => {
collect_array_names_expr(left, names);
collect_array_names_expr(right, names);
}
Expr::Unary { expr, .. } => collect_array_names_expr(expr, names),
Expr::Assign { rhs, .. } => collect_array_names_expr(rhs, names),
Expr::AssignField { field, rhs, .. } => {
collect_array_names_expr(field, names);
collect_array_names_expr(rhs, names);
}
Expr::Field(inner) => collect_array_names_expr(inner, names),
Expr::Call { args, .. } => {
for a in args {
collect_array_names_expr(a, names);
}
}
Expr::IndirectCall { callee, args } => {
collect_array_names_expr(callee, names);
for a in args {
collect_array_names_expr(a, names);
}
}
Expr::Ternary { cond, then_, else_ } => {
collect_array_names_expr(cond, names);
collect_array_names_expr(then_, names);
collect_array_names_expr(else_, names);
}
Expr::IncDec { target, .. } => match target {
IncDecTarget::Index { name, indices } => {
names.insert(name.clone());
for ix in indices {
collect_array_names_expr(ix, names);
}
}
IncDecTarget::Field(e) => collect_array_names_expr(e, names),
IncDecTarget::Var(_) => {}
},
Expr::GetLine {
pipe_cmd, redir, ..
} => {
if let Some(p) = pipe_cmd {
collect_array_names_expr(p, names);
}
match redir {
GetlineRedir::File(e) | GetlineRedir::Coproc(e) => {
collect_array_names_expr(e, names)
}
GetlineRedir::Primary => {}
}
}
Expr::Tuple(parts) => {
for p in parts {
collect_array_names_expr(p, names);
}
}
Expr::Number(_)
| Expr::IntegerLiteral(_)
| Expr::Str(_)
| Expr::RegexpLiteral(_)
| Expr::Var(_) => {}
}
}
fn peephole_optimize(ops: &mut Vec<Op>) {
let mut fusions: Vec<(usize, Op, usize)> = Vec::new();
let mut i = 0;
while i < ops.len() {
if i + 3 <= ops.len() {
if let (
Op::PushNum(n),
Op::GetField,
Op::Print {
argc: 1,
redir: RedirKind::Stdout,
},
) = (ops[i], ops[i + 1], ops[i + 2])
{
let field = n as u16;
if n >= 0.0 && n == field as f64 {
fusions.push((i, Op::PrintFieldStdout(field), 2));
i += 3;
continue;
}
}
}
if i + 4 <= ops.len() {
if let (
Op::PushNum(n),
Op::GetField,
Op::CompoundAssignSlot(slot, BinOp::Add),
Op::Pop,
) = (ops[i], ops[i + 1], ops[i + 2], ops[i + 3])
{
let field = n as u16;
if n >= 0.0 && n == field as f64 {
fusions.push((i, Op::AddFieldToSlot { field, slot }, 3));
i += 4;
continue;
}
}
}
if i + 2 <= ops.len() {
if let (Op::PushNum(n), Op::GetField) = (ops[i], ops[i + 1]) {
let field = n as u16;
if n >= 0.0 && n == field as f64 {
let next = if i + 2 < ops.len() {
Some(ops[i + 2])
} else {
None
};
let is_numeric_consumer = matches!(
next,
Some(Op::Add)
| Some(Op::Sub)
| Some(Op::Mul)
| Some(Op::Div)
| Some(Op::Mod)
| Some(Op::CmpLt)
| Some(Op::CmpLe)
| Some(Op::CmpGt)
| Some(Op::CmpGe)
| Some(Op::CompoundAssignSlot(_, _))
| Some(Op::CompoundAssignVar(_, _))
);
if is_numeric_consumer {
fusions.push((i, Op::PushFieldNum(field), 1));
i += 2;
continue;
}
}
}
}
if i + 3 <= ops.len() {
if let (Op::GetSlot(src), Op::CompoundAssignSlot(dst, BinOp::Add), Op::Pop) =
(ops[i], ops[i + 1], ops[i + 2])
{
fusions.push((i, Op::AddSlotToSlot { src, dst }, 2));
i += 3;
continue;
}
}
if i + 2 <= ops.len() {
if let (Op::IncDecSlot(slot, kind), Op::Pop) = (ops[i], ops[i + 1]) {
let fused = match kind {
IncDecOp::PreInc | IncDecOp::PostInc => Op::IncrSlot(slot),
IncDecOp::PreDec | IncDecOp::PostDec => Op::DecrSlot(slot),
};
fusions.push((i, fused, 1));
i += 2;
continue;
}
if let (Op::IncDecVar(idx, kind), Op::Pop) = (ops[i], ops[i + 1]) {
let fused = match kind {
IncDecOp::PreInc | IncDecOp::PostInc => Op::IncrVar(idx),
IncDecOp::PreDec | IncDecOp::PostDec => Op::DecrVar(idx),
};
fusions.push((i, fused, 1));
i += 2;
continue;
}
if let (Op::PushStr(idx), Op::Concat) = (ops[i], ops[i + 1]) {
fusions.push((i, Op::ConcatPoolStr(idx), 1));
i += 2;
continue;
}
}
if i + 5 <= ops.len() {
if let (Op::GetSlot(s1), Op::PushNum(n), Op::Add, Op::SetSlot(s2), Op::Pop) =
(ops[i], ops[i + 1], ops[i + 2], ops[i + 3], ops[i + 4])
{
if s1 == s2 && n == 1.0 {
fusions.push((i, Op::IncrSlot(s1), 4));
i += 5;
continue;
}
}
}
if i + 4 <= ops.len() {
if let (Op::GetSlot(slot), Op::PushNum(limit), Op::CmpLt, Op::JumpIfFalsePop(target)) =
(ops[i], ops[i + 1], ops[i + 2], ops[i + 3])
{
fusions.push((
i,
Op::JumpIfSlotGeNum {
slot,
limit,
target,
},
3,
));
i += 4;
continue;
}
}
if i + 7 <= ops.len() {
if let (
Op::PushNum(n1),
Op::GetField,
Op::PushNum(n2),
Op::GetField,
Op::Mul,
Op::CompoundAssignSlot(slot, BinOp::Add),
Op::Pop,
) = (
ops[i],
ops[i + 1],
ops[i + 2],
ops[i + 3],
ops[i + 4],
ops[i + 5],
ops[i + 6],
) {
let f1 = n1 as u16;
let f2 = n2 as u16;
if n1 >= 0.0 && n1 == f1 as f64 && n2 >= 0.0 && n2 == f2 as f64 {
fusions.push((i, Op::AddMulFieldsToSlot { f1, f2, slot }, 6));
i += 7;
continue;
}
}
}
if i + 5 <= ops.len() {
if let (
Op::PushNum(n),
Op::GetField,
Op::PushNum(delta),
Op::CompoundAssignIndex(arr, BinOp::Add),
Op::Pop,
) = (ops[i], ops[i + 1], ops[i + 2], ops[i + 3], ops[i + 4])
{
let field = n as u16;
if n >= 0.0 && n == field as f64 {
fusions.push((i, Op::ArrayFieldAddConst { arr, field, delta }, 4));
i += 5;
continue;
}
}
}
if i + 7 <= ops.len() {
if let (
Op::PushNum(n1),
Op::GetField,
Op::PushNum(n2),
Op::GetField,
Op::PushNum(n3),
Op::GetField,
Op::Print {
argc: 3,
redir: RedirKind::Stdout,
},
) = (
ops[i],
ops[i + 1],
ops[i + 2],
ops[i + 3],
ops[i + 4],
ops[i + 5],
ops[i + 6],
) {
let f1 = n1 as u16;
let f2 = n2 as u16;
let f3 = n3 as u16;
if n1 >= 0.0
&& n1 == f1 as f64
&& n2 >= 0.0
&& n2 == f2 as f64
&& n3 >= 0.0
&& n3 == f3 as f64
{
fusions.push((i, Op::PrintThreeFieldsStdout { f1, f2, f3 }, 6));
i += 7;
continue;
}
}
}
if i + 8 <= ops.len() {
if let (
Op::PushNum(n1),
Op::GetField,
Op::PushStr(sep),
Op::Concat,
Op::PushNum(n2),
Op::GetField,
Op::Concat,
Op::Print {
argc: 1,
redir: RedirKind::Stdout,
},
) = (
ops[i],
ops[i + 1],
ops[i + 2],
ops[i + 3],
ops[i + 4],
ops[i + 5],
ops[i + 6],
ops[i + 7],
) {
let f1 = n1 as u16;
let f2 = n2 as u16;
if n1 >= 0.0 && n1 == f1 as f64 && n2 >= 0.0 && n2 == f2 as f64 {
fusions.push((i, Op::PrintFieldSepField { f1, sep, f2 }, 7));
i += 8;
continue;
}
}
}
if i + 5 <= ops.len() {
if let (
Op::PushFieldNum(f1),
Op::PushFieldNum(f2),
Op::Mul,
Op::CompoundAssignSlot(slot, BinOp::Add),
Op::Pop,
) = (ops[i], ops[i + 1], ops[i + 2], ops[i + 3], ops[i + 4])
{
fusions.push((i, Op::AddMulFieldsToSlot { f1, f2, slot }, 4));
i += 5;
continue;
}
}
if i + 4 <= ops.len() {
if let (
Op::PushFieldNum(field),
Op::PushNum(delta),
Op::CompoundAssignIndex(arr, BinOp::Add),
Op::Pop,
) = (ops[i], ops[i + 1], ops[i + 2], ops[i + 3])
{
fusions.push((i, Op::ArrayFieldAddConst { arr, field, delta }, 3));
i += 4;
continue;
}
}
if i + 4 <= ops.len() {
if let (
Op::PushFieldNum(f1),
Op::PushFieldNum(f2),
Op::PushFieldNum(f3),
Op::Print {
argc: 3,
redir: RedirKind::Stdout,
},
) = (ops[i], ops[i + 1], ops[i + 2], ops[i + 3])
{
fusions.push((i, Op::PrintThreeFieldsStdout { f1, f2, f3 }, 3));
i += 4;
continue;
}
}
if i + 6 <= ops.len() {
if let (
Op::PushFieldNum(f1),
Op::PushStr(sep),
Op::Concat,
Op::PushFieldNum(f2),
Op::Concat,
Op::Print {
argc: 1,
redir: RedirKind::Stdout,
},
) = (
ops[i],
ops[i + 1],
ops[i + 2],
ops[i + 3],
ops[i + 4],
ops[i + 5],
) {
fusions.push((i, Op::PrintFieldSepField { f1, sep, f2 }, 5));
i += 6;
continue;
}
}
i += 1;
}
if fusions.is_empty() {
return;
}
let old_len = ops.len();
let mut offset_map = vec![0usize; old_len + 1]; let mut adjustment: usize = 0;
let mut fi = 0;
#[allow(clippy::needless_range_loop)]
for pos in 0..=old_len {
if fi < fusions.len() {
let (fpos, _, removed) = fusions[fi];
if pos > fpos && pos <= fpos + removed {
offset_map[pos] = fpos - adjustment;
if pos == fpos + removed {
adjustment += removed;
fi += 1;
}
continue;
}
if pos == fpos + removed {
adjustment += removed;
fi += 1;
}
}
offset_map[pos] = pos - adjustment;
}
let total_removed: usize = fusions.iter().map(|(_, _, r)| r).sum();
let mut new_ops = Vec::with_capacity(ops.len().saturating_sub(total_removed));
let mut src = 0usize;
let mut fi = 0usize;
while src < ops.len() {
if fi < fusions.len() && src == fusions[fi].0 {
let (_, new_op, removed) = fusions[fi];
new_ops.push(new_op);
src += 1 + removed;
fi += 1;
} else {
new_ops.push(ops[src]);
src += 1;
}
}
debug_assert_eq!(new_ops.len(), ops.len() - total_removed);
*ops = new_ops;
for op in ops.iter_mut() {
match op {
Op::Jump(ref mut t) => *t = offset_map[*t],
Op::JumpIfFalsePop(ref mut t) => *t = offset_map[*t],
Op::JumpIfTruePop(ref mut t) => *t = offset_map[*t],
Op::ForInNext {
ref mut end_jump, ..
} => *end_jump = offset_map[*end_jump],
Op::JumpIfSlotGeNum { ref mut target, .. } => *target = offset_map[*target],
_ => {}
}
}
}
pub fn validate_program(prog: &Program) -> Result<()> {
for rule in &prog.rules {
validate_pattern(&rule.pattern)?;
for st in &rule.stmts {
validate_stmt(st)?;
}
}
for f in prog.funcs.values() {
for st in &f.body {
validate_stmt(st)?;
}
}
Ok(())
}
fn validate_pattern(pat: &Pattern) -> Result<()> {
match pat {
Pattern::Expr(e) => validate_expr(e, false),
Pattern::Range(a, b) => {
validate_pattern(a.as_ref())?;
validate_pattern(b.as_ref())?;
Ok(())
}
_ => Ok(()),
}
}
fn validate_print_redir(r: &PrintRedir) -> Result<()> {
match r {
PrintRedir::Overwrite(e)
| PrintRedir::Append(e)
| PrintRedir::Pipe(e)
| PrintRedir::Coproc(e) => validate_expr(e, false),
}
}
fn validate_stmt(st: &Stmt) -> Result<()> {
match st {
Stmt::If { cond, then_, else_ } => {
validate_expr(cond, false)?;
for s in then_ {
validate_stmt(s)?;
}
for s in else_ {
validate_stmt(s)?;
}
Ok(())
}
Stmt::While { cond, body } => {
validate_expr(cond, false)?;
for s in body {
validate_stmt(s)?;
}
Ok(())
}
Stmt::DoWhile { body, cond } => {
for s in body {
validate_stmt(s)?;
}
validate_expr(cond, false)?;
Ok(())
}
Stmt::ForC {
init,
cond,
iter,
body,
} => {
if let Some(e) = init {
validate_expr(e, false)?;
}
if let Some(e) = cond {
validate_expr(e, false)?;
}
if let Some(e) = iter {
validate_expr(e, false)?;
}
for s in body {
validate_stmt(s)?;
}
Ok(())
}
Stmt::ForIn { body, .. } => {
for s in body {
validate_stmt(s)?;
}
Ok(())
}
Stmt::Block(ss) => {
for s in ss {
validate_stmt(s)?;
}
Ok(())
}
Stmt::Expr(e) => validate_expr(e, false),
Stmt::Print { args, redir } => {
for a in args {
validate_expr(a, true)?;
}
if let Some(r) = redir {
validate_print_redir(r)?;
}
Ok(())
}
Stmt::Printf { args, redir } => {
for a in args {
validate_expr(a, true)?;
}
if let Some(r) = redir {
validate_print_redir(r)?;
}
Ok(())
}
Stmt::Break | Stmt::Continue | Stmt::Next | Stmt::NextFile => Ok(()),
Stmt::Exit(e) => {
if let Some(ex) = e {
validate_expr(ex, false)?;
}
Ok(())
}
Stmt::Delete { indices, .. } => {
if let Some(ix) = indices {
for e in ix {
validate_expr(e, false)?;
}
}
Ok(())
}
Stmt::Return(e) => {
if let Some(ex) = e {
validate_expr(ex, false)?;
}
Ok(())
}
Stmt::GetLine {
pipe_cmd, redir, ..
} => {
if let Some(cmd) = pipe_cmd {
validate_expr(cmd, false)?;
}
match redir {
GetlineRedir::Primary => Ok(()),
GetlineRedir::File(e) | GetlineRedir::Coproc(e) => validate_expr(e, false),
}
}
Stmt::Switch { expr, arms } => {
validate_expr(expr, false)?;
for arm in arms {
match arm {
SwitchArm::Case { label, stmts } => {
if let SwitchLabel::Expr(le) = label {
validate_expr(le, false)?;
}
for s in stmts {
validate_stmt(s)?;
}
}
SwitchArm::Default { stmts } => {
for s in stmts {
validate_stmt(s)?;
}
}
}
}
Ok(())
}
}
}
fn validate_expr(e: &Expr, allow_tuple: bool) -> Result<()> {
match e {
Expr::Tuple(_) if !allow_tuple => Err(Error::Runtime(
"parenthesized comma list is not allowed in this context".into(),
)),
Expr::Tuple(parts) => {
for p in parts {
validate_expr(p, false)?;
}
Ok(())
}
Expr::Binary { left, right, .. } => {
validate_expr(left, false)?;
validate_expr(right, false)?;
Ok(())
}
Expr::Unary { expr, .. } => validate_expr(expr, false),
Expr::Assign { rhs, .. } => validate_expr(rhs, false),
Expr::AssignField { field, rhs, .. } => {
validate_expr(field, false)?;
validate_expr(rhs, false)?;
Ok(())
}
Expr::AssignIndex { rhs, indices, .. } => {
for x in indices {
validate_expr(x, false)?;
}
validate_expr(rhs, false)?;
Ok(())
}
Expr::Call { name, args } => {
match name.as_str() {
"gsub" | "sub" if args.len() < 2 => {
return Err(Error::Runtime(format!(
"{name}: expected at least 2 arguments"
)));
}
"split" | "match" if args.len() < 2 => {
return Err(Error::Runtime(format!(
"{name}: expected at least 2 arguments"
)));
}
"patsplit" if !(2..=4).contains(&args.len()) => {
return Err(Error::Runtime(
"patsplit: expected 2, 3, or 4 arguments".into(),
));
}
"gensub" if !(3..=4).contains(&args.len()) => {
return Err(Error::Runtime("gensub: expected 3 or 4 arguments".into()));
}
_ => {}
}
for a in args {
validate_expr(a, false)?;
}
Ok(())
}
Expr::IndirectCall { callee, args } => {
validate_expr(callee, false)?;
for a in args {
validate_expr(a, false)?;
}
Ok(())
}
Expr::Ternary { cond, then_, else_ } => {
validate_expr(cond, false)?;
validate_expr(then_, false)?;
validate_expr(else_, false)?;
Ok(())
}
Expr::In { key, .. } => validate_expr(key, true),
Expr::Field(inner) => validate_expr(inner, false),
Expr::Index { indices, .. } => {
for x in indices {
validate_expr(x, false)?;
}
Ok(())
}
Expr::IncDec { target, .. } => match target {
IncDecTarget::Field(inner) => validate_expr(inner, false),
IncDecTarget::Index { indices, .. } => {
for x in indices {
validate_expr(x, false)?;
}
Ok(())
}
IncDecTarget::Var(_) => Ok(()),
},
Expr::GetLine {
pipe_cmd, redir, ..
} => {
if let Some(cmd) = pipe_cmd {
validate_expr(cmd, false)?;
}
match redir {
GetlineRedir::Primary => Ok(()),
GetlineRedir::File(ex) | GetlineRedir::Coproc(ex) => validate_expr(ex, false),
}
}
Expr::Number(_)
| Expr::IntegerLiteral(_)
| Expr::Str(_)
| Expr::RegexpLiteral(_)
| Expr::Var(_) => Ok(()),
}
}
fn is_literal_regex(pat: &str) -> bool {
!pat.bytes().any(|b| {
matches!(
b,
b'.' | b'*'
| b'+'
| b'?'
| b'['
| b']'
| b'('
| b')'
| b'{'
| b'}'
| b'|'
| b'^'
| b'$'
| b'\\'
)
})
}
#[cfg(test)]
mod tests {
use super::*;
use crate::ast::{Expr, Pattern};
use crate::bytecode::{
Chunk, CompiledPattern, CompiledProgram, CompiledRangeEndpoint, CompiledRule, Op,
};
use crate::parser::parse_program;
use crate::runtime::Runtime;
use crate::vm::{vm_match_range_endpoint, vm_pattern_matches, vm_range_step, vm_run_begin};
use std::collections::HashMap;
fn compiled_program_with_rule(
pattern: CompiledPattern,
strings: StringPool,
) -> CompiledProgram {
CompiledProgram {
begin_chunks: vec![],
end_chunks: vec![],
beginfile_chunks: vec![],
endfile_chunks: vec![],
record_rules: vec![CompiledRule {
pattern,
body: Chunk::from_ops(vec![]),
original_index: 0,
}],
functions: HashMap::new(),
strings,
slot_count: 0,
slot_names: vec![],
slot_map: HashMap::new(),
array_var_names: vec![],
}
}
#[test]
fn compile_bignum_literal_add_uses_push_num_decimal_str_not_f64() {
let prog =
parse_program(r#"BEGIN { print sprintf("%d", 9223372036854775807 + 1) }"#).unwrap();
let cp = Compiler::compile_program(&prog).unwrap();
let ops = &cp.begin_chunks[0].ops;
assert!(
ops.iter().any(|o| matches!(o, Op::PushNumDecimalStr(_))),
"expected PushNumDecimalStr for integer literals, ops={ops:?}"
);
assert!(
!ops.iter()
.any(|o| matches!(o, Op::PushNum(n) if *n == 9223372036854775808.0)),
"must not push i64 max through f64, ops={ops:?}"
);
}
#[test]
fn compile_begin_print_constant() {
let prog = parse_program("BEGIN { print 42 }").unwrap();
let cp = Compiler::compile_program(&prog).unwrap();
assert!(!cp.begin_chunks.is_empty());
assert!(!cp.begin_chunks[0].ops.is_empty());
}
#[test]
fn vm_begin_negative_field_errors_with_init_slots_and_jit_enabled() {
use crate::runtime::Runtime;
use crate::vm::vm_run_begin;
let prog = parse_program("BEGIN { print $(-1) }\n").unwrap();
let cp = Compiler::compile_program(&prog).unwrap();
let mut rt = Runtime::new();
rt.slots = cp.init_slots(&rt.vars);
rt.jit_enabled = true;
let r = vm_run_begin(&cp, &mut rt);
assert!(
r.is_err(),
"expected runtime error, got {:?} print_buf={:?}",
r,
rt.print_buf
);
}
#[test]
fn compile_record_rule_field_access() {
let prog = parse_program("{ print $1 + $2 }").unwrap();
let cp = Compiler::compile_program(&prog).unwrap();
assert_eq!(cp.record_rules.len(), 1);
assert!(!cp.record_rules[0].body.ops.is_empty());
}
#[test]
fn compile_user_function_has_body() {
let prog = parse_program("function dbl(n){ return n*2 } { print dbl(3) }").unwrap();
let cp = Compiler::compile_program(&prog).unwrap();
let f = cp.functions.get("dbl").expect("dbl");
assert_eq!(f.params, vec!["n".to_string()]);
assert!(!f.body.ops.is_empty());
}
#[test]
fn compile_empty_record_pattern_still_emits_rule() {
let prog = parse_program("{ }").unwrap();
let cp = Compiler::compile_program(&prog).unwrap();
assert_eq!(cp.record_rules.len(), 1);
}
#[test]
fn compile_end_block() {
let prog = parse_program("END { print \"x\" }").unwrap();
let cp = Compiler::compile_program(&prog).unwrap();
assert!(!cp.end_chunks.is_empty());
}
#[test]
fn compile_beginfile_endfile() {
let prog = parse_program("BEGINFILE { print \"bf\" } ENDFILE { print \"ef\" }").unwrap();
let cp = Compiler::compile_program(&prog).unwrap();
assert!(!cp.beginfile_chunks.is_empty());
assert!(!cp.endfile_chunks.is_empty());
}
#[test]
fn compile_begin_and_end_together() {
let prog = parse_program("BEGIN { a=1 } { print $0 } END { print a }").unwrap();
let cp = Compiler::compile_program(&prog).unwrap();
assert!(!cp.begin_chunks.is_empty());
assert!(!cp.end_chunks.is_empty());
assert_eq!(cp.record_rules.len(), 1);
}
#[test]
fn compile_while_loop() {
let prog = parse_program("BEGIN { i=0; while (i<3) i=i+1 }").unwrap();
let cp = Compiler::compile_program(&prog).unwrap();
assert!(!cp.begin_chunks[0].ops.is_empty());
}
#[test]
fn compile_if_else() {
let prog = parse_program("BEGIN { if (1) print 1; else print 0 }").unwrap();
let cp = Compiler::compile_program(&prog).unwrap();
assert!(!cp.begin_chunks[0].ops.is_empty());
}
#[test]
fn compile_delete_array_elem() {
let prog = parse_program("BEGIN { delete a[\"k\"] }").unwrap();
let cp = Compiler::compile_program(&prog).unwrap();
assert!(!cp.begin_chunks[0].ops.is_empty());
}
#[test]
fn compile_two_functions() {
let prog = parse_program(
"function a(){ return 1 } function b(){ return 2 } BEGIN { print a()+b() }",
)
.unwrap();
let cp = Compiler::compile_program(&prog).unwrap();
assert!(cp.functions.contains_key("a"));
assert!(cp.functions.contains_key("b"));
}
#[test]
fn compile_do_while_loop() {
let prog = parse_program("BEGIN { do { x = 1 } while (0) }").unwrap();
let cp = Compiler::compile_program(&prog).unwrap();
assert!(!cp.begin_chunks[0].ops.is_empty());
}
#[test]
fn compile_for_in_loop() {
let prog = parse_program("BEGIN { for (k in a) print k }").unwrap();
let cp = Compiler::compile_program(&prog).unwrap();
assert!(!cp.begin_chunks[0].ops.is_empty());
}
#[test]
fn compile_record_rule_with_next() {
let prog = parse_program("{ next }").unwrap();
let cp = Compiler::compile_program(&prog).unwrap();
assert_eq!(cp.record_rules.len(), 1);
assert!(!cp.record_rules[0].body.ops.is_empty());
}
#[test]
fn compile_delete_multidimensional_element() {
let prog = parse_program("BEGIN { delete a[1,2] }").unwrap();
let cp = Compiler::compile_program(&prog).unwrap();
assert!(!cp.begin_chunks[0].ops.is_empty());
}
#[test]
fn compile_range_pattern_has_compiled_endpoints() {
let prog = parse_program("/start/,/end/ { print }").unwrap();
let cp = Compiler::compile_program(&prog).unwrap();
let rule = &cp.record_rules[0];
assert!(matches!(
&rule.pattern,
crate::bytecode::CompiledPattern::Range { .. }
));
}
#[test]
fn validate_rejects_gsub_without_args() {
let prog = parse_program("BEGIN { gsub() }").unwrap();
let e = validate_program(&prog).unwrap_err();
assert!(e.to_string().contains("gsub"), "{e}");
}
#[test]
fn validate_rejects_gsub_with_only_pattern_arg() {
let prog = parse_program("BEGIN { gsub(/x/) }").unwrap();
let e = validate_program(&prog).unwrap_err();
assert!(e.to_string().contains("gsub"), "{e}");
}
#[test]
fn validate_rejects_split_without_args() {
let prog = parse_program("BEGIN { split() }").unwrap();
assert!(validate_program(&prog).is_err());
}
#[test]
fn validate_rejects_split_single_arg() {
let prog = parse_program("BEGIN { split(\"a,b\") }").unwrap();
let e = validate_program(&prog).unwrap_err();
assert!(e.to_string().contains("split"), "{e}");
}
#[test]
fn validate_accepts_match_three_args_with_array() {
let prog = parse_program(r#"BEGIN { n = match("aba", /a/, arr) }"#).unwrap();
validate_program(&prog).unwrap();
}
#[test]
fn validate_rejects_tuple_in_return() {
let prog = parse_program("function f(){ return (1,2) } BEGIN { f() }").unwrap();
assert!(validate_program(&prog).is_err());
}
#[test]
fn validate_rejects_match_with_few_args() {
let prog = parse_program("BEGIN { match(\"x\") }").unwrap();
let e = validate_program(&prog).unwrap_err();
assert!(e.to_string().contains("match"), "{e}");
}
#[test]
fn validate_rejects_patsplit_wrong_arity() {
let prog = parse_program("BEGIN { patsplit(\"a\") }").unwrap();
let e = validate_program(&prog).unwrap_err();
assert!(e.to_string().contains("patsplit"), "{e}");
}
#[test]
fn validate_rejects_gensub_wrong_arity() {
let prog = parse_program("BEGIN { gensub(/a/, \"b\") }").unwrap();
let e = validate_program(&prog).unwrap_err();
assert!(e.to_string().contains("gensub"), "{e}");
}
#[test]
fn validate_accepts_gensub_three_and_four_args() {
for src in [
"BEGIN { gensub(/a/, \"b\", \"g\") }",
"BEGIN { gensub(/a/, \"b\", \"g\", \"aba\") }",
] {
let prog = parse_program(src).unwrap();
validate_program(&prog).unwrap_or_else(|e| panic!("{src}: {e}"));
}
}
#[test]
fn validate_rejects_gensub_five_args() {
let prog = parse_program("BEGIN { gensub(/a/, \"b\", \"g\", \"x\", \"y\") }").unwrap();
let e = validate_program(&prog).unwrap_err();
assert!(e.to_string().contains("gensub"), "{e}");
}
#[test]
fn validate_accepts_exit_with_expression() {
let prog = parse_program("BEGIN { exit 1 + 1 }").unwrap();
validate_program(&prog).unwrap();
}
#[test]
fn validate_rejects_sub_with_few_args() {
let prog = parse_program("BEGIN { sub(/a/) }").unwrap();
let e = validate_program(&prog).unwrap_err();
assert!(e.to_string().contains("sub"), "{e}");
}
#[test]
fn validate_rejects_patsplit_five_args() {
let prog = parse_program("BEGIN { patsplit(\"a\", arr, \"x\", y, z) }").unwrap();
let e = validate_program(&prog).unwrap_err();
assert!(e.to_string().contains("patsplit"), "{e}");
}
#[test]
fn validate_accepts_patsplit_two_three_and_four_args() {
for src in [
"BEGIN { patsplit(\"a b\", parts) }",
"BEGIN { patsplit(\"a b\", parts, \"[a-z]+\") }",
"BEGIN { patsplit(\"a b\", parts, \"[a-z]+\", seps) }",
] {
let prog = parse_program(src).unwrap();
validate_program(&prog).unwrap_or_else(|e| panic!("{src}: {e}"));
}
}
#[test]
fn lowered_pattern_empty_matches_via_vm() {
let (pat, strings) = lower_pattern(&Pattern::Empty);
let cp = compiled_program_with_rule(pat, strings);
let mut rt = Runtime::new();
rt.set_record_from_line("anything");
let rule = &cp.record_rules[0];
assert!(vm_pattern_matches(rule, &cp, &mut rt).unwrap());
}
#[test]
fn lowered_pattern_regexp_respects_record() {
let (pat, strings) = lower_pattern(&Pattern::Regexp("ell".into()));
let cp = compiled_program_with_rule(pat, strings);
let mut rt = Runtime::new();
rt.set_record_from_line("hello");
let rule = &cp.record_rules[0];
assert!(vm_pattern_matches(rule, &cp, &mut rt).unwrap());
let (pat2, strings2) = lower_pattern(&Pattern::Regexp("^z".into()));
let cp2 = compiled_program_with_rule(pat2, strings2);
let rule2 = &cp2.record_rules[0];
assert!(!vm_pattern_matches(rule2, &cp2, &mut rt).unwrap());
}
#[test]
fn lowered_range_endpoint_nested_range_errors_in_vm() {
let p = Pattern::Range(
Box::new(Pattern::Regexp("a".into())),
Box::new(Pattern::Regexp("b".into())),
);
let (ep, strings) = lower_range_endpoint(&p);
let cp = compiled_program_with_rule(CompiledPattern::Always, strings);
let mut rt = Runtime::new();
assert!(matches!(ep, CompiledRangeEndpoint::NestedRangeError));
assert!(vm_match_range_endpoint(&ep, &cp, &mut rt).is_err());
}
#[test]
fn range_step_enters_after_start_pattern() {
let prog = parse_program("/start/,/end/ { print }").unwrap();
let cp = Compiler::compile_program(&prog).unwrap();
let rule = &cp.record_rules[0];
let CompiledPattern::Range { start, end } = &rule.pattern else {
panic!("expected range pattern");
};
let mut rt = Runtime::new();
rt.set_record_from_line("start");
let mut state = false;
assert!(vm_range_step(&mut state, start, end, &cp, &mut rt).unwrap());
assert!(state);
}
#[test]
fn begin_assignment_runs_on_vm_slots() {
let prog = parse_program("BEGIN { answer = 42 }").unwrap();
let cp = Compiler::compile_program(&prog).unwrap();
let mut rt = Runtime::new();
rt.slots = cp.init_slots(&rt.vars);
vm_run_begin(&cp, &mut rt).unwrap();
let slot = *cp.slot_map.get("answer").expect("answer slotted");
assert_eq!(rt.slots[slot as usize].as_number(), 42.0);
}
#[test]
fn lowered_pattern_expr_numeric_truthy_via_vm() {
let (pat, strings) = lower_pattern(&Pattern::Expr(Expr::Number(1.0)));
let cp = compiled_program_with_rule(pat, strings);
let mut rt = Runtime::new();
rt.set_record_from_line("anything");
let rule = &cp.record_rules[0];
assert!(vm_pattern_matches(rule, &cp, &mut rt).unwrap());
}
#[test]
fn lowered_pattern_expr_numeric_falsy_via_vm() {
let (pat, strings) = lower_pattern(&Pattern::Expr(Expr::Number(0.0)));
let cp = compiled_program_with_rule(pat, strings);
let mut rt = Runtime::new();
rt.set_record_from_line("anything");
let rule = &cp.record_rules[0];
assert!(!vm_pattern_matches(rule, &cp, &mut rt).unwrap());
}
#[test]
fn lowered_range_endpoint_expr_truthy_via_vm() {
let (ep, strings) = lower_range_endpoint(&Pattern::Expr(Expr::Number(2.0)));
assert!(matches!(ep, CompiledRangeEndpoint::Expr(_)));
let cp = compiled_program_with_rule(CompiledPattern::Always, strings);
let mut rt = Runtime::new();
rt.set_record_from_line("z");
assert!(vm_match_range_endpoint(&ep, &cp, &mut rt).unwrap());
}
}