mod lexer;
mod parser;
mod runtime;
use super::{CommandContext, CommandMeta, CommandResult, VirtualCommand};
use lexer::Lexer;
use parser::Parser;
use runtime::AwkRuntime;
use std::path::PathBuf;
pub struct AwkCommand;
static AWK_META: CommandMeta = CommandMeta {
name: "awk",
synopsis: "awk [-F FS] [-v VAR=VALUE] [-f FILE] 'PROGRAM' [FILE ...]",
description: "Pattern scanning and text processing language.",
options: &[
("-F FS", "set the input field separator"),
("-v VAR=VALUE", "assign a value to a variable"),
("-f FILE", "read the awk program from FILE"),
],
supports_help_flag: true,
flags: &[],
};
impl VirtualCommand for AwkCommand {
fn name(&self) -> &str {
"awk"
}
fn meta(&self) -> Option<&'static CommandMeta> {
Some(&AWK_META)
}
fn execute(&self, args: &[String], ctx: &CommandContext) -> CommandResult {
match run_awk(args, ctx) {
Ok(result) => result,
Err(e) => CommandResult {
stdout: String::new(),
stderr: format!("awk: {e}\n"),
exit_code: 2,
stdout_bytes: None,
},
}
}
}
struct AwkOpts {
field_separator: Option<String>,
assignments: Vec<(String, String)>,
program: String,
prog_file: Option<String>,
files: Vec<String>,
}
fn parse_args(args: &[String]) -> Result<AwkOpts, String> {
let mut fs = None;
let mut assignments = Vec::new();
let mut program = None;
let mut files = Vec::new();
let mut prog_file = None;
let mut i = 0;
while i < args.len() {
let arg = &args[i];
if arg == "-F" {
i += 1;
if i >= args.len() {
return Err("option -F requires an argument".to_string());
}
fs = Some(args[i].clone());
} else if let Some(sep) = arg.strip_prefix("-F") {
fs = Some(sep.to_string());
} else if arg == "-v" {
i += 1;
if i >= args.len() {
return Err("option -v requires an argument".to_string());
}
let assign = &args[i];
if let Some((var, val)) = assign.split_once('=') {
assignments.push((var.to_string(), val.to_string()));
} else {
return Err(format!("invalid -v assignment: {assign}"));
}
} else if let Some(rest) = arg.strip_prefix("-v") {
if let Some((var, val)) = rest.split_once('=') {
assignments.push((var.to_string(), val.to_string()));
} else {
return Err(format!("invalid -v assignment: {rest}"));
}
} else if arg == "-f" {
i += 1;
if i >= args.len() {
return Err("option -f requires an argument".to_string());
}
prog_file = Some(args[i].clone());
} else if arg == "--" {
i += 1;
break;
} else if arg.starts_with('-') && program.is_none() && prog_file.is_none() {
return Err(format!("unknown option: {arg}"));
} else if program.is_none() && prog_file.is_none() {
program = Some(arg.clone());
} else {
files.push(arg.clone());
}
i += 1;
}
while i < args.len() {
files.push(args[i].clone());
i += 1;
}
if let Some(pf) = prog_file {
Ok(AwkOpts {
field_separator: fs,
assignments,
program: String::new(),
prog_file: Some(pf),
files,
})
} else if let Some(prog) = program {
Ok(AwkOpts {
field_separator: fs,
assignments,
program: prog,
prog_file: None,
files,
})
} else {
Err("no program text".to_string())
}
}
fn resolve_path(path_str: &str, cwd: &str) -> PathBuf {
if path_str.starts_with('/') {
PathBuf::from(path_str)
} else {
PathBuf::from(cwd).join(path_str)
}
}
fn run_awk(args: &[String], ctx: &CommandContext) -> Result<CommandResult, String> {
let mut opts = parse_args(args)?;
if let Some(ref pf) = opts.prog_file {
let path = resolve_path(pf, ctx.cwd);
match ctx.fs.read_file(&path) {
Ok(bytes) => {
opts.program = String::from_utf8_lossy(&bytes).to_string();
}
Err(e) => return Err(format!("can't open source file '{pf}': {e}")),
}
}
if opts.program.is_empty() {
return Err("no program text".to_string());
}
let tokens = Lexer::new(&opts.program)
.tokenize()
.map_err(|e| format!("syntax error: {e}"))?;
let program = Parser::new(tokens)
.parse()
.map_err(|e| format!("syntax error: {e}"))?;
let mut runtime = AwkRuntime::new();
runtime.apply_limits(ctx.limits);
if let Some(ref fs) = opts.field_separator {
runtime.set_var("FS", fs);
}
for (var, val) in &opts.assignments {
runtime.set_var(var, val);
}
let mut argv_args = vec!["awk".to_string()];
argv_args.extend(opts.files.clone());
runtime.set_argc_argv(&argv_args);
let inputs = collect_inputs(&opts.files, ctx)?;
let (exit_code, stdout, stderr) = runtime.execute(&program, &inputs);
Ok(CommandResult {
stdout,
stderr,
exit_code,
stdout_bytes: None,
})
}
fn collect_inputs(files: &[String], ctx: &CommandContext) -> Result<Vec<(String, String)>, String> {
if files.is_empty() {
if ctx.stdin.is_empty() {
return Ok(vec![]);
}
return Ok(vec![("".to_string(), ctx.stdin.to_string())]);
}
let mut inputs = Vec::new();
for file in files {
if file == "-" {
inputs.push(("(standard input)".to_string(), ctx.stdin.to_string()));
} else {
let path = resolve_path(file, ctx.cwd);
match ctx.fs.read_file(&path) {
Ok(bytes) => {
inputs.push((file.clone(), String::from_utf8_lossy(&bytes).to_string()));
}
Err(e) => {
return Err(format!("can't open file '{file}': {e}"));
}
}
}
}
Ok(inputs)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::interpreter::ExecutionLimits;
use crate::network::NetworkPolicy;
use crate::vfs::{InMemoryFs, VirtualFs};
use std::collections::HashMap;
use std::sync::Arc;
fn run(program: &str, stdin: &str) -> CommandResult {
let fs = Arc::new(InMemoryFs::new());
let env = HashMap::new();
let limits = ExecutionLimits::default();
let ctx = CommandContext {
fs: &*fs,
cwd: "/",
env: &env,
variables: None,
stdin,
stdin_bytes: None,
limits: &limits,
network_policy: &NetworkPolicy::default(),
exec: None,
shell_opts: None,
};
let args = vec![program.to_string()];
AwkCommand.execute(&args, &ctx)
}
fn run_with_args(args: &[&str], stdin: &str) -> CommandResult {
let fs = Arc::new(InMemoryFs::new());
let env = HashMap::new();
let limits = ExecutionLimits::default();
let ctx = CommandContext {
fs: &*fs,
cwd: "/",
env: &env,
variables: None,
stdin,
stdin_bytes: None,
limits: &limits,
network_policy: &NetworkPolicy::default(),
exec: None,
shell_opts: None,
};
let args: Vec<String> = args.iter().map(|s| s.to_string()).collect();
AwkCommand.execute(&args, &ctx)
}
fn run_with_files(program: &str, files: &[(&str, &str)]) -> CommandResult {
let fs = Arc::new(InMemoryFs::new());
for (name, content) in files {
fs.write_file(&PathBuf::from(format!("/{name}")), content.as_bytes())
.unwrap();
}
let env = HashMap::new();
let limits = ExecutionLimits::default();
let ctx = CommandContext {
fs: &*fs,
cwd: "/",
env: &env,
variables: None,
stdin: "",
stdin_bytes: None,
limits: &limits,
network_policy: &NetworkPolicy::default(),
exec: None,
shell_opts: None,
};
let mut args: Vec<String> = vec![program.to_string()];
for (name, _) in files {
args.push(name.to_string());
}
AwkCommand.execute(&args, &ctx)
}
#[test]
fn integration_print_first_field() {
let r = run("{print $1}", "hello world\nfoo bar\n");
assert_eq!(r.stdout, "hello\nfoo\n");
assert_eq!(r.exit_code, 0);
}
#[test]
fn integration_field_separator() {
let r = run_with_args(&["-F:", "{print $1}"], "root:x:0:0\n");
assert_eq!(r.stdout, "root\n");
}
#[test]
fn integration_field_assignment() {
let r = run("{$2 = \"X\"; print $0}", "a b c\n");
assert_eq!(r.stdout, "a X c\n");
}
#[test]
fn integration_regex_filter() {
let r = run("/error/ {print}", "info: ok\nerror: fail\ninfo: done\n");
assert_eq!(r.stdout, "error: fail\n");
}
#[test]
fn integration_begin_end_sum() {
let r = run("BEGIN{sum=0} {sum+=$1} END{print sum}", "10\n20\n30\n");
assert_eq!(r.stdout, "60\n");
}
#[test]
fn integration_variable() {
let r = run_with_args(&["-v", "threshold=10", "$1 > threshold"], "5\n15\n8\n20\n");
assert_eq!(r.stdout, "15\n20\n");
}
#[test]
fn integration_uninitialized() {
let r = run("{print x+0, x}", "line\n");
assert_eq!(r.stdout, "0 \n");
}
#[test]
fn integration_arithmetic() {
let r = run("{print $1, $1*2}", "5\n10\n");
assert_eq!(r.stdout, "5 10\n10 20\n");
}
#[test]
fn integration_if_else() {
let r = run(
"{if ($1 > 10) print \"big\"; else print \"small\"}",
"5\n15\n",
);
assert_eq!(r.stdout, "small\nbig\n");
}
#[test]
fn integration_printf() {
let r = run("{printf \"%-10s %5d\\n\", $1, $2}", "hello 42\n");
assert_eq!(r.stdout, "hello 42\n");
}
#[test]
fn integration_array_word_count() {
let r = run(
"{count[$1]++} END{for(k in count) print k, count[k]}",
"a\nb\na\nc\nb\na\n",
);
assert!(r.stdout.contains("a 3"));
assert!(r.stdout.contains("b 2"));
assert!(r.stdout.contains("c 1"));
}
#[test]
fn integration_string_functions() {
let r = run("{print toupper($0)}", "hello\n");
assert_eq!(r.stdout, "HELLO\n");
}
#[test]
fn integration_multi_file() {
let r = run_with_files(
"{print FILENAME, FNR, NR}",
&[("file1", "a\nb\n"), ("file2", "c\n")],
);
assert_eq!(r.stdout, "file1 1 1\nfile1 2 2\nfile2 1 3\n");
}
#[test]
fn integration_range_pattern() {
let r = run(
"/start/,/end/ {print}",
"before\nstart here\nmiddle\nend here\nafter\n",
);
assert_eq!(r.stdout, "start here\nmiddle\nend here\n");
}
#[test]
fn integration_no_action_implicit_print() {
let r = run("/hello/", "hello world\ngoodbye\nhello again\n");
assert_eq!(r.stdout, "hello world\nhello again\n");
}
#[test]
fn integration_empty_input() {
let r = run("{print}", "");
assert_eq!(r.stdout, "");
}
#[test]
fn integration_empty_fs() {
let r = run_with_args(&["-F", "", "{print $1, $2, $3}"], "abc\n");
assert_eq!(r.stdout, "a b c\n");
}
#[test]
fn integration_nr_nf() {
let r = run("{print NR, NF}", "a b c\nx y\n");
assert_eq!(r.stdout, "1 3\n2 2\n");
}
#[test]
fn integration_progfile() {
let fs = Arc::new(InMemoryFs::new());
fs.write_file(&PathBuf::from("/prog.awk"), b"{print $1}")
.unwrap();
let env = HashMap::new();
let limits = ExecutionLimits::default();
let ctx = CommandContext {
fs: &*fs,
cwd: "/",
env: &env,
variables: None,
stdin: "hello world\n",
stdin_bytes: None,
limits: &limits,
network_policy: &NetworkPolicy::default(),
exec: None,
shell_opts: None,
};
let args = vec!["-f".to_string(), "prog.awk".to_string()];
let r = AwkCommand.execute(&args, &ctx);
assert_eq!(r.stdout, "hello\n");
}
#[test]
fn integration_match_function() {
let r = run(
"{if (match($0, /[0-9]+/)) print RSTART, RLENGTH}",
"abc123def\n",
);
assert_eq!(r.stdout, "4 3\n");
}
#[test]
fn integration_split_function() {
let r = run(
"{n=split($0, a, \":\"); for(i=1;i<=n;i++) print a[i]}",
"a:b:c\n",
);
assert_eq!(r.stdout, "a\nb\nc\n");
}
#[test]
fn integration_sub_gsub() {
let r = run("{sub(/world/, \"earth\"); print}", "hello world\n");
assert_eq!(r.stdout, "hello earth\n");
let r = run("{gsub(/o/, \"0\"); print}", "foobar\n");
assert_eq!(r.stdout, "f00bar\n");
}
#[test]
fn integration_in_array() {
let r = run("{a[$1]=1} END{print (\"x\" in a), (\"z\" in a)}", "x\ny\n");
assert_eq!(r.stdout, "1 0\n");
}
#[test]
fn integration_assignment_operators() {
let r = run("BEGIN{x=10; x+=5; x-=3; print x}", "");
assert_eq!(r.stdout, "12\n");
}
#[test]
fn integration_do_while() {
let r = run(
"BEGIN{i=1; do { printf \"%d \", i; i++ } while(i<=3); print \"\"}",
"",
);
assert_eq!(r.stdout, "1 2 3 \n");
}
#[test]
fn integration_ternary() {
let r = run("{print ($1 > 0) ? \"pos\" : \"neg\"}", "5\n-3\n");
assert_eq!(r.stdout, "pos\nneg\n");
}
#[test]
fn integration_pipe_stdin() {
let r = run("{print $2}", "hello world\n");
assert_eq!(r.stdout, "world\n");
}
#[test]
fn integration_substr() {
let r = run("{print substr($0, 7, 5)}", "hello world\n");
assert_eq!(r.stdout, "world\n");
}
#[test]
fn integration_index_func() {
let r = run("{print index($0, \"world\")}", "hello world\n");
assert_eq!(r.stdout, "7\n");
}
#[test]
fn integration_sprintf() {
let r = run("{print sprintf(\"%05d\", $1)}", "42\n");
assert_eq!(r.stdout, "00042\n");
}
#[test]
fn integration_power() {
let r = run("BEGIN{print 2^10}", "");
assert_eq!(r.stdout, "1024\n");
}
#[test]
fn integration_int() {
let r = run("BEGIN{print int(3.9)}", "");
assert_eq!(r.stdout, "3\n");
}
#[test]
fn integration_error_on_no_program() {
let r = run_with_args(&[], "");
assert_ne!(r.exit_code, 0);
}
#[test]
fn integration_expression_pattern() {
let r = run("NR > 1 {print}", "skip\nkeep1\nkeep2\n");
assert_eq!(r.stdout, "keep1\nkeep2\n");
}
#[test]
fn integration_regex_match_not_match() {
let r = run("{if ($0 ~ /^[0-9]/) print}", "123\nabc\n456\n");
assert_eq!(r.stdout, "123\n456\n");
let r = run("{if ($0 !~ /^[0-9]/) print}", "123\nabc\n456\n");
assert_eq!(r.stdout, "abc\n");
}
#[test]
fn integration_delete_array() {
let r = run("{a[$1]=1} END{delete a; print length(a)}", "x\ny\n");
assert_eq!(r.stdout, "0\n");
}
#[test]
fn integration_single_field() {
let r = run("{print $1, NF}", "hello\n");
assert_eq!(r.stdout, "hello 1\n");
}
#[test]
fn integration_very_long_line() {
let long = "a ".repeat(1000).trim().to_string();
let input = format!("{long}\n");
let r = run("{print NF}", &input);
assert_eq!(r.stdout, "1000\n");
}
#[test]
fn integration_begin_only() {
let r = run("BEGIN{print \"hello\"}", "");
assert_eq!(r.stdout, "hello\n");
}
#[test]
fn integration_end_only() {
let r = run("END{print \"done\"}", "some input\n");
assert_eq!(r.stdout, "done\n");
}
#[test]
fn integration_break_continue() {
let r = run(
"BEGIN{for(i=1;i<=10;i++){if(i==4) break; printf \"%d \",i}; print \"\"}",
"",
);
assert_eq!(r.stdout, "1 2 3 \n");
let r = run(
"BEGIN{for(i=1;i<=5;i++){if(i==3) continue; printf \"%d \",i}; print \"\"}",
"",
);
assert_eq!(r.stdout, "1 2 4 5 \n");
}
#[test]
fn integration_next() {
let r = run(
"{if ($1 == \"skip\") next; print}",
"keep\nskip\nalso keep\n",
);
assert_eq!(r.stdout, "keep\nalso keep\n");
}
#[test]
fn integration_exit_code() {
let r = run("{ if (NR==2) exit 42; print }", "a\nb\nc\n");
assert_eq!(r.stdout, "a\n");
assert_eq!(r.exit_code, 42);
}
#[test]
fn integration_logical_ops() {
let r = run("{print ($1 > 0 && $1 < 10)}", "5\n15\n");
assert_eq!(r.stdout, "1\n0\n");
let r = run("{print ($1 > 10 || $1 < 0)}", "5\n-3\n15\n");
assert_eq!(r.stdout, "0\n1\n1\n");
}
#[test]
fn integration_modulo() {
let r = run("{print $1 % 3}", "10\n7\n");
assert_eq!(r.stdout, "1\n1\n");
}
#[test]
fn integration_implicit_concat() {
let r = run("BEGIN{x = \"hello\" \" \" \"world\"; print x}", "");
assert_eq!(r.stdout, "hello world\n");
}
#[test]
fn integration_ofs() {
let r = run_with_args(&["-v", "OFS=-", "{print $1, $2}"], "a b\n");
assert_eq!(r.stdout, "a-b\n");
}
#[test]
fn integration_length_func() {
let r = run("{print length($0)}", "hello\n");
assert_eq!(r.stdout, "5\n");
}
#[test]
fn integration_pre_post_increment() {
let r = run("BEGIN{x=5; print ++x; print x++; print x}", "");
assert_eq!(r.stdout, "6\n6\n7\n");
}
}