use crate::ast::{BinOp, IncDecOp};
use crate::runtime::{AwkMap, Value};
use std::collections::HashMap;
use std::fmt;
use std::sync::atomic::{AtomicU32, Ordering};
use std::sync::{Arc, Mutex};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum RedirKind {
Stdout,
Overwrite,
Append,
Pipe,
Coproc,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum GetlineSource {
Primary,
File,
Coproc,
Pipe,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum SubTarget {
Record,
Var(u32),
SlotVar(u16),
Field,
Index(u32),
}
#[allow(dead_code)]
#[derive(Debug, Clone, Copy)]
pub enum Op {
PushNum(f64),
PushNumDecimalStr(u32),
PushStr(u32),
PushRegexp(u32),
GetVar(u32),
SetVar(u32),
GetSlot(u16),
SetSlot(u16),
GetField,
SetField,
GetArrayElem(u32),
SetArrayElem(u32),
SymtabKeyCount,
CompoundAssignVar(u32, BinOp),
CompoundAssignSlot(u16, BinOp),
CompoundAssignField(BinOp),
CompoundAssignIndex(u32, BinOp),
IncDecVar(u32, IncDecOp),
IncrVar(u32),
DecrVar(u32),
IncDecSlot(u16, IncDecOp),
IncDecField(IncDecOp),
IncDecIndex(u32, IncDecOp),
Add,
Sub,
Mul,
Div,
Mod,
Pow,
CmpEq,
CmpNe,
CmpLt,
CmpLe,
CmpGt,
CmpGe,
Concat,
RegexMatch,
RegexNotMatch,
Neg,
Pos,
Not,
ToBool,
Jump(usize),
JumpIfFalsePop(usize),
JumpIfTruePop(usize),
Print {
argc: u16,
redir: RedirKind,
},
Printf {
argc: u16,
redir: RedirKind,
},
Next,
NextFile,
ExitWithCode,
ExitDefault,
ReturnVal,
ReturnEmpty,
CallBuiltin(u32, u16),
CallUser(u32, u16),
CallIndirect(u16),
TypeofVar(u32),
TypeofSlot(u16),
TypeofArrayElem(u32),
TypeofField,
TypeofValue,
InArray(u32),
DeleteArray(u32),
DeleteElem(u32),
JoinArrayKey(u16),
GetLine {
var: Option<u32>,
source: GetlineSource,
push_result: bool,
},
SubFn(SubTarget),
GsubFn(SubTarget),
Split {
arr: u32,
has_fs: bool,
},
Patsplit {
arr: u32,
has_fp: bool,
seps: Option<u32>,
},
MatchBuiltin {
arr: Option<u32>,
},
ForInStart(u32),
ForInNext {
var: u32,
end_jump: usize,
},
ForInEnd,
Pop,
Dup,
Asort {
src: u32,
dest: Option<u32>,
},
Asorti {
src: u32,
dest: Option<u32>,
},
MatchRegexp(u32),
AddFieldToSlot {
field: u16,
slot: u16,
},
ConcatPoolStr(u32),
PrintFieldStdout(u16),
IncrSlot(u16),
DecrSlot(u16),
AddSlotToSlot {
src: u16,
dst: u16,
},
PushFieldNum(u16),
GetNR,
GetFNR,
GetNF,
JumpIfSlotGeNum {
slot: u16,
limit: f64,
target: usize,
},
AddMulFieldsToSlot {
f1: u16,
f2: u16,
slot: u16,
},
ArrayFieldAddConst {
arr: u32,
field: u16,
delta: f64,
},
PrintFieldSepField {
f1: u16,
sep: u32,
f2: u16,
},
PrintThreeFieldsStdout {
f1: u16,
f2: u16,
f3: u16,
},
}
type JitChunkCache = Mutex<Option<Result<Arc<crate::jit::JitChunk>, ()>>>;
#[derive(Clone)]
pub struct Chunk {
pub ops: Vec<Op>,
pub(crate) jit_lock: Arc<JitChunkCache>,
pub(crate) jit_invocation_count: Arc<AtomicU32>,
}
impl Default for Chunk {
fn default() -> Self {
Self {
ops: Vec::new(),
jit_lock: Arc::new(Mutex::new(None)),
jit_invocation_count: Arc::new(AtomicU32::new(0)),
}
}
}
impl fmt::Debug for Chunk {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("Chunk")
.field("ops", &self.ops)
.field("jit_lock", &"<cached JIT>")
.field(
"jit_invocation_count",
&self.jit_invocation_count.load(Ordering::Relaxed),
)
.finish()
}
}
impl Chunk {
pub fn from_ops(ops: Vec<Op>) -> Self {
Self {
ops,
jit_lock: Arc::new(Mutex::new(None)),
jit_invocation_count: Arc::new(AtomicU32::new(0)),
}
}
}
#[derive(Debug, Clone, Default)]
pub struct StringPool {
strings: Vec<String>,
index: HashMap<String, u32>,
}
impl StringPool {
pub fn intern(&mut self, s: &str) -> u32 {
if let Some(&idx) = self.index.get(s) {
return idx;
}
let idx = self.strings.len() as u32;
self.strings.push(s.to_string());
self.index.insert(s.to_string(), idx);
idx
}
pub fn get(&self, idx: u32) -> &str {
&self.strings[idx as usize]
}
}
#[derive(Debug, Clone)]
pub struct CompiledProgram {
pub begin_chunks: Vec<Chunk>,
pub end_chunks: Vec<Chunk>,
pub beginfile_chunks: Vec<Chunk>,
pub endfile_chunks: Vec<Chunk>,
pub record_rules: Vec<CompiledRule>,
pub functions: HashMap<String, CompiledFunc>,
pub strings: StringPool,
pub slot_count: u16,
pub slot_names: Vec<String>,
pub slot_map: HashMap<String, u16>,
pub array_var_names: Vec<String>,
}
impl CompiledProgram {
pub fn init_slots(&self, vars: &AwkMap<String, Value>) -> Vec<Value> {
let mut slots = vec![Value::Uninit; self.slot_count as usize];
for (i, name) in self.slot_names.iter().enumerate() {
if let Some(v) = vars.get(name) {
slots[i] = v.clone();
}
}
slots
}
}
#[derive(Debug, Clone)]
pub struct CompiledRule {
pub pattern: CompiledPattern,
pub body: Chunk,
pub original_index: usize,
}
#[derive(Debug, Clone)]
pub enum CompiledRangeEndpoint {
Always,
Never,
NestedRangeError,
Regexp(u32),
LiteralRegexp(u32),
Expr(Chunk),
}
#[derive(Debug, Clone)]
pub enum CompiledPattern {
Always,
Regexp(u32),
LiteralRegexp(u32),
Expr(Chunk),
Range {
start: CompiledRangeEndpoint,
end: CompiledRangeEndpoint,
},
}
#[derive(Debug, Clone)]
pub struct CompiledFunc {
pub params: Vec<String>,
pub body: Chunk,
}
#[cfg(test)]
mod tests {
use super::*;
use crate::runtime::Value;
#[test]
fn string_pool_intern_dedupes() {
let mut p = StringPool::default();
let a = p.intern("hello");
let b = p.intern("hello");
let c = p.intern("world");
assert_eq!(a, b);
assert_ne!(a, c);
assert_eq!(p.get(a), "hello");
assert_eq!(p.get(c), "world");
}
#[test]
fn init_slots_seeds_from_vars_map() {
let mut vars = AwkMap::default();
vars.insert("x".into(), Value::Num(7.0));
let cp = CompiledProgram {
begin_chunks: vec![],
end_chunks: vec![],
beginfile_chunks: vec![],
endfile_chunks: vec![],
record_rules: vec![],
functions: HashMap::new(),
strings: StringPool::default(),
slot_count: 1,
slot_names: vec!["x".into()],
slot_map: HashMap::from([("x".into(), 0u16)]),
array_var_names: vec![],
};
let slots = cp.init_slots(&vars);
assert_eq!(slots.len(), 1);
assert_eq!(slots[0].as_number(), 7.0);
}
#[test]
fn string_pool_intern_preserves_order() {
let mut p = StringPool::default();
let i0 = p.intern("first");
let i1 = p.intern("second");
assert_eq!(i0, 0);
assert_eq!(i1, 1);
assert_eq!(p.get(i0), "first");
assert_eq!(p.get(i1), "second");
}
#[test]
fn string_pool_many_distinct_strings() {
let mut p = StringPool::default();
let mut idx = Vec::new();
for i in 0..32 {
let s = format!("k{i}");
idx.push(p.intern(&s));
}
for (i, id) in idx.iter().copied().enumerate() {
assert_eq!(p.get(id), format!("k{i}"));
}
}
#[test]
fn init_slots_missing_var_uses_empty_string() {
let cp = CompiledProgram {
begin_chunks: vec![],
end_chunks: vec![],
beginfile_chunks: vec![],
endfile_chunks: vec![],
record_rules: vec![],
functions: HashMap::new(),
strings: StringPool::default(),
slot_count: 2,
slot_names: vec!["x".into(), "y".into()],
slot_map: HashMap::from([("x".into(), 0u16), ("y".into(), 1u16)]),
array_var_names: vec![],
};
let mut vars = AwkMap::default();
vars.insert("x".into(), Value::Num(1.0));
let slots = cp.init_slots(&vars);
assert_eq!(slots[0].as_number(), 1.0);
assert_eq!(slots[1].as_str(), "");
}
#[test]
fn string_pool_intern_empty_string() {
let mut p = StringPool::default();
let a = p.intern("");
let b = p.intern("");
assert_eq!(a, b);
assert_eq!(p.get(a), "");
}
#[test]
fn init_slots_preserves_empty_string_value() {
let cp = CompiledProgram {
begin_chunks: vec![],
end_chunks: vec![],
beginfile_chunks: vec![],
endfile_chunks: vec![],
record_rules: vec![],
functions: HashMap::new(),
strings: StringPool::default(),
slot_count: 1,
slot_names: vec!["z".into()],
slot_map: HashMap::from([("z".into(), 0u16)]),
array_var_names: vec![],
};
let mut vars = AwkMap::default();
vars.insert("z".into(), Value::Str(String::new()));
let slots = cp.init_slots(&vars);
assert_eq!(slots[0].as_str(), "");
}
#[test]
fn redir_kind_and_getline_source_variants_distinct() {
assert_ne!(RedirKind::Stdout, RedirKind::Append);
assert_ne!(GetlineSource::File, GetlineSource::Pipe);
}
#[test]
fn chunk_from_ops_empty_and_with_push() {
let empty = Chunk::from_ops(vec![]);
assert!(empty.ops.is_empty());
let c = Chunk::from_ops(vec![Op::PushNum(2.5), Op::PushNum(1.0)]);
assert_eq!(c.ops.len(), 2);
assert!(matches!(c.ops[0], Op::PushNum(n) if n == 2.5));
}
#[test]
fn compiled_range_endpoint_nested_range_error_marker() {
assert!(matches!(
CompiledRangeEndpoint::NestedRangeError,
CompiledRangeEndpoint::NestedRangeError
));
}
#[test]
fn compiled_pattern_range_shape() {
let p = CompiledPattern::Range {
start: CompiledRangeEndpoint::Always,
end: CompiledRangeEndpoint::Regexp(3),
};
assert!(matches!(
p,
CompiledPattern::Range {
start: CompiledRangeEndpoint::Always,
end: CompiledRangeEndpoint::Regexp(3),
}
));
}
}