use std::borrow::Cow;
use std::cell::Cell;
use std::cmp::Ordering;
use std::collections::HashMap;
pub type AwkMap<K, V> = rustc_hash::FxHashMap<K, V>;
use socket2::{Domain, Socket, Type};
use std::fs::{File, OpenOptions};
use std::io::{BufRead, BufReader, BufWriter, Read, Write};
use std::net::{Ipv4Addr, Ipv6Addr, SocketAddr, TcpStream, ToSocketAddrs, UdpSocket};
use std::path::Path;
use std::process::{Child, ChildStdin, ChildStdout, Command, Stdio};
use std::sync::{Arc, Mutex};
use std::time::Duration;
use crate::bignum::value_to_mpfr;
use crate::bytecode::CompiledProgram;
use crate::error::{Error, Result};
use gettext::Catalog;
use rug::float::Round;
use rug::ops::Pow as _;
use rug::Float;
thread_local! {
static NON_DECIMAL_PARSE: Cell<bool> = const { Cell::new(false) };
}
pub fn set_numeric_parse_mode(enabled: bool) {
NON_DECIMAL_PARSE.with(|c| c.set(enabled));
}
#[inline]
pub fn numeric_parse_mode() -> bool {
NON_DECIMAL_PARSE.with(|c| c.get())
}
use memchr::memmem;
use regex::bytes::Regex as BytesRegex;
use regex::{Regex, RegexBuilder};
const DEFAULT_PRINT_BUF_CAPACITY: usize = 512 * 1024;
pub(crate) type SharedInputReader = Arc<Mutex<BufReader<Box<dyn Read + Send>>>>;
pub const MPFR_PREC: u32 = 256;
pub fn awk_locale_str_cmp(a: &str, b: &str) -> Ordering {
#[cfg(unix)]
{
use std::ffi::CString;
match (CString::new(a), CString::new(b)) {
(Ok(ca), Ok(cb)) => unsafe {
let r = libc::strcoll(ca.as_ptr(), cb.as_ptr());
r.cmp(&0)
},
_ => a.cmp(b),
}
}
#[cfg(not(unix))]
{
a.cmp(b)
}
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub(crate) enum SortedInMode {
Unsorted,
IndStrAsc,
IndStrDesc,
IndNumAsc,
IndNumDesc,
ValStrAsc,
ValStrDesc,
ValNumAsc,
ValNumDesc,
ValTypeAsc,
ValTypeDesc,
CustomFn(String),
}
fn is_sorted_in_user_fn_name(s: &str) -> bool {
let mut chars = s.chars();
let Some(c) = chars.next() else {
return false;
};
if !(c.is_ascii_alphabetic() || c == '_') {
return false;
}
chars.all(|c| c.is_ascii_alphanumeric() || c == '_')
}
fn parse_sorted_in_at_token(t: &str) -> Option<SortedInMode> {
match t {
"@unsorted" => Some(SortedInMode::Unsorted),
"@ind_str_asc" => Some(SortedInMode::IndStrAsc),
"@ind_str_desc" => Some(SortedInMode::IndStrDesc),
"@ind_num_asc" => Some(SortedInMode::IndNumAsc),
"@ind_num_desc" => Some(SortedInMode::IndNumDesc),
"@val_str_asc" => Some(SortedInMode::ValStrAsc),
"@val_str_desc" => Some(SortedInMode::ValStrDesc),
"@val_num_asc" => Some(SortedInMode::ValNumAsc),
"@val_num_desc" => Some(SortedInMode::ValNumDesc),
"@val_type_asc" => Some(SortedInMode::ValTypeAsc),
"@val_type_desc" => Some(SortedInMode::ValTypeDesc),
_ => None,
}
}
pub(crate) fn sorted_in_mode(rt: &Runtime) -> SortedInMode {
if rt.posix {
return SortedInMode::Unsorted;
}
match rt.get_global_var("PROCINFO") {
Some(Value::Array(m)) => {
let Some(v) = m.get("sorted_in") else {
return SortedInMode::Unsorted;
};
let s = v.as_str();
let t = s.trim();
if t.is_empty() {
return SortedInMode::Unsorted;
}
if t.starts_with('@') {
if let Some(mode) = parse_sorted_in_at_token(t) {
return mode;
}
if !rt.sorted_in_warned.get() {
rt.sorted_in_warned.set(true);
eprintln!(
"awkrs: PROCINFO[\"sorted_in\"]={s:?}: unknown @… token (expected @ind_* / @val_* / @unsorted)"
);
}
return SortedInMode::Unsorted;
}
if is_sorted_in_user_fn_name(t) {
return SortedInMode::CustomFn(t.to_string());
}
SortedInMode::Unsorted
}
_ => SortedInMode::Unsorted,
}
}
#[inline]
fn val_type_rank(v: &Value) -> u8 {
match v {
Value::Uninit => 0,
Value::Num(_) | Value::Mpfr(_) => 1,
Value::Str(_) | Value::StrLit(_) | Value::Regexp(_) => 2,
Value::Array(_) => 3,
}
}
pub(crate) fn sort_for_in_keys(
keys: &mut [String],
arr: &AwkMap<String, Value>,
mode: SortedInMode,
) {
use SortedInMode::*;
match mode {
Unsorted => {}
CustomFn(_) => {}
IndStrAsc => keys.sort(),
IndStrDesc => keys.sort_by(|a, b| b.cmp(a)),
IndNumAsc => keys.sort_by(|a, b| {
parse_number(a)
.partial_cmp(&parse_number(b))
.unwrap_or(Ordering::Equal)
}),
IndNumDesc => keys.sort_by(|a, b| {
parse_number(b)
.partial_cmp(&parse_number(a))
.unwrap_or(Ordering::Equal)
}),
ValStrAsc => keys.sort_by(|ka, kb| {
let sa = arr.get(ka).map(|v| v.as_str()).unwrap_or_default();
let sb = arr.get(kb).map(|v| v.as_str()).unwrap_or_default();
awk_locale_str_cmp(&sa, &sb)
}),
ValStrDesc => keys.sort_by(|ka, kb| {
let sa = arr.get(ka).map(|v| v.as_str()).unwrap_or_default();
let sb = arr.get(kb).map(|v| v.as_str()).unwrap_or_default();
awk_locale_str_cmp(&sb, &sa)
}),
ValNumAsc => keys.sort_by(|ka, kb| {
let na = arr.get(ka).map(|v| v.as_number()).unwrap_or(0.0);
let nb = arr.get(kb).map(|v| v.as_number()).unwrap_or(0.0);
na.partial_cmp(&nb).unwrap_or(Ordering::Equal)
}),
ValNumDesc => keys.sort_by(|ka, kb| {
let na = arr.get(ka).map(|v| v.as_number()).unwrap_or(0.0);
let nb = arr.get(kb).map(|v| v.as_number()).unwrap_or(0.0);
nb.partial_cmp(&na).unwrap_or(Ordering::Equal)
}),
ValTypeAsc => keys.sort_by(|ka, kb| {
let va = arr.get(ka.as_str());
let vb = arr.get(kb.as_str());
let ra = va.map(val_type_rank).unwrap_or(0);
let rb = vb.map(val_type_rank).unwrap_or(0);
ra.cmp(&rb).then_with(|| {
let sa = va.map(|v| v.as_str()).unwrap_or_default();
let sb = vb.map(|v| v.as_str()).unwrap_or_default();
awk_locale_str_cmp(&sa, &sb)
})
}),
ValTypeDesc => keys.sort_by(|ka, kb| {
let va = arr.get(ka.as_str());
let vb = arr.get(kb.as_str());
let ra = va.map(val_type_rank).unwrap_or(0);
let rb = vb.map(val_type_rank).unwrap_or(0);
rb.cmp(&ra).then_with(|| {
let sa = va.map(|v| v.as_str()).unwrap_or_default();
let sb = vb.map(|v| v.as_str()).unwrap_or_default();
awk_locale_str_cmp(&sb, &sa)
})
}),
}
}
#[cfg(unix)]
fn wait_fd_read_timeout(fd: std::os::unix::io::RawFd, timeout_ms: i32) -> crate::error::Result<()> {
if timeout_ms <= 0 {
return Ok(());
}
let mut fds = libc::pollfd {
fd,
events: libc::POLLIN,
revents: 0,
};
let rc = unsafe { libc::poll(&mut fds, 1, timeout_ms) };
if rc < 0 {
return Err(crate::error::Error::Io(std::io::Error::last_os_error()));
}
if rc == 0 {
return Err(crate::error::Error::Io(std::io::Error::new(
std::io::ErrorKind::TimedOut,
"read timeout (PROCINFO[\"READ_TIMEOUT\"])",
)));
}
Ok(())
}
#[inline]
pub fn value_to_float(v: &Value, prec: u32, round: Round) -> Float {
value_to_mpfr(v, prec, round)
}
pub fn awk_binop_values(
op: crate::ast::BinOp,
old: &Value,
rhs: &Value,
use_mpfr: bool,
rt: &Runtime,
) -> crate::error::Result<Value> {
use crate::ast::BinOp;
use crate::error::Error;
old.reject_if_array_scalar()?;
rhs.reject_if_array_scalar()?;
if !use_mpfr {
let a = old.as_number();
let b = rhs.as_number();
let n = match op {
BinOp::Add => a + b,
BinOp::Sub => a - b,
BinOp::Mul => a * b,
BinOp::Div => {
if b == 0.0 {
return Err(Error::Runtime("division by zero attempted".into()));
}
a / b
}
BinOp::Mod => a % b,
BinOp::Pow => a.powf(b),
_ => return Err(Error::Runtime("invalid compound assignment op".into())),
};
return Ok(Value::Num(n));
}
let prec = rt.mpfr_prec_bits();
let round = rt.mpfr_round();
let a = value_to_mpfr(old, prec, round);
let b = value_to_mpfr(rhs, prec, round);
let r = match op {
BinOp::Add => Float::with_val_round(prec, &a + &b, round).0,
BinOp::Sub => Float::with_val_round(prec, &a - &b, round).0,
BinOp::Mul => Float::with_val_round(prec, &a * &b, round).0,
BinOp::Div => {
if b.is_zero() {
return Err(Error::Runtime("division by zero attempted".into()));
}
Float::with_val_round(prec, &a / &b, round).0
}
BinOp::Mod => Float::with_val_round(prec, &a % &b, round).0,
BinOp::Pow => Float::with_val_round(prec, a.pow(&b), round).0,
_ => return Err(Error::Runtime("invalid compound assignment op".into())),
};
Ok(Value::Mpfr(r))
}
pub fn parse_inet_tcp(path: &str) -> Option<(u16, String, u16)> {
parse_inet_l4(path, "/inet/tcp/")
}
pub fn parse_inet_udp(path: &str) -> Option<(u16, String, u16)> {
parse_inet_l4(path, "/inet/udp/")
}
fn parse_inet_l4(path: &str, prefix: &str) -> Option<(u16, String, u16)> {
let rest = path.strip_prefix(prefix)?;
let mut it = rest.split('/');
let lport = it.next()?.parse().ok()?;
let host = it.next()?.to_string();
let rport = it.next()?.parse().ok()?;
if it.next().is_some() {
return None;
}
Some((lport, host, rport))
}
fn tcp_connect_with_local_port(host: &str, lport: u16, rport: u16) -> Result<TcpStream> {
let mut addrs = format!("{host}:{rport}")
.to_socket_addrs()
.map_err(|e| Error::Runtime(format!("inet resolve `{host}`: {e}")))?;
let addr = addrs
.next()
.ok_or_else(|| Error::Runtime(format!("inet: no address for `{host}:{rport}`")))?;
let domain = match addr {
SocketAddr::V4(_) => Domain::IPV4,
SocketAddr::V6(_) => Domain::IPV6,
};
let socket = Socket::new(domain, Type::STREAM, None)
.map_err(|e| Error::Runtime(format!("inet socket: {e}")))?;
let bind_addr = match addr {
SocketAddr::V4(_) => SocketAddr::from((Ipv4Addr::UNSPECIFIED, lport)),
SocketAddr::V6(_) => SocketAddr::from((Ipv6Addr::UNSPECIFIED, lport)),
};
socket
.bind(&bind_addr.into())
.map_err(|e| Error::Runtime(format!("inet bind local port {lport}: {e}")))?;
socket.set_nonblocking(false).ok();
socket
.connect(&addr.into())
.map_err(|e| Error::Runtime(format!("inet connect `{host}:{rport}`: {e}")))?;
Ok(socket.into())
}
pub struct CoprocHandle {
pub child: Child,
pub stdin: BufWriter<ChildStdin>,
pub stdout: BufReader<ChildStdout>,
}
#[derive(Debug, Clone)]
pub enum Value {
Uninit,
Str(String),
StrLit(String),
Regexp(String),
Num(f64),
Mpfr(Float),
Array(AwkMap<String, Value>),
}
#[inline]
fn mpfr_value_default_display(f: &Float) -> String {
let prec = f.prec();
crate::format::awk_sprintf_with_decimal(
"%.6g",
&[Value::Mpfr(f.clone())],
'.',
Some(','),
Some((prec, Round::Nearest)),
)
.unwrap_or_else(|_| crate::bignum::mpfr_string_trim_trailing_zeros(f.to_string()))
}
#[inline]
pub(crate) fn longest_f64_prefix(s: &str) -> Option<&str> {
if s.is_empty() {
return None;
}
for end in (1..=s.len()).rev() {
if s[..end].parse::<f64>().is_ok() {
return Some(&s[..end]);
}
}
None
}
impl Value {
#[inline]
pub fn reject_if_array_scalar(&self) -> Result<()> {
if matches!(self, Value::Array(_)) {
return Err(Error::Runtime(
"attempt to use an array in a scalar context".into(),
));
}
Ok(())
}
pub fn as_str(&self) -> String {
match self {
Value::Uninit => String::new(),
Value::Str(s) | Value::StrLit(s) => s.clone(),
Value::Regexp(s) => s.clone(),
Value::Num(n) => format_number(*n),
Value::Mpfr(f) => mpfr_value_default_display(f),
Value::Array(_) => String::new(),
}
}
#[inline]
pub fn as_str_cow(&self) -> Cow<'_, str> {
match self {
Value::Uninit => Cow::Borrowed(""),
Value::Str(s) | Value::StrLit(s) => Cow::Borrowed(s.as_str()),
Value::Regexp(s) => Cow::Borrowed(s.as_str()),
Value::Num(n) => Cow::Owned(format_number(*n)),
Value::Mpfr(f) => Cow::Owned(mpfr_value_default_display(f)),
Value::Array(_) => Cow::Borrowed(""),
}
}
#[inline]
#[allow(dead_code)]
pub fn str_ref(&self) -> Option<&str> {
match self {
Value::Str(s) | Value::StrLit(s) => Some(s),
Value::Regexp(s) => Some(s),
_ => None,
}
}
pub fn write_to(&self, buf: &mut Vec<u8>) {
match self {
Value::Uninit => {}
Value::Str(s) | Value::StrLit(s) => buf.extend_from_slice(s.as_bytes()),
Value::Regexp(s) => buf.extend_from_slice(s.as_bytes()),
Value::Num(n) => {
use std::io::Write;
let n = *n;
if n.fract() == 0.0 && n.abs() < 1e15 {
let _ = write!(buf, "{}", n as i64);
} else {
let _ = write!(buf, "{n}");
}
}
Value::Mpfr(f) => buf.extend_from_slice(mpfr_value_default_display(f).as_bytes()),
Value::Array(_) => {}
}
}
pub fn as_number(&self) -> f64 {
match self {
Value::Uninit => 0.0,
Value::Num(n) => *n,
Value::Str(s) | Value::StrLit(s) => parse_number(s),
Value::Regexp(s) => parse_number(s),
Value::Mpfr(f) => f.to_f64(),
Value::Array(_) => 0.0,
}
}
pub fn truthy(&self) -> bool {
match self {
Value::Uninit => false,
Value::Num(n) => *n != 0.0,
Value::Str(s) | Value::StrLit(s) => {
!s.is_empty() && s.parse::<f64>().map(|n| n != 0.0).unwrap_or(true)
}
Value::Regexp(s) => !s.is_empty(),
Value::Mpfr(f) => !f.is_zero(),
Value::Array(a) => !a.is_empty(),
}
}
pub fn truthy_cond(&self) -> crate::error::Result<bool> {
self.reject_if_array_scalar()?;
Ok(match self {
Value::Uninit => false,
Value::Num(n) => *n != 0.0,
Value::Str(s) | Value::StrLit(s) => {
!s.is_empty() && s.parse::<f64>().map(|n| n != 0.0).unwrap_or(true)
}
Value::Regexp(s) => !s.is_empty(),
Value::Mpfr(f) => !f.is_zero(),
Value::Array(_) => unreachable!(),
})
}
#[inline]
pub fn into_string(self) -> String {
match self {
Value::Uninit => String::new(),
Value::Str(s) | Value::StrLit(s) => s,
Value::Regexp(s) => s,
Value::Num(n) => format_number(n),
Value::Mpfr(f) => mpfr_value_default_display(&f),
Value::Array(_) => String::new(),
}
}
#[inline]
pub fn append_to_string(&self, buf: &mut String) {
match self {
Value::Uninit => {}
Value::Str(s) | Value::StrLit(s) => buf.push_str(s),
Value::Regexp(s) => buf.push_str(s),
Value::Num(n) => {
use std::fmt::Write;
let n = *n;
if n.fract() == 0.0 && n.abs() < 1e15 {
let _ = write!(buf, "{}", n as i64);
} else {
let _ = write!(buf, "{n}");
}
}
Value::Mpfr(f) => buf.push_str(&mpfr_value_default_display(f)),
Value::Array(_) => {}
}
}
pub fn is_numeric_str(&self) -> bool {
match self {
Value::Uninit => true,
Value::Num(_) => true,
Value::Mpfr(_) => true,
Value::StrLit(_) => false,
Value::Str(s) => {
let t = s.trim();
!t.is_empty() && longest_f64_prefix(t).is_some()
}
Value::Regexp(_) => false,
Value::Array(_) => false,
}
}
}
#[inline]
fn format_number(n: f64) -> String {
if n.fract() == 0.0 && n.abs() < 1e15 {
format!("{}", n as i64)
} else {
format!("{n}")
}
}
#[inline]
fn parse_number_strtonum(s: &str) -> f64 {
let t = s.trim();
if t.is_empty() {
return 0.0;
}
if t.starts_with("0x") || t.starts_with("0X") {
return u64::from_str_radix(&t[2..], 16)
.map(|v| v as f64)
.unwrap_or(0.0);
}
if t.len() > 1 && t.starts_with('0') && !t.contains('.') && !t.contains('e') && !t.contains('E')
{
return i64::from_str_radix(t, 8).map(|v| v as f64).unwrap_or(0.0);
}
longest_f64_prefix(t)
.and_then(|p| p.parse::<f64>().ok())
.unwrap_or(0.0)
}
#[inline]
fn parse_number(s: &str) -> f64 {
if s.is_empty() {
return 0.0;
}
let s = s.trim();
if s.is_empty() {
return 0.0;
}
if numeric_parse_mode() {
return parse_number_strtonum(s);
}
if let Some(n) = parse_ascii_integer(s) {
return n as f64;
}
longest_f64_prefix(s)
.and_then(|p| p.parse::<f64>().ok())
.unwrap_or(0.0)
}
#[inline]
fn parse_ascii_integer(s: &str) -> Option<i64> {
let b = s.as_bytes();
let mut i = 0usize;
let neg = match b.first().copied() {
Some(b'-') => {
i = 1;
true
}
Some(b'+') => {
i = 1;
false
}
_ => false,
};
if i >= b.len() {
return None;
}
let mut acc: i64 = 0;
while i < b.len() {
let d = b[i];
if !d.is_ascii_digit() {
return None;
}
acc = acc.checked_mul(10)?.checked_add((d - b'0') as i64)?;
i += 1;
}
Some(if neg { -acc } else { acc })
}
fn split_fields_fpat(
record: &str,
fpat: &str,
field_ranges: &mut Vec<(u32, u32)>,
ignore_case: bool,
) -> bool {
field_ranges.clear();
let mut b = RegexBuilder::new(fpat);
b.case_insensitive(ignore_case);
match b.build() {
Ok(re) => {
for m in re.find_iter(record) {
field_ranges.push((m.start() as u32, m.end() as u32));
}
true
}
Err(_) => false,
}
}
fn split_fields_fieldwidths(record: &str, widths: &[usize], field_ranges: &mut Vec<(u32, u32)>) {
field_ranges.clear();
if widths.is_empty() {
return;
}
let b = record.as_bytes();
let n = b.len();
let mut pos = 0usize;
let len_w = widths.len();
for (i, &w) in widths.iter().enumerate() {
let end = if i == len_w - 1 { n } else { (pos + w).min(n) };
field_ranges.push((pos as u32, end as u32));
pos = end;
if pos >= n {
break;
}
}
}
fn split_csv_gawk_fields(record: &str, field_ranges: &mut Vec<(u32, u32)>) {
field_ranges.clear();
let bytes = record.as_bytes();
let n = bytes.len();
let mut i = 0usize;
while i < n {
if bytes[i] == b',' {
field_ranges.push((i as u32, i as u32));
i += 1;
continue;
}
if bytes[i] == b'"' {
i += 1;
let val_start = i;
while i < n {
if bytes[i] == b'"' {
if i + 1 < n && bytes[i + 1] == b'"' {
i += 2;
continue;
}
break;
}
i += 1;
}
let val_end = i;
field_ranges.push((val_start as u32, val_end as u32));
if i < n && bytes[i] == b'"' {
i += 1;
}
} else {
let val_start = i;
while i < n && bytes[i] != b',' {
i += 1;
}
field_ranges.push((val_start as u32, i as u32));
}
if i < n && bytes[i] == b',' {
i += 1;
if i == n {
field_ranges.push((n as u32, n as u32));
}
}
}
}
fn split_fields_into(
record: &str,
fs: &str,
field_ranges: &mut Vec<(u32, u32)>,
ignore_case: bool,
characters_as_bytes: bool,
) {
field_ranges.clear();
if !record.is_empty() {
let want = (record.len() / 16).saturating_add(4).clamp(8, 2048);
if field_ranges.capacity() < want {
field_ranges.reserve(want - field_ranges.capacity());
}
}
if fs.is_empty() {
if characters_as_bytes {
for i in 0..record.len() {
field_ranges.push((i as u32, (i + 1) as u32));
}
} else {
for (i, c) in record.char_indices() {
field_ranges.push((i as u32, (i + c.len_utf8()) as u32));
}
}
} else if fs == " " {
let bytes = record.as_bytes();
let len = bytes.len();
let mut i = 0;
while i < len && bytes[i].is_ascii_whitespace() {
i += 1;
}
while i < len {
let start = i;
while i < len && !bytes[i].is_ascii_whitespace() {
i += 1;
}
field_ranges.push((start as u32, i as u32));
while i < len && bytes[i].is_ascii_whitespace() {
i += 1;
}
}
} else if fs.len() == 1 {
let sep = fs.as_bytes()[0];
let bytes = record.as_bytes();
let mut start = 0;
for (i, &b) in bytes.iter().enumerate() {
if b == sep {
field_ranges.push((start as u32, i as u32));
start = i + 1;
}
}
field_ranges.push((start as u32, bytes.len() as u32));
} else {
let mut b = RegexBuilder::new(fs);
b.case_insensitive(ignore_case);
match b.build() {
Ok(re) => {
let mut last = 0;
for m in re.find_iter(record) {
field_ranges.push((last as u32, m.start() as u32));
last = m.end();
}
field_ranges.push((last as u32, record.len() as u32));
}
Err(_) => {
let mut pos = 0;
for part in record.split(fs) {
let end = pos + part.len();
field_ranges.push((pos as u32, end as u32));
pos = end + fs.len();
}
}
}
}
}
pub struct Runtime {
pub vars: AwkMap<String, Value>,
pub global_readonly: Option<Arc<AwkMap<String, Value>>>,
pub fields: Vec<String>,
pub field_ranges: Vec<(u32, u32)>,
pub fields_dirty: bool,
pub fields_pending_split: bool,
pub cached_fs: String,
pub record: String,
pub line_buf: Vec<u8>,
pub nr: f64,
pub fnr: f64,
pub filename: String,
pub exit_pending: bool,
pub exit_code: i32,
pub input_reader: Option<SharedInputReader>,
pub file_handles: HashMap<String, BufReader<File>>,
pub dir_read: HashMap<String, (Vec<String>, usize)>,
pub output_handles: HashMap<String, BufWriter<File>>,
pub pipe_stdin: HashMap<String, BufWriter<ChildStdin>>,
pub pipe_children: HashMap<String, Child>,
pub coproc_handles: HashMap<String, CoprocHandle>,
pub inet_tcp_read: HashMap<String, BufReader<TcpStream>>,
pub inet_tcp_write: HashMap<String, TcpStream>,
pub inet_udp: HashMap<String, UdpSocket>,
pub gettext_dir: String,
pub bignum: bool,
pub rand_seed: u64,
pub numeric_decimal: char,
pub numeric_thousands_sep: Option<char>,
pub slots: Vec<Value>,
pub regex_cache_cs: AwkMap<String, Regex>,
pub regex_cache_ci: AwkMap<String, Regex>,
pub memmem_finder_cache: AwkMap<String, memmem::Finder<'static>>,
pub print_buf: Vec<u8>,
pub ofs_bytes: Vec<u8>,
pub ors_bytes: Vec<u8>,
pub vm_stack: Vec<Value>,
pub jit_slot_buf: Vec<f64>,
pub csv_mode: bool,
pub rs_pattern_for_regex: String,
pub rs_regex_bytes: Option<BytesRegex>,
pub sandbox: bool,
pub characters_as_bytes: bool,
pub posix: bool,
pub traditional: bool,
pub jit_enabled: bool,
pub gettext_catalogs: AwkMap<String, Arc<Catalog>>,
pub symtab_slot_map: HashMap<String, u16>,
pub profile_record_hits: Vec<u64>,
pub sorted_in_warned: Cell<bool>,
pub errno_code: i32,
#[cfg(unix)]
pub primary_input_poll_fd: Option<std::os::unix::io::RawFd>,
}
impl Runtime {
pub fn new() -> Self {
let mut vars = AwkMap::default();
vars.insert("OFS".into(), Value::Str(" ".into()));
vars.insert("ORS".into(), Value::Str("\n".into()));
vars.insert("OFMT".into(), Value::Str("%.6g".into()));
vars.insert("CONVFMT".into(), Value::Str("%.6g".into()));
vars.insert("RS".into(), Value::Str("\n".into()));
vars.insert("RT".into(), Value::Str(String::new()));
vars.insert("ERRNO".into(), Value::Str(String::new()));
vars.insert("ARGIND".into(), Value::Num(0.0));
let mut environ = AwkMap::default();
for (k, v) in std::env::vars() {
environ.insert(k, Value::Str(v));
}
vars.insert("ENVIRON".into(), Value::Array(environ));
vars.insert("PROCINFO".into(), Value::Array(AwkMap::default()));
vars.insert("SYMTAB".into(), Value::Array(AwkMap::default()));
vars.insert("FUNCTAB".into(), Value::Array(AwkMap::default()));
vars.insert("SUBSEP".into(), Value::Str("\x1c".into()));
vars.insert("FPAT".into(), Value::Str(String::new()));
vars.insert("FIELDWIDTHS".into(), Value::Str(String::new()));
vars.insert("IGNORECASE".into(), Value::Num(0.0));
vars.insert("BINMODE".into(), Value::Num(0.0));
vars.insert("LINT".into(), Value::Num(0.0));
vars.insert("TEXTDOMAIN".into(), Value::Str(String::new()));
Self {
vars,
global_readonly: None,
fields: Vec::new(),
field_ranges: Vec::new(),
fields_dirty: false,
fields_pending_split: false,
cached_fs: " ".into(),
record: String::new(),
line_buf: Vec::with_capacity(256),
nr: 0.0,
fnr: 0.0,
filename: String::new(),
exit_pending: false,
exit_code: 0,
input_reader: None,
inet_tcp_read: HashMap::new(),
inet_tcp_write: HashMap::new(),
inet_udp: HashMap::new(),
gettext_dir: String::new(),
bignum: false,
file_handles: HashMap::new(),
dir_read: HashMap::new(),
output_handles: HashMap::new(),
pipe_stdin: HashMap::new(),
pipe_children: HashMap::new(),
coproc_handles: HashMap::new(),
rand_seed: 1,
numeric_decimal: '.',
numeric_thousands_sep: crate::locale_numeric::thousands_sep_from_locale().or(Some(',')),
slots: Vec::new(),
regex_cache_cs: AwkMap::default(),
regex_cache_ci: AwkMap::default(),
memmem_finder_cache: AwkMap::default(),
print_buf: Vec::with_capacity(DEFAULT_PRINT_BUF_CAPACITY),
ofs_bytes: b" ".to_vec(),
ors_bytes: b"\n".to_vec(),
vm_stack: Vec::with_capacity(64),
jit_slot_buf: Vec::new(),
csv_mode: false,
rs_pattern_for_regex: String::new(),
rs_regex_bytes: None,
sandbox: false,
characters_as_bytes: false,
posix: false,
traditional: false,
jit_enabled: true,
gettext_catalogs: AwkMap::default(),
symtab_slot_map: HashMap::new(),
profile_record_hits: Vec::new(),
sorted_in_warned: Cell::new(false),
errno_code: 0,
#[cfg(unix)]
primary_input_poll_fd: None,
}
}
pub fn lint_runtime_active(&self) -> bool {
self.get_global_var("LINT")
.map(|v| v.truthy())
.unwrap_or(false)
}
pub fn lint_warn(&self, msg: &str) {
if self.lint_runtime_active() {
eprintln!("awkrs: warning: {msg}");
}
}
pub fn warn_builtin_negative_arg(&self, name: &str, x: f64) {
if x.is_nan() {
return;
}
eprintln!("awkrs: warning: {name}: received negative argument {x}");
}
pub fn mpfr_prec_bits(&self) -> u32 {
if !self.bignum {
return MPFR_PREC;
}
match self.get_global_var("PROCINFO") {
Some(Value::Array(m)) => m
.get("prec")
.map(|v| v.as_number() as u32)
.filter(|&p| (53..=1_000_000).contains(&p))
.unwrap_or(MPFR_PREC),
_ => MPFR_PREC,
}
}
pub fn mpfr_round(&self) -> Round {
let s = match self.get_global_var("PROCINFO") {
Some(Value::Array(m)) => m.get("roundmode").map(|v| v.as_str()).unwrap_or_default(),
_ => String::new(),
};
let c = s.trim().chars().next().unwrap_or('N');
match c.to_ascii_uppercase() {
'N' => Round::Nearest,
'Z' => Round::Zero,
'U' => Round::Up,
'D' => Round::Down,
'A' => Round::AwayZero,
_ => Round::Nearest,
}
}
pub fn procinfo_subsep_string(&self) -> String {
self.get_global_var("SUBSEP")
.map(|v| v.as_str().to_string())
.unwrap_or_else(|| "\x1c".into())
}
pub fn global_read_timeout_ms(&self) -> i32 {
match self.get_global_var("PROCINFO") {
Some(Value::Array(m)) => match m.get("READ_TIMEOUT") {
Some(v) => (v.as_number() as i32).max(0),
None => crate::procinfo::gawk_read_timeout_env().max(0),
},
_ => crate::procinfo::gawk_read_timeout_env().max(0),
}
}
pub fn procinfo_read_timeout_ms_for(&self, input_key: &str) -> i32 {
let sep = self.procinfo_subsep_string();
let composite = format!("{input_key}{sep}READ_TIMEOUT");
if let Some(Value::Array(m)) = self.get_global_var("PROCINFO") {
if let Some(v) = m.get(&composite) {
return (v.as_number() as i32).max(0);
}
}
self.global_read_timeout_ms()
}
pub fn procinfo_retry_enabled_for(&self, input_key: &str) -> bool {
let sep = self.procinfo_subsep_string();
let composite = format!("{input_key}{sep}RETRY");
self.get_global_var("PROCINFO")
.and_then(|v| match v {
Value::Array(m) => m.get(&composite).map(|v| v.truthy()),
_ => None,
})
.unwrap_or(false)
}
pub fn primary_input_procinfo_key(&self) -> String {
let f = self.filename.trim();
if f.is_empty() {
"-".into()
} else {
f.to_string()
}
}
pub fn getline_io_return_code(&self, e: &std::io::Error, input_key: &str) -> f64 {
if !self.procinfo_retry_enabled_for(input_key) {
return -1.0;
}
let retry = matches!(
e.kind(),
std::io::ErrorKind::WouldBlock
| std::io::ErrorKind::TimedOut
| std::io::ErrorKind::Interrupted
);
if retry {
-2.0
} else {
-1.0
}
}
pub fn getline_error_code_for_key(&mut self, err: &Error, input_key: &str) -> f64 {
match err {
Error::Io(e) => {
self.set_errno_io(e);
self.getline_io_return_code(e, input_key)
}
_ => {
self.set_errno_str(err.to_string());
-1.0
}
}
}
pub fn refresh_special_arrays(&mut self, cp: &CompiledProgram, bin_name: &str) {
self.procinfo_refresh(cp, bin_name);
self.functab_refresh(cp);
self.symtab_mirror_refresh(cp);
}
fn procinfo_refresh(&mut self, cp: &CompiledProgram, bin_name: &str) {
let mut p = AwkMap::default();
if let Some(Value::Array(old)) = self.vars.get("PROCINFO") {
for (k, v) in old.iter() {
p.insert(k.clone(), v.clone());
}
}
p.insert(
"version".into(),
Value::Str(env!("CARGO_PKG_VERSION").into()),
);
p.insert("api".into(), Value::Str("awkrs".into()));
p.insert("api_major".into(), Value::Num(4.0));
p.insert("api_minor".into(), Value::Num(1.0));
p.insert("program".into(), Value::Str(bin_name.into()));
p.insert(
"platform".into(),
Value::Str(crate::procinfo::gawk_platform_string().into()),
);
if let Some(pma) = crate::procinfo::AWKRS_PMA_VERSION {
p.insert("pma".into(), Value::Str(pma.into()));
}
p.insert("pid".into(), Value::Num(std::process::id() as f64));
p.insert("errno".into(), Value::Num(self.errno_code as f64));
#[cfg(unix)]
{
unsafe {
p.insert("ppid".into(), Value::Num(libc::getppid() as f64));
p.insert("uid".into(), Value::Num(libc::getuid() as f64));
p.insert("euid".into(), Value::Num(libc::geteuid() as f64));
p.insert("gid".into(), Value::Num(libc::getgid() as f64));
p.insert("egid".into(), Value::Num(libc::getegid() as f64));
p.insert("pgrpid".into(), Value::Num(libc::getpgrp() as f64));
}
for (k, v) in crate::procinfo::supplementary_group_entries() {
p.insert(k, Value::Num(v));
}
}
p.insert(
"FS".into(),
Value::Str(crate::procinfo::field_split_mode(self).into()),
);
p.insert("strftime".into(), Value::Str("%c".into()));
let mut argv_proc = AwkMap::default();
for (i, a) in std::env::args().enumerate() {
argv_proc.insert(i.to_string(), Value::Str(a));
}
p.insert("argv".into(), Value::Array(argv_proc));
p.insert(
"mb_cur_max".into(),
Value::Num(crate::procinfo::mb_cur_max_value()),
);
if self.bignum && !p.contains_key("prec") {
p.insert("prec".into(), Value::Num(MPFR_PREC as f64));
}
if !p.contains_key("roundmode") {
p.insert("roundmode".into(), Value::Str("N".into()));
}
if !p.contains_key("READ_TIMEOUT") {
let env_to = crate::procinfo::gawk_read_timeout_env();
if env_to > 0 {
p.insert("READ_TIMEOUT".into(), Value::Num(env_to as f64));
}
}
if self.bignum {
p.insert(
"gmp_version".into(),
Value::Str(crate::procinfo::gmp_version_string()),
);
p.insert(
"mpfr_version".into(),
Value::Str(crate::procinfo::mpfr_version_string()),
);
p.insert(
"prec_min".into(),
Value::Num(gmp_mpfr_sys::mpfr::PREC_MIN as f64),
);
p.insert(
"prec_max".into(),
Value::Num(gmp_mpfr_sys::mpfr::PREC_MAX as f64),
);
}
let binmode = self
.get_global_var("BINMODE")
.map(|v| v.as_number())
.unwrap_or(0.0);
p.insert("awkrs_binmode".into(), Value::Num(binmode));
p.entry("nproc".into())
.or_insert(Value::Num(std::thread::available_parallelism()
.map(|n| n.get() as f64)
.unwrap_or(1.0)));
p.entry("sorted_in".into())
.or_insert(Value::Str(String::new()));
if !self.bignum {
p.entry("prec".into()).or_insert(Value::Num(53.0));
}
crate::procinfo::merge_procinfo_identifiers(&mut p, cp);
let sep = self
.get_global_var("SUBSEP")
.map(|v| v.as_str().to_string())
.unwrap_or_else(|| "\x1c".into());
let global_to = match p.get("READ_TIMEOUT") {
Some(v) => v.as_number(),
None => crate::procinfo::gawk_read_timeout_env() as f64,
};
let mut paths: Vec<String> = Vec::new();
if let Some(Value::Array(argv)) = self.get_global_var("ARGV") {
let argc = self
.get_global_var("ARGC")
.map(|v| v.as_number() as i64)
.unwrap_or(0);
for i in 1..argc {
if let Some(v) = argv.get(&i.to_string()) {
paths.push(v.as_str().to_string());
}
}
}
paths.push("-".into());
for path in paths {
let k_rt = format!("{path}{sep}READ_TIMEOUT");
p.entry(k_rt).or_insert(Value::Num(global_to));
let k_retry = format!("{path}{sep}RETRY");
p.entry(k_retry).or_insert(Value::Num(0.0));
}
self.vars.insert("PROCINFO".into(), Value::Array(p));
}
fn functab_refresh(&mut self, cp: &CompiledProgram) {
let mut ft = AwkMap::default();
for (name, f) in &cp.functions {
let mut meta = AwkMap::default();
meta.insert("type".into(), Value::Str("user".into()));
meta.insert("arity".into(), Value::Num(f.params.len() as f64));
ft.insert(name.clone(), Value::Array(meta));
}
for &name in crate::namespace::BUILTIN_NAMES {
if !ft.contains_key(name) {
let mut meta = AwkMap::default();
meta.insert("type".into(), Value::Str("builtin".into()));
ft.insert(name.into(), Value::Array(meta));
}
}
self.vars.insert("FUNCTAB".into(), Value::Array(ft));
}
fn symtab_mirror_refresh(&mut self, cp: &CompiledProgram) {
self.symtab_slot_map = cp.slot_map.clone();
self.vars
.insert("SYMTAB".into(), Value::Array(AwkMap::default()));
}
#[inline]
pub fn ensure_jit_slot_buf(&mut self, n: usize) {
if self.jit_slot_buf.len() < n {
self.jit_slot_buf.resize(n, 0.0);
} else if self.jit_slot_buf.len() > n {
self.jit_slot_buf.truncate(n);
}
}
pub fn init_argv(&mut self, files: &[std::path::PathBuf]) {
use std::env;
let bin = env::args().next().unwrap_or_else(|| "awkrs".to_string());
let mut argv = vec![bin];
for f in files {
argv.push(f.to_string_lossy().into_owned());
}
let argc = argv.len();
self.vars.insert("ARGC".into(), Value::Num(argc as f64));
let mut map = AwkMap::default();
for (i, s) in argv.iter().enumerate() {
map.insert(i.to_string(), Value::Str(s.clone()));
}
self.vars.insert("ARGV".into(), Value::Array(map));
}
#[allow(clippy::too_many_arguments)]
pub fn for_parallel_worker(
shared_globals: Arc<AwkMap<String, Value>>,
filename: String,
rand_seed: u64,
numeric_decimal: char,
numeric_thousands_sep: Option<char>,
csv_mode: bool,
bignum: bool,
sandbox: bool,
characters_as_bytes: bool,
posix: bool,
traditional: bool,
jit_enabled: bool,
gettext_catalogs: AwkMap<String, Arc<Catalog>>,
) -> Self {
Self {
vars: AwkMap::default(),
global_readonly: Some(shared_globals),
fields: Vec::new(),
field_ranges: Vec::new(),
fields_dirty: false,
fields_pending_split: false,
cached_fs: " ".into(),
record: String::new(),
line_buf: Vec::new(),
nr: 0.0,
fnr: 0.0,
filename,
exit_pending: false,
exit_code: 0,
input_reader: None,
inet_tcp_read: HashMap::new(),
inet_tcp_write: HashMap::new(),
inet_udp: HashMap::new(),
gettext_dir: String::new(),
bignum,
file_handles: HashMap::new(),
dir_read: HashMap::new(),
output_handles: HashMap::new(),
pipe_stdin: HashMap::new(),
pipe_children: HashMap::new(),
coproc_handles: HashMap::new(),
rand_seed,
numeric_decimal,
numeric_thousands_sep,
slots: Vec::new(),
regex_cache_cs: AwkMap::default(),
regex_cache_ci: AwkMap::default(),
memmem_finder_cache: AwkMap::default(),
print_buf: Vec::new(),
ofs_bytes: b" ".to_vec(),
ors_bytes: b"\n".to_vec(),
vm_stack: Vec::with_capacity(64),
jit_slot_buf: Vec::new(),
csv_mode,
rs_pattern_for_regex: String::new(),
rs_regex_bytes: None,
sandbox,
characters_as_bytes,
posix,
traditional,
jit_enabled,
gettext_catalogs,
symtab_slot_map: HashMap::new(),
profile_record_hits: Vec::new(),
sorted_in_warned: Cell::new(false),
errno_code: 0,
#[cfg(unix)]
primary_input_poll_fd: None,
}
}
pub fn require_unsandboxed_io(&self) -> Result<()> {
if self.sandbox {
return Err(Error::Runtime(
"sandbox: file I/O, pipes, coprocesses, inet, and system() are disabled".into(),
));
}
Ok(())
}
pub fn ensure_regex(&mut self, pat: &str) -> std::result::Result<(), String> {
let ic = self.ignore_case_flag();
let cache = if ic {
&mut self.regex_cache_ci
} else {
&mut self.regex_cache_cs
};
if cache.contains_key(pat) {
return Ok(());
}
let mut b = RegexBuilder::new(pat);
b.case_insensitive(ic);
let re = b.build().map_err(|e| e.to_string())?;
cache.insert(pat.to_string(), re);
Ok(())
}
pub fn regex_ref(&self, pat: &str) -> &Regex {
let ic = self.ignore_case_flag();
if ic {
&self.regex_cache_ci[pat]
} else {
&self.regex_cache_cs[pat]
}
}
#[inline]
pub fn ignore_case_flag(&self) -> bool {
self.get_global_var("IGNORECASE")
.map(|v| v.truthy())
.unwrap_or(false)
}
pub fn clear_errno(&mut self) {
self.errno_code = 0;
self.vars.insert("ERRNO".into(), Value::Str(String::new()));
}
pub fn set_errno_io(&mut self, e: &std::io::Error) {
self.errno_code = e.raw_os_error().unwrap_or(0);
self.vars.insert("ERRNO".into(), Value::Str(e.to_string()));
}
pub fn set_errno_str(&mut self, msg: impl Into<String>) {
self.errno_code = 0;
self.vars.insert("ERRNO".into(), Value::Str(msg.into()));
}
pub fn ensure_rs_regex_bytes(&mut self) -> Result<()> {
let rs = self.rs_string();
if self.rs_pattern_for_regex == rs {
return Ok(());
}
self.rs_pattern_for_regex.clear();
self.rs_pattern_for_regex.push_str(&rs);
if rs == "\n" || rs.is_empty() {
self.rs_regex_bytes = None;
return Ok(());
}
if rs.chars().count() <= 1 {
self.rs_regex_bytes = None;
return Ok(());
}
self.rs_regex_bytes = Some(
BytesRegex::new(&rs).map_err(|e| Error::Runtime(format!("invalid RS regex: {e}")))?,
);
Ok(())
}
pub fn set_rt_from_bytes(&mut self, sep: &[u8]) {
let t = if sep.is_empty() {
String::new()
} else {
String::from_utf8_lossy(sep).into_owned()
};
self.vars.insert("RT".into(), Value::Str(t));
}
pub fn literal_substring_finder(&mut self, pat: &str) -> &memmem::Finder<'static> {
if !self.memmem_finder_cache.contains_key(pat) {
let f = memmem::Finder::new(pat.as_bytes()).into_owned();
self.memmem_finder_cache.insert(pat.to_string(), f);
}
&self.memmem_finder_cache[pat]
}
#[inline]
pub fn get_global_var(&self, name: &str) -> Option<&Value> {
self.vars
.get(name)
.or_else(|| self.global_readonly.as_ref()?.get(name))
}
pub fn write_pipe_line(&mut self, cmd: &str, data: &str) -> Result<()> {
self.require_unsandboxed_io()?;
if self.coproc_handles.contains_key(cmd) {
return Err(Error::Runtime(format!(
"one-way pipe `|` conflicts with two-way `|&` for `{cmd}`"
)));
}
if !self.pipe_stdin.contains_key(cmd) {
let mut child = Command::new("sh")
.arg("-c")
.arg(cmd)
.stdin(Stdio::piped())
.spawn()
.map_err(|e| Error::Runtime(format!("pipe `{cmd}`: {e}")))?;
let stdin = child
.stdin
.take()
.ok_or_else(|| Error::Runtime(format!("pipe `{cmd}`: no stdin")))?;
self.pipe_children.insert(cmd.to_string(), child);
self.pipe_stdin
.insert(cmd.to_string(), BufWriter::new(stdin));
}
let w = self.pipe_stdin.get_mut(cmd).unwrap();
w.write_all(data.as_bytes()).map_err(Error::Io)?;
Ok(())
}
fn ensure_coproc(&mut self, cmd: &str) -> Result<()> {
self.require_unsandboxed_io()?;
if self.coproc_handles.contains_key(cmd) {
return Ok(());
}
if self.pipe_stdin.contains_key(cmd) {
return Err(Error::Runtime(format!(
"two-way pipe `|&` conflicts with one-way `|` for `{cmd}`"
)));
}
let mut child = Command::new("sh")
.arg("-c")
.arg(cmd)
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.spawn()
.map_err(|e| Error::Runtime(format!("coprocess `{cmd}`: {e}")))?;
let stdin = child
.stdin
.take()
.ok_or_else(|| Error::Runtime(format!("coprocess `{cmd}`: no stdin")))?;
let stdout = child
.stdout
.take()
.ok_or_else(|| Error::Runtime(format!("coprocess `{cmd}`: no stdout")))?;
self.coproc_handles.insert(
cmd.to_string(),
CoprocHandle {
child,
stdin: BufWriter::new(stdin),
stdout: BufReader::new(stdout),
},
);
Ok(())
}
pub fn write_coproc_line(&mut self, cmd: &str, data: &str) -> Result<()> {
self.ensure_coproc(cmd)?;
let w = self.coproc_handles.get_mut(cmd).unwrap();
w.stdin.write_all(data.as_bytes()).map_err(Error::Io)?;
Ok(())
}
pub fn read_line_coproc(&mut self, cmd: &str) -> Result<Option<String>> {
self.ensure_coproc(cmd)?;
let to = self.procinfo_read_timeout_ms_for(cmd);
#[cfg(unix)]
if to > 0 {
use std::os::unix::io::AsRawFd;
let h = self.coproc_handles.get_mut(cmd).unwrap();
let fd = h.stdout.get_ref().as_raw_fd();
wait_fd_read_timeout(fd, to)?;
}
let h = self.coproc_handles.get_mut(cmd).unwrap();
let mut line = String::new();
let n = h.stdout.read_line(&mut line).map_err(Error::Io)?;
if n == 0 {
return Ok(None);
}
Ok(Some(line))
}
pub fn read_line_pipe(&mut self, cmd: &str) -> Result<Option<String>> {
self.require_unsandboxed_io()?;
let mut child = Command::new("sh")
.arg("-c")
.arg(cmd)
.stdout(Stdio::piped())
.spawn()
.map_err(|e| Error::Runtime(format!("pipe getline `{cmd}`: {e}")))?;
let stdout = child
.stdout
.take()
.ok_or_else(|| Error::Runtime(format!("pipe getline `{cmd}`: no stdout")))?;
let mut reader = BufReader::new(stdout);
let to = self.procinfo_read_timeout_ms_for(cmd);
#[cfg(unix)]
if to > 0 {
use std::os::unix::io::AsRawFd;
let fd = reader.get_ref().as_raw_fd();
wait_fd_read_timeout(fd, to)?;
}
let mut line = String::new();
let n = reader.read_line(&mut line).map_err(Error::Io)?;
let _ = child.wait();
if n == 0 {
Ok(None)
} else {
Ok(Some(line))
}
}
pub fn write_output_line(&mut self, path: &str, data: &str, append: bool) -> Result<()> {
self.require_unsandboxed_io()?;
if path.starts_with("/inet/udp/") {
let _ = append;
self.ensure_inet_udp(path)?;
let s = self.inet_udp.get_mut(path).unwrap();
s.send(data.as_bytes())
.map_err(|e| Error::Runtime(format!("inet udp send `{path}`: {e}")))?;
return Ok(());
}
if path.starts_with("/inet/tcp/") {
let _ = append;
self.ensure_inet_tcp_pair(path)?;
let w = self.inet_tcp_write.get_mut(path).unwrap();
w.write_all(data.as_bytes()).map_err(Error::Io)?;
return Ok(());
}
self.ensure_output_writer(path, append)?;
let w = self.output_handles.get_mut(path).unwrap();
w.write_all(data.as_bytes()).map_err(Error::Io)?;
Ok(())
}
fn ensure_output_writer(&mut self, path: &str, append: bool) -> Result<()> {
if path.starts_with("/inet/udp/") {
return self.ensure_inet_udp(path);
}
if path.starts_with("/inet/tcp/") {
return self.ensure_inet_tcp_pair(path);
}
if self.output_handles.contains_key(path) {
return Ok(());
}
let f = if append {
OpenOptions::new().create(true).append(true).open(path)
} else {
OpenOptions::new()
.create(true)
.write(true)
.truncate(true)
.open(path)
}
.map_err(|e| Error::Runtime(format!("open {path}: {e}")))?;
self.output_handles
.insert(path.to_string(), BufWriter::new(f));
Ok(())
}
pub fn flush_redirect_target(&mut self, key: &str) -> Result<()> {
if let Some(w) = self.output_handles.get_mut(key) {
w.flush().map_err(Error::Io)?;
return Ok(());
}
if let Some(w) = self.inet_tcp_write.get_mut(key) {
w.flush().map_err(Error::Io)?;
return Ok(());
}
if self.inet_udp.contains_key(key) {
return Ok(());
}
if let Some(w) = self.pipe_stdin.get_mut(key) {
w.flush().map_err(Error::Io)?;
return Ok(());
}
if let Some(h) = self.coproc_handles.get_mut(key) {
h.stdin.flush().map_err(Error::Io)?;
return Ok(());
}
Err(Error::Runtime(format!(
"fflush: {key} is not an open output file, pipe, or coprocess"
)))
}
#[cfg_attr(unix, allow(dead_code))]
pub fn attach_input_reader(&mut self, r: SharedInputReader) {
self.attach_input_reader_with_poll_fd(r, None);
}
pub fn attach_input_reader_with_poll_fd(
&mut self,
r: SharedInputReader,
#[cfg(unix)] poll_fd: Option<std::os::unix::io::RawFd>,
#[cfg(not(unix))] _poll_fd: Option<()>,
) {
self.input_reader = Some(r);
#[cfg(unix)]
{
self.primary_input_poll_fd = poll_fd;
}
}
pub fn detach_input_reader(&mut self) {
self.input_reader = None;
#[cfg(unix)]
{
self.primary_input_poll_fd = None;
}
}
#[cfg(unix)]
pub fn poll_primary_read_timeout_if_needed(&self) -> Result<()> {
let to = self.procinfo_read_timeout_ms_for(&self.primary_input_procinfo_key());
if to > 0 {
if let Some(fd) = self.primary_input_poll_fd {
wait_fd_read_timeout(fd, to)?;
}
}
Ok(())
}
pub fn rs_string(&self) -> String {
match self.get_global_var("RS") {
Some(Value::Str(s)) => s.clone(),
Some(v) => v.as_str(),
None => "\n".to_string(),
}
}
pub fn num_to_string_convfmt(&self, n: f64) -> String {
let fmt = self
.get_global_var("CONVFMT")
.map(|v| v.as_str())
.unwrap_or_else(|| "%.6g".to_string());
crate::format::awk_sprintf_with_decimal(
&fmt,
&[Value::Num(n)],
self.numeric_decimal,
self.numeric_thousands_sep,
None,
)
.unwrap_or_else(|_| format_number(n))
}
pub fn num_to_string_ofmt(&self, n: f64) -> String {
let fmt = self
.get_global_var("OFMT")
.map(|v| v.as_str())
.unwrap_or_else(|| "%.6g".to_string());
crate::format::awk_sprintf_with_decimal(
&fmt,
&[Value::Num(n)],
self.numeric_decimal,
self.numeric_thousands_sep,
None,
)
.unwrap_or_else(|_| format_number(n))
}
pub fn mpfr_to_string_convfmt(&self, f: &Float) -> String {
let fmt = self
.get_global_var("CONVFMT")
.map(|v| v.as_str())
.unwrap_or_else(|| "%.6g".to_string());
crate::format::awk_sprintf_with_decimal(
&fmt,
&[Value::Mpfr(f.clone())],
self.numeric_decimal,
self.numeric_thousands_sep,
Some((self.mpfr_prec_bits(), self.mpfr_round())),
)
.unwrap_or_else(|_| f.to_string())
}
pub fn mpfr_to_string_ofmt(&self, f: &Float) -> String {
let fmt = self
.get_global_var("OFMT")
.map(|v| v.as_str())
.unwrap_or_else(|| "%.6g".to_string());
crate::format::awk_sprintf_with_decimal(
&fmt,
&[Value::Mpfr(f.clone())],
self.numeric_decimal,
self.numeric_thousands_sep,
Some((self.mpfr_prec_bits(), self.mpfr_round())),
)
.unwrap_or_else(|_| f.to_string())
}
pub fn set_field_from_mpfr(&mut self, i: i32, f: &Float) -> crate::error::Result<()> {
let s = self.mpfr_to_string_convfmt(f);
self.set_field(i, &s)
}
pub fn read_line_primary(&mut self) -> Result<Option<String>> {
let Some(reader) = self.input_reader.clone() else {
return Err(Error::Runtime(
"`getline` with no file is only valid during normal input".into(),
));
};
let to = self.procinfo_read_timeout_ms_for(&self.primary_input_procinfo_key());
#[cfg(unix)]
if to > 0 {
if let Some(fd) = self.primary_input_poll_fd {
wait_fd_read_timeout(fd, to)?;
}
}
let rs = self.rs_string();
self.ensure_rs_regex_bytes()?;
let mut rt_sep = Vec::new();
if !crate::record_io::read_next_record(
&reader,
&rs,
&mut self.line_buf,
&mut rt_sep,
self.rs_regex_bytes.as_ref(),
)? {
return Ok(None);
}
self.set_rt_from_bytes(&rt_sep);
let end = if rs == "\n" {
crate::record_io::trim_end_record_bytes(&self.line_buf)
} else {
self.line_buf.len()
};
Ok(Some(
String::from_utf8_lossy(&self.line_buf[..end]).into_owned(),
))
}
pub fn read_line_file(&mut self, path: &str) -> Result<Option<String>> {
self.require_unsandboxed_io()?;
if path.starts_with("/inet/udp/") {
self.ensure_inet_udp(path)?;
let s = self.inet_udp.get_mut(path).unwrap();
let mut buf = [0u8; 65536];
let n = s
.recv(&mut buf)
.map_err(|e| Error::Runtime(format!("inet udp recv `{path}`: {e}")))?;
if n == 0 {
return Ok(None);
}
return Ok(Some(String::from_utf8_lossy(&buf[..n]).into_owned()));
}
if path.starts_with("/inet/tcp/") {
self.ensure_inet_tcp_pair(path)?;
let reader = self.inet_tcp_read.get_mut(path).unwrap();
let mut line = String::new();
let n = reader.read_line(&mut line).map_err(Error::Io)?;
if n == 0 {
return Ok(None);
}
return Ok(Some(line));
}
if path.starts_with("/inet/") {
return Err(Error::Runtime(format!(
"unsupported inet path `{path}` (use /inet/tcp/... or /inet/udp/...)"
)));
}
let p = Path::new(path);
if p.is_dir() {
self.require_unsandboxed_io()?;
if !self.dir_read.contains_key(path) {
let mut names: Vec<String> = std::fs::read_dir(p)
.map_err(|e| Error::Runtime(format!("read_dir {path}: {e}")))?
.filter_map(|e| e.ok().map(|x| x.file_name().to_string_lossy().into_owned()))
.collect();
names.sort();
self.dir_read.insert(path.to_string(), (names, 0));
}
let (names, i) = self.dir_read.get_mut(path).unwrap();
if *i >= names.len() {
return Ok(None);
}
let name = names[*i].clone();
*i += 1;
return Ok(Some(name));
}
if !self.file_handles.contains_key(path) {
let f = File::open(p).map_err(|e| Error::Runtime(format!("open {path}: {e}")))?;
self.file_handles
.insert(path.to_string(), BufReader::new(f));
}
let to = self.procinfo_read_timeout_ms_for(path);
let reader = self.file_handles.get_mut(path).unwrap();
#[cfg(unix)]
if to > 0 {
use std::os::unix::io::AsRawFd;
let fd = reader.get_ref().as_raw_fd();
wait_fd_read_timeout(fd, to)?;
}
let mut line = String::new();
let n = reader.read_line(&mut line).map_err(Error::Io)?;
if n == 0 {
return Ok(None);
}
Ok(Some(line))
}
fn ensure_inet_tcp_pair(&mut self, path: &str) -> Result<()> {
if self.inet_tcp_read.contains_key(path) {
return Ok(());
}
let (lport, host, rport) = parse_inet_tcp(path)
.ok_or_else(|| Error::Runtime(format!("invalid /inet/tcp/ path `{path}`")))?;
let stream = if lport == 0 {
TcpStream::connect((host.as_str(), rport))
.map_err(|e| Error::Runtime(format!("inet connect `{path}`: {e}")))?
} else {
tcp_connect_with_local_port(&host, lport, rport)?
};
let w = stream
.try_clone()
.map_err(|e| Error::Runtime(format!("inet: {e}")))?;
let to = self.procinfo_read_timeout_ms_for(path);
if to > 0 {
let d = Duration::from_millis(to as u64);
stream
.set_read_timeout(Some(d))
.map_err(|e| Error::Runtime(format!("inet tcp read timeout: {e}")))?;
}
self.inet_tcp_read
.insert(path.to_string(), BufReader::new(stream));
self.inet_tcp_write.insert(path.to_string(), w);
Ok(())
}
fn ensure_inet_udp(&mut self, path: &str) -> Result<()> {
if self.inet_udp.contains_key(path) {
return Ok(());
}
let (lport, host, rport) = parse_inet_udp(path)
.ok_or_else(|| Error::Runtime(format!("invalid /inet/udp/ path `{path}`")))?;
let mut addrs = format!("{host}:{rport}")
.to_socket_addrs()
.map_err(|e| Error::Runtime(format!("inet udp resolve `{host}`: {e}")))?;
let addr = addrs
.next()
.ok_or_else(|| Error::Runtime(format!("inet udp: no address for `{host}:{rport}`")))?;
let socket = match addr {
SocketAddr::V4(_) => UdpSocket::bind((Ipv4Addr::UNSPECIFIED, lport)),
SocketAddr::V6(_) => UdpSocket::bind((Ipv6Addr::UNSPECIFIED, lport)),
}
.map_err(|e| Error::Runtime(format!("inet udp bind `{path}`: {e}")))?;
socket
.connect(addr)
.map_err(|e| Error::Runtime(format!("inet udp connect `{path}`: {e}")))?;
let to = self.procinfo_read_timeout_ms_for(path);
if to > 0 {
socket
.set_read_timeout(Some(Duration::from_millis(to as u64)))
.map_err(|e| Error::Runtime(format!("inet udp read timeout: {e}")))?;
}
self.inet_udp.insert(path.to_string(), socket);
Ok(())
}
pub fn close_handle(&mut self, path: &str) -> f64 {
let mut exit_status: f64 = 0.0;
if let Some(h) = self.coproc_handles.remove(path) {
let _ = shutdown_coproc(h);
}
if let Some(mut w) = self.output_handles.remove(path) {
let _ = w.flush();
}
if let Some(mut w) = self.pipe_stdin.remove(path) {
let _ = w.flush();
}
if let Some(mut ch) = self.pipe_children.remove(path) {
match ch.wait() {
Ok(status) => {
#[cfg(unix)]
{
use std::os::unix::process::ExitStatusExt;
if let Some(code) = status.code() {
exit_status = code as f64;
} else if let Some(sig) = status.signal() {
exit_status = (256 + sig) as f64;
}
}
#[cfg(not(unix))]
{
exit_status = status.code().unwrap_or(-1) as f64;
}
}
Err(_) => {
exit_status = -1.0;
}
}
}
let _ = self.file_handles.remove(path);
let _ = self.dir_read.remove(path);
let _ = self.inet_tcp_read.remove(path);
let _ = self.inet_tcp_write.remove(path);
let _ = self.inet_udp.remove(path);
exit_status
}
pub fn rand(&mut self) -> f64 {
self.rand_seed = self.rand_seed.wrapping_mul(1103515245).wrapping_add(12345);
f64::from((self.rand_seed >> 16) as u32 & 0x7fff) / 32768.0
}
pub fn srand(&mut self, n: Option<u64>) -> f64 {
let prev = self.rand_seed;
self.rand_seed = n.unwrap_or_else(|| {
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_secs() ^ (d.subsec_nanos() as u64))
.unwrap_or(1)
});
(prev & 0xffff_ffff) as f64
}
pub fn set_field_sep_split(&mut self, fs: &str, line: &str) {
self.record.clear();
self.record.push_str(line);
self.fields_dirty = false;
self.fields_pending_split = true;
self.cached_fs.clear();
self.cached_fs.push_str(fs);
self.fields.clear();
self.field_ranges.clear();
}
pub fn set_field_sep_split_owned(&mut self, fs: &str, line: String) {
self.record = line;
self.fields_dirty = false;
self.fields_pending_split = true;
self.cached_fs.clear();
self.cached_fs.push_str(fs);
self.fields.clear();
self.field_ranges.clear();
}
#[inline]
pub fn ensure_fields_split(&mut self) {
if self.fields_pending_split {
self.fields_pending_split = false;
self.split_record_fields();
}
}
fn split_record_fields(&mut self) {
let record = self.record.as_str();
if self.csv_mode {
split_csv_gawk_fields(record, &mut self.field_ranges);
self.fields.clear();
for &(s, e) in &self.field_ranges {
let raw = &record[s as usize..e as usize];
self.fields.push(if raw.contains("\"\"") {
raw.replace("\"\"", "\"")
} else {
raw.to_string()
});
}
self.fields_dirty = true;
return;
}
let ic = self.ignore_case_flag();
if let Some(fw) = self.fieldwidths_vec() {
if !fw.is_empty() {
split_fields_fieldwidths(record, &fw, &mut self.field_ranges);
self.fields.clear();
self.fields_dirty = false;
return;
}
}
let fpat_trimmed: Option<String> = self.get_global_var("FPAT").and_then(|fv| {
if !matches!(
fv,
Value::Str(ref s) | Value::StrLit(ref s) if !s.trim().is_empty()
) {
return None;
}
let t = fv.as_str_cow();
let tr = t.as_ref().trim();
if tr.is_empty() {
None
} else {
Some(tr.to_string())
}
});
if let Some(ref fp_trimmed) = fpat_trimmed {
if split_fields_fpat(record, fp_trimmed, &mut self.field_ranges, ic) {
return;
}
}
let cab = self.characters_as_bytes;
if !self.cached_fs.is_empty() {
split_fields_into(record, &self.cached_fs, &mut self.field_ranges, ic, cab);
} else {
match self.get_global_var("FS") {
None => split_fields_into(record, " ", &mut self.field_ranges, ic, cab),
Some(v) => {
let fs = v.as_str_cow().into_owned();
split_fields_into(record, &fs, &mut self.field_ranges, ic, cab);
}
}
}
}
fn fieldwidths_vec(&self) -> Option<Vec<usize>> {
let t = self.get_global_var("FIELDWIDTHS")?.as_str();
let t = t.trim();
if t.is_empty() {
return None;
}
let v: Vec<usize> = t
.split_whitespace()
.filter_map(|w| w.parse::<usize>().ok())
.filter(|&w| w > 0)
.collect();
if v.is_empty() {
None
} else {
Some(v)
}
}
pub fn field(&mut self, i: i32) -> crate::error::Result<Value> {
if i < 0 {
return Err(crate::error::Error::Runtime(
"attempt to access field number -1".into(),
));
}
let idx = i as usize;
if idx == 0 {
return Ok(Value::Str(self.record.clone()));
}
self.ensure_fields_split();
if self.fields_dirty {
Ok(self
.fields
.get(idx - 1)
.cloned()
.map(Value::Str)
.unwrap_or_else(|| Value::Str(String::new())))
} else {
Ok(self
.field_ranges
.get(idx - 1)
.map(|&(s, e)| Value::Str(self.record[s as usize..e as usize].to_string()))
.unwrap_or_else(|| Value::Str(String::new())))
}
}
#[inline]
pub fn field_as_number(&mut self, i: i32) -> crate::error::Result<f64> {
if i < 0 {
return Err(crate::error::Error::Runtime(
"attempt to access field number -1".into(),
));
}
let idx = i as usize;
if idx == 0 {
return Ok(parse_number(&self.record));
}
self.ensure_fields_split();
if self.fields_dirty {
Ok(self
.fields
.get(idx - 1)
.map(|s| parse_number(s))
.unwrap_or(0.0))
} else {
Ok(self
.field_ranges
.get(idx - 1)
.map(|&(s, e)| parse_number(&self.record[s as usize..e as usize]))
.unwrap_or(0.0))
}
}
#[inline]
pub fn print_field_to_buf(&mut self, idx: usize) {
if idx == 0 {
self.print_buf.extend_from_slice(self.record.as_bytes());
return;
}
self.ensure_fields_split();
if self.fields_dirty {
if let Some(s) = self.fields.get(idx - 1) {
self.print_buf.extend_from_slice(s.as_bytes());
}
} else if let Some(&(s, e)) = self.field_ranges.get(idx - 1) {
self.print_buf
.extend_from_slice(&self.record.as_bytes()[s as usize..e as usize]);
}
}
#[allow(dead_code)]
pub fn field_str(&self, i: usize) -> &str {
if i == 0 {
return &self.record;
}
if self.fields_dirty {
self.fields.get(i - 1).map(|s| s.as_str()).unwrap_or("")
} else {
self.field_ranges
.get(i - 1)
.map(|&(s, e)| &self.record[s as usize..e as usize])
.unwrap_or("")
}
}
#[inline]
#[allow(dead_code)]
pub fn nf(&mut self) -> usize {
self.ensure_fields_split();
if self.fields_dirty {
self.fields.len()
} else {
self.field_ranges.len()
}
}
#[inline]
pub fn field_is_unassigned(&mut self, i: i32) -> bool {
if i < 1 {
return false;
}
(i as usize) > self.nf()
}
pub fn set_record_str(&mut self, val: &str) {
let fs = self
.get_global_var("FS")
.map(|v| v.as_str())
.unwrap_or_else(|| " ".into());
self.set_field_sep_split(&fs, val);
self.ensure_fields_split();
let nf = self.nf() as f64;
self.vars.insert("NF".into(), Value::Num(nf));
}
pub fn set_nf(&mut self, n: i32) -> crate::error::Result<()> {
if n < 0 {
return Err(crate::error::Error::Runtime(
"NF set to negative value".into(),
));
}
let nf = n as usize;
self.ensure_fields_split();
if !self.fields_dirty {
self.fields.clear();
for &(s, e) in &self.field_ranges {
self.fields
.push(self.record[s as usize..e as usize].to_string());
}
self.fields_dirty = true;
}
if self.fields.len() > nf {
self.fields.truncate(nf);
} else {
self.fields.resize(nf, String::new());
}
self.rebuild_record();
self.vars.insert("NF".into(), Value::Num(nf as f64));
Ok(())
}
pub fn set_field(&mut self, i: i32, val: &str) -> crate::error::Result<()> {
if i == 0 {
self.set_record_str(val);
return Ok(());
}
if i < 1 {
return Err(crate::error::Error::Runtime(
"attempt to access field number -1".into(),
));
}
if !self.fields_dirty {
self.fields.clear();
for &(s, e) in &self.field_ranges {
self.fields
.push(self.record[s as usize..e as usize].to_string());
}
self.fields_dirty = true;
}
let idx = (i - 1) as usize;
if self.fields.len() <= idx {
self.fields.resize(idx + 1, String::new());
}
self.fields[idx] = val.to_string();
self.rebuild_record();
let nf = self.fields.len() as f64;
self.vars.insert("NF".into(), Value::Num(nf));
Ok(())
}
pub fn set_field_num(&mut self, i: i32, n: f64) -> crate::error::Result<()> {
if i == 0 {
let s = if n.fract() == 0.0 && n.abs() < 1e15 {
format!("{}", n as i64)
} else {
format!("{n}")
};
self.set_record_str(&s);
return Ok(());
}
if i < 1 {
return Err(crate::error::Error::Runtime(
"attempt to access field number -1".into(),
));
}
if !self.fields_dirty {
self.fields.clear();
for &(s, e) in &self.field_ranges {
self.fields
.push(self.record[s as usize..e as usize].to_string());
}
self.fields_dirty = true;
}
let idx = (i - 1) as usize;
if self.fields.len() <= idx {
self.fields.resize(idx + 1, String::new());
}
self.fields[idx].clear();
if n.fract() == 0.0 && n.abs() < 1e15 {
use std::fmt::Write;
let _ = write!(self.fields[idx], "{}", n as i64);
} else {
use std::fmt::Write;
let _ = write!(self.fields[idx], "{n}");
}
self.rebuild_record();
let nf = self.fields.len() as f64;
self.vars.insert("NF".into(), Value::Num(nf));
Ok(())
}
fn rebuild_record(&mut self) {
let ofs = self
.vars
.get("OFS")
.map(|v| v.as_str())
.unwrap_or_else(|| " ".into());
self.record = self.fields.join(&ofs);
}
pub fn set_record_from_line(&mut self, line: &str) {
let trimmed = line.trim_end_matches(['\n', '\r']);
let fs = self
.vars
.get("FS")
.map(|v| v.as_str())
.unwrap_or_else(|| " ".into());
self.set_field_sep_split(&fs, trimmed);
}
pub fn set_record_from_line_buf(&mut self) {
let rs = self.rs_string();
let mut end = self.line_buf.len();
if rs == "\n" {
while end > 0 && (self.line_buf[end - 1] == b'\n' || self.line_buf[end - 1] == b'\r') {
end -= 1;
}
}
self.record.clear();
match std::str::from_utf8(&self.line_buf[..end]) {
Ok(s) => self.record.push_str(s),
Err(_) => {
let lossy = String::from_utf8_lossy(&self.line_buf[..end]);
self.record.push_str(&lossy);
}
}
let fs_changed = match self.vars.get("FS") {
Some(Value::Str(s)) | Some(Value::StrLit(s)) | Some(Value::Regexp(s)) => {
s.as_str() != self.cached_fs.as_str()
}
_ => false,
};
if fs_changed {
if let Some(Value::Str(s)) | Some(Value::StrLit(s)) | Some(Value::Regexp(s)) =
self.vars.get("FS")
{
self.cached_fs.clear();
self.cached_fs.push_str(s);
}
}
self.fields_dirty = false;
self.fields.clear();
self.field_ranges.clear();
self.split_record_fields();
let nf = self.nf() as f64;
self.vars.insert("NF".into(), Value::Num(nf));
}
pub fn symtab_elem_get(&self, key: &str) -> Value {
if let Some(&slot) = self.symtab_slot_map.get(key) {
let i = slot as usize;
if i < self.slots.len() {
return self.slots[i].clone();
}
}
self.get_global_var(key)
.cloned()
.unwrap_or_else(|| self.builtin_scalar_symtab(key))
}
fn builtin_scalar_symtab(&self, name: &str) -> Value {
match name {
"NR" => Value::Num(self.nr),
"FNR" => Value::Num(self.fnr),
"NF" => Value::Num(if self.fields_dirty {
self.fields.len()
} else {
self.field_ranges.len()
} as f64),
"FILENAME" => Value::Str(self.filename.clone()),
_ => Value::Uninit,
}
}
pub fn symtab_keys_reflect(&self) -> Vec<String> {
use rustc_hash::FxHashSet;
let mut seen = FxHashSet::default();
for k in self.vars.keys() {
if matches!(k.as_str(), "SYMTAB" | "FUNCTAB" | "PROCINFO") {
continue;
}
seen.insert(k.clone());
}
if let Some(g) = &self.global_readonly {
for k in g.keys() {
if matches!(k.as_str(), "SYMTAB" | "FUNCTAB" | "PROCINFO") {
continue;
}
seen.insert(k.clone());
}
}
for k in self.symtab_slot_map.keys() {
seen.insert(k.clone());
}
for &s in crate::namespace::SPECIAL_GLOBAL_NAMES {
seen.insert((*s).to_string());
}
let mut out: Vec<_> = seen.into_iter().collect();
out.sort();
out
}
fn symtab_has_key(&self, key: &str) -> bool {
if self.symtab_slot_map.contains_key(key) {
return true;
}
if self.vars.contains_key(key) && !matches!(key, "SYMTAB" | "FUNCTAB" | "PROCINFO") {
return true;
}
if self
.global_readonly
.as_ref()
.is_some_and(|g| g.contains_key(key))
{
return true;
}
!matches!(self.symtab_elem_get(key), Value::Uninit)
}
pub fn symtab_elem_set(&mut self, key: &str, val: Value) {
if let Some(&slot) = self.symtab_slot_map.get(key) {
let i = slot as usize;
if i < self.slots.len() {
self.slots[i] = val;
return;
}
}
match key {
"OFS" => self.ofs_bytes = val.as_str().into_bytes(),
"ORS" => self.ors_bytes = val.as_str().into_bytes(),
_ => {}
}
self.vars.insert(key.to_string(), val);
}
#[inline]
pub fn array_get(&self, name: &str, key: &str) -> Value {
if name == "SYMTAB" {
return self.symtab_elem_get(key);
}
match self.get_global_var(name) {
Some(Value::Array(a)) => match a.get(key) {
Some(Value::Num(n)) => Value::Num(*n),
Some(v) => v.clone(),
None => Value::Str(String::new()),
},
_ => Value::Str(String::new()),
}
}
pub fn array_set(&mut self, name: &str, key: String, val: Value) {
if name == "SYMTAB" {
self.symtab_elem_set(&key, val);
return;
}
if let Some(existing) = self.vars.get_mut(name) {
match existing {
Value::Array(a) => {
a.insert(key, val);
return;
}
_ => {
let mut m = AwkMap::default();
m.insert(key, val);
*existing = Value::Array(m);
return;
}
}
}
if let Some(Value::Array(a)) = self.global_readonly.as_ref().and_then(|g| g.get(name)) {
let mut copy = a.clone();
copy.insert(key, val);
self.vars.insert(name.to_string(), Value::Array(copy));
} else {
let mut m = AwkMap::default();
m.insert(key, val);
self.vars.insert(name.to_string(), Value::Array(m));
}
}
pub fn array_field_add_delta(&mut self, name: &str, field: i32, delta: f64) {
self.ensure_fields_split();
if field < 1 {
Self::apply_array_numeric_delta(&mut self.vars, &self.global_readonly, name, "", delta);
return;
}
let idx = (field - 1) as usize;
if self.fields_dirty {
let key = self.fields.get(idx).map(|s| s.as_str()).unwrap_or("");
Self::apply_array_numeric_delta(
&mut self.vars,
&self.global_readonly,
name,
key,
delta,
);
return;
}
let (s, e) = match self.field_ranges.get(idx) {
Some(&(s, e)) => (s as usize, e as usize),
None => {
Self::apply_array_numeric_delta(
&mut self.vars,
&self.global_readonly,
name,
"",
delta,
);
return;
}
};
let key = &self.record[s..e];
Self::apply_array_numeric_delta(&mut self.vars, &self.global_readonly, name, key, delta);
}
fn apply_array_numeric_delta(
vars: &mut AwkMap<String, Value>,
global_readonly: &Option<Arc<AwkMap<String, Value>>>,
name: &str,
key: &str,
delta: f64,
) {
if let Some(existing) = vars.get_mut(name) {
match existing {
Value::Array(a) => {
if let Some(v) = a.get_mut(key) {
let n = v.as_number() + delta;
*v = Value::Num(n);
} else {
a.insert(key.to_string(), Value::Num(delta));
}
return;
}
_ => {
let mut m = AwkMap::default();
m.insert(key.to_string(), Value::Num(delta));
*existing = Value::Array(m);
return;
}
}
}
if let Some(Value::Array(a)) = global_readonly.as_ref().and_then(|g| g.get(name)) {
let mut copy = a.clone();
let old = copy.get(key).map(|v| v.as_number()).unwrap_or(0.0);
copy.insert(key.to_string(), Value::Num(old + delta));
vars.insert(name.to_string(), Value::Array(copy));
} else {
let mut m = AwkMap::default();
m.insert(key.to_string(), Value::Num(delta));
vars.insert(name.to_string(), Value::Array(m));
}
}
pub fn array_delete(&mut self, name: &str, key: Option<&str>) {
if let Some(k) = key {
if let Some(Value::Array(a)) = self.vars.get_mut(name) {
a.remove(k);
} else if let Some(Value::Array(a)) =
self.global_readonly.as_ref().and_then(|g| g.get(name))
{
let mut copy = a.clone();
copy.remove(k);
self.vars.insert(name.to_string(), Value::Array(copy));
}
} else {
self.vars.remove(name);
if self
.global_readonly
.as_ref()
.is_some_and(|g| g.contains_key(name))
{
self.vars
.insert(name.to_string(), Value::Array(AwkMap::default()));
}
}
}
pub fn array_keys(&self, name: &str) -> Vec<String> {
if name == "SYMTAB" {
let mut keys = self.symtab_keys_reflect();
if self.posix {
return keys;
}
let mode = sorted_in_mode(self);
if matches!(mode, SortedInMode::CustomFn(_)) {
return keys;
}
let mut tmp: AwkMap<String, Value> = AwkMap::default();
for k in &keys {
tmp.insert(k.clone(), self.symtab_elem_get(k));
}
sort_for_in_keys(&mut keys, &tmp, mode);
return keys;
}
let Some(Value::Array(a)) = self.get_global_var(name) else {
return Vec::new();
};
let mut keys: Vec<String> = a.keys().cloned().collect();
if self.posix {
return keys;
}
let mode = sorted_in_mode(self);
if matches!(mode, SortedInMode::CustomFn(_)) {
return keys;
}
sort_for_in_keys(&mut keys, a, mode);
keys
}
#[inline]
pub fn array_has(&self, name: &str, key: &str) -> bool {
if name == "SYMTAB" {
return self.symtab_has_key(key);
}
match self.get_global_var(name) {
Some(Value::Array(a)) => a.contains_key(key),
_ => false,
}
}
pub fn split_into_array(&mut self, arr_name: &str, parts: &[String]) {
self.array_delete(arr_name, None);
for (i, p) in parts.iter().enumerate() {
self.array_set(arr_name, format!("{}", i + 1), Value::Str(p.clone()));
}
}
}
pub fn split_string_by_field_separator(s: &str, fs: &str, ignore_case: bool) -> Vec<String> {
if s.is_empty() {
return Vec::new();
}
if fs.is_empty() {
s.chars().map(|c| c.to_string()).collect()
} else if fs == " " {
s.split_whitespace().map(String::from).collect()
} else if fs.len() == 1 {
s.split(fs).map(String::from).collect()
} else {
let mut b = RegexBuilder::new(fs);
b.case_insensitive(ignore_case);
match b.build() {
Ok(re) => re.split(s).map(String::from).collect(),
Err(_) => s.split(fs).map(String::from).collect(),
}
}
}
fn shutdown_coproc(mut h: CoprocHandle) -> Result<()> {
h.stdin.flush().map_err(Error::Io)?;
drop(h.stdin);
let mut buf = String::new();
loop {
buf.clear();
let n = h.stdout.read_line(&mut buf).map_err(Error::Io)?;
if n == 0 {
break;
}
}
drop(h.stdout);
let _ = h.child.wait();
Ok(())
}
impl Clone for Runtime {
fn clone(&self) -> Self {
Self {
vars: self.vars.clone(),
global_readonly: self.global_readonly.clone(),
fields: self.fields.clone(),
field_ranges: self.field_ranges.clone(),
fields_dirty: self.fields_dirty,
fields_pending_split: self.fields_pending_split,
cached_fs: self.cached_fs.clone(),
record: self.record.clone(),
line_buf: Vec::new(),
nr: self.nr,
fnr: self.fnr,
filename: self.filename.clone(),
exit_pending: self.exit_pending,
exit_code: self.exit_code,
input_reader: None,
inet_tcp_read: HashMap::new(),
inet_tcp_write: HashMap::new(),
inet_udp: HashMap::new(),
gettext_dir: self.gettext_dir.clone(),
bignum: self.bignum,
file_handles: HashMap::new(),
dir_read: HashMap::new(),
output_handles: HashMap::new(),
pipe_stdin: HashMap::new(),
pipe_children: HashMap::new(),
coproc_handles: HashMap::new(),
rand_seed: self.rand_seed,
numeric_decimal: self.numeric_decimal,
numeric_thousands_sep: self.numeric_thousands_sep,
slots: self.slots.clone(),
regex_cache_cs: self.regex_cache_cs.clone(),
regex_cache_ci: self.regex_cache_ci.clone(),
memmem_finder_cache: self.memmem_finder_cache.clone(),
print_buf: Vec::new(),
ofs_bytes: self.ofs_bytes.clone(),
ors_bytes: self.ors_bytes.clone(),
vm_stack: Vec::with_capacity(64),
jit_slot_buf: Vec::new(),
csv_mode: self.csv_mode,
rs_pattern_for_regex: self.rs_pattern_for_regex.clone(),
rs_regex_bytes: self.rs_regex_bytes.clone(),
sandbox: self.sandbox,
characters_as_bytes: self.characters_as_bytes,
posix: self.posix,
traditional: self.traditional,
jit_enabled: self.jit_enabled,
gettext_catalogs: self.gettext_catalogs.clone(),
symtab_slot_map: self.symtab_slot_map.clone(),
profile_record_hits: Vec::new(),
sorted_in_warned: Cell::new(self.sorted_in_warned.get()),
errno_code: self.errno_code,
#[cfg(unix)]
primary_input_poll_fd: self.primary_input_poll_fd,
}
}
}
impl Drop for Runtime {
fn drop(&mut self) {
for (_, h) in self.coproc_handles.drain() {
let _ = shutdown_coproc(h);
}
for (_, mut w) in self.output_handles.drain() {
let _ = w.flush();
}
for (_, mut w) in self.pipe_stdin.drain() {
let _ = w.flush();
}
for (_, mut ch) in self.pipe_children.drain() {
let _ = ch.wait();
}
}
}
#[cfg(test)]
mod value_tests {
use super::Value;
#[test]
fn value_as_number_from_int_string() {
assert_eq!(Value::Str("42".into()).as_number(), 42.0);
}
#[test]
fn value_as_number_regexp_uses_pattern_text_as_numeric_string() {
assert_eq!(Value::Regexp("3.5".into()).as_number(), 3.5);
assert_eq!(Value::Regexp("notnum".into()).as_number(), 0.0);
}
#[test]
fn value_as_number_empty_string_zero() {
assert_eq!(Value::Str("".into()).as_number(), 0.0);
}
#[test]
fn value_truthy_numeric_string_zero() {
assert!(!Value::Str("0".into()).truthy());
}
#[test]
fn value_truthy_non_numeric_string() {
assert!(Value::Str("hello".into()).truthy());
}
#[test]
fn value_truthy_cond_rejects_whole_array() {
let mut m = super::AwkMap::default();
m.insert("k".into(), Value::Num(1.0));
let v = Value::Array(m);
assert!(v.truthy_cond().is_err());
assert!(v.truthy());
}
#[test]
fn value_is_numeric_str_detects_decimal() {
assert!(Value::Str("3.14".into()).is_numeric_str());
assert!(!Value::Str("x".into()).is_numeric_str());
}
#[test]
fn str_lit_not_numeric_string_for_relops() {
assert!(!Value::StrLit("10".into()).is_numeric_str());
assert!(Value::Uninit.is_numeric_str());
}
#[test]
fn as_number_longest_numeric_prefix() {
assert_eq!(Value::StrLit("42trailing".into()).as_number(), 42.0);
}
#[test]
fn split_empty_source_zero_fields() {
let v = super::split_string_by_field_separator("", ",", false);
assert!(v.is_empty());
}
#[test]
fn set_nf_truncates_and_rebuilds_record() {
let mut rt = super::Runtime::new();
rt.set_field_sep_split(" ", "a b c d e");
rt.ensure_fields_split();
rt.set_nf(3).unwrap();
assert_eq!(rt.record, "a b c");
assert_eq!(rt.nf(), 3);
}
#[test]
fn set_record_str_resplits_nf() {
let mut rt = super::Runtime::new();
rt.vars.insert("FS".into(), Value::Str(" ".into()));
rt.set_field_sep_split(" ", "a b c");
rt.ensure_fields_split();
rt.set_record_str("x y");
assert_eq!(rt.nf(), 2);
}
#[test]
fn value_append_to_string_concat() {
let mut buf = String::from("a");
Value::Str("b".into()).append_to_string(&mut buf);
Value::Num(7.0).append_to_string(&mut buf);
assert_eq!(buf, "ab7");
}
#[test]
fn value_into_string_from_num_integer_form() {
assert_eq!(Value::Num(12.0).into_string(), "12");
}
#[test]
fn value_write_to_buf_str_and_num() {
let mut v = Vec::new();
Value::Str("ok".into()).write_to(&mut v);
Value::Num(5.0).write_to(&mut v);
assert_eq!(v, b"ok5");
}
#[test]
fn value_truthy_num_zero() {
assert!(!Value::Num(0.0).truthy());
}
#[test]
fn value_truthy_num_nonzero() {
assert!(Value::Num(-3.0).truthy());
}
#[test]
fn value_empty_array_not_truthy() {
let m = super::AwkMap::default();
assert!(!Value::Array(m).truthy());
}
#[test]
fn value_as_number_negative_float_string() {
assert_eq!(Value::Str("-2.5".into()).as_number(), -2.5);
}
#[test]
fn value_as_number_scientific_notation_string() {
assert_eq!(Value::Str("1e2".into()).as_number(), 100.0);
}
#[test]
fn value_as_number_hex_only_when_non_decimal_parse_mode() {
super::set_numeric_parse_mode(false);
assert_eq!(Value::Str("0x10".into()).as_number(), 0.0);
super::set_numeric_parse_mode(true);
assert_eq!(Value::Str("0x10".into()).as_number(), 16.0);
super::set_numeric_parse_mode(false);
}
#[test]
fn value_as_number_leading_zero_octal_only_in_non_decimal_mode() {
super::set_numeric_parse_mode(false);
assert_eq!(Value::Str("010".into()).as_number(), 10.0);
super::set_numeric_parse_mode(true);
assert_eq!(Value::Str("010".into()).as_number(), 8.0);
super::set_numeric_parse_mode(false);
}
#[test]
fn value_into_string_float_fraction() {
let s = Value::Num(0.25).into_string();
assert!(s.contains('2') && s.contains('5'), "{s}");
}
#[test]
fn csv_mode_quoted_comma_three_fields() {
let mut rt = super::Runtime::new();
rt.csv_mode = true;
rt.set_field_sep_split(",", r#"a,"b,c",d"#);
rt.ensure_fields_split();
assert_eq!(rt.nf(), 3);
assert_eq!(rt.field(1).unwrap().as_str(), "a");
assert_eq!(rt.field(2).unwrap().as_str(), "b,c");
assert_eq!(rt.field(3).unwrap().as_str(), "d");
}
#[test]
fn csv_mode_escape_double_quote_in_field() {
let mut rt = super::Runtime::new();
rt.csv_mode = true;
rt.set_field_sep_split(",", "\"a\"\"b\"");
rt.ensure_fields_split();
assert_eq!(rt.field(1).unwrap().as_str(), "a\"b");
}
#[test]
fn csv_mode_trailing_comma_empty_field() {
let mut rt = super::Runtime::new();
rt.csv_mode = true;
rt.set_field_sep_split(",", "a,");
rt.ensure_fields_split();
assert_eq!(rt.nf(), 2);
assert_eq!(rt.field(1).unwrap().as_str(), "a");
assert_eq!(rt.field(2).unwrap().as_str(), "");
}
#[test]
fn ignore_case_false_when_unset_or_zero() {
let rt = super::Runtime::new();
assert!(!rt.ignore_case_flag());
let mut rt0 = super::Runtime::new();
rt0.vars.insert("IGNORECASE".into(), Value::Num(0.0));
assert!(!rt0.ignore_case_flag());
}
#[test]
fn ignore_case_true_for_numeric_one() {
let mut rt = super::Runtime::new();
rt.vars.insert("IGNORECASE".into(), Value::Num(1.0));
assert!(rt.ignore_case_flag());
}
#[test]
fn ignore_case_true_for_non_numeric_string() {
let mut rt = super::Runtime::new();
rt.vars
.insert("IGNORECASE".into(), Value::Str("yes".into()));
assert!(rt.ignore_case_flag());
}
#[test]
fn num_to_string_convfmt_uses_convfmt_global() {
let mut rt = super::Runtime::new();
rt.vars.insert("CONVFMT".into(), Value::Str("%.0f".into()));
assert_eq!(rt.num_to_string_convfmt(3.2), "3");
}
#[test]
fn num_to_string_ofmt_uses_ofmt_global() {
let mut rt = super::Runtime::new();
rt.vars.insert("OFMT".into(), Value::Str("%.2f".into()));
assert_eq!(rt.num_to_string_ofmt(1.2), "1.20");
}
}
#[cfg(test)]
mod longest_prefix_and_sorted_in_tests {
use super::{
longest_f64_prefix, sort_for_in_keys, sorted_in_mode, AwkMap, Runtime, SortedInMode, Value,
};
#[test]
fn longest_f64_prefix_empty_none() {
assert_eq!(longest_f64_prefix(""), None);
}
#[test]
fn longest_f64_prefix_scientific_ok() {
assert_eq!(longest_f64_prefix("1e2"), Some("1e2"));
}
#[test]
fn longest_f64_prefix_non_monotonic_stops_at_last_valid() {
assert_eq!(longest_f64_prefix("1ex"), Some("1"));
}
#[test]
fn longest_f64_prefix_trailing_non_numeric() {
assert_eq!(longest_f64_prefix("3.5abc"), Some("3.5"));
}
#[test]
fn sorted_in_posix_forces_unsorted() {
let mut rt = Runtime::new();
rt.posix = true;
let mut pi = AwkMap::default();
pi.insert("sorted_in".into(), Value::Str("@ind_str_desc".into()));
rt.vars.insert("PROCINFO".into(), Value::Array(pi));
assert_eq!(sorted_in_mode(&rt), SortedInMode::Unsorted);
}
#[test]
fn sorted_in_reads_at_tokens_with_trim() {
let mut rt = Runtime::new();
let mut pi = AwkMap::default();
pi.insert("sorted_in".into(), Value::Str(" @val_num_asc ".into()));
rt.vars.insert("PROCINFO".into(), Value::Array(pi));
assert_eq!(sorted_in_mode(&rt), SortedInMode::ValNumAsc);
}
#[test]
fn sorted_in_user_function_name() {
let mut rt = Runtime::new();
let mut pi = AwkMap::default();
pi.insert("sorted_in".into(), Value::Str("my_cmp".into()));
rt.vars.insert("PROCINFO".into(), Value::Array(pi));
assert_eq!(sorted_in_mode(&rt), SortedInMode::CustomFn("my_cmp".into()));
}
#[test]
fn sorted_in_missing_or_empty_is_unsorted() {
let rt = Runtime::new();
assert_eq!(sorted_in_mode(&rt), SortedInMode::Unsorted);
let mut rt2 = Runtime::new();
let mut pi = AwkMap::default();
pi.insert("sorted_in".into(), Value::Str(" ".into()));
rt2.vars.insert("PROCINFO".into(), Value::Array(pi));
assert_eq!(sorted_in_mode(&rt2), SortedInMode::Unsorted);
let mut rt3 = Runtime::new();
let pi = AwkMap::default();
rt3.vars.insert("PROCINFO".into(), Value::Array(pi));
assert_eq!(sorted_in_mode(&rt3), SortedInMode::Unsorted);
}
#[test]
fn sort_for_in_ind_num_asc_numeric_not_lexicographic() {
let mut keys = vec!["10".into(), "2".into(), "1".into()];
let arr = AwkMap::default();
sort_for_in_keys(&mut keys, &arr, SortedInMode::IndNumAsc);
assert_eq!(keys, vec!["1", "2", "10"]);
}
#[test]
fn sort_for_in_val_num_desc_by_values() {
let mut keys = vec!["a".into(), "b".into()];
let mut arr = AwkMap::default();
arr.insert("a".into(), Value::Num(1.0));
arr.insert("b".into(), Value::Num(10.0));
sort_for_in_keys(&mut keys, &arr, SortedInMode::ValNumDesc);
assert_eq!(keys, vec!["b", "a"]);
}
#[test]
fn sort_for_in_val_str_asc_by_string_values() {
let mut keys = vec!["a".into(), "b".into()];
let mut arr = AwkMap::default();
arr.insert("a".into(), Value::Str("z".into()));
arr.insert("b".into(), Value::Str("a".into()));
sort_for_in_keys(&mut keys, &arr, SortedInMode::ValStrAsc);
assert_eq!(keys, vec!["b", "a"]);
}
#[test]
fn sorted_in_mode_ind_str_asc_token() {
let mut rt = Runtime::new();
let mut pi = AwkMap::default();
pi.insert("sorted_in".into(), Value::Str("@ind_str_asc".into()));
rt.vars.insert("PROCINFO".into(), Value::Array(pi));
assert_eq!(sorted_in_mode(&rt), SortedInMode::IndStrAsc);
}
#[test]
fn sorted_in_mode_val_type_asc_token() {
let mut rt = Runtime::new();
let mut pi = AwkMap::default();
pi.insert("sorted_in".into(), Value::Str("@val_type_asc".into()));
rt.vars.insert("PROCINFO".into(), Value::Array(pi));
assert_eq!(sorted_in_mode(&rt), SortedInMode::ValTypeAsc);
}
#[test]
fn sorted_in_mode_val_type_desc_token() {
let mut rt = Runtime::new();
let mut pi = AwkMap::default();
pi.insert("sorted_in".into(), Value::Str("@val_type_desc".into()));
rt.vars.insert("PROCINFO".into(), Value::Array(pi));
assert_eq!(sorted_in_mode(&rt), SortedInMode::ValTypeDesc);
}
#[test]
fn sort_for_in_val_type_asc_orders_type_rank_then_value_string() {
let mut keys = vec!["str".into(), "num".into(), "absent".into()];
let mut arr = AwkMap::default();
arr.insert("num".into(), Value::Num(1.0));
arr.insert("str".into(), Value::Str("z".into()));
sort_for_in_keys(&mut keys, &arr, SortedInMode::ValTypeAsc);
assert_eq!(keys, vec!["absent", "num", "str"]);
}
#[test]
fn sort_for_in_val_type_desc_reverses_type_rank() {
let mut keys = vec!["absent".into(), "num".into(), "str".into()];
let mut arr = AwkMap::default();
arr.insert("num".into(), Value::Num(1.0));
arr.insert("str".into(), Value::Str("z".into()));
sort_for_in_keys(&mut keys, &arr, SortedInMode::ValTypeDesc);
assert_eq!(keys, vec!["str", "num", "absent"]);
}
#[test]
fn sort_for_in_ind_str_desc() {
let mut keys = vec!["a".into(), "c".into(), "b".into()];
let arr = AwkMap::default();
sort_for_in_keys(&mut keys, &arr, SortedInMode::IndStrDesc);
assert_eq!(keys, vec!["c", "b", "a"]);
}
#[test]
fn sort_for_in_unsorted_no_op() {
let mut keys = vec!["z".into(), "a".into()];
let arr = AwkMap::default();
sort_for_in_keys(&mut keys, &arr, SortedInMode::Unsorted);
assert_eq!(keys, vec!["z", "a"]);
}
#[test]
fn sort_for_in_custom_fn_no_op_in_runtime_helper() {
let mut keys = vec!["b".into(), "a".into()];
let arr = AwkMap::default();
sort_for_in_keys(&mut keys, &arr, SortedInMode::CustomFn("cmp".into()));
assert_eq!(keys, vec!["b", "a"]);
}
}
#[cfg(test)]
mod init_argv_tests {
use super::{Runtime, Value};
use std::path::PathBuf;
#[test]
fn init_argv_sets_argc_and_numeric_string_keys() {
let mut rt = Runtime::new();
rt.init_argv(&[
PathBuf::from("/data/one.txt"),
PathBuf::from("/data/two.txt"),
]);
assert_eq!(rt.vars.get("ARGC").unwrap().as_number(), 3.0);
let Value::Array(argv) = rt.vars.get("ARGV").expect("ARGV") else {
panic!("ARGV not array");
};
assert!(!argv.get("0").unwrap().as_str().is_empty());
assert_eq!(argv.get("1").unwrap().as_str(), "/data/one.txt");
assert_eq!(argv.get("2").unwrap().as_str(), "/data/two.txt");
}
#[test]
fn init_argv_empty_file_list_leaves_only_program_name() {
let mut rt = Runtime::new();
rt.init_argv(&[]);
assert_eq!(rt.vars.get("ARGC").unwrap().as_number(), 1.0);
let Value::Array(argv) = rt.vars.get("ARGV").expect("ARGV") else {
panic!("ARGV not array");
};
assert_eq!(argv.len(), 1);
assert!(argv.get("0").is_some());
}
}