awk_rs/interpreter/
mod.rs

1mod builtins;
2mod expr;
3pub mod stmt;
4
5use std::collections::HashMap;
6use std::fs::File;
7use std::io::{BufRead, BufReader, Write};
8use std::process::{Child, ChildStdin, ChildStdout};
9
10use crate::ast::*;
11use crate::error::{Error, Result};
12use crate::value::Value;
13
14use regex::Regex;
15
16/// Input source for getline from pipe
17pub struct PipeInput {
18    #[allow(dead_code)]
19    child: Child,
20    reader: BufReader<ChildStdout>,
21}
22
23/// Output destination for print/printf redirection
24pub enum OutputFile {
25    File(File),
26    Pipe(ChildStdin),
27}
28
29impl Write for OutputFile {
30    fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
31        match self {
32            OutputFile::File(f) => f.write(buf),
33            OutputFile::Pipe(p) => p.write(buf),
34        }
35    }
36
37    fn flush(&mut self) -> std::io::Result<()> {
38        match self {
39            OutputFile::File(f) => f.flush(),
40            OutputFile::Pipe(p) => p.flush(),
41        }
42    }
43}
44
45/// The AWK interpreter runtime
46pub struct Interpreter<'a> {
47    /// The parsed program
48    program: &'a Program,
49
50    /// Global variables
51    pub(crate) variables: HashMap<String, Value>,
52
53    /// Associative arrays
54    pub(crate) arrays: HashMap<String, HashMap<String, Value>>,
55
56    /// User-defined functions
57    pub(crate) functions: HashMap<String, &'a FunctionDef>,
58
59    /// Built-in variables
60    /// Field separator (FS)
61    pub(crate) fs: String,
62    /// Output field separator (OFS)
63    pub(crate) ofs: String,
64    /// Record separator (RS)
65    pub(crate) rs: String,
66    /// Output record separator (ORS)
67    pub(crate) ors: String,
68    /// Number format for output (OFMT)
69    pub(crate) ofmt: String,
70    /// Conversion format (CONVFMT)
71    pub(crate) convfmt: String,
72    /// Subscript separator (SUBSEP)
73    pub(crate) subsep: String,
74    /// Field pattern (FPAT) - gawk extension
75    pub(crate) fpat: String,
76    /// Fixed field widths (FIELDWIDTHS) - gawk extension
77    pub(crate) fieldwidths: String,
78
79    /// Mode flags
80    pub(crate) posix_mode: bool,
81    pub(crate) traditional_mode: bool,
82
83    /// Current record ($0)
84    pub(crate) record: String,
85    /// Current fields ($1, $2, ...)
86    pub(crate) fields: Vec<String>,
87    /// Number of fields (NF)
88    pub(crate) nf: usize,
89    /// Record number (NR)
90    pub(crate) nr: usize,
91    /// File record number (FNR)
92    pub(crate) fnr: usize,
93    /// Current filename (FILENAME)
94    pub(crate) filename: String,
95
96    /// RSTART and RLENGTH from match()
97    pub(crate) rstart: usize,
98    pub(crate) rlength: i32,
99
100    /// Control flow flags
101    should_exit: bool,
102    exit_code: i32,
103    should_next: bool,
104    should_nextfile: bool,
105
106    /// Open files for output redirection
107    pub(crate) output_files: HashMap<String, OutputFile>,
108
109    /// Open files for input (getline)
110    pub(crate) input_files: HashMap<String, BufReader<File>>,
111
112    /// Open pipes for input (getline from command)
113    pub(crate) pipes: HashMap<String, PipeInput>,
114
115    /// Compiled regex cache
116    pub(crate) regex_cache: HashMap<String, Regex>,
117
118    /// Range pattern state (for /start/,/end/ patterns)
119    range_states: HashMap<usize, bool>,
120
121    /// Random number generator state
122    pub(crate) rand_seed: u64,
123    pub(crate) rand_state: u64,
124
125    /// Command line arguments (ARGC, ARGV)
126    pub(crate) argc: usize,
127    pub(crate) argv: Vec<String>,
128
129    /// Environment variables (ENVIRON)
130    pub(crate) environ: HashMap<String, String>,
131
132    /// Array aliases for pass-by-reference in functions
133    /// Maps parameter name -> actual array name
134    pub(crate) array_aliases: HashMap<String, String>,
135}
136
137impl<'a> Interpreter<'a> {
138    pub fn new(program: &'a Program) -> Self {
139        let mut functions = HashMap::new();
140        for func in &program.functions {
141            functions.insert(func.name.clone(), func);
142        }
143
144        // Initialize environment variables
145        let environ: HashMap<String, String> = std::env::vars().collect();
146
147        // Initialize random seed from current time
148        use std::time::{SystemTime, UNIX_EPOCH};
149        let rand_seed = SystemTime::now()
150            .duration_since(UNIX_EPOCH)
151            .map(|d| d.as_nanos() as u64)
152            .unwrap_or(12345);
153
154        Self {
155            program,
156            variables: HashMap::new(),
157            arrays: HashMap::new(),
158            functions,
159            fs: " ".to_string(),
160            ofs: " ".to_string(),
161            rs: "\n".to_string(),
162            ors: "\n".to_string(),
163            ofmt: "%.6g".to_string(),
164            convfmt: "%.6g".to_string(),
165            subsep: "\x1c".to_string(),
166            fpat: String::new(),
167            fieldwidths: String::new(),
168            posix_mode: false,
169            traditional_mode: false,
170            record: String::new(),
171            fields: Vec::new(),
172            nf: 0,
173            nr: 0,
174            fnr: 0,
175            filename: String::new(),
176            rstart: 0,
177            rlength: -1,
178            should_exit: false,
179            exit_code: 0,
180            should_next: false,
181            should_nextfile: false,
182            output_files: HashMap::new(),
183            input_files: HashMap::new(),
184            pipes: HashMap::new(),
185            regex_cache: HashMap::new(),
186            range_states: HashMap::new(),
187            rand_seed,
188            rand_state: rand_seed,
189            argc: 0,
190            argv: Vec::new(),
191            environ,
192            array_aliases: HashMap::new(),
193        }
194    }
195
196    /// Set command line arguments (ARGC and ARGV)
197    pub fn set_args(&mut self, args: Vec<String>) {
198        self.argc = args.len();
199        self.argv = args;
200    }
201
202    /// Set the field separator
203    pub fn set_fs(&mut self, fs: &str) {
204        self.fs = fs.to_string();
205        // Clear FPAT and FIELDWIDTHS when FS is set
206        self.fpat.clear();
207        self.fieldwidths.clear();
208    }
209
210    /// Set POSIX strict mode
211    pub fn set_posix_mode(&mut self, enabled: bool) {
212        self.posix_mode = enabled;
213        if enabled {
214            self.traditional_mode = false;
215        }
216    }
217
218    /// Set traditional AWK mode (no gawk extensions)
219    pub fn set_traditional_mode(&mut self, enabled: bool) {
220        self.traditional_mode = enabled;
221        if enabled {
222            self.posix_mode = false;
223        }
224    }
225
226    /// Set a variable before execution
227    pub fn set_variable(&mut self, name: &str, value: &str) {
228        self.variables
229            .insert(name.to_string(), Value::from_string(value.to_string()));
230    }
231
232    /// Set the current filename (FILENAME)
233    pub fn set_filename(&mut self, filename: &str) {
234        self.filename = filename.to_string();
235    }
236
237    /// Run the AWK program with given input
238    pub fn run<R: BufRead, W: Write>(&mut self, inputs: Vec<R>, output: &mut W) -> Result<i32> {
239        // Execute BEGIN rules
240        for rule in &self.program.rules {
241            if matches!(&rule.pattern, Some(Pattern::Begin)) {
242                if let Some(action) = &rule.action {
243                    self.execute_block(action, output)?;
244                }
245                if self.should_exit {
246                    return Ok(self.exit_code);
247                }
248            }
249        }
250
251        // Process input files
252        for input in inputs {
253            self.fnr = 0;
254
255            // Execute BEGINFILE rules (gawk extension)
256            for rule in &self.program.rules {
257                if matches!(&rule.pattern, Some(Pattern::BeginFile)) {
258                    if let Some(action) = &rule.action {
259                        self.execute_block(action, output)?;
260                    }
261                    if self.should_exit {
262                        return Ok(self.exit_code);
263                    }
264                }
265            }
266
267            self.process_input(input, output)?;
268
269            // Execute ENDFILE rules (gawk extension)
270            for rule in &self.program.rules {
271                if matches!(&rule.pattern, Some(Pattern::EndFile)) {
272                    if let Some(action) = &rule.action {
273                        self.execute_block(action, output)?;
274                    }
275                    if self.should_exit {
276                        return Ok(self.exit_code);
277                    }
278                }
279            }
280
281            if self.should_exit {
282                return Ok(self.exit_code);
283            }
284        }
285
286        // Execute END rules
287        for rule in &self.program.rules {
288            if matches!(&rule.pattern, Some(Pattern::End))
289                && let Some(action) = &rule.action
290            {
291                self.execute_block(action, output)?;
292            }
293        }
294
295        Ok(self.exit_code)
296    }
297
298    fn process_input<R: BufRead, W: Write>(&mut self, mut input: R, output: &mut W) -> Result<()> {
299        // Check for paragraph mode (RS = "")
300        if self.rs.is_empty() {
301            return self.process_input_paragraph_mode(input, output);
302        }
303
304        let mut line = String::new();
305
306        loop {
307            line.clear();
308            let bytes_read = input.read_line(&mut line).map_err(Error::Io)?;
309            if bytes_read == 0 {
310                break; // EOF
311            }
312
313            // Remove record separator
314            if line.ends_with('\n') {
315                line.pop();
316                if line.ends_with('\r') {
317                    line.pop();
318                }
319            }
320
321            self.nr += 1;
322            self.fnr += 1;
323            self.set_record(&line);
324
325            self.process_current_record(output)?;
326
327            if self.should_nextfile {
328                self.should_nextfile = false;
329                break;
330            }
331
332            if self.should_exit {
333                break;
334            }
335        }
336
337        Ok(())
338    }
339
340    /// Process input in paragraph mode (RS = "")
341    /// Blank lines separate records; multiple blank lines count as one separator
342    fn process_input_paragraph_mode<R: BufRead, W: Write>(
343        &mut self,
344        mut input: R,
345        output: &mut W,
346    ) -> Result<()> {
347        let mut line = String::new();
348        let mut record = String::new();
349        let mut in_record = false;
350
351        loop {
352            line.clear();
353            let bytes_read = input.read_line(&mut line).map_err(Error::Io)?;
354
355            // Check if line is blank (empty or only whitespace)
356            let is_blank = line.trim().is_empty();
357
358            if bytes_read == 0 {
359                // EOF - process any remaining record
360                if !record.is_empty() {
361                    // Remove trailing newline
362                    while record.ends_with('\n') || record.ends_with('\r') {
363                        record.pop();
364                    }
365                    self.nr += 1;
366                    self.fnr += 1;
367                    self.set_record(&record);
368                    self.process_current_record(output)?;
369                }
370                break;
371            }
372
373            if is_blank {
374                // Blank line - end of record if we're in one
375                if in_record && !record.is_empty() {
376                    // Remove trailing newline
377                    while record.ends_with('\n') || record.ends_with('\r') {
378                        record.pop();
379                    }
380                    self.nr += 1;
381                    self.fnr += 1;
382                    self.set_record(&record);
383                    self.process_current_record(output)?;
384
385                    record.clear();
386                    in_record = false;
387
388                    if self.should_nextfile || self.should_exit {
389                        break;
390                    }
391                }
392            } else {
393                // Non-blank line - add to record
394                if in_record {
395                    record.push('\n');
396                }
397                // Remove trailing newline from line before adding
398                if line.ends_with('\n') {
399                    line.pop();
400                    if line.ends_with('\r') {
401                        line.pop();
402                    }
403                }
404                record.push_str(&line);
405                in_record = true;
406            }
407        }
408
409        if self.should_nextfile {
410            self.should_nextfile = false;
411        }
412
413        Ok(())
414    }
415
416    /// Process the current record through all matching rules
417    fn process_current_record<W: Write>(&mut self, output: &mut W) -> Result<()> {
418        for (idx, rule) in self.program.rules.iter().enumerate() {
419            // Skip special patterns that are handled separately
420            if matches!(
421                &rule.pattern,
422                Some(Pattern::Begin)
423                    | Some(Pattern::End)
424                    | Some(Pattern::BeginFile)
425                    | Some(Pattern::EndFile)
426            ) {
427                continue;
428            }
429
430            let matches = self.pattern_matches(&rule.pattern, idx)?;
431            if matches {
432                if let Some(action) = &rule.action {
433                    self.execute_block(action, output)?;
434                } else {
435                    // Default action is to print $0
436                    writeln!(output, "{}", self.record).map_err(Error::Io)?;
437                }
438            }
439
440            if self.should_next {
441                self.should_next = false;
442                break;
443            }
444
445            if self.should_nextfile || self.should_exit {
446                break;
447            }
448        }
449        Ok(())
450    }
451
452    pub(crate) fn set_record(&mut self, record: &str) {
453        self.record = record.to_string();
454        self.split_fields();
455    }
456
457    fn split_fields(&mut self) {
458        self.fields.clear();
459
460        if self.record.is_empty() {
461            self.nf = 0;
462            return;
463        }
464
465        // Pre-estimate capacity to reduce reallocations
466        let estimated_fields = self.record.len() / 8 + 1;
467        self.fields.reserve(estimated_fields.min(64));
468
469        // Check for FPAT (field pattern) - gawk extension
470        if !self.fpat.is_empty() && !self.posix_mode && !self.traditional_mode {
471            self.split_fields_fpat();
472            return;
473        }
474
475        // Check for FIELDWIDTHS - gawk extension
476        if !self.fieldwidths.is_empty() && !self.posix_mode && !self.traditional_mode {
477            self.split_fields_widths();
478            return;
479        }
480
481        // Standard FS-based splitting
482        if self.fs == " " {
483            // Special case: split on runs of whitespace, trimming leading/trailing
484            // Use byte-based iteration for ASCII optimization
485            self.fields
486                .extend(self.record.split_whitespace().map(String::from));
487        } else if self.fs.len() == 1 {
488            // Single character separator - most common case, optimize for it
489            let sep = self.fs.as_bytes()[0];
490            let bytes = self.record.as_bytes();
491            let mut start = 0;
492
493            for (i, &b) in bytes.iter().enumerate() {
494                if b == sep {
495                    self.fields.push(self.record[start..i].to_string());
496                    start = i + 1;
497                }
498            }
499            // Don't forget the last field
500            self.fields.push(self.record[start..].to_string());
501        } else {
502            // Regex separator - cache the compiled regex
503            let fs = self.fs.clone();
504            let record = self.record.clone();
505            if let Some(regex) = self.regex_cache.get(&fs) {
506                self.fields.extend(regex.split(&record).map(String::from));
507            } else if let Ok(regex) = Regex::new(&fs) {
508                self.fields.extend(regex.split(&record).map(String::from));
509                self.regex_cache.insert(fs, regex);
510            } else {
511                // If regex fails, treat as literal string
512                self.fields.extend(record.split(&fs).map(String::from));
513            }
514        }
515
516        self.nf = self.fields.len();
517    }
518
519    /// Split fields using FPAT (field pattern matching)
520    fn split_fields_fpat(&mut self) {
521        let fpat = self.fpat.clone();
522        let record = self.record.clone();
523
524        if let Some(regex) = self.regex_cache.get(&fpat) {
525            for mat in regex.find_iter(&record) {
526                self.fields.push(mat.as_str().to_string());
527            }
528        } else if let Ok(regex) = Regex::new(&fpat) {
529            for mat in regex.find_iter(&record) {
530                self.fields.push(mat.as_str().to_string());
531            }
532            self.regex_cache.insert(fpat, regex);
533        }
534
535        self.nf = self.fields.len();
536    }
537
538    /// Split fields using FIELDWIDTHS (fixed-width fields)
539    fn split_fields_widths(&mut self) {
540        let widths: Vec<usize> = self
541            .fieldwidths
542            .split_whitespace()
543            .filter_map(|s| s.parse().ok())
544            .collect();
545
546        let mut pos = 0;
547        let chars: Vec<char> = self.record.chars().collect();
548
549        for width in widths {
550            if pos >= chars.len() {
551                break;
552            }
553            let end = (pos + width).min(chars.len());
554            let field: String = chars[pos..end].iter().collect();
555            self.fields.push(field);
556            pos = end;
557        }
558
559        self.nf = self.fields.len();
560    }
561
562    #[inline]
563    pub(crate) fn get_field(&self, index: usize) -> String {
564        if index == 0 {
565            self.record.clone()
566        } else if index <= self.fields.len() {
567            self.fields[index - 1].clone()
568        } else {
569            String::new()
570        }
571    }
572
573    /// Get field reference without cloning (for read-only access)
574    #[inline]
575    #[allow(dead_code)]
576    pub(crate) fn get_field_ref(&self, index: usize) -> &str {
577        if index == 0 {
578            &self.record
579        } else if index <= self.fields.len() {
580            &self.fields[index - 1]
581        } else {
582            ""
583        }
584    }
585
586    pub(crate) fn set_field(&mut self, index: usize, value: String) {
587        if index == 0 {
588            self.record = value;
589            self.split_fields();
590        } else {
591            // Extend fields if necessary
592            while self.fields.len() < index {
593                self.fields.push(String::new());
594            }
595            self.fields[index - 1] = value;
596            self.nf = self.fields.len();
597            // Rebuild $0
598            self.record = self.fields.join(&self.ofs);
599        }
600    }
601
602    fn pattern_matches(&mut self, pattern: &Option<Pattern>, rule_idx: usize) -> Result<bool> {
603        match pattern {
604            None => Ok(true), // No pattern means always match
605            Some(Pattern::Begin)
606            | Some(Pattern::End)
607            | Some(Pattern::BeginFile)
608            | Some(Pattern::EndFile) => Ok(false),
609            Some(Pattern::Expr(expr)) => {
610                let val = self.eval_expr(expr)?;
611                Ok(val.is_truthy())
612            }
613            Some(Pattern::Regex(regex)) => {
614                let record = self.record.clone();
615                let re = self.get_regex(regex)?;
616                Ok(re.is_match(&record))
617            }
618            Some(Pattern::Range { start, end }) => {
619                let active = self.range_states.get(&rule_idx).copied().unwrap_or(false);
620                if !active {
621                    // Check if start pattern matches
622                    if self.pattern_matches(&Some(start.as_ref().clone()), rule_idx)? {
623                        self.range_states.insert(rule_idx, true);
624                        return Ok(true);
625                    }
626                    Ok(false)
627                } else {
628                    // Range is active, check if end pattern matches
629                    if self.pattern_matches(&Some(end.as_ref().clone()), rule_idx)? {
630                        self.range_states.insert(rule_idx, false);
631                    }
632                    Ok(true)
633                }
634            }
635            Some(Pattern::And(left, right)) => Ok(self
636                .pattern_matches(&Some(left.as_ref().clone()), rule_idx)?
637                && self.pattern_matches(&Some(right.as_ref().clone()), rule_idx)?),
638            Some(Pattern::Or(left, right)) => Ok(self
639                .pattern_matches(&Some(left.as_ref().clone()), rule_idx)?
640                || self.pattern_matches(&Some(right.as_ref().clone()), rule_idx)?),
641            Some(Pattern::Not(inner)) => {
642                Ok(!self.pattern_matches(&Some(inner.as_ref().clone()), rule_idx)?)
643            }
644        }
645    }
646
647    pub(crate) fn get_regex(&mut self, pattern: &str) -> Result<&Regex> {
648        if !self.regex_cache.contains_key(pattern) {
649            let regex = Regex::new(pattern).map_err(Error::Regex)?;
650            self.regex_cache.insert(pattern.to_string(), regex);
651        }
652        Ok(self.regex_cache.get(pattern).unwrap())
653    }
654
655    pub(crate) fn get_variable(&self, name: &str) -> Value {
656        // Check special variables first
657        match name {
658            "NF" => Value::Number(self.nf as f64),
659            "NR" => Value::Number(self.nr as f64),
660            "FNR" => Value::Number(self.fnr as f64),
661            "FS" => Value::from_string(self.fs.clone()),
662            "OFS" => Value::from_string(self.ofs.clone()),
663            "RS" => Value::from_string(self.rs.clone()),
664            "ORS" => Value::from_string(self.ors.clone()),
665            "OFMT" => Value::from_string(self.ofmt.clone()),
666            "CONVFMT" => Value::from_string(self.convfmt.clone()),
667            "SUBSEP" => Value::from_string(self.subsep.clone()),
668            "FILENAME" => Value::from_string(self.filename.clone()),
669            "RSTART" => Value::Number(self.rstart as f64),
670            "RLENGTH" => Value::Number(self.rlength as f64),
671            "ARGC" => Value::Number(self.argc as f64),
672            // gawk extensions
673            "FPAT" => Value::from_string(self.fpat.clone()),
674            "FIELDWIDTHS" => Value::from_string(self.fieldwidths.clone()),
675            _ => self
676                .variables
677                .get(name)
678                .cloned()
679                .unwrap_or(Value::Uninitialized),
680        }
681    }
682
683    /// Get an element from ARGV, ENVIRON, or PROCINFO arrays
684    pub(crate) fn get_special_array(&self, array: &str, key: &str) -> Option<Value> {
685        match array {
686            "ARGV" => key
687                .parse::<usize>()
688                .ok()
689                .and_then(|i| self.argv.get(i))
690                .map(|s| Value::from_string(s.clone())),
691            "ENVIRON" => self.environ.get(key).map(|s| Value::from_string(s.clone())),
692            "PROCINFO" => {
693                // gawk PROCINFO array - system information
694                match key {
695                    "version" => Some(Value::from_string(env!("CARGO_PKG_VERSION").to_string())),
696                    "strftime" => Some(Value::from_string("%a %b %e %H:%M:%S %Z %Y".to_string())),
697                    "FS" => {
698                        if !self.fpat.is_empty() {
699                            Some(Value::from_string("FPAT".to_string()))
700                        } else if !self.fieldwidths.is_empty() {
701                            Some(Value::from_string("FIELDWIDTHS".to_string()))
702                        } else {
703                            Some(Value::from_string("FS".to_string()))
704                        }
705                    }
706                    "identifiers" => Some(Value::Number(0.0)), // Not implemented
707                    "pid" => Some(Value::Number(std::process::id() as f64)),
708                    "ppid" => Some(Value::Number(0.0)), // Not easily available in Rust
709                    "uid" => Some(Value::Number(0.0)),  // Platform specific
710                    "gid" => Some(Value::Number(0.0)),  // Platform specific
711                    "euid" => Some(Value::Number(0.0)), // Platform specific
712                    "egid" => Some(Value::Number(0.0)), // Platform specific
713                    "pgrpid" => Some(Value::Number(0.0)), // Platform specific
714                    _ => Some(Value::Uninitialized),
715                }
716            }
717            _ => None,
718        }
719    }
720
721    pub(crate) fn set_variable_value(&mut self, name: &str, value: Value) {
722        // Handle special variables
723        match name {
724            "NF" => {
725                let new_nf = value.to_number() as usize;
726                if new_nf < self.nf {
727                    self.fields.truncate(new_nf);
728                } else {
729                    while self.fields.len() < new_nf {
730                        self.fields.push(String::new());
731                    }
732                }
733                self.nf = new_nf;
734                self.record = self.fields.join(&self.ofs);
735            }
736            "FS" => {
737                self.fs = value.to_string_val();
738                // Clear FPAT and FIELDWIDTHS when FS is set
739                self.fpat.clear();
740                self.fieldwidths.clear();
741            }
742            "OFS" => self.ofs = value.to_string_val(),
743            "RS" => self.rs = value.to_string_val(),
744            "ORS" => self.ors = value.to_string_val(),
745            "OFMT" => self.ofmt = value.to_string_val(),
746            "CONVFMT" => self.convfmt = value.to_string_val(),
747            "SUBSEP" => self.subsep = value.to_string_val(),
748            // gawk extensions
749            "FPAT" => {
750                self.fpat = value.to_string_val();
751                // FPAT takes precedence over FS and FIELDWIDTHS
752                self.fieldwidths.clear();
753            }
754            "FIELDWIDTHS" => {
755                self.fieldwidths = value.to_string_val();
756                // FIELDWIDTHS takes precedence over FS
757                self.fpat.clear();
758            }
759            _ => {
760                self.variables.insert(name.to_string(), value);
761            }
762        }
763    }
764
765    /// Resolve array name through aliases (for pass-by-reference in functions)
766    fn resolve_array_name<'b>(&'b self, array: &'b str) -> &'b str {
767        self.array_aliases
768            .get(array)
769            .map(|s| s.as_str())
770            .unwrap_or(array)
771    }
772
773    pub(crate) fn get_array_element(&self, array: &str, key: &str) -> Value {
774        let array = self.resolve_array_name(array);
775
776        // Check for special arrays first
777        if let Some(val) = self.get_special_array(array, key) {
778            return val;
779        }
780
781        self.arrays
782            .get(array)
783            .and_then(|arr| arr.get(key))
784            .cloned()
785            .unwrap_or(Value::Uninitialized)
786    }
787
788    pub(crate) fn set_array_element(&mut self, array: &str, key: &str, value: Value) {
789        let array = self.resolve_array_name(array).to_string();
790        self.arrays
791            .entry(array)
792            .or_default()
793            .insert(key.to_string(), value);
794    }
795
796    pub(crate) fn array_key_exists(&self, array: &str, key: &str) -> bool {
797        let array = self.resolve_array_name(array);
798
799        // Check special arrays
800        match array {
801            "ARGV" => key
802                .parse::<usize>()
803                .ok()
804                .map(|i| i < self.argv.len())
805                .unwrap_or(false),
806            "ENVIRON" => self.environ.contains_key(key),
807            _ => self
808                .arrays
809                .get(array)
810                .map(|arr| arr.contains_key(key))
811                .unwrap_or(false),
812        }
813    }
814
815    pub(crate) fn delete_array_element(&mut self, array: &str, key: &str) {
816        let array = self.resolve_array_name(array).to_string();
817        if let Some(arr) = self.arrays.get_mut(&array) {
818            arr.remove(key);
819        }
820    }
821
822    pub(crate) fn make_array_key(&self, indices: &[Value]) -> String {
823        indices
824            .iter()
825            .map(|v| v.to_string_val())
826            .collect::<Vec<_>>()
827            .join(&self.subsep)
828    }
829}
830
831#[cfg(test)]
832mod tests {
833    use super::*;
834    use crate::lexer::Lexer;
835    use crate::parser::Parser;
836    use std::io::Cursor;
837
838    fn run_awk(program: &str, input: &str) -> String {
839        let mut lexer = Lexer::new(program);
840        let tokens = lexer.tokenize().unwrap();
841        let mut parser = Parser::new(tokens);
842        let ast = parser.parse().unwrap();
843
844        let mut interpreter = Interpreter::new(&ast);
845        let mut output = Vec::new();
846        let inputs: Vec<std::io::BufReader<Cursor<&str>>> = if input.is_empty() {
847            vec![]
848        } else {
849            vec![std::io::BufReader::new(Cursor::new(input))]
850        };
851
852        interpreter.run(inputs, &mut output).unwrap();
853        String::from_utf8(output).unwrap()
854    }
855
856    #[test]
857    fn test_begin_print() {
858        let output = run_awk(r#"BEGIN { print "hello" }"#, "");
859        assert_eq!(output, "hello\n");
860    }
861
862    #[test]
863    fn test_print_field() {
864        let output = run_awk("{ print $1 }", "one two three");
865        assert_eq!(output, "one\n");
866    }
867
868    #[test]
869    fn test_print_multiple_fields() {
870        let output = run_awk("{ print $1, $3 }", "one two three");
871        assert_eq!(output, "one three\n");
872    }
873
874    #[test]
875    fn test_arithmetic() {
876        let output = run_awk("BEGIN { print 2 + 3 * 4 }", "");
877        assert_eq!(output, "14\n");
878    }
879
880    #[test]
881    fn test_variable() {
882        let output = run_awk("BEGIN { x = 5; print x }", "");
883        assert_eq!(output, "5\n");
884    }
885
886    #[test]
887    fn test_if_statement() {
888        let output = run_awk("BEGIN { x = 10; if (x > 5) print \"big\" }", "");
889        assert_eq!(output, "big\n");
890    }
891
892    #[test]
893    fn test_while_loop() {
894        let output = run_awk("BEGIN { i = 1; while (i <= 3) { print i; i++ } }", "");
895        assert_eq!(output, "1\n2\n3\n");
896    }
897
898    #[test]
899    fn test_pattern_match() {
900        let output = run_awk("/two/ { print $0 }", "one\ntwo\nthree");
901        assert_eq!(output, "two\n");
902    }
903
904    #[test]
905    fn test_for_loop() {
906        let output = run_awk("BEGIN { for (i = 1; i <= 3; i++) print i }", "");
907        assert_eq!(output, "1\n2\n3\n");
908    }
909
910    #[test]
911    fn test_for_in_loop() {
912        let output = run_awk(
913            "BEGIN { a[1]=1; a[2]=2; for (k in a) count++; print count }",
914            "",
915        );
916        assert_eq!(output, "2\n");
917    }
918
919    #[test]
920    fn test_do_while() {
921        let output = run_awk("BEGIN { i = 0; do { i++ } while (i < 3); print i }", "");
922        assert_eq!(output, "3\n");
923    }
924
925    #[test]
926    fn test_break() {
927        let output = run_awk(
928            "BEGIN { for (i=1; i<=10; i++) { if (i==3) break; print i } }",
929            "",
930        );
931        assert_eq!(output, "1\n2\n");
932    }
933
934    #[test]
935    fn test_continue() {
936        let output = run_awk(
937            "BEGIN { for (i=1; i<=3; i++) { if (i==2) continue; print i } }",
938            "",
939        );
940        assert_eq!(output, "1\n3\n");
941    }
942
943    #[test]
944    fn test_next() {
945        let output = run_awk("/skip/ { next } { print }", "one\nskip\ntwo");
946        assert_eq!(output, "one\ntwo\n");
947    }
948
949    #[test]
950    fn test_exit() {
951        // exit is called before print on NR == 2
952        let output = run_awk("NR == 2 { exit } { print }", "one\ntwo\nthree");
953        assert_eq!(output, "one\n");
954    }
955
956    #[test]
957    fn test_exit_in_end() {
958        let output = run_awk("{ print } END { print \"done\" }", "one\ntwo");
959        assert!(output.contains("done"));
960    }
961
962    #[test]
963    fn test_array_access() {
964        let output = run_awk("BEGIN { a[\"x\"] = 1; print a[\"x\"] }", "");
965        assert_eq!(output, "1\n");
966    }
967
968    #[test]
969    fn test_array_in() {
970        let output = run_awk("BEGIN { a[1]=1; print (1 in a), (2 in a) }", "");
971        assert_eq!(output, "1 0\n");
972    }
973
974    #[test]
975    fn test_delete() {
976        let output = run_awk(
977            "BEGIN { a[1]=1; a[2]=2; delete a[1]; for(k in a) print k }",
978            "",
979        );
980        assert_eq!(output, "2\n");
981    }
982
983    #[test]
984    fn test_special_variables() {
985        let output = run_awk("{ print NR, NF, $0 }", "a b c");
986        assert_eq!(output, "1 3 a b c\n");
987    }
988
989    #[test]
990    fn test_fs_change() {
991        let output = run_awk("BEGIN { FS = \":\" } { print $1 }", "a:b:c");
992        assert_eq!(output, "a\n");
993    }
994
995    #[test]
996    fn test_ofs() {
997        let output = run_awk("BEGIN { OFS = \"-\" } { print $1, $2 }", "a b c");
998        assert_eq!(output, "a-b\n");
999    }
1000
1001    #[test]
1002    fn test_nf_access() {
1003        let output = run_awk("{ print $NF }", "a b c");
1004        assert_eq!(output, "c\n");
1005    }
1006
1007    #[test]
1008    fn test_field_modify() {
1009        let output = run_awk("{ $2 = \"X\"; print $0 }", "a b c");
1010        assert_eq!(output, "a X c\n");
1011    }
1012
1013    #[test]
1014    fn test_user_function() {
1015        let output = run_awk(
1016            "function double(x) { return x*2 } BEGIN { print double(5) }",
1017            "",
1018        );
1019        assert_eq!(output, "10\n");
1020    }
1021
1022    #[test]
1023    fn test_recursion() {
1024        let output = run_awk(
1025            "function fact(n) { return n<=1 ? 1 : n*fact(n-1) } BEGIN { print fact(5) }",
1026            "",
1027        );
1028        assert_eq!(output, "120\n");
1029    }
1030
1031    #[test]
1032    fn test_printf() {
1033        let output = run_awk("BEGIN { printf \"%d %s\\n\", 42, \"hello\" }", "");
1034        assert_eq!(output, "42 hello\n");
1035    }
1036
1037    #[test]
1038    fn test_range_pattern() {
1039        let output = run_awk(
1040            "/start/,/end/ { print }",
1041            "before\nstart\nmiddle\nend\nafter",
1042        );
1043        assert_eq!(output, "start\nmiddle\nend\n");
1044    }
1045
1046    #[test]
1047    fn test_compound_pattern_and() {
1048        let output = run_awk("NR > 1 && NR < 4 { print }", "one\ntwo\nthree\nfour");
1049        assert_eq!(output, "two\nthree\n");
1050    }
1051
1052    #[test]
1053    fn test_logical_or_in_expr() {
1054        let output = run_awk("{ if (/a/ || /c/) print }", "a\nb\nc");
1055        assert_eq!(output, "a\nc\n");
1056    }
1057
1058    #[test]
1059    fn test_negated_pattern() {
1060        let output = run_awk("!/skip/ { print }", "keep\nskip\nkeep");
1061        assert_eq!(output, "keep\nkeep\n");
1062    }
1063
1064    #[test]
1065    fn test_builtin_length() {
1066        let output = run_awk("BEGIN { print length(\"hello\") }", "");
1067        assert_eq!(output, "5\n");
1068    }
1069
1070    #[test]
1071    fn test_builtin_substr() {
1072        let output = run_awk("BEGIN { print substr(\"hello\", 2, 3) }", "");
1073        assert_eq!(output, "ell\n");
1074    }
1075
1076    #[test]
1077    fn test_builtin_index() {
1078        let output = run_awk("BEGIN { print index(\"hello\", \"ll\") }", "");
1079        assert_eq!(output, "3\n");
1080    }
1081
1082    #[test]
1083    fn test_builtin_split() {
1084        let output = run_awk(
1085            "BEGIN { n = split(\"a:b:c\", arr, \":\"); print n, arr[1], arr[2] }",
1086            "",
1087        );
1088        assert_eq!(output, "3 a b\n");
1089    }
1090
1091    #[test]
1092    fn test_builtin_sub() {
1093        let output = run_awk("BEGIN { x = \"hello\"; sub(\"l\", \"L\", x); print x }", "");
1094        assert_eq!(output, "heLlo\n");
1095    }
1096
1097    #[test]
1098    fn test_builtin_gsub() {
1099        let output = run_awk(
1100            "BEGIN { x = \"hello\"; gsub(\"l\", \"L\", x); print x }",
1101            "",
1102        );
1103        assert_eq!(output, "heLLo\n");
1104    }
1105
1106    #[test]
1107    fn test_builtin_match() {
1108        let output = run_awk("BEGIN { print match(\"hello\", \"ll\") }", "");
1109        assert_eq!(output, "3\n");
1110    }
1111
1112    #[test]
1113    fn test_builtin_sprintf() {
1114        let output = run_awk("BEGIN { print sprintf(\"%05d\", 42) }", "");
1115        assert_eq!(output, "00042\n");
1116    }
1117
1118    #[test]
1119    fn test_builtin_tolower() {
1120        let output = run_awk("BEGIN { print tolower(\"HELLO\") }", "");
1121        assert_eq!(output, "hello\n");
1122    }
1123
1124    #[test]
1125    fn test_builtin_toupper() {
1126        let output = run_awk("BEGIN { print toupper(\"hello\") }", "");
1127        assert_eq!(output, "HELLO\n");
1128    }
1129
1130    #[test]
1131    fn test_builtin_math() {
1132        let output = run_awk("BEGIN { print int(3.7), sqrt(4), sin(0) }", "");
1133        assert_eq!(output, "3 2 0\n");
1134    }
1135
1136    #[test]
1137    fn test_ternary() {
1138        let output = run_awk("BEGIN { print 1 ? \"yes\" : \"no\" }", "");
1139        assert_eq!(output, "yes\n");
1140    }
1141
1142    #[test]
1143    fn test_concatenation() {
1144        let output = run_awk("BEGIN { print \"a\" \"b\" \"c\" }", "");
1145        assert_eq!(output, "abc\n");
1146    }
1147
1148    #[test]
1149    fn test_unary_ops() {
1150        let output = run_awk("BEGIN { x = 5; print -x, +x, !0 }", "");
1151        assert_eq!(output, "-5 5 1\n");
1152    }
1153
1154    #[test]
1155    fn test_post_increment() {
1156        let output = run_awk("BEGIN { x = 5; print x++ \" \" x }", "");
1157        assert_eq!(output, "5 6\n");
1158    }
1159
1160    #[test]
1161    fn test_pre_increment() {
1162        let output = run_awk("BEGIN { x = 5; print ++x }", "");
1163        assert_eq!(output, "6\n");
1164    }
1165
1166    #[test]
1167    fn test_compound_assign() {
1168        let output = run_awk("BEGIN { x = 10; x += 5; x -= 3; x *= 2; print x }", "");
1169        assert_eq!(output, "24\n");
1170    }
1171
1172    #[test]
1173    fn test_getline_var() {
1174        let output = run_awk("{ getline next_line; print $0, next_line }", "a\nb");
1175        // When we read "a", getline reads "b" into next_line
1176        assert!(output.contains("a") && output.contains("b"));
1177    }
1178
1179    #[test]
1180    fn test_fpat() {
1181        let output = run_awk("BEGIN { FPAT = \"[^,]+\" } { print $1, $2 }", "a,b,c");
1182        assert_eq!(output, "a b\n");
1183    }
1184
1185    #[test]
1186    fn test_fieldwidths() {
1187        let output = run_awk(
1188            "BEGIN { FIELDWIDTHS = \"2 3 2\" } { print $1, $2 }",
1189            "abcdefg",
1190        );
1191        assert_eq!(output, "ab cde\n");
1192    }
1193
1194    #[test]
1195    fn test_paragraph_mode() {
1196        let output = run_awk("BEGIN { RS = \"\" } { print NR, NF }", "a b\nc d\n\ne f");
1197        // First paragraph has 4 words across 2 lines, second has 2 words
1198        assert!(output.contains("1"));
1199    }
1200}