1mod builtins;
2mod expr;
3pub mod stmt;
4
5use std::collections::HashMap;
6use std::fs::File;
7use std::io::{BufRead, BufReader, Write};
8use std::process::{Child, ChildStdin, ChildStdout};
9
10use crate::ast::*;
11use crate::error::{Error, Result};
12use crate::value::Value;
13
14use regex::Regex;
15
16pub struct PipeInput {
18 #[allow(dead_code)]
19 child: Child,
20 reader: BufReader<ChildStdout>,
21}
22
23pub enum OutputFile {
25 File(File),
26 Pipe(ChildStdin),
27}
28
29impl Write for OutputFile {
30 fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
31 match self {
32 OutputFile::File(f) => f.write(buf),
33 OutputFile::Pipe(p) => p.write(buf),
34 }
35 }
36
37 fn flush(&mut self) -> std::io::Result<()> {
38 match self {
39 OutputFile::File(f) => f.flush(),
40 OutputFile::Pipe(p) => p.flush(),
41 }
42 }
43}
44
45pub struct Interpreter<'a> {
47 program: &'a Program,
49
50 pub(crate) variables: HashMap<String, Value>,
52
53 pub(crate) arrays: HashMap<String, HashMap<String, Value>>,
55
56 pub(crate) functions: HashMap<String, &'a FunctionDef>,
58
59 pub(crate) fs: String,
62 pub(crate) ofs: String,
64 pub(crate) rs: String,
66 pub(crate) ors: String,
68 pub(crate) ofmt: String,
70 pub(crate) convfmt: String,
72 pub(crate) subsep: String,
74 pub(crate) fpat: String,
76 pub(crate) fieldwidths: String,
78
79 pub(crate) posix_mode: bool,
81 pub(crate) traditional_mode: bool,
82
83 pub(crate) record: String,
85 pub(crate) fields: Vec<String>,
87 pub(crate) nf: usize,
89 pub(crate) nr: usize,
91 pub(crate) fnr: usize,
93 pub(crate) filename: String,
95
96 pub(crate) rstart: usize,
98 pub(crate) rlength: i32,
99
100 should_exit: bool,
102 exit_code: i32,
103 should_next: bool,
104 should_nextfile: bool,
105
106 pub(crate) output_files: HashMap<String, OutputFile>,
108
109 pub(crate) input_files: HashMap<String, BufReader<File>>,
111
112 pub(crate) pipes: HashMap<String, PipeInput>,
114
115 pub(crate) regex_cache: HashMap<String, Regex>,
117
118 range_states: HashMap<usize, bool>,
120
121 pub(crate) rand_seed: u64,
123 pub(crate) rand_state: u64,
124
125 pub(crate) argc: usize,
127 pub(crate) argv: Vec<String>,
128
129 pub(crate) environ: HashMap<String, String>,
131
132 pub(crate) array_aliases: HashMap<String, String>,
135}
136
137impl<'a> Interpreter<'a> {
138 pub fn new(program: &'a Program) -> Self {
139 let mut functions = HashMap::new();
140 for func in &program.functions {
141 functions.insert(func.name.clone(), func);
142 }
143
144 let environ: HashMap<String, String> = std::env::vars().collect();
146
147 use std::time::{SystemTime, UNIX_EPOCH};
149 let rand_seed = SystemTime::now()
150 .duration_since(UNIX_EPOCH)
151 .map(|d| d.as_nanos() as u64)
152 .unwrap_or(12345);
153
154 Self {
155 program,
156 variables: HashMap::new(),
157 arrays: HashMap::new(),
158 functions,
159 fs: " ".to_string(),
160 ofs: " ".to_string(),
161 rs: "\n".to_string(),
162 ors: "\n".to_string(),
163 ofmt: "%.6g".to_string(),
164 convfmt: "%.6g".to_string(),
165 subsep: "\x1c".to_string(),
166 fpat: String::new(),
167 fieldwidths: String::new(),
168 posix_mode: false,
169 traditional_mode: false,
170 record: String::new(),
171 fields: Vec::new(),
172 nf: 0,
173 nr: 0,
174 fnr: 0,
175 filename: String::new(),
176 rstart: 0,
177 rlength: -1,
178 should_exit: false,
179 exit_code: 0,
180 should_next: false,
181 should_nextfile: false,
182 output_files: HashMap::new(),
183 input_files: HashMap::new(),
184 pipes: HashMap::new(),
185 regex_cache: HashMap::new(),
186 range_states: HashMap::new(),
187 rand_seed,
188 rand_state: rand_seed,
189 argc: 0,
190 argv: Vec::new(),
191 environ,
192 array_aliases: HashMap::new(),
193 }
194 }
195
196 pub fn set_args(&mut self, args: Vec<String>) {
198 self.argc = args.len();
199 self.argv = args;
200 }
201
202 pub fn set_fs(&mut self, fs: &str) {
204 self.fs = fs.to_string();
205 self.fpat.clear();
207 self.fieldwidths.clear();
208 }
209
210 pub fn set_posix_mode(&mut self, enabled: bool) {
212 self.posix_mode = enabled;
213 if enabled {
214 self.traditional_mode = false;
215 }
216 }
217
218 pub fn set_traditional_mode(&mut self, enabled: bool) {
220 self.traditional_mode = enabled;
221 if enabled {
222 self.posix_mode = false;
223 }
224 }
225
226 pub fn set_variable(&mut self, name: &str, value: &str) {
228 self.variables
229 .insert(name.to_string(), Value::from_string(value.to_string()));
230 }
231
232 pub fn set_filename(&mut self, filename: &str) {
234 self.filename = filename.to_string();
235 }
236
237 pub fn run<R: BufRead, W: Write>(&mut self, inputs: Vec<R>, output: &mut W) -> Result<i32> {
239 for rule in &self.program.rules {
241 if matches!(&rule.pattern, Some(Pattern::Begin)) {
242 if let Some(action) = &rule.action {
243 self.execute_block(action, output)?;
244 }
245 if self.should_exit {
246 return Ok(self.exit_code);
247 }
248 }
249 }
250
251 for input in inputs {
253 self.fnr = 0;
254
255 for rule in &self.program.rules {
257 if matches!(&rule.pattern, Some(Pattern::BeginFile)) {
258 if let Some(action) = &rule.action {
259 self.execute_block(action, output)?;
260 }
261 if self.should_exit {
262 return Ok(self.exit_code);
263 }
264 }
265 }
266
267 self.process_input(input, output)?;
268
269 for rule in &self.program.rules {
271 if matches!(&rule.pattern, Some(Pattern::EndFile)) {
272 if let Some(action) = &rule.action {
273 self.execute_block(action, output)?;
274 }
275 if self.should_exit {
276 return Ok(self.exit_code);
277 }
278 }
279 }
280
281 if self.should_exit {
282 return Ok(self.exit_code);
283 }
284 }
285
286 for rule in &self.program.rules {
288 if matches!(&rule.pattern, Some(Pattern::End))
289 && let Some(action) = &rule.action
290 {
291 self.execute_block(action, output)?;
292 }
293 }
294
295 Ok(self.exit_code)
296 }
297
298 fn process_input<R: BufRead, W: Write>(&mut self, mut input: R, output: &mut W) -> Result<()> {
299 if self.rs.is_empty() {
301 return self.process_input_paragraph_mode(input, output);
302 }
303
304 let mut line = String::new();
305
306 loop {
307 line.clear();
308 let bytes_read = input.read_line(&mut line).map_err(Error::Io)?;
309 if bytes_read == 0 {
310 break; }
312
313 if line.ends_with('\n') {
315 line.pop();
316 if line.ends_with('\r') {
317 line.pop();
318 }
319 }
320
321 self.nr += 1;
322 self.fnr += 1;
323 self.set_record(&line);
324
325 self.process_current_record(output)?;
326
327 if self.should_nextfile {
328 self.should_nextfile = false;
329 break;
330 }
331
332 if self.should_exit {
333 break;
334 }
335 }
336
337 Ok(())
338 }
339
340 fn process_input_paragraph_mode<R: BufRead, W: Write>(
343 &mut self,
344 mut input: R,
345 output: &mut W,
346 ) -> Result<()> {
347 let mut line = String::new();
348 let mut record = String::new();
349 let mut in_record = false;
350
351 loop {
352 line.clear();
353 let bytes_read = input.read_line(&mut line).map_err(Error::Io)?;
354
355 let is_blank = line.trim().is_empty();
357
358 if bytes_read == 0 {
359 if !record.is_empty() {
361 while record.ends_with('\n') || record.ends_with('\r') {
363 record.pop();
364 }
365 self.nr += 1;
366 self.fnr += 1;
367 self.set_record(&record);
368 self.process_current_record(output)?;
369 }
370 break;
371 }
372
373 if is_blank {
374 if in_record && !record.is_empty() {
376 while record.ends_with('\n') || record.ends_with('\r') {
378 record.pop();
379 }
380 self.nr += 1;
381 self.fnr += 1;
382 self.set_record(&record);
383 self.process_current_record(output)?;
384
385 record.clear();
386 in_record = false;
387
388 if self.should_nextfile || self.should_exit {
389 break;
390 }
391 }
392 } else {
393 if in_record {
395 record.push('\n');
396 }
397 if line.ends_with('\n') {
399 line.pop();
400 if line.ends_with('\r') {
401 line.pop();
402 }
403 }
404 record.push_str(&line);
405 in_record = true;
406 }
407 }
408
409 if self.should_nextfile {
410 self.should_nextfile = false;
411 }
412
413 Ok(())
414 }
415
416 fn process_current_record<W: Write>(&mut self, output: &mut W) -> Result<()> {
418 for (idx, rule) in self.program.rules.iter().enumerate() {
419 if matches!(
421 &rule.pattern,
422 Some(Pattern::Begin)
423 | Some(Pattern::End)
424 | Some(Pattern::BeginFile)
425 | Some(Pattern::EndFile)
426 ) {
427 continue;
428 }
429
430 let matches = self.pattern_matches(&rule.pattern, idx)?;
431 if matches {
432 if let Some(action) = &rule.action {
433 self.execute_block(action, output)?;
434 } else {
435 writeln!(output, "{}", self.record).map_err(Error::Io)?;
437 }
438 }
439
440 if self.should_next {
441 self.should_next = false;
442 break;
443 }
444
445 if self.should_nextfile || self.should_exit {
446 break;
447 }
448 }
449 Ok(())
450 }
451
452 pub(crate) fn set_record(&mut self, record: &str) {
453 self.record = record.to_string();
454 self.split_fields();
455 }
456
457 fn split_fields(&mut self) {
458 self.fields.clear();
459
460 if self.record.is_empty() {
461 self.nf = 0;
462 return;
463 }
464
465 let estimated_fields = self.record.len() / 8 + 1;
467 self.fields.reserve(estimated_fields.min(64));
468
469 if !self.fpat.is_empty() && !self.posix_mode && !self.traditional_mode {
471 self.split_fields_fpat();
472 return;
473 }
474
475 if !self.fieldwidths.is_empty() && !self.posix_mode && !self.traditional_mode {
477 self.split_fields_widths();
478 return;
479 }
480
481 if self.fs == " " {
483 self.fields
486 .extend(self.record.split_whitespace().map(String::from));
487 } else if self.fs.len() == 1 {
488 let sep = self.fs.as_bytes()[0];
490 let bytes = self.record.as_bytes();
491 let mut start = 0;
492
493 for (i, &b) in bytes.iter().enumerate() {
494 if b == sep {
495 self.fields.push(self.record[start..i].to_string());
496 start = i + 1;
497 }
498 }
499 self.fields.push(self.record[start..].to_string());
501 } else {
502 let fs = self.fs.clone();
504 let record = self.record.clone();
505 if let Some(regex) = self.regex_cache.get(&fs) {
506 self.fields.extend(regex.split(&record).map(String::from));
507 } else if let Ok(regex) = Regex::new(&fs) {
508 self.fields.extend(regex.split(&record).map(String::from));
509 self.regex_cache.insert(fs, regex);
510 } else {
511 self.fields.extend(record.split(&fs).map(String::from));
513 }
514 }
515
516 self.nf = self.fields.len();
517 }
518
519 fn split_fields_fpat(&mut self) {
521 let fpat = self.fpat.clone();
522 let record = self.record.clone();
523
524 if let Some(regex) = self.regex_cache.get(&fpat) {
525 for mat in regex.find_iter(&record) {
526 self.fields.push(mat.as_str().to_string());
527 }
528 } else if let Ok(regex) = Regex::new(&fpat) {
529 for mat in regex.find_iter(&record) {
530 self.fields.push(mat.as_str().to_string());
531 }
532 self.regex_cache.insert(fpat, regex);
533 }
534
535 self.nf = self.fields.len();
536 }
537
538 fn split_fields_widths(&mut self) {
540 let widths: Vec<usize> = self
541 .fieldwidths
542 .split_whitespace()
543 .filter_map(|s| s.parse().ok())
544 .collect();
545
546 let mut pos = 0;
547 let chars: Vec<char> = self.record.chars().collect();
548
549 for width in widths {
550 if pos >= chars.len() {
551 break;
552 }
553 let end = (pos + width).min(chars.len());
554 let field: String = chars[pos..end].iter().collect();
555 self.fields.push(field);
556 pos = end;
557 }
558
559 self.nf = self.fields.len();
560 }
561
562 #[inline]
563 pub(crate) fn get_field(&self, index: usize) -> String {
564 if index == 0 {
565 self.record.clone()
566 } else if index <= self.fields.len() {
567 self.fields[index - 1].clone()
568 } else {
569 String::new()
570 }
571 }
572
573 #[inline]
575 #[allow(dead_code)]
576 pub(crate) fn get_field_ref(&self, index: usize) -> &str {
577 if index == 0 {
578 &self.record
579 } else if index <= self.fields.len() {
580 &self.fields[index - 1]
581 } else {
582 ""
583 }
584 }
585
586 pub(crate) fn set_field(&mut self, index: usize, value: String) {
587 if index == 0 {
588 self.record = value;
589 self.split_fields();
590 } else {
591 while self.fields.len() < index {
593 self.fields.push(String::new());
594 }
595 self.fields[index - 1] = value;
596 self.nf = self.fields.len();
597 self.record = self.fields.join(&self.ofs);
599 }
600 }
601
602 fn pattern_matches(&mut self, pattern: &Option<Pattern>, rule_idx: usize) -> Result<bool> {
603 match pattern {
604 None => Ok(true), Some(Pattern::Begin)
606 | Some(Pattern::End)
607 | Some(Pattern::BeginFile)
608 | Some(Pattern::EndFile) => Ok(false),
609 Some(Pattern::Expr(expr)) => {
610 let val = self.eval_expr(expr)?;
611 Ok(val.is_truthy())
612 }
613 Some(Pattern::Regex(regex)) => {
614 let record = self.record.clone();
615 let re = self.get_regex(regex)?;
616 Ok(re.is_match(&record))
617 }
618 Some(Pattern::Range { start, end }) => {
619 let active = self.range_states.get(&rule_idx).copied().unwrap_or(false);
620 if !active {
621 if self.pattern_matches(&Some(start.as_ref().clone()), rule_idx)? {
623 self.range_states.insert(rule_idx, true);
624 return Ok(true);
625 }
626 Ok(false)
627 } else {
628 if self.pattern_matches(&Some(end.as_ref().clone()), rule_idx)? {
630 self.range_states.insert(rule_idx, false);
631 }
632 Ok(true)
633 }
634 }
635 Some(Pattern::And(left, right)) => Ok(self
636 .pattern_matches(&Some(left.as_ref().clone()), rule_idx)?
637 && self.pattern_matches(&Some(right.as_ref().clone()), rule_idx)?),
638 Some(Pattern::Or(left, right)) => Ok(self
639 .pattern_matches(&Some(left.as_ref().clone()), rule_idx)?
640 || self.pattern_matches(&Some(right.as_ref().clone()), rule_idx)?),
641 Some(Pattern::Not(inner)) => {
642 Ok(!self.pattern_matches(&Some(inner.as_ref().clone()), rule_idx)?)
643 }
644 }
645 }
646
647 pub(crate) fn get_regex(&mut self, pattern: &str) -> Result<&Regex> {
648 if !self.regex_cache.contains_key(pattern) {
649 let regex = Regex::new(pattern).map_err(Error::Regex)?;
650 self.regex_cache.insert(pattern.to_string(), regex);
651 }
652 Ok(self.regex_cache.get(pattern).unwrap())
653 }
654
655 pub(crate) fn get_variable(&self, name: &str) -> Value {
656 match name {
658 "NF" => Value::Number(self.nf as f64),
659 "NR" => Value::Number(self.nr as f64),
660 "FNR" => Value::Number(self.fnr as f64),
661 "FS" => Value::from_string(self.fs.clone()),
662 "OFS" => Value::from_string(self.ofs.clone()),
663 "RS" => Value::from_string(self.rs.clone()),
664 "ORS" => Value::from_string(self.ors.clone()),
665 "OFMT" => Value::from_string(self.ofmt.clone()),
666 "CONVFMT" => Value::from_string(self.convfmt.clone()),
667 "SUBSEP" => Value::from_string(self.subsep.clone()),
668 "FILENAME" => Value::from_string(self.filename.clone()),
669 "RSTART" => Value::Number(self.rstart as f64),
670 "RLENGTH" => Value::Number(self.rlength as f64),
671 "ARGC" => Value::Number(self.argc as f64),
672 "FPAT" => Value::from_string(self.fpat.clone()),
674 "FIELDWIDTHS" => Value::from_string(self.fieldwidths.clone()),
675 _ => self
676 .variables
677 .get(name)
678 .cloned()
679 .unwrap_or(Value::Uninitialized),
680 }
681 }
682
683 pub(crate) fn get_special_array(&self, array: &str, key: &str) -> Option<Value> {
685 match array {
686 "ARGV" => key
687 .parse::<usize>()
688 .ok()
689 .and_then(|i| self.argv.get(i))
690 .map(|s| Value::from_string(s.clone())),
691 "ENVIRON" => self.environ.get(key).map(|s| Value::from_string(s.clone())),
692 "PROCINFO" => {
693 match key {
695 "version" => Some(Value::from_string(env!("CARGO_PKG_VERSION").to_string())),
696 "strftime" => Some(Value::from_string("%a %b %e %H:%M:%S %Z %Y".to_string())),
697 "FS" => {
698 if !self.fpat.is_empty() {
699 Some(Value::from_string("FPAT".to_string()))
700 } else if !self.fieldwidths.is_empty() {
701 Some(Value::from_string("FIELDWIDTHS".to_string()))
702 } else {
703 Some(Value::from_string("FS".to_string()))
704 }
705 }
706 "identifiers" => Some(Value::Number(0.0)), "pid" => Some(Value::Number(std::process::id() as f64)),
708 "ppid" => Some(Value::Number(0.0)), "uid" => Some(Value::Number(0.0)), "gid" => Some(Value::Number(0.0)), "euid" => Some(Value::Number(0.0)), "egid" => Some(Value::Number(0.0)), "pgrpid" => Some(Value::Number(0.0)), _ => Some(Value::Uninitialized),
715 }
716 }
717 _ => None,
718 }
719 }
720
721 pub(crate) fn set_variable_value(&mut self, name: &str, value: Value) {
722 match name {
724 "NF" => {
725 let new_nf = value.to_number() as usize;
726 if new_nf < self.nf {
727 self.fields.truncate(new_nf);
728 } else {
729 while self.fields.len() < new_nf {
730 self.fields.push(String::new());
731 }
732 }
733 self.nf = new_nf;
734 self.record = self.fields.join(&self.ofs);
735 }
736 "FS" => {
737 self.fs = value.to_string_val();
738 self.fpat.clear();
740 self.fieldwidths.clear();
741 }
742 "OFS" => self.ofs = value.to_string_val(),
743 "RS" => self.rs = value.to_string_val(),
744 "ORS" => self.ors = value.to_string_val(),
745 "OFMT" => self.ofmt = value.to_string_val(),
746 "CONVFMT" => self.convfmt = value.to_string_val(),
747 "SUBSEP" => self.subsep = value.to_string_val(),
748 "FPAT" => {
750 self.fpat = value.to_string_val();
751 self.fieldwidths.clear();
753 }
754 "FIELDWIDTHS" => {
755 self.fieldwidths = value.to_string_val();
756 self.fpat.clear();
758 }
759 _ => {
760 self.variables.insert(name.to_string(), value);
761 }
762 }
763 }
764
765 fn resolve_array_name<'b>(&'b self, array: &'b str) -> &'b str {
767 self.array_aliases
768 .get(array)
769 .map(|s| s.as_str())
770 .unwrap_or(array)
771 }
772
773 pub(crate) fn get_array_element(&self, array: &str, key: &str) -> Value {
774 let array = self.resolve_array_name(array);
775
776 if let Some(val) = self.get_special_array(array, key) {
778 return val;
779 }
780
781 self.arrays
782 .get(array)
783 .and_then(|arr| arr.get(key))
784 .cloned()
785 .unwrap_or(Value::Uninitialized)
786 }
787
788 pub(crate) fn set_array_element(&mut self, array: &str, key: &str, value: Value) {
789 let array = self.resolve_array_name(array).to_string();
790 self.arrays
791 .entry(array)
792 .or_default()
793 .insert(key.to_string(), value);
794 }
795
796 pub(crate) fn array_key_exists(&self, array: &str, key: &str) -> bool {
797 let array = self.resolve_array_name(array);
798
799 match array {
801 "ARGV" => key
802 .parse::<usize>()
803 .ok()
804 .map(|i| i < self.argv.len())
805 .unwrap_or(false),
806 "ENVIRON" => self.environ.contains_key(key),
807 _ => self
808 .arrays
809 .get(array)
810 .map(|arr| arr.contains_key(key))
811 .unwrap_or(false),
812 }
813 }
814
815 pub(crate) fn delete_array_element(&mut self, array: &str, key: &str) {
816 let array = self.resolve_array_name(array).to_string();
817 if let Some(arr) = self.arrays.get_mut(&array) {
818 arr.remove(key);
819 }
820 }
821
822 pub(crate) fn make_array_key(&self, indices: &[Value]) -> String {
823 indices
824 .iter()
825 .map(|v| v.to_string_val())
826 .collect::<Vec<_>>()
827 .join(&self.subsep)
828 }
829}
830
831#[cfg(test)]
832mod tests {
833 use super::*;
834 use crate::lexer::Lexer;
835 use crate::parser::Parser;
836 use std::io::Cursor;
837
838 fn run_awk(program: &str, input: &str) -> String {
839 let mut lexer = Lexer::new(program);
840 let tokens = lexer.tokenize().unwrap();
841 let mut parser = Parser::new(tokens);
842 let ast = parser.parse().unwrap();
843
844 let mut interpreter = Interpreter::new(&ast);
845 let mut output = Vec::new();
846 let inputs: Vec<std::io::BufReader<Cursor<&str>>> = if input.is_empty() {
847 vec![]
848 } else {
849 vec![std::io::BufReader::new(Cursor::new(input))]
850 };
851
852 interpreter.run(inputs, &mut output).unwrap();
853 String::from_utf8(output).unwrap()
854 }
855
856 #[test]
857 fn test_begin_print() {
858 let output = run_awk(r#"BEGIN { print "hello" }"#, "");
859 assert_eq!(output, "hello\n");
860 }
861
862 #[test]
863 fn test_print_field() {
864 let output = run_awk("{ print $1 }", "one two three");
865 assert_eq!(output, "one\n");
866 }
867
868 #[test]
869 fn test_print_multiple_fields() {
870 let output = run_awk("{ print $1, $3 }", "one two three");
871 assert_eq!(output, "one three\n");
872 }
873
874 #[test]
875 fn test_arithmetic() {
876 let output = run_awk("BEGIN { print 2 + 3 * 4 }", "");
877 assert_eq!(output, "14\n");
878 }
879
880 #[test]
881 fn test_variable() {
882 let output = run_awk("BEGIN { x = 5; print x }", "");
883 assert_eq!(output, "5\n");
884 }
885
886 #[test]
887 fn test_if_statement() {
888 let output = run_awk("BEGIN { x = 10; if (x > 5) print \"big\" }", "");
889 assert_eq!(output, "big\n");
890 }
891
892 #[test]
893 fn test_while_loop() {
894 let output = run_awk("BEGIN { i = 1; while (i <= 3) { print i; i++ } }", "");
895 assert_eq!(output, "1\n2\n3\n");
896 }
897
898 #[test]
899 fn test_pattern_match() {
900 let output = run_awk("/two/ { print $0 }", "one\ntwo\nthree");
901 assert_eq!(output, "two\n");
902 }
903
904 #[test]
905 fn test_for_loop() {
906 let output = run_awk("BEGIN { for (i = 1; i <= 3; i++) print i }", "");
907 assert_eq!(output, "1\n2\n3\n");
908 }
909
910 #[test]
911 fn test_for_in_loop() {
912 let output = run_awk(
913 "BEGIN { a[1]=1; a[2]=2; for (k in a) count++; print count }",
914 "",
915 );
916 assert_eq!(output, "2\n");
917 }
918
919 #[test]
920 fn test_do_while() {
921 let output = run_awk("BEGIN { i = 0; do { i++ } while (i < 3); print i }", "");
922 assert_eq!(output, "3\n");
923 }
924
925 #[test]
926 fn test_break() {
927 let output = run_awk(
928 "BEGIN { for (i=1; i<=10; i++) { if (i==3) break; print i } }",
929 "",
930 );
931 assert_eq!(output, "1\n2\n");
932 }
933
934 #[test]
935 fn test_continue() {
936 let output = run_awk(
937 "BEGIN { for (i=1; i<=3; i++) { if (i==2) continue; print i } }",
938 "",
939 );
940 assert_eq!(output, "1\n3\n");
941 }
942
943 #[test]
944 fn test_next() {
945 let output = run_awk("/skip/ { next } { print }", "one\nskip\ntwo");
946 assert_eq!(output, "one\ntwo\n");
947 }
948
949 #[test]
950 fn test_exit() {
951 let output = run_awk("NR == 2 { exit } { print }", "one\ntwo\nthree");
953 assert_eq!(output, "one\n");
954 }
955
956 #[test]
957 fn test_exit_in_end() {
958 let output = run_awk("{ print } END { print \"done\" }", "one\ntwo");
959 assert!(output.contains("done"));
960 }
961
962 #[test]
963 fn test_array_access() {
964 let output = run_awk("BEGIN { a[\"x\"] = 1; print a[\"x\"] }", "");
965 assert_eq!(output, "1\n");
966 }
967
968 #[test]
969 fn test_array_in() {
970 let output = run_awk("BEGIN { a[1]=1; print (1 in a), (2 in a) }", "");
971 assert_eq!(output, "1 0\n");
972 }
973
974 #[test]
975 fn test_delete() {
976 let output = run_awk(
977 "BEGIN { a[1]=1; a[2]=2; delete a[1]; for(k in a) print k }",
978 "",
979 );
980 assert_eq!(output, "2\n");
981 }
982
983 #[test]
984 fn test_special_variables() {
985 let output = run_awk("{ print NR, NF, $0 }", "a b c");
986 assert_eq!(output, "1 3 a b c\n");
987 }
988
989 #[test]
990 fn test_fs_change() {
991 let output = run_awk("BEGIN { FS = \":\" } { print $1 }", "a:b:c");
992 assert_eq!(output, "a\n");
993 }
994
995 #[test]
996 fn test_ofs() {
997 let output = run_awk("BEGIN { OFS = \"-\" } { print $1, $2 }", "a b c");
998 assert_eq!(output, "a-b\n");
999 }
1000
1001 #[test]
1002 fn test_nf_access() {
1003 let output = run_awk("{ print $NF }", "a b c");
1004 assert_eq!(output, "c\n");
1005 }
1006
1007 #[test]
1008 fn test_field_modify() {
1009 let output = run_awk("{ $2 = \"X\"; print $0 }", "a b c");
1010 assert_eq!(output, "a X c\n");
1011 }
1012
1013 #[test]
1014 fn test_user_function() {
1015 let output = run_awk(
1016 "function double(x) { return x*2 } BEGIN { print double(5) }",
1017 "",
1018 );
1019 assert_eq!(output, "10\n");
1020 }
1021
1022 #[test]
1023 fn test_recursion() {
1024 let output = run_awk(
1025 "function fact(n) { return n<=1 ? 1 : n*fact(n-1) } BEGIN { print fact(5) }",
1026 "",
1027 );
1028 assert_eq!(output, "120\n");
1029 }
1030
1031 #[test]
1032 fn test_printf() {
1033 let output = run_awk("BEGIN { printf \"%d %s\\n\", 42, \"hello\" }", "");
1034 assert_eq!(output, "42 hello\n");
1035 }
1036
1037 #[test]
1038 fn test_range_pattern() {
1039 let output = run_awk(
1040 "/start/,/end/ { print }",
1041 "before\nstart\nmiddle\nend\nafter",
1042 );
1043 assert_eq!(output, "start\nmiddle\nend\n");
1044 }
1045
1046 #[test]
1047 fn test_compound_pattern_and() {
1048 let output = run_awk("NR > 1 && NR < 4 { print }", "one\ntwo\nthree\nfour");
1049 assert_eq!(output, "two\nthree\n");
1050 }
1051
1052 #[test]
1053 fn test_logical_or_in_expr() {
1054 let output = run_awk("{ if (/a/ || /c/) print }", "a\nb\nc");
1055 assert_eq!(output, "a\nc\n");
1056 }
1057
1058 #[test]
1059 fn test_negated_pattern() {
1060 let output = run_awk("!/skip/ { print }", "keep\nskip\nkeep");
1061 assert_eq!(output, "keep\nkeep\n");
1062 }
1063
1064 #[test]
1065 fn test_builtin_length() {
1066 let output = run_awk("BEGIN { print length(\"hello\") }", "");
1067 assert_eq!(output, "5\n");
1068 }
1069
1070 #[test]
1071 fn test_builtin_substr() {
1072 let output = run_awk("BEGIN { print substr(\"hello\", 2, 3) }", "");
1073 assert_eq!(output, "ell\n");
1074 }
1075
1076 #[test]
1077 fn test_builtin_index() {
1078 let output = run_awk("BEGIN { print index(\"hello\", \"ll\") }", "");
1079 assert_eq!(output, "3\n");
1080 }
1081
1082 #[test]
1083 fn test_builtin_split() {
1084 let output = run_awk(
1085 "BEGIN { n = split(\"a:b:c\", arr, \":\"); print n, arr[1], arr[2] }",
1086 "",
1087 );
1088 assert_eq!(output, "3 a b\n");
1089 }
1090
1091 #[test]
1092 fn test_builtin_sub() {
1093 let output = run_awk("BEGIN { x = \"hello\"; sub(\"l\", \"L\", x); print x }", "");
1094 assert_eq!(output, "heLlo\n");
1095 }
1096
1097 #[test]
1098 fn test_builtin_gsub() {
1099 let output = run_awk(
1100 "BEGIN { x = \"hello\"; gsub(\"l\", \"L\", x); print x }",
1101 "",
1102 );
1103 assert_eq!(output, "heLLo\n");
1104 }
1105
1106 #[test]
1107 fn test_builtin_match() {
1108 let output = run_awk("BEGIN { print match(\"hello\", \"ll\") }", "");
1109 assert_eq!(output, "3\n");
1110 }
1111
1112 #[test]
1113 fn test_builtin_sprintf() {
1114 let output = run_awk("BEGIN { print sprintf(\"%05d\", 42) }", "");
1115 assert_eq!(output, "00042\n");
1116 }
1117
1118 #[test]
1119 fn test_builtin_tolower() {
1120 let output = run_awk("BEGIN { print tolower(\"HELLO\") }", "");
1121 assert_eq!(output, "hello\n");
1122 }
1123
1124 #[test]
1125 fn test_builtin_toupper() {
1126 let output = run_awk("BEGIN { print toupper(\"hello\") }", "");
1127 assert_eq!(output, "HELLO\n");
1128 }
1129
1130 #[test]
1131 fn test_builtin_math() {
1132 let output = run_awk("BEGIN { print int(3.7), sqrt(4), sin(0) }", "");
1133 assert_eq!(output, "3 2 0\n");
1134 }
1135
1136 #[test]
1137 fn test_ternary() {
1138 let output = run_awk("BEGIN { print 1 ? \"yes\" : \"no\" }", "");
1139 assert_eq!(output, "yes\n");
1140 }
1141
1142 #[test]
1143 fn test_concatenation() {
1144 let output = run_awk("BEGIN { print \"a\" \"b\" \"c\" }", "");
1145 assert_eq!(output, "abc\n");
1146 }
1147
1148 #[test]
1149 fn test_unary_ops() {
1150 let output = run_awk("BEGIN { x = 5; print -x, +x, !0 }", "");
1151 assert_eq!(output, "-5 5 1\n");
1152 }
1153
1154 #[test]
1155 fn test_post_increment() {
1156 let output = run_awk("BEGIN { x = 5; print x++ \" \" x }", "");
1157 assert_eq!(output, "5 6\n");
1158 }
1159
1160 #[test]
1161 fn test_pre_increment() {
1162 let output = run_awk("BEGIN { x = 5; print ++x }", "");
1163 assert_eq!(output, "6\n");
1164 }
1165
1166 #[test]
1167 fn test_compound_assign() {
1168 let output = run_awk("BEGIN { x = 10; x += 5; x -= 3; x *= 2; print x }", "");
1169 assert_eq!(output, "24\n");
1170 }
1171
1172 #[test]
1173 fn test_getline_var() {
1174 let output = run_awk("{ getline next_line; print $0, next_line }", "a\nb");
1175 assert!(output.contains("a") && output.contains("b"));
1177 }
1178
1179 #[test]
1180 fn test_fpat() {
1181 let output = run_awk("BEGIN { FPAT = \"[^,]+\" } { print $1, $2 }", "a,b,c");
1182 assert_eq!(output, "a b\n");
1183 }
1184
1185 #[test]
1186 fn test_fieldwidths() {
1187 let output = run_awk(
1188 "BEGIN { FIELDWIDTHS = \"2 3 2\" } { print $1, $2 }",
1189 "abcdefg",
1190 );
1191 assert_eq!(output, "ab cde\n");
1192 }
1193
1194 #[test]
1195 fn test_paragraph_mode() {
1196 let output = run_awk("BEGIN { RS = \"\" } { print NR, NF }", "a b\nc d\n\ne f");
1197 assert!(output.contains("1"));
1199 }
1200}