1mod ast;
4mod builtins;
5mod bytecode;
6mod cli;
7mod compiler;
8mod cyber_help;
9mod error;
10mod format;
11#[allow(dead_code)]
12mod interp;
13mod lexer;
14mod locale_numeric;
15mod parser;
16mod runtime;
17mod vm;
18
19pub use error::{Error, Result};
20
21use crate::ast::parallel;
22use crate::ast::{Pattern, Program};
23use crate::bytecode::{CompiledPattern, CompiledProgram};
24use crate::cli::{Args, MawkWAction};
25use crate::compiler::Compiler;
26use crate::interp::{range_step, Flow};
27use crate::parser::parse_program;
28use crate::runtime::{Runtime, Value};
29use crate::vm::{
30 flush_print_buf, vm_pattern_matches, vm_run_begin, vm_run_beginfile, vm_run_end,
31 vm_run_endfile, vm_run_rule,
32};
33use clap::Parser;
34use rayon::prelude::*;
35use std::fs::File;
36use std::io::{self, BufRead, BufReader, Read, Write};
37use std::path::{Path, PathBuf};
38use std::sync::Arc;
39
40pub fn run(bin_name: &str) -> Result<()> {
42 let mut args = Args::parse();
43 if args.show_help {
44 cyber_help::print_cyberpunk_help(bin_name);
45 return Ok(());
46 }
47 if args.show_version {
48 println!("{} {}", bin_name, env!("CARGO_PKG_VERSION"));
49 return Ok(());
50 }
51 args.normalize();
52 match args.apply_mawk_w() {
53 Ok(()) => {}
54 Err(MawkWAction::Help) => {
55 cyber_help::print_cyberpunk_help(bin_name);
56 return Ok(());
57 }
58 Err(MawkWAction::Version) => {
59 println!("{} {}", bin_name, env!("CARGO_PKG_VERSION"));
60 return Ok(());
61 }
62 }
63 if args.copyright {
64 println!(
65 "{} {} — Copyright (c) MenkeTechnologies; MIT license.",
66 bin_name,
67 env!("CARGO_PKG_VERSION")
68 );
69 return Ok(());
70 }
71 if args.dump_variables.is_some() {
72 eprintln!("{bin_name}: warning: --dump-variables is not fully implemented");
73 }
74 if args.debug.is_some() {
75 eprintln!("{bin_name}: warning: --debug is not fully implemented");
76 }
77 let threads = args.threads.unwrap_or(1).max(1);
78
79 let (program_text, files) = resolve_program_and_files(&args)?;
80 let prog = parse_program(&program_text)?;
81 let parallel_ok = parallel::record_rules_parallel_safe(&prog);
82
83 let cp = Compiler::compile_program(&prog);
85
86 let mut rt = Runtime::new();
87 if args.use_lc_numeric {
88 locale_numeric::set_locale_numeric_from_env();
89 rt.numeric_decimal = locale_numeric::decimal_point_from_locale();
90 }
91 apply_assigns(&args, &mut rt)?;
92 if let Some(fs) = &args.field_sep {
93 rt.vars
94 .insert("FS".into(), Value::Str(String::from(fs.as_str())));
95 }
96
97 rt.slots = cp.init_slots(&rt.vars);
98
99 vm_run_begin(&cp, &mut rt)?;
100 flush_print_buf(&mut rt.print_buf)?;
101 if rt.exit_pending {
102 vm_run_end(&cp, &mut rt)?;
103 flush_print_buf(&mut rt.print_buf)?;
104 std::process::exit(rt.exit_code);
105 }
106
107 let mut range_state: Vec<bool> = vec![false; prog.rules.len()];
108
109 let use_parallel = threads > 1 && parallel_ok && !files.is_empty();
111 if threads > 1 && !parallel_ok {
112 eprintln!("{bin_name}: warning: program is not parallel-safe (range patterns, exit, getline without file, getline coprocess, cross-record assignments, …); running sequentially (use a single thread to silence this warning)");
113 }
114
115 let mut nr_global = 0.0f64;
116
117 if files.is_empty() {
118 rt.filename = "-".into();
119 vm_run_beginfile(&cp, &mut rt)?;
120 if rt.exit_pending {
121 vm_run_endfile(&cp, &mut rt)?;
122 vm_run_end(&cp, &mut rt)?;
123 std::process::exit(rt.exit_code);
124 }
125 process_file(None, &prog, &cp, &mut range_state, &mut rt)?;
126 vm_run_endfile(&cp, &mut rt)?;
127 } else {
128 for p in &files {
129 rt.filename = p.to_string_lossy().into_owned();
130 rt.fnr = 0.0;
131 vm_run_beginfile(&cp, &mut rt)?;
132 if rt.exit_pending {
133 vm_run_endfile(&cp, &mut rt)?;
134 vm_run_end(&cp, &mut rt)?;
135 std::process::exit(rt.exit_code);
136 }
137 let n = if use_parallel {
138 process_file_parallel(Some(p.as_path()), &prog, &cp, &mut rt, threads, nr_global)?
139 } else {
140 process_file(Some(p.as_path()), &prog, &cp, &mut range_state, &mut rt)?
141 };
142 nr_global += n as f64;
143 vm_run_endfile(&cp, &mut rt)?;
144 if rt.exit_pending {
145 break;
146 }
147 }
148 }
149
150 flush_print_buf(&mut rt.print_buf)?;
151 vm_run_end(&cp, &mut rt)?;
152 flush_print_buf(&mut rt.print_buf)?;
153 if rt.exit_pending {
154 std::process::exit(rt.exit_code);
155 }
156 Ok(())
157}
158
159struct ParallelRecordOut {
160 prints: Vec<String>,
161 exit_pending: bool,
162 exit_code: i32,
163}
164
165fn read_all_lines<R: Read>(mut r: R) -> Result<Vec<String>> {
166 let mut buf = BufReader::new(&mut r);
167 let mut lines = Vec::new();
168 let mut s = String::new();
169 loop {
170 s.clear();
171 let n = buf.read_line(&mut s).map_err(Error::Io)?;
172 if n == 0 {
173 break;
174 }
175 lines.push(s.clone());
176 }
177 Ok(lines)
178}
179
180fn process_file_parallel(
181 path: Option<&Path>,
182 _prog: &Program,
183 cp: &CompiledProgram,
184 rt: &mut Runtime,
185 threads: usize,
186 nr_offset: f64,
187) -> Result<usize> {
188 let reader: Box<dyn Read + Send> = if let Some(p) = path {
189 Box::new(File::open(p).map_err(|e| Error::ProgramFile(p.to_path_buf(), e))?)
190 } else {
191 Box::new(std::io::stdin())
192 };
193 let lines = read_all_lines(reader)?;
194 let nlines = lines.len();
195 if nlines == 0 {
196 return Ok(0);
197 }
198
199 let cp_arc = Arc::new(cp.clone());
200 let shared_globals = Arc::new(rt.vars.clone());
201 let shared_slots = Arc::new(rt.slots.clone());
202 let fname = rt.filename.clone();
203 let seed_base = rt.rand_seed;
204 let numeric_dec = rt.numeric_decimal;
205
206 let pool = rayon::ThreadPoolBuilder::new()
207 .num_threads(threads)
208 .build()
209 .map_err(|e| Error::Runtime(format!("rayon pool: {e}")))?;
210
211 let results: Vec<std::result::Result<(usize, ParallelRecordOut), Error>> = pool.install(|| {
212 lines
213 .into_par_iter()
214 .enumerate()
215 .map(|(i, line)| {
216 let cp = Arc::clone(&cp_arc);
217 let mut local = Runtime::for_parallel_worker(
218 Arc::clone(&shared_globals),
219 fname.clone(),
220 seed_base ^ (i as u64).wrapping_mul(0x9e3779b97f4a7c15),
221 numeric_dec,
222 );
223 local.slots = (*shared_slots).clone();
224 local.nr = nr_offset + i as f64 + 1.0;
225 local.fnr = i as f64 + 1.0;
226 local.set_record_from_line(&line);
227
228 let mut buf = Vec::new();
229 for rule in &cp.record_rules {
230 if matches!(rule.pattern, CompiledPattern::Range) {
231 return Err(Error::Runtime(
232 "internal: range pattern in parallel path".into(),
233 ));
234 }
235 let run = vm_pattern_matches(rule, &cp, &mut local)?;
236 if run {
237 match vm_run_rule(rule, &cp, &mut local, Some(&mut buf)) {
238 Ok(Flow::Next) => break,
239 Ok(Flow::ExitPending) => {
240 return Ok((
241 i,
242 ParallelRecordOut {
243 prints: buf,
244 exit_pending: true,
245 exit_code: local.exit_code,
246 },
247 ));
248 }
249 Ok(Flow::Normal) => {}
250 Ok(Flow::Break) | Ok(Flow::Continue) => {}
251 Ok(Flow::Return(_)) => {
252 return Err(Error::Runtime(
253 "`return` used outside function in rule action".into(),
254 ));
255 }
256 Err(Error::Exit(code)) => return Err(Error::Exit(code)),
257 Err(e) => return Err(e),
258 }
259 }
260 }
261 Ok((
262 i,
263 ParallelRecordOut {
264 prints: buf,
265 exit_pending: local.exit_pending,
266 exit_code: local.exit_code,
267 },
268 ))
269 })
270 .collect()
271 });
272
273 let mut outs: Vec<(usize, ParallelRecordOut)> = Vec::with_capacity(results.len());
274 for r in results {
275 outs.push(r?);
276 }
277 outs.sort_by_key(|(i, _)| *i);
278
279 let mut stdout = io::stdout().lock();
280 for (_, out) in &outs {
281 for chunk in &out.prints {
282 stdout.write_all(chunk.as_bytes()).map_err(Error::Io)?;
283 }
284 }
285
286 for (_, out) in &outs {
287 if out.exit_pending {
288 rt.exit_pending = true;
289 rt.exit_code = out.exit_code;
290 break;
291 }
292 }
293
294 Ok(nlines)
295}
296
297fn uses_primary_getline(cp: &CompiledProgram) -> bool {
299 use crate::bytecode::{GetlineSource, Op};
300 let check = |ops: &[Op]| {
301 ops.iter().any(|op| {
302 matches!(
303 op,
304 Op::GetLine {
305 source: GetlineSource::Primary,
306 ..
307 }
308 )
309 })
310 };
311 for c in &cp.begin_chunks {
312 if check(&c.ops) {
313 return true;
314 }
315 }
316 for c in &cp.end_chunks {
317 if check(&c.ops) {
318 return true;
319 }
320 }
321 for r in &cp.record_rules {
322 if check(&r.body.ops) {
323 return true;
324 }
325 }
326 for f in cp.functions.values() {
327 if check(&f.body.ops) {
328 return true;
329 }
330 }
331 false
332}
333
334fn process_file(
335 path: Option<&Path>,
336 prog: &Program,
337 cp: &CompiledProgram,
338 range_state: &mut [bool],
339 rt: &mut Runtime,
340) -> Result<usize> {
341 if let Some(p) = path {
344 if !uses_primary_getline(cp) {
345 return process_file_slurp(p, prog, cp, range_state, rt);
346 }
347 }
348
349 let reader: Box<dyn Read + Send> = if let Some(p) = path {
351 Box::new(File::open(p).map_err(|e| Error::ProgramFile(p.to_path_buf(), e))?)
352 } else {
353 Box::new(std::io::stdin())
354 };
355 let br = Arc::new(std::sync::Mutex::new(BufReader::new(reader)));
356 rt.attach_input_reader(Arc::clone(&br));
357
358 let mut count = 0usize;
359 loop {
360 rt.line_buf.clear();
361 let n = br
362 .lock()
363 .map_err(|_| Error::Runtime("input reader lock poisoned".into()))?
364 .read_until(b'\n', &mut rt.line_buf)
365 .map_err(Error::Io)?;
366 if n == 0 {
367 break;
368 }
369 count += 1;
370 rt.nr += 1.0;
371 rt.fnr += 1.0;
372 rt.set_record_from_line_buf();
373 if dispatch_rules(prog, cp, range_state, rt)? {
374 break;
375 }
376 }
377 rt.detach_input_reader();
378 Ok(count)
379}
380
381#[derive(Clone, Copy)]
384enum InlineAction {
385 PrintFieldStdout(u16),
386 AddFieldToSlot {
387 field: u16,
388 slot: u16,
389 },
390 AddConstToSlot {
392 val: u16,
393 slot: u16,
394 },
395}
396
397#[derive(Clone)]
399enum InlinePattern {
400 Always,
401 LiteralContains(String),
402}
403
404fn detect_inline_program(cp: &CompiledProgram) -> Option<(InlinePattern, InlineAction)> {
406 if cp.record_rules.len() != 1 {
407 return None;
408 }
409 let rule = &cp.record_rules[0];
410 let pattern = match &rule.pattern {
411 CompiledPattern::Always => InlinePattern::Always,
412 CompiledPattern::LiteralRegexp(idx) => {
413 InlinePattern::LiteralContains(cp.strings.get(*idx).to_string())
414 }
415 _ => return None,
416 };
417 let ops = &rule.body.ops;
418 let action = if ops.len() == 1 {
419 match ops[0] {
420 bytecode::Op::PrintFieldStdout(f) => InlineAction::PrintFieldStdout(f),
421 bytecode::Op::AddFieldToSlot { field, slot } => {
422 InlineAction::AddFieldToSlot { field, slot }
423 }
424 _ => return None,
425 }
426 } else if ops.len() == 3 {
427 if let (
429 bytecode::Op::PushNum(n),
430 bytecode::Op::CompoundAssignSlot(slot, crate::ast::BinOp::Add),
431 bytecode::Op::Pop,
432 ) = (ops[0], ops[1], ops[2])
433 {
434 let val = n as u16;
435 if n >= 0.0 && n == val as f64 {
436 InlineAction::AddConstToSlot { val, slot }
437 } else {
438 return None;
439 }
440 } else {
441 return None;
442 }
443 } else {
444 return None;
445 };
446 Some((pattern, action))
447}
448
449fn process_file_slurp(
452 path: &Path,
453 prog: &Program,
454 cp: &CompiledProgram,
455 range_state: &mut [bool],
456 rt: &mut Runtime,
457) -> Result<usize> {
458 let data = std::fs::read(path).map_err(|e| Error::ProgramFile(path.to_path_buf(), e))?;
459 let fs = rt
461 .vars
462 .get("FS")
463 .map(|v| v.as_str())
464 .unwrap_or_else(|| " ".into());
465
466 if let Some((pattern, action)) = detect_inline_program(cp) {
468 return process_file_slurp_inline(data, &fs, pattern, action, rt);
469 }
470
471 let mut count = 0usize;
472 let mut pos = 0;
473 let len = data.len();
474
475 while pos < len {
476 let eol = data[pos..]
478 .iter()
479 .position(|&b| b == b'\n')
480 .map(|i| pos + i)
481 .unwrap_or(len);
482
483 let end = if eol > pos && data[eol - 1] == b'\r' {
485 eol - 1
486 } else {
487 eol
488 };
489
490 count += 1;
491 rt.nr += 1.0;
492 rt.fnr += 1.0;
493
494 let line = unsafe { std::str::from_utf8_unchecked(&data[pos..end]) };
499 rt.set_field_sep_split(&fs, line);
500
501 if dispatch_rules(prog, cp, range_state, rt)? {
502 break;
503 }
504
505 pos = eol + 1;
506 }
507 Ok(count)
508}
509
510fn process_file_slurp_inline(
513 data: Vec<u8>,
514 fs: &str,
515 pattern: InlinePattern,
516 action: InlineAction,
517 rt: &mut Runtime,
518) -> Result<usize> {
519 if matches!(pattern, InlinePattern::Always) {
523 if let InlineAction::PrintFieldStdout(field) = action {
524 if field > 0 && fs == " " {
525 return process_file_print_field_raw(&data, field as usize, rt);
526 }
527 }
528 }
529
530 let mut count = 0usize;
531 let mut pos = 0;
532 let len = data.len();
533
534 let mut ors_local = [0u8; 64];
536 let ors_len = rt.ors_bytes.len().min(64);
537 ors_local[..ors_len].copy_from_slice(&rt.ors_bytes[..ors_len]);
538
539 while pos < len {
540 let eol = data[pos..]
541 .iter()
542 .position(|&b| b == b'\n')
543 .map(|i| pos + i)
544 .unwrap_or(len);
545
546 let end = if eol > pos && data[eol - 1] == b'\r' {
547 eol - 1
548 } else {
549 eol
550 };
551
552 count += 1;
553 rt.nr += 1.0;
554 rt.fnr += 1.0;
555
556 let line_bytes = &data[pos..end];
557
558 if let InlinePattern::LiteralContains(ref needle) = pattern {
562 let line_str = unsafe { std::str::from_utf8_unchecked(line_bytes) };
565 if !line_str.contains(needle.as_str()) {
566 pos = eol + 1;
567 continue;
568 }
569 }
570
571 match action {
573 InlineAction::AddConstToSlot { val, slot } => {
574 let sv = rt.slots[slot as usize].as_number();
575 rt.slots[slot as usize] = Value::Num(sv + val as f64);
576 }
577 _ => {
578 match std::str::from_utf8(line_bytes) {
580 Ok(line) => rt.set_field_sep_split(fs, line),
581 Err(_) => {
582 let lossy = String::from_utf8_lossy(line_bytes);
583 rt.set_field_sep_split(fs, &lossy);
584 }
585 }
586 match action {
587 InlineAction::PrintFieldStdout(field) => {
588 rt.print_field_to_buf(field as usize);
589 rt.print_buf.extend_from_slice(&ors_local[..ors_len]);
590 }
591 InlineAction::AddFieldToSlot { field, slot } => {
592 let fv = rt.field_as_number(field as i32);
593 let sv = rt.slots[slot as usize].as_number();
594 rt.slots[slot as usize] = Value::Num(sv + fv);
595 }
596 InlineAction::AddConstToSlot { .. } => unreachable!(),
597 }
598 }
599 }
600
601 pos = eol + 1;
602 }
603 Ok(count)
604}
605
606fn process_file_print_field_raw(data: &[u8], field_idx: usize, rt: &mut Runtime) -> Result<usize> {
610 let mut count = 0usize;
611 let mut pos = 0;
612 let len = data.len();
613 let ors = b"\n"; while pos < len {
616 let eol = data[pos..]
618 .iter()
619 .position(|&b| b == b'\n')
620 .map(|i| pos + i)
621 .unwrap_or(len);
622
623 let end = if eol > pos && data[eol - 1] == b'\r' {
624 eol - 1
625 } else {
626 eol
627 };
628
629 count += 1;
630 rt.nr += 1.0;
631 rt.fnr += 1.0;
632
633 let line = &data[pos..end];
635 let mut fi = 0usize; let mut i = 0;
637 let llen = line.len();
638
639 while i < llen && line[i].is_ascii_whitespace() {
641 i += 1;
642 }
643
644 let mut field_start = i;
645 let mut field_end = i;
646 let mut found = false;
647
648 while i <= llen {
649 let at_end = i == llen;
650 let is_ws = !at_end && line[i].is_ascii_whitespace();
651
652 if at_end || is_ws {
653 if field_start < i {
654 fi += 1;
655 if fi == field_idx {
656 field_end = i;
657 found = true;
658 break;
659 }
660 }
661 if is_ws {
662 while i < llen && line[i].is_ascii_whitespace() {
664 i += 1;
665 }
666 field_start = i;
667 continue;
668 }
669 }
670 i += 1;
671 }
672
673 if found {
674 rt.print_buf
675 .extend_from_slice(&line[field_start..field_end]);
676 }
677 rt.print_buf.extend_from_slice(ors);
678
679 pos = eol + 1;
680 }
681 Ok(count)
682}
683
684fn dispatch_rules(
686 prog: &Program,
687 cp: &CompiledProgram,
688 range_state: &mut [bool],
689 rt: &mut Runtime,
690) -> Result<bool> {
691 for rule in &cp.record_rules {
692 let run = match &rule.pattern {
693 CompiledPattern::Range => {
694 let orig = &prog.rules[rule.original_index];
695 if let Pattern::Range(p1, p2) = &orig.pattern {
696 range_step(&mut range_state[rule.original_index], p1, p2, rt, prog)?
697 } else {
698 false
699 }
700 }
701 _ => vm_pattern_matches(rule, cp, rt)?,
702 };
703 if run {
704 match vm_run_rule(rule, cp, rt, None) {
705 Ok(Flow::Next) => break,
706 Ok(Flow::ExitPending) => return Ok(true),
707 Ok(Flow::Normal) => {}
708 Ok(Flow::Break) | Ok(Flow::Continue) => {}
709 Ok(Flow::Return(_)) => {
710 return Err(Error::Runtime(
711 "`return` used outside function in rule action".into(),
712 ));
713 }
714 Err(Error::Exit(code)) => return Err(Error::Exit(code)),
715 Err(e) => return Err(e),
716 }
717 }
718 }
719 Ok(rt.exit_pending)
720}
721
722fn resolve_program_and_files(args: &Args) -> Result<(String, Vec<PathBuf>)> {
723 let mut prog = String::new();
724 for p in &args.include {
725 prog.push_str(&std::fs::read_to_string(p).map_err(|e| Error::ProgramFile(p.clone(), e))?);
726 }
727 for p in &args.progfiles {
728 prog.push_str(&std::fs::read_to_string(p).map_err(|e| Error::ProgramFile(p.clone(), e))?);
729 }
730 for e in &args.source {
731 prog.push_str(e);
732 prog.push('\n');
733 }
734 if let Some(exec) = &args.exec_file {
735 prog.push_str(
736 &std::fs::read_to_string(exec).map_err(|e| Error::ProgramFile(exec.clone(), e))?,
737 );
738 }
739 if prog.is_empty() {
740 if args.rest.is_empty() {
741 return Err(Error::Parse {
742 line: 1,
743 msg: "no program given".into(),
744 });
745 }
746 let inline = args.rest[0].clone();
747 let files: Vec<PathBuf> = args.rest[1..].iter().map(PathBuf::from).collect();
748 return Ok((inline, files));
749 }
750 let files: Vec<PathBuf> = args.rest.iter().map(PathBuf::from).collect();
751 Ok((prog, files))
752}
753
754fn apply_assigns(args: &Args, rt: &mut Runtime) -> Result<()> {
755 for a in &args.assigns {
756 let (name, val) = a.split_once('=').ok_or_else(|| Error::Parse {
757 line: 1,
758 msg: format!("invalid -v `{a}`, expected name=value"),
759 })?;
760 rt.vars
761 .insert(name.to_string(), Value::Str(val.to_string()));
762 }
763 Ok(())
764}