use std::fmt::Write;
use std::path::{Path, PathBuf};
use std::process::Command;
use zsh::zsh_h::{wc_code, wc_data, wordcode};
fn corpus_dir() -> PathBuf {
PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("tests/lexer_corpus")
}
fn module_dir() -> PathBuf {
PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("src/zsh/Src/Modules")
}
fn module_built() -> bool {
let d = module_dir();
d.join("zshrs_dump.so").exists() || d.join("zshrs_dump.bundle").exists()
}
fn zsh_available() -> bool {
Command::new("zsh")
.arg("--version")
.output()
.map(|o| o.status.success())
.unwrap_or(false)
}
fn collect_corpus() -> Vec<PathBuf> {
let mut entries: Vec<PathBuf> = std::fs::read_dir(corpus_dir())
.map(|rd| {
rd.filter_map(|e| e.ok())
.map(|e| e.path())
.filter(|p| matches!(p.extension().and_then(|s| s.to_str()), Some("zsh") | Some("sh")))
.filter(|p| {
p.file_name()
.and_then(|s| s.to_str())
.map(|n| n.chars().next().is_some_and(|c| c.is_ascii_digit()))
.unwrap_or(false)
})
.collect()
})
.unwrap_or_default();
entries.sort();
entries
}
fn shell_escape(s: &str) -> String {
let mut out = String::with_capacity(s.len() + 2);
out.push('\'');
for ch in s.chars() {
if ch == '\'' {
out.push_str("'\\''");
} else {
out.push(ch);
}
}
out.push('\'');
out
}
fn dump_via_zsh(file: &Path) -> Result<String, String> {
let module_dir = module_dir().to_string_lossy().to_string();
let script = format!(
"module_path=({}); zmodload zsh/zshrs_dump; dumpwordcode {}",
shell_escape(&module_dir),
shell_escape(&file.to_string_lossy()),
);
let out = Command::new("zsh")
.args(["-fc", &script])
.output()
.map_err(|e| format!("spawn zsh: {}", e))?;
if !out.status.success() {
return Err(format!(
"zsh exit {:?}, stderr:\n{}",
out.status.code(),
String::from_utf8_lossy(&out.stderr),
));
}
Ok(String::from_utf8_lossy(&out.stdout).into_owned())
}
fn load_zsh_stream(path: &Path) -> Result<String, String> {
let wc_path = path.with_file_name(format!(
"{}.wordcode",
path.file_name().and_then(|s| s.to_str()).unwrap_or("?")
));
if wc_path.exists() {
return std::fs::read_to_string(&wc_path)
.map_err(|e| format!("read {}: {}", wc_path.display(), e));
}
dump_via_zsh(path)
}
const WCNAMES: &[&str] = &[
"WC_END", "WC_LIST", "WC_SUBLIST", "WC_PIPE", "WC_REDIR",
"WC_ASSIGN", "WC_SIMPLE", "WC_TYPESET", "WC_SUBSH", "WC_CURSH",
"WC_TIMED", "WC_FUNCDEF", "WC_FOR", "WC_SELECT", "WC_WHILE",
"WC_REPEAT", "WC_CASE", "WC_IF", "WC_COND", "WC_ARITH",
"WC_AUTOFN", "WC_TRY",
];
fn wc_name(kind: wordcode) -> &'static str {
let i = kind as usize;
if i < WCNAMES.len() { WCNAMES[i] } else { "WC_?" }
}
fn esc(out: &mut String, s: &str) {
for b in s.bytes() {
match b {
b'\n' => out.push_str("\\n"),
b'\t' => out.push_str("\\t"),
b'\\' => out.push_str("\\\\"),
b'"' => out.push_str("\\\""),
0 => out.push_str("\\0"),
c if c < 0x20 || c >= 0x7f => {
let _ = write!(out, "\\x{:02x}", c);
}
c => out.push(c as char),
}
}
}
fn dump_via_zshrs(src: &str) -> String {
use zsh::tokens::ENDINPUT;
zsh::lex::lex_init(src);
zsh::lex::set_tok(ENDINPUT);
zsh::parse::init_parse();
zsh::lex::zshlex();
zsh::parse::par_list_wordcode();
if zsh::lex::tok() != ENDINPUT {
return "PARSE_ERR\n".to_string();
}
let prog = zsh::parse::bld_eprog(true);
let mut buf = String::new();
let _ = writeln!(
buf,
"EPROG flags=0x{:x} len={} npats={}",
prog.flags, prog.len, prog.npats
);
let wc_count = prog.prog.len();
let _ = writeln!(buf, "WORDS {}", wc_count);
for (i, w) in prog.prog.iter().enumerate() {
let _ = writeln!(
buf,
"WC[{}]=0x{:08x} KIND={} DATA=0x{:x}",
i,
w,
wc_name(wc_code(*w)),
wc_data(*w)
);
}
let strs_str = prog.strs.unwrap_or_default();
let strs_bytes = strs_str.as_bytes();
let mut entries: Vec<&[u8]> = Vec::new();
let mut start = 0;
for (i, &b) in strs_bytes.iter().enumerate() {
if b == 0 {
entries.push(&strs_bytes[start..i]);
start = i + 1;
}
}
let _ = writeln!(buf, "STRS {}", entries.len());
for (i, e) in entries.iter().enumerate() {
let _ = write!(buf, "STR[{}]=\"", i);
esc_bytes(&mut buf, e);
buf.push_str("\"\n");
}
buf
}
fn esc_bytes(out: &mut String, bytes: &[u8]) {
for &b in bytes {
match b {
b'\n' => out.push_str("\\n"),
b'\t' => out.push_str("\\t"),
b'\\' => out.push_str("\\\\"),
b'"' => out.push_str("\\\""),
0 => out.push_str("\\0"),
c if c < 0x20 || c >= 0x7f => {
let _ = write!(out, "\\x{:02x}", c);
}
c => out.push(c as char),
}
}
}
fn first_divergence(a: &str, b: &str) -> usize {
a.bytes()
.zip(b.bytes())
.position(|(x, y)| x != y)
.unwrap_or_else(|| a.len().min(b.len()))
}
fn safe_slice(s: &str, lo: usize, hi: usize) -> &str {
let start = (0..=lo.min(s.len())).rev().find(|i| s.is_char_boundary(*i)).unwrap_or(0);
let end = (start..=hi.min(s.len())).rev().find(|i| s.is_char_boundary(*i)).unwrap_or(s.len());
&s[start..end]
}
fn check_file(path: &Path) -> Result<(), String> {
let src = std::fs::read_to_string(path).map_err(|e| format!("read: {}", e))?;
let zsh_stream = load_zsh_stream(path).map_err(|e| format!("zsh dump: {}", e))?;
let zshrs_stream = dump_via_zshrs(&src);
if zsh_stream == zshrs_stream {
return Ok(());
}
let div = first_divergence(&zsh_stream, &zshrs_stream);
let lo = div.saturating_sub(60);
let hi_zsh = (div + 60).min(zsh_stream.len());
let hi_zshrs = (div + 60).min(zshrs_stream.len());
Err(format!(
"\n--- {} ---\n\
=== first divergence at byte {} ===\n\
zsh ...{}...\n\
zshrs ...{}...\n",
path.file_name().and_then(|s| s.to_str()).unwrap_or("?"),
div,
safe_slice(&zsh_stream, lo, hi_zsh).escape_default(),
safe_slice(&zshrs_stream, lo, hi_zshrs).escape_default(),
))
}
#[test]
fn corpus_wordcode_parity() {
let corpus = collect_corpus();
if corpus.is_empty() {
panic!("no corpus files in tests/lexer_corpus/");
}
let all_present = corpus.iter().all(|p| {
p.with_file_name(format!(
"{}.wordcode",
p.file_name().and_then(|s| s.to_str()).unwrap_or("?")
))
.exists()
});
if !all_present {
if !zsh_available() {
eprintln!("zsh not on PATH and not all .wordcode files present — skipping");
return;
}
if !module_built() {
eprintln!(
"zsh/zshrs_dump module not built and not all .wordcode files present — \
skipping wordcode parity. Build the C module then run \
tests/lexer_corpus/regen.sh"
);
return;
}
}
let mut passes = 0usize;
let mut failures = Vec::new();
for path in &corpus {
match check_file(path) {
Ok(()) => passes += 1,
Err(report) => failures.push(report),
}
}
eprintln!(
"wordcode parity: {}/{} passing ({} failing)",
passes,
corpus.len(),
failures.len()
);
if !failures.is_empty() {
for f in failures.iter().take(3) {
eprintln!("{}", f);
}
if failures.len() > 3 {
eprintln!("... {} more failures elided ...", failures.len() - 3);
}
panic!("wordcode parity FAILURES: {}/{}", failures.len(), corpus.len());
}
}