use bashkit::{Bash, FileSystem, FsLimits, InMemoryFs};
use std::path::Path;
mod byte_boundary_safety {
use super::*;
#[tokio::test]
async fn unicode_awk_multibyte_comment_no_panic() {
let mut bash = Bash::new();
let result = bash
.exec(
r#"echo "hello" | awk '# ── Pass 1 ──
{print $1}'"#,
)
.await
.unwrap();
let _ = result.exit_code;
}
#[tokio::test]
async fn unicode_awk_multibyte_string_no_panic() {
let mut bash = Bash::new();
let result = bash
.exec(r#"echo "café" | awk '{print "→ " $0}'"#)
.await
.unwrap();
let _ = result.exit_code;
}
#[tokio::test]
async fn unicode_awk_cjk_input_no_panic() {
let mut bash = Bash::new();
let result = bash
.exec(r#"echo "日本語 テスト" | awk '{print $1}'"#)
.await
.unwrap();
let _ = result.exit_code;
}
#[tokio::test]
async fn unicode_awk_emoji_no_panic() {
let mut bash = Bash::new();
let result = bash
.exec(r#"echo "hello 🌍 world" | awk '{print $2}'"#)
.await
.unwrap();
let _ = result.exit_code;
}
#[tokio::test]
async fn unicode_awk_multibyte_field_separator_no_panic() {
let mut bash = Bash::new();
let result = bash
.exec(r#"echo "a│b│c" | awk -F'│' '{print $2}'"#)
.await
.unwrap();
let _ = result.exit_code;
}
#[tokio::test]
async fn unicode_awk_multibyte_pattern_no_panic() {
let mut bash = Bash::new();
let result = bash
.exec(r#"printf "café\ntest\n" | awk '/café/{print "found: " $0}'"#)
.await
.unwrap();
let _ = result.exit_code;
}
#[tokio::test]
async fn unicode_awk_multibyte_variable_no_panic() {
let mut bash = Bash::new();
let result = bash
.exec(r#"echo "test" | awk 'BEGIN{x="─═─"} {print x, $0}'"#)
.await
.unwrap();
let _ = result.exit_code;
}
#[tokio::test]
async fn unicode_sed_multibyte_no_panic() {
let mut bash = Bash::new();
let result = bash
.exec(r#"echo "café latte" | sed 's/café/coffee/'"#)
.await
.unwrap();
let _ = result.exit_code;
}
#[tokio::test]
async fn unicode_sed_cjk_replacement_no_panic() {
let mut bash = Bash::new();
let result = bash
.exec(r#"echo "hello world" | sed 's/world/世界/'"#)
.await
.unwrap();
let _ = result.exit_code;
}
#[tokio::test]
async fn unicode_sed_box_drawing_no_panic() {
let mut bash = Bash::new();
let result = bash
.exec(r#"echo "──border──" | sed 's/──//g'"#)
.await
.unwrap();
let _ = result.exit_code;
}
#[tokio::test]
async fn unicode_grep_multibyte_no_panic() {
let mut bash = Bash::new();
let result = bash.exec(r#"echo "café" | grep "café""#).await.unwrap();
let _ = result.exit_code;
}
#[tokio::test]
async fn unicode_awk_stress_mixed_multibyte() {
let mut bash = Bash::new();
let result = bash
.exec(
r#"printf "α β γ δ ε\n日本 中文 한국\n🌍 🌎 🌏\n" | awk '{
for(i=1;i<=NF;i++) print NR, i, $i
}'"#,
)
.await
.unwrap();
let _ = result.exit_code;
}
}
mod zero_width_chars {
use super::*;
#[tokio::test]
async fn unicode_zwsp_in_filename_current_behavior() {
let fs = InMemoryFs::new();
let result = fs
.write_file(Path::new("/tmp/file\u{200B}name.txt"), b"data")
.await;
if result.is_ok() {
let normal = fs
.write_file(Path::new("/tmp/filename.txt"), b"other")
.await;
assert!(normal.is_ok());
let content1 = fs
.read_file(Path::new("/tmp/file\u{200B}name.txt"))
.await
.unwrap();
let content2 = fs.read_file(Path::new("/tmp/filename.txt")).await.unwrap();
assert_ne!(
content1, content2,
"ZWSP creates distinct file (TM-UNI-003 gap)"
);
}
}
#[tokio::test]
async fn unicode_bom_in_filename_current_behavior() {
let fs = InMemoryFs::new();
let result = fs
.write_file(Path::new("/tmp/\u{FEFF}file.txt"), b"data")
.await;
let _ = result;
}
#[tokio::test]
async fn unicode_zwj_in_filename_current_behavior() {
let fs = InMemoryFs::new();
let result = fs
.write_file(Path::new("/tmp/file\u{200D}name.txt"), b"data")
.await;
let _ = result;
}
#[tokio::test]
async fn unicode_zwsp_in_variable_passthrough() {
let mut bash = Bash::new();
let result = bash
.exec(
r#"x="normal"
echo "$x""#,
)
.await
.unwrap();
assert_eq!(result.exit_code, 0);
assert!(result.stdout.contains("normal"));
}
#[tokio::test]
async fn unicode_zwsp_in_string_passthrough() {
let mut bash = Bash::new();
let result = bash.exec("echo \"hello\u{200B}world\"").await.unwrap();
assert_eq!(result.exit_code, 0);
assert!(result.stdout.contains("hello"));
assert!(result.stdout.contains("world"));
}
}
mod homoglyph_tests {
use super::*;
#[tokio::test]
async fn unicode_homoglyph_filenames_distinct() {
let fs = InMemoryFs::new();
fs.write_file(Path::new("/tmp/data.txt"), b"latin")
.await
.unwrap();
fs.write_file(Path::new("/tmp/d\u{0430}ta.txt"), b"cyrillic")
.await
.unwrap();
let latin = fs.read_file(Path::new("/tmp/data.txt")).await.unwrap();
let cyrillic = fs
.read_file(Path::new("/tmp/d\u{0430}ta.txt"))
.await
.unwrap();
assert_eq!(latin, b"latin");
assert_eq!(cyrillic, b"cyrillic");
}
#[tokio::test]
async fn unicode_homoglyph_variables_distinct() {
let mut bash = Bash::new();
let result = bash.exec("x=latin; echo $x").await.unwrap();
assert_eq!(result.exit_code, 0);
assert!(result.stdout.contains("latin"));
}
}
mod normalization_tests {
use super::*;
#[tokio::test]
async fn unicode_nfc_nfd_distinct_files() {
let fs = InMemoryFs::new();
fs.write_file(Path::new("/tmp/caf\u{00E9}.txt"), b"nfc")
.await
.unwrap();
fs.write_file(Path::new("/tmp/cafe\u{0301}.txt"), b"nfd")
.await
.unwrap();
let nfc = fs
.read_file(Path::new("/tmp/caf\u{00E9}.txt"))
.await
.unwrap();
let nfd = fs
.read_file(Path::new("/tmp/cafe\u{0301}.txt"))
.await
.unwrap();
assert_eq!(nfc, b"nfc");
assert_eq!(nfd, b"nfd");
}
#[tokio::test]
async fn unicode_normalization_in_scripts() {
let mut bash = Bash::new();
let result = bash
.exec("x=\"caf\u{00E9}\"; if [ \"$x\" = \"caf\u{00E9}\" ]; then echo match; fi")
.await
.unwrap();
assert_eq!(result.exit_code, 0);
assert!(result.stdout.contains("match"));
}
}
mod combining_char_tests {
use super::*;
#[tokio::test]
async fn unicode_excessive_combining_marks_bounded() {
let limits = FsLimits::new().max_filename_length(255);
let fs = InMemoryFs::with_limits(limits);
let mut name = String::from("/tmp/a");
for _ in 0..200 {
name.push('\u{0300}'); }
name.push_str(".txt");
let result = fs.write_file(Path::new(&name), b"data").await;
let _ = result;
}
#[tokio::test]
async fn unicode_combining_marks_in_awk_no_hang() {
let mut bash = Bash::new();
let result = bash
.exec("echo \"a\u{0300}\u{0301}\u{0302}bc\" | awk '{print length($0), $0}'")
.await
.unwrap();
let _ = result.exit_code;
}
}
mod invisible_char_tests {
use super::*;
#[tokio::test]
async fn unicode_tag_chars_in_filename_current_behavior() {
let fs = InMemoryFs::new();
let result = fs
.write_file(Path::new("/tmp/file\u{E0001}name.txt"), b"data")
.await;
let _ = result;
}
#[tokio::test]
async fn unicode_interlinear_annotation_in_filename() {
let fs = InMemoryFs::new();
let result = fs
.write_file(Path::new("/tmp/file\u{FFF9}name.txt"), b"data")
.await;
let _ = result;
}
#[tokio::test]
async fn unicode_deprecated_format_chars_in_filename() {
let fs = InMemoryFs::new();
let result = fs
.write_file(Path::new("/tmp/file\u{206A}name.txt"), b"data")
.await;
let _ = result;
}
}
mod bidi_script_tests {
use super::*;
#[tokio::test]
async fn unicode_bidi_in_script_comment_accepted() {
let mut bash = Bash::new();
let result = bash
.exec("# \u{202E}this comment has RTL override\necho safe")
.await
.unwrap();
assert_eq!(result.exit_code, 0);
assert!(result.stdout.contains("safe"));
}
#[tokio::test]
async fn unicode_bidi_in_string_passthrough() {
let mut bash = Bash::new();
let result = bash.exec("echo \"text\u{202E}reversed\"").await.unwrap();
assert_eq!(result.exit_code, 0);
assert!(result.stdout.contains("text"));
}
#[tokio::test]
async fn unicode_bidi_in_filename_blocked() {
let fs = InMemoryFs::new();
let result = fs
.write_file(Path::new("/tmp/test\u{202E}exe.txt"), b"data")
.await;
assert!(
result.is_err(),
"Bidi override in filename should be rejected"
);
let err = result.unwrap_err().to_string();
assert!(
err.contains("bidi override"),
"Error should mention bidi override: {}",
err
);
}
}
mod path_validation_crosscheck {
use super::*;
#[tokio::test]
async fn unicode_control_chars_still_blocked() {
let fs = InMemoryFs::new();
for ch in ['\u{0001}', '\u{001F}', '\u{007F}', '\u{0080}', '\u{009F}'] {
let path = format!("/tmp/file{}name.txt", ch);
let result = fs.write_file(Path::new(&path), b"data").await;
assert!(
result.is_err(),
"Control char U+{:04X} should be rejected in filenames",
ch as u32
);
}
}
#[tokio::test]
async fn unicode_all_bidi_overrides_blocked_in_paths() {
let fs = InMemoryFs::new();
for ch in ['\u{202A}', '\u{202B}', '\u{202C}', '\u{202D}', '\u{202E}'] {
let path = format!("/tmp/file{}name.txt", ch);
let result = fs.write_file(Path::new(&path), b"data").await;
assert!(
result.is_err(),
"Bidi char U+{:04X} should be rejected in filenames",
ch as u32
);
}
for ch in ['\u{2066}', '\u{2067}', '\u{2068}', '\u{2069}'] {
let path = format!("/tmp/file{}name.txt", ch);
let result = fs.write_file(Path::new(&path), b"data").await;
assert!(
result.is_err(),
"Bidi isolate U+{:04X} should be rejected in filenames",
ch as u32
);
}
}
#[tokio::test]
async fn unicode_normal_chars_allowed_in_paths() {
let fs = InMemoryFs::new();
fs.write_file(Path::new("/tmp/café.txt"), b"ok")
.await
.unwrap();
fs.write_file(Path::new("/tmp/文件.txt"), b"ok")
.await
.unwrap();
fs.write_file(Path::new("/tmp/🌍.txt"), b"ok")
.await
.unwrap();
fs.write_file(Path::new("/tmp/ملف.txt"), b"ok")
.await
.unwrap();
fs.write_file(Path::new("/tmp/फ़ाइल.txt"), b"ok")
.await
.unwrap();
}
}
mod e2e_unicode_security {
use super::*;
#[tokio::test]
async fn unicode_e2e_pipeline_no_panic() {
let mut bash = Bash::new();
let result = bash
.exec(
r#"
echo "名前,値" > /tmp/data.csv
echo "日本語,テスト" >> /tmp/data.csv
echo "café,latte" >> /tmp/data.csv
awk -F, '{print NR ": " $1 " → " $2}' /tmp/data.csv
"#,
)
.await
.unwrap();
let _ = result.exit_code;
}
#[tokio::test]
async fn unicode_e2e_grep_multibyte() {
let mut bash = Bash::new();
let result = bash
.exec(
r#"
echo "hello world" > /tmp/test.txt
echo "café latte" >> /tmp/test.txt
echo "日本語" >> /tmp/test.txt
grep "café" /tmp/test.txt
"#,
)
.await
.unwrap();
assert_eq!(result.exit_code, 0);
assert!(result.stdout.contains("café"));
}
#[tokio::test]
async fn unicode_e2e_sed_multibyte() {
let mut bash = Bash::new();
let result = bash
.exec(
r#"
echo "hello world" > /tmp/test.txt
sed 's/world/世界/' /tmp/test.txt
"#,
)
.await
.unwrap();
assert_eq!(result.exit_code, 0);
assert!(result.stdout.contains("世界"));
}
#[tokio::test]
async fn unicode_e2e_variable_ops() {
let mut bash = Bash::new();
let result = bash
.exec(
r#"
x="café"
echo "${#x}"
echo "${x/é/e}"
"#,
)
.await
.unwrap();
assert_eq!(result.exit_code, 0);
}
#[tokio::test]
async fn unicode_issue_395_exact_reproduction() {
let mut bash = Bash::new();
let awk_code = r#"echo "key=value" | awk '
# ── Pass 1: load all overrides into a map ──────────────────────────────────
NR == FNR {
print $0
}'"#;
let result = bash.exec(awk_code).await.unwrap();
let _ = result.exit_code;
}
}
mod expr_byte_boundary {
use super::*;
#[tokio::test]
async fn unicode_expr_length_multibyte_no_panic() {
let mut bash = Bash::new();
let result = bash.exec(r#"expr length "café""#).await.unwrap();
let _ = result.exit_code;
}
#[tokio::test]
async fn unicode_expr_substr_multibyte_no_panic() {
let mut bash = Bash::new();
let result = bash.exec(r#"expr substr "café" 4 1"#).await.unwrap();
let _ = result.exit_code;
}
#[tokio::test]
async fn unicode_expr_substr_cjk_no_panic() {
let mut bash = Bash::new();
let result = bash.exec(r#"expr substr "日本語" 2 1"#).await.unwrap();
let _ = result.exit_code;
}
#[tokio::test]
async fn unicode_expr_substr_emoji_no_panic() {
let mut bash = Bash::new();
let result = bash.exec(r#"expr substr "🌍🌎🌏" 2 1"#).await.unwrap();
let _ = result.exit_code;
}
#[tokio::test]
async fn unicode_expr_index_multibyte_no_panic() {
let mut bash = Bash::new();
let result = bash.exec(r#"expr index "café" "é""#).await.unwrap();
let _ = result.exit_code;
}
#[tokio::test]
async fn unicode_expr_length_emoji_no_panic() {
let mut bash = Bash::new();
let result = bash.exec(r#"expr length "🌍🌎🌏""#).await.unwrap();
let _ = result.exit_code;
}
}
mod printf_byte_boundary {
use super::*;
#[tokio::test]
async fn unicode_printf_precision_multibyte_no_panic() {
let mut bash = Bash::new();
let result = bash.exec(r#"printf "%.1s\n" "é""#).await.unwrap();
let _ = result.exit_code;
}
#[tokio::test]
async fn unicode_printf_precision_cjk_no_panic() {
let mut bash = Bash::new();
let result = bash.exec(r#"printf "%.1s\n" "日本""#).await.unwrap();
let _ = result.exit_code;
}
#[tokio::test]
async fn unicode_printf_precision_emoji_no_panic() {
let mut bash = Bash::new();
let result = bash.exec(r#"printf "%.2s\n" "🌍🌎""#).await.unwrap();
let _ = result.exit_code;
}
#[tokio::test]
async fn unicode_printf_width_multibyte_no_panic() {
let mut bash = Bash::new();
let result = bash.exec(r#"printf "%10s\n" "café""#).await.unwrap();
let _ = result.exit_code;
}
#[tokio::test]
async fn unicode_printf_multiple_multibyte_args_no_panic() {
let mut bash = Bash::new();
let result = bash
.exec(r#"printf "%-10s %5s\n" "日本語" "café""#)
.await
.unwrap();
let _ = result.exit_code;
}
}
mod cuttr_byte_boundary {
use super::*;
#[tokio::test]
async fn unicode_tr_multibyte_set1_no_panic() {
let mut bash = Bash::new();
let result = bash.exec(r#"echo "café" | tr 'é' 'e'"#).await.unwrap();
let _ = result.exit_code;
}
#[tokio::test]
async fn unicode_tr_cjk_sets_no_panic() {
let mut bash = Bash::new();
let result = bash.exec(r#"echo "日本語" | tr '日' '月'"#).await.unwrap();
let _ = result.exit_code;
}
#[tokio::test]
async fn unicode_tr_delete_multibyte_no_panic() {
let mut bash = Bash::new();
let result = bash.exec(r#"echo "café" | tr -d 'é'"#).await.unwrap();
let _ = result.exit_code;
}
#[tokio::test]
async fn unicode_cut_multibyte_delimiter_no_panic() {
let mut bash = Bash::new();
let result = bash.exec(r#"echo "a│b│c" | cut -d'│' -f2"#).await.unwrap();
let _ = result.exit_code;
}
#[tokio::test]
async fn unicode_cut_chars_multibyte_no_panic() {
let mut bash = Bash::new();
let result = bash.exec(r#"echo "café" | cut -c4"#).await.unwrap();
let _ = result.exit_code;
}
#[tokio::test]
async fn unicode_tr_squeeze_multibyte_no_panic() {
let mut bash = Bash::new();
let result = bash.exec(r#"echo "caféé" | tr -s 'é'"#).await.unwrap();
let _ = result.exit_code;
}
}
mod interpreter_byte_boundary {
use super::*;
#[tokio::test]
async fn unicode_arithmetic_multibyte_context_no_panic() {
let mut bash = Bash::new();
let result = bash.exec(r#"x=1; echo $((x + 1))"#).await.unwrap();
assert_eq!(result.exit_code, 0);
}
#[tokio::test]
async fn unicode_variable_multibyte_value_no_panic() {
let mut bash = Bash::new();
let result = bash
.exec(
r#"x="café"
echo "${#x}""#,
)
.await
.unwrap();
let _ = result.exit_code;
}
}
mod sed_extended_byte_boundary {
use super::*;
#[tokio::test]
async fn unicode_sed_multibyte_delimiter_no_panic() {
let mut bash = Bash::new();
let result = bash
.exec(r#"echo "hello world" | sed 's│hello│goodbye│'"#)
.await
.unwrap();
let _ = result.exit_code;
}
#[tokio::test]
async fn unicode_sed_address_multibyte_no_panic() {
let mut bash = Bash::new();
let result = bash
.exec(r#"printf "café\nlatte\n" | sed '/café/d'"#)
.await
.unwrap();
let _ = result.exit_code;
}
#[tokio::test]
async fn unicode_sed_append_multibyte_no_panic() {
let mut bash = Bash::new();
let result = bash.exec(r#"echo "test" | sed 'a\日本語'"#).await.unwrap();
let _ = result.exit_code;
}
#[tokio::test]
async fn unicode_sed_insert_multibyte_no_panic() {
let mut bash = Bash::new();
let result = bash
.exec(r#"echo "test" | sed 'i\→ header'"#)
.await
.unwrap();
let _ = result.exit_code;
}
#[tokio::test]
async fn unicode_sed_emoji_all_positions_no_panic() {
let mut bash = Bash::new();
let result = bash
.exec(r#"echo "🌍 hello 🌎" | sed 's/🌍/🌏/g'"#)
.await
.unwrap();
let _ = result.exit_code;
}
#[tokio::test]
async fn unicode_sed_multiple_commands_multibyte_no_panic() {
let mut bash = Bash::new();
let result = bash
.exec(r#"echo "café latte" | sed -e 's/café/coffee/' -e 's/latte/milk/'"#)
.await
.unwrap();
let _ = result.exit_code;
}
#[tokio::test]
async fn unicode_sed_transliterate_multibyte_no_panic() {
let mut bash = Bash::new();
let result = std::panic::AssertUnwindSafe(async move {
bash.exec(r#"echo "abc" | sed 'y/abc/αβγ/'"#).await
});
let outcome = tokio::task::spawn(result).await;
let _ = outcome;
}
}
mod cross_component_unicode_e2e {
use super::*;
#[tokio::test]
async fn unicode_e2e_expr_variable_expansion() {
let mut bash = Bash::new();
let result = bash
.exec(
r#"
x="café latte"
len=$(expr length "$x")
echo "length: $len"
"#,
)
.await
.unwrap();
let _ = result.exit_code;
}
#[tokio::test]
async fn unicode_e2e_printf_formatting() {
let mut bash = Bash::new();
let result = bash
.exec(
r#"
for item in "café" "日本語" "🌍🌎"; do
printf "%-15s [%s]\n" "$item" "$item"
done
"#,
)
.await
.unwrap();
let _ = result.exit_code;
}
#[tokio::test]
async fn unicode_e2e_sed_pipeline_multibyte() {
let mut bash = Bash::new();
let result = bash
.exec(
r#"
echo "── café ── latte ──" | sed 's/──/==/g' | sed 's/café/coffee/'
"#,
)
.await
.unwrap();
let _ = result.exit_code;
}
#[tokio::test]
async fn unicode_e2e_tr_cut_pipeline() {
let mut bash = Bash::new();
let result = bash
.exec(
r#"
echo "café:latte:espresso" | cut -d: -f2
echo "CAFÉ" | tr '[:upper:]' '[:lower:]'
"#,
)
.await
.unwrap();
let _ = result.exit_code;
}
#[tokio::test]
async fn unicode_e2e_all_builtins_stress() {
let mut bash = Bash::new();
let result = bash
.exec(
r#"
# Awk with multi-byte (TM-UNI-001)
echo "日本語 テスト" | awk '{print $1}'
# Sed with multi-byte (TM-UNI-002)
echo "café" | sed 's/é/e/'
# Expr with multi-byte (TM-UNI-015)
expr length "日本語"
# Printf with multi-byte (TM-UNI-016)
printf "%s\n" "café"
# Grep with multi-byte (safe)
echo "café" | grep "café"
echo "done"
"#,
)
.await
.unwrap();
let _ = result.exit_code;
}
}