use rumdl_lib::lint_context::LintContext;
use rumdl_lib::rule::Rule;
use rumdl_lib::rules::MD051LinkFragments;
#[test]
fn test_unicode_normalization_spoofing_prevention() {
let rule = MD051LinkFragments::new();
let normalization_pairs = vec![
("café", "cafe\u{0301}"), ("naïve", "nai\u{0308}ve"), ("résumé", "re\u{0301}sume\u{0301}"), ("file", "file"), ("①②③", "123"), ("İstanbul", "istanbul"), ("MASS", "mass"), ("full width", "full width"), ];
for (version1, version2) in normalization_pairs {
let content1 = format!("# {version1}\n\n[Link](#test)");
let content2 = format!("# {version2}\n\n[Link](#test)");
let ctx1 = LintContext::new(&content1, rumdl_lib::config::MarkdownFlavor::Standard, None);
let ctx2 = LintContext::new(&content2, rumdl_lib::config::MarkdownFlavor::Standard, None);
let result1 = rule.check(&ctx1);
let result2 = rule.check(&ctx2);
assert!(
result1.is_ok() && result2.is_ok(),
"Unicode normalization test failed for: '{version1}' vs '{version2}'"
);
println!("✓ Unicode normalization tested: '{version1}' vs '{version2}'");
}
}
#[test]
fn test_zero_width_character_injection() {
let rule = MD051LinkFragments::new();
let zero_width_cases = vec![
("word\u{200B}break", "Zero Width Space (U+200B)"),
("word\u{200C}break", "Zero Width Non-Joiner (U+200C)"),
("word\u{200D}break", "Zero Width Joiner (U+200D)"),
("word\u{FEFF}break", "Zero Width No-Break Space / BOM (U+FEFF)"),
("word\u{2060}break", "Word Joiner (U+2060)"),
("word\u{061C}break", "Arabic Letter Mark (U+061C)"),
("word\u{034F}break", "Combining Grapheme Joiner (U+034F)"),
("\u{200B}\u{200C}\u{200D}text", "Multiple zero-width at start"),
("text\u{200B}\u{200C}\u{200D}", "Multiple zero-width at end"),
("te\u{200B}st\u{200C}ing\u{200D}now", "Zero-width scattered"),
];
for (input, description) in zero_width_cases {
let content = format!("# {input}\n\n[Link](#wordbreak)");
let ctx = LintContext::new(&content, rumdl_lib::config::MarkdownFlavor::Standard, None);
let result = rule.check(&ctx);
assert!(result.is_ok(), "Zero-width character test failed: {description}");
let warnings = result.unwrap();
assert!(
warnings.len() <= 1,
"Unexpected warnings for {}: {} warnings",
description,
warnings.len()
);
println!("✓ Zero-width character test: {description}");
}
}
#[test]
fn test_bidirectional_text_injection() {
let rule = MD051LinkFragments::new();
let bidi_cases = vec![
("left\u{202E}right", "Right-to-Left Override (RLO)"),
("normal\u{202D}forced", "Left-to-Right Override (LRO)"),
("text\u{202B}embedded\u{202C}normal", "Right-to-Left Embedding"),
("text\u{202A}embedded\u{202C}normal", "Left-to-Right Embedding"),
("text\u{202C}pop", "Pop Directional Formatting"),
("text\u{2066}isolate\u{2069}end", "Left-to-Right Isolate"),
("text\u{2067}isolate\u{2069}end", "Right-to-Left Isolate"),
("text\u{2068}isolate\u{2069}end", "First Strong Isolate"),
("English العربية English", "Mixed English/Arabic"),
("Hello עברית World", "Mixed English/Hebrew"),
("Test Русский Text", "Mixed English/Cyrillic"),
];
for (input, description) in bidi_cases {
let content = format!("# {input}\n\n[Link](#test)");
let ctx = LintContext::new(&content, rumdl_lib::config::MarkdownFlavor::Standard, None);
let result = rule.check(&ctx);
assert!(result.is_ok(), "Bidirectional text test failed: {description}");
println!("✓ Bidirectional text test: {description}");
}
}
#[test]
fn test_control_character_injection() {
let rule = MD051LinkFragments::new();
let control_char_cases = vec![
("test\u{0000}null", "Null character (NUL)"),
("test\u{0001}start", "Start of Heading (SOH)"),
("test\u{0007}bell", "Bell character (BEL)"),
("test\u{0008}back", "Backspace (BS)"),
("test\u{0009}tab", "Horizontal Tab (HT)"),
("test\u{000A}newline", "Line Feed (LF)"),
("test\u{000B}vtab", "Vertical Tab (VT)"),
("test\u{000C}form", "Form Feed (FF)"),
("test\u{000D}return", "Carriage Return (CR)"),
("test\u{001B}escape", "Escape (ESC)"),
("test\u{001F}unit", "Unit Separator (US)"),
("test\u{0080}control", "Padding Character"),
("test\u{009F}application", "Application Program Command"),
("test\u{007F}delete", "Delete character (DEL)"),
("test\u{2028}line", "Line Separator"),
("test\u{2029}para", "Paragraph Separator"),
];
for (input, description) in control_char_cases {
let content = format!("# {input}\n\n[Link](#test)");
let ctx = LintContext::new(&content, rumdl_lib::config::MarkdownFlavor::Standard, None);
let result = rule.check(&ctx);
assert!(result.is_ok(), "Control character test failed: {description}");
println!("✓ Control character test: {description}");
}
}
#[test]
fn test_emoji_symbol_edge_cases() {
let rule = MD051LinkFragments::new();
let emoji_cases = vec![
("test 🎉 party", "Basic emoji"),
("🚀 rocket start", "Emoji at start"),
("end emoji 🎯", "Emoji at end"),
("skin tone 👋🏽 wave", "Emoji with skin tone modifier"),
("family 👨👩👧👦 emoji", "Multi-person family emoji"),
("flag 🏳️🌈 pride", "Flag with modifier"),
("🇺🇸🇬🇧🇫🇷 flags", "Country flag sequence"),
("👨💻👩💻 developers", "Professional emoji sequence"),
("math ∑∆∇ symbols", "Mathematical symbols"),
("operators ±×÷ test", "Mathematical operators"),
("money $€¥£ symbols", "Currency symbols"),
("arrows ←→↑↓ test", "Arrow symbols"),
("shapes ●■▲♠ test", "Shape symbols"),
("keycap 1️⃣2️⃣3️⃣ test", "Keycap emoji sequence"),
];
for (input, description) in emoji_cases {
let content = format!("# {input}\n\n[Link](#test)");
let ctx = LintContext::new(&content, rumdl_lib::config::MarkdownFlavor::Standard, None);
let result = rule.check(&ctx);
assert!(result.is_ok(), "Emoji test failed: {description}");
println!("✓ Emoji test: {description}");
}
}
#[test]
fn test_private_use_area_handling() {
let rule = MD051LinkFragments::new();
let private_use_cases = vec![
("test\u{E000}private", "Private Use Area start"),
("test\u{F8FF}private", "Private Use Area end"),
("custom\u{E123}symbol", "Custom private symbol"),
("test\u{F0000}plane15", "Plane 15 Private Use"),
("test\u{100000}plane16", "Plane 16 Private Use"),
("invalid\u{FFFD}char", "Unicode replacement character"),
("test\u{FFFE}nonchar", "Non-character U+FFFE"),
("test\u{FFFF}nonchar", "Non-character U+FFFF"),
];
for (input, description) in private_use_cases {
let content = format!("# {input}\n\n[Link](#test)");
let ctx = LintContext::new(&content, rumdl_lib::config::MarkdownFlavor::Standard, None);
let result = rule.check(&ctx);
assert!(result.is_ok(), "Private use area test failed: {description}");
println!("✓ Private use area test: {description}");
}
}
#[test]
fn test_surrogate_pair_handling() {
let rule = MD051LinkFragments::new();
let high_codepoint_cases = vec![
("math 𝕳𝖊𝖑𝖑𝖔 script", "Mathematical script"),
("bold 𝐇𝐞𝐥𝐥𝐨 text", "Mathematical bold"),
("ancient 𐎀𐎁𐎂 script", "Ugaritic script"),
("linear 𐀀𐀁𐀂 b", "Linear B script"),
("music 𝄞𝄢𝅘𝅥𝅮 notes", "Musical notation"),
("tags 𝟏𝟐𝟑 test", "Mathematical digits"),
("symbols 𝟬𝟭𝟮 test", "Mathematical monospace"),
];
for (input, description) in high_codepoint_cases {
let content = format!("# {input}\n\n[Link](#test)");
let ctx = LintContext::new(&content, rumdl_lib::config::MarkdownFlavor::Standard, None);
let result = rule.check(&ctx);
assert!(result.is_ok(), "High codepoint test failed: {description}");
println!("✓ High codepoint test: {description}");
}
}
#[test]
fn test_unicode_case_folding_edge_cases() {
let rule = MD051LinkFragments::new();
let case_folding_cases = vec![
("İstanbul", "Turkish capital I with dot"),
("ıstanbul", "Turkish lowercase dotless i"),
("Straße", "German sharp s (ß)"),
("STRASSE", "German SS uppercase"),
("ΏΝΤΆΣ", "Greek with tonos"),
("ώντάς", "Greek lowercase with tonos"),
("ff", "Latin small ligature ff"),
("ffi", "Latin small ligature ffi"),
("st", "Latin small ligature st"),
("CAFÉ", "Uppercase with accents"),
("café", "Lowercase with accents"),
("Café", "Mixed case with accents"),
];
for (input, description) in case_folding_cases {
let content = format!("# {input}\n\n[Link](#test)");
let ctx = LintContext::new(&content, rumdl_lib::config::MarkdownFlavor::Standard, None);
let result = rule.check(&ctx);
assert!(result.is_ok(), "Case folding test failed: {description}");
println!("✓ Case folding test: {description}");
}
}
#[test]
fn test_combining_character_sequences() {
let rule = MD051LinkFragments::new();
let combining_cases = vec![
("e\u{0301}", "e with combining acute"), ("a\u{0308}", "a with combining diaeresis"), ("o\u{0303}", "o with combining tilde"), ("e\u{0301}\u{0308}", "e with acute and diaeresis"),
("a\u{0300}\u{0323}", "a with grave and dot below"),
("cafe\u{0301}", "café with combining accent"),
("resume\u{0301}", "resumé with combining accent"),
("a\u{0363}\u{0364}\u{0365}", "a with multiple combining marks"),
("🇺\u{0301}🇸", "Flag with combining mark (unusual)"),
("a\u{200B}\u{0301}", "a with zero-width space and accent"),
];
for (input, description) in combining_cases {
let content = format!("# {input}\n\n[Link](#test)");
let ctx = LintContext::new(&content, rumdl_lib::config::MarkdownFlavor::Standard, None);
let result = rule.check(&ctx);
assert!(result.is_ok(), "Combining character test failed: {description}");
println!("✓ Combining character test: {description}");
}
}
#[test]
fn test_complex_unicode_security_scenarios() {
let rule = MD051LinkFragments::new();
let combining_bomb = format!("a{}", "\u{0301}".repeat(100));
let complex_scenarios = vec![
("safe\u{202E}\u{200B}🎉attack", "Mixed RTL override attack"),
("cafe\u{0301}\u{200B}́", "Normalization confusion attack"),
("user\u{202E}moc.evil\u{202C}@bank.com", "Domain spoofing attempt"),
("Click here:\u{202E}gro.buhtig\u{202C}/malicious", "URL spoofing"),
(&combining_bomb, "Combining character bomb"),
("раураl.com", "Cyrillic/Latin script mixing"), ("ad\u{200B}min@ex\u{200C}ample.com", "Split legitimate text"),
];
for (input, description) in complex_scenarios {
let content = format!("# {input}\n\n[Link](#test)");
let ctx = LintContext::new(&content, rumdl_lib::config::MarkdownFlavor::Standard, None);
let start = std::time::Instant::now();
let result = rule.check(&ctx);
let duration = start.elapsed();
assert!(result.is_ok(), "Unicode security scenario failed: {description}");
assert!(
duration < std::time::Duration::from_secs(1),
"Security scenario took too long: {description} - {duration:?}"
);
println!("✓ Security scenario handled: {description} in {duration:?}");
}
}