use rumdl_lib::lint_context::LintContext;
use rumdl_lib::rule::Rule;
use rumdl_lib::rules::*;
use rumdl_lib::utils::fix_utils::apply_warning_fixes;
#[test]
fn test_unicode_cli_lsp_consistency() {
let unicode_content = r#"# 🚀 This is a header with emoji
مرحبا بكم في هذا النص العربي
(https://例え.テスト)[Japanese domain link]
Here is some `中文代码` in inline code
"#;
let rules: Vec<Box<dyn Rule>> = vec![
Box::new(MD009TrailingSpaces::default()),
Box::new(MD011NoReversedLinks),
Box::new(MD022BlanksAroundHeadings::new()),
Box::new(MD034NoBareUrls),
Box::new(MD047SingleTrailingNewline),
];
for rule in &rules {
let ctx = LintContext::new(unicode_content, rumdl_lib::config::MarkdownFlavor::Standard, None);
let warnings = rule.check(&ctx).expect("Rule check should succeed");
let cli_fixed = rule.fix(&ctx).expect("CLI fix should succeed");
let lsp_fixed = apply_warning_fixes(unicode_content, &warnings).expect("LSP fix should succeed");
assert_eq!(
cli_fixed,
lsp_fixed,
"Rule {} produced different CLI vs LSP results for Unicode content:\nCLI: '{}'\nLSP: '{}'",
rule.name(),
cli_fixed,
lsp_fixed
);
for warning in &warnings {
if let Some(fix) = &warning.fix {
assert!(
fix.range.start <= unicode_content.len(),
"Rule {} fix range start {} exceeds content length {} for Unicode content",
rule.name(),
fix.range.start,
unicode_content.len()
);
assert!(
fix.range.end <= unicode_content.len(),
"Rule {} fix range end {} exceeds content length {} for Unicode content",
rule.name(),
fix.range.end,
unicode_content.len()
);
assert!(
unicode_content.is_char_boundary(fix.range.start),
"Rule {} fix range start {} is not a valid UTF-8 char boundary",
rule.name(),
fix.range.start
);
assert!(
unicode_content.is_char_boundary(fix.range.end),
"Rule {} fix range end {} is not a valid UTF-8 char boundary",
rule.name(),
fix.range.end
);
}
}
}
}
#[test]
fn test_complex_unicode_scenarios() {
let test_cases = vec![
("Café with combining é", "é"), ("مرحبا بكم في هذا النص العربي", "Arabic text"),
("Hello世界こんにちは", "Mixed Japanese/English"),
("👨👩👧👦 Family emoji", "family with ZWJ"),
("这是中文。", "Chinese with CJK period"),
("∑ᵢ₌₁ⁿ xᵢ = total", "Math symbols with subscripts/superscripts"),
];
let rule = MD047SingleTrailingNewline;
for (content, description) in test_cases {
let ctx = LintContext::new(content, rumdl_lib::config::MarkdownFlavor::Standard, None);
let warnings_result = rule.check(&ctx);
assert!(
warnings_result.is_ok(),
"Rule {} failed to check content with {}: '{}'",
rule.name(),
description,
content
);
let fix_result = rule.fix(&ctx);
assert!(
fix_result.is_ok(),
"Rule {} failed to fix content with {}: '{}'",
rule.name(),
description,
content
);
let warnings = warnings_result.unwrap();
if !warnings.is_empty() {
let cli_fixed = fix_result.unwrap();
let lsp_fixed = apply_warning_fixes(content, &warnings);
assert!(
lsp_fixed.is_ok(),
"LSP fix failed for content with {description}: '{content}'"
);
assert_eq!(
cli_fixed,
lsp_fixed.unwrap(),
"CLI/LSP inconsistency for content with {description}: '{content}'"
);
}
}
}
#[test]
fn test_unicode_byte_boundary_validation() {
let unicode_content = "# 🚀🎉🔥 Unicode Header\n中文内容 with 日本語\n```\n코드 블록\n```\n";
let rules: Vec<Box<dyn Rule>> = vec![
Box::new(MD022BlanksAroundHeadings::new()),
Box::new(MD031BlanksAroundFences::default()),
Box::new(MD047SingleTrailingNewline),
];
for rule in &rules {
let ctx = LintContext::new(unicode_content, rumdl_lib::config::MarkdownFlavor::Standard, None);
let warnings = rule.check(&ctx).expect("Rule check should succeed");
for warning in &warnings {
if let Some(fix) = &warning.fix {
assert!(
fix.range.start <= unicode_content.len(),
"Fix range start exceeds content length"
);
assert!(
fix.range.end <= unicode_content.len(),
"Fix range end exceeds content length"
);
assert!(
unicode_content.is_char_boundary(fix.range.start),
"Fix range start {} is not on UTF-8 char boundary for rule {}",
fix.range.start,
rule.name()
);
assert!(
unicode_content.is_char_boundary(fix.range.end),
"Fix range end {} is not on UTF-8 char boundary for rule {}",
fix.range.end,
rule.name()
);
let mut test_content = unicode_content.to_string();
let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
test_content.replace_range(fix.range.clone(), &fix.replacement);
test_content
}));
assert!(
result.is_ok(),
"Fix replacement panicked for rule {} with range {:?}",
rule.name(),
fix.range
);
let fixed_content = result.unwrap();
assert!(
fixed_content.is_ascii() || std::str::from_utf8(fixed_content.as_bytes()).is_ok(),
"Fix produced invalid UTF-8 for rule {}",
rule.name()
);
}
}
}
}