use std::collections::{BTreeMap, HashSet};
use crate::checker::Checker;
use crate::diagnostic::{Diagnostic, Severity};
use crate::po::entry::Entry;
use crate::po::message::Message;
use crate::rules::rule::RuleChecker;
pub struct UnicodeCtrlRule;
impl RuleChecker for UnicodeCtrlRule {
fn name(&self) -> &'static str {
"unicode-ctrl"
}
fn description(&self) -> &'static str {
"Check for stray Unicode control or format characters in translation."
}
fn is_default(&self) -> bool {
true
}
fn is_check(&self) -> bool {
true
}
fn check_msg(
&self,
checker: &Checker,
_entry: &Entry,
msgid: &Message,
msgstr: &Message,
) -> Vec<Diagnostic> {
let id_set: HashSet<char> = msgid.value.chars().filter(|c| is_ctrl_char(*c)).collect();
let lang_code = checker.language_code();
let mut strays: BTreeMap<char, Vec<(usize, usize)>> = BTreeMap::new();
for (idx, c) in msgstr.value.char_indices() {
if is_ctrl_char(c) && !id_set.contains(&c) && !is_legitimate_for_locale(c, lang_code) {
strays.entry(c).or_default().push((idx, idx + c.len_utf8()));
}
}
strays
.into_iter()
.filter_map(|(c, positions)| {
let msg = format!(
"extra control character U+{:04X} ({})",
c as u32,
ctrl_char_name(c),
);
self.new_diag(checker, ctrl_char_severity(c), msg)
.map(|d| d.with_msgs_hl(msgid, [], msgstr, positions))
})
.collect()
}
}
fn ctrl_char_severity(c: char) -> Severity {
match c as u32 {
0x00 => Severity::Error,
_ => Severity::Warning,
}
}
fn is_legitimate_for_locale(c: char, language_code: &str) -> bool {
match c as u32 {
0x200B => matches!(
language_code,
"km" | "my"
| "lo"
| "th"
| "bo"
| "jv"
| "shn"
| "hi"
| "mr"
| "ne"
| "sa"
| "kok"
| "bn"
| "as"
| "ta"
| "te"
| "kn"
| "ml"
| "si"
| "gu"
| "or"
| "pa"
),
0x200C => matches!(
language_code,
"ar" | "fa"
| "ur"
| "ps"
| "ckb"
| "yi"
| "hi"
| "mr"
| "ne"
| "sa"
| "kok"
| "bn"
| "as"
| "ta"
| "te"
| "kn"
| "ml"
| "si"
| "gu"
| "or"
| "pa"
),
0x200D => matches!(
language_code,
"ar" | "fa"
| "ur"
| "ps"
| "ckb"
| "hi"
| "mr"
| "ne"
| "sa"
| "kok"
| "bn"
| "as"
| "ta"
| "te"
| "kn"
| "ml"
| "si"
| "gu"
| "or"
| "pa"
),
0x200E | 0x200F | 0x202A..=0x202C | 0x2066..=0x2069 => matches!(
language_code,
"ar" | "he" | "fa" | "ur" | "yi" | "ckb" | "ps" | "ug" | "dv"
),
_ => false,
}
}
fn is_ctrl_char(c: char) -> bool {
let cp = c as u32;
matches!(cp,
0x00..=0x08 | 0x0B | 0x0C | 0x0E..=0x1F
| 0x7F..=0x9F
| 0x00AD
| 0x200B..=0x200F
| 0x202A..=0x202E
| 0x2028 | 0x2029
| 0x2060..=0x2064
| 0x2066..=0x2069
| 0xFFF9..=0xFFFB
| 0xFFFD
| 0xFEFF
)
}
fn ctrl_char_name(c: char) -> &'static str {
match c as u32 {
0x00 => "NULL",
0x07 => "BELL",
0x08 => "BACKSPACE",
0x0B => "VERTICAL TAB",
0x0C => "FORM FEED",
0x1B => "ESCAPE",
0x7F => "DELETE",
0x00AD => "SOFT HYPHEN",
0x200B => "ZERO WIDTH SPACE",
0x200C => "ZERO WIDTH NON-JOINER",
0x200D => "ZERO WIDTH JOINER",
0x200E => "LEFT-TO-RIGHT MARK",
0x200F => "RIGHT-TO-LEFT MARK",
0x202A => "LEFT-TO-RIGHT EMBEDDING",
0x202B => "RIGHT-TO-LEFT EMBEDDING",
0x202C => "POP DIRECTIONAL FORMATTING",
0x202D => "LEFT-TO-RIGHT OVERRIDE",
0x202E => "RIGHT-TO-LEFT OVERRIDE",
0x2028 => "LINE SEPARATOR",
0x2029 => "PARAGRAPH SEPARATOR",
0x2060 => "WORD JOINER",
0x2061 => "FUNCTION APPLICATION",
0x2062 => "INVISIBLE TIMES",
0x2063 => "INVISIBLE SEPARATOR",
0x2064 => "INVISIBLE PLUS",
0x2066 => "LEFT-TO-RIGHT ISOLATE",
0x2067 => "RIGHT-TO-LEFT ISOLATE",
0x2068 => "FIRST STRONG ISOLATE",
0x2069 => "POP DIRECTIONAL ISOLATE",
0xFEFF => "ZERO WIDTH NO-BREAK SPACE",
0xFFF9 => "INTERLINEAR ANNOTATION ANCHOR",
0xFFFA => "INTERLINEAR ANNOTATION SEPARATOR",
0xFFFB => "INTERLINEAR ANNOTATION TERMINATOR",
0xFFFD => "REPLACEMENT CHARACTER",
0x01..=0x1F => "C0 CONTROL",
0x80..=0x9F => "C1 CONTROL",
_ => "FORMAT CHARACTER",
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{diagnostic::Diagnostic, rules::rule::Rules};
fn check(content: &str) -> Vec<Diagnostic> {
let mut checker = Checker::new(content.as_bytes());
let rules = Rules::new(vec![Box::new(UnicodeCtrlRule {})]);
checker.do_all_checks(&rules);
checker.diagnostics
}
fn check_with_lang(lang: &str, body: &str) -> Vec<Diagnostic> {
let content = format!(
"msgid \"\"\n\
msgstr \"\"\n\
\"Language: {lang}\\n\"\n\
\"Content-Type: text/plain; charset=UTF-8\\n\"\n\
\n\
{body}"
);
check(&content)
}
#[test]
fn test_clean_translation_is_silent() {
let diags = check(
r#"
msgid "Save"
msgstr "Save"
msgid "Hello, world"
msgstr "Bonjour, le monde"
"#,
);
assert!(diags.is_empty());
}
#[test]
fn test_visible_spaces_are_not_flagged() {
let diags = check(
"msgid \"price: 5\"\n\
msgstr \"prix\u{00A0}: 5\"\n\
\n\
msgid \"width: 5\"\n\
msgstr \"largeur\u{202F}: 5\"\n",
);
assert!(diags.is_empty(), "got unexpected diagnostics: {diags:?}");
}
#[test]
fn test_zero_width_space_in_translation_only_is_flagged() {
let diags = check("msgid \"Save\"\nmsgstr \"Sa\u{200B}ve\"\n");
assert_eq!(diags.len(), 1);
let d = &diags[0];
assert_eq!(d.severity, Severity::Warning);
assert_eq!(
d.message,
"extra control character U+200B (ZERO WIDTH SPACE)"
);
}
#[test]
fn test_zero_width_joiner_in_both_is_not_flagged() {
let diags = check(
"msgid \"Family: \u{1F468}\u{200D}\u{1F469}\u{200D}\u{1F467}\"\n\
msgstr \"Famille : \u{1F468}\u{200D}\u{1F469}\u{200D}\u{1F467}\"\n",
);
assert!(
diags.is_empty(),
"ZWJ present in both sides should not fire"
);
}
#[test]
fn test_zwj_only_in_translation_is_flagged() {
let diags = check("msgid \"hello world\"\nmsgstr \"bonjour\u{200D}monde\"\n");
assert_eq!(diags.len(), 1);
assert_eq!(
diags[0].message,
"extra control character U+200D (ZERO WIDTH JOINER)"
);
}
#[test]
fn test_bidi_override_is_flagged() {
let diags = check("msgid \"Click Save\"\nmsgstr \"Click \u{202E}evaS\"\n");
assert_eq!(diags.len(), 1);
assert_eq!(
diags[0].message,
"extra control character U+202E (RIGHT-TO-LEFT OVERRIDE)"
);
}
#[test]
fn test_soft_hyphen_is_flagged() {
let diags = check("msgid \"installation\"\nmsgstr \"instal\u{00AD}lation\"\n");
assert_eq!(diags.len(), 1);
assert_eq!(
diags[0].message,
"extra control character U+00AD (SOFT HYPHEN)"
);
}
#[test]
fn test_bom_in_translation_is_flagged() {
let diags = check("msgid \"x\"\nmsgstr \"\u{FEFF}x\"\n");
assert_eq!(diags.len(), 1);
assert_eq!(
diags[0].message,
"extra control character U+FEFF (ZERO WIDTH NO-BREAK SPACE)"
);
}
#[test]
fn test_replacement_character_is_flagged() {
let diags = check("msgid \"hello\"\nmsgstr \"hell\u{FFFD}\"\n");
assert_eq!(diags.len(), 1);
assert_eq!(
diags[0].message,
"extra control character U+FFFD (REPLACEMENT CHARACTER)"
);
}
#[test]
fn test_c0_escape_is_flagged() {
let diags = check("msgid \"GET\"\nmsgstr \"\u{001B}[0;32mGET\u{001B}[0m\"\n");
assert_eq!(diags.len(), 1);
assert_eq!(diags[0].message, "extra control character U+001B (ESCAPE)");
}
#[test]
fn test_tab_lf_cr_are_not_flagged() {
let diags = check("msgid \"a\"\nmsgstr \"a\\t\\n\\r\"\n");
assert!(diags.is_empty());
}
#[test]
fn test_variation_selectors_are_not_flagged() {
let diags = check("msgid \"\u{2764}\"\nmsgstr \"\u{2764}\u{FE0F}\"\n");
assert!(diags.is_empty());
}
#[test]
fn test_multiple_distinct_strays_yield_one_diagnostic_each() {
let diags = check("msgid \"a\"\nmsgstr \"a\u{200B}\u{202E}\u{00AD}\"\n");
assert_eq!(diags.len(), 3);
assert!(diags[0].message.contains("U+00AD"));
assert!(diags[1].message.contains("U+200B"));
assert!(diags[2].message.contains("U+202E"));
}
#[test]
fn test_repeated_same_stray_is_one_diagnostic_with_multiple_highlights() {
let diags = check("msgid \"abc\"\nmsgstr \"a\u{200B}b\u{200B}c\"\n");
assert_eq!(diags.len(), 1);
let d = &diags[0];
assert_eq!(
d.message,
"extra control character U+200B (ZERO WIDTH SPACE)"
);
assert_eq!(d.lines.len(), 3);
assert!(d.lines[0].highlights.is_empty());
assert_eq!(d.lines[2].highlights.len(), 2);
}
#[test]
fn test_noqa_suppresses_diagnostic() {
let diags = check("#, noqa:unicode-ctrl\nmsgid \"Save\"\nmsgstr \"Sa\u{200B}ve\"\n");
assert!(diags.is_empty());
}
#[test]
fn test_zwsp_ignored_for_khmer() {
let diags = check_with_lang("km", "msgid \"hello\"\nmsgstr \"hel\u{200B}lo\"\n");
assert!(diags.is_empty(), "ZWSP should be exempt for km");
}
#[test]
fn test_zwsp_flagged_for_german() {
let diags = check_with_lang("de", "msgid \"hello\"\nmsgstr \"hel\u{200B}lo\"\n");
assert_eq!(diags.len(), 1);
assert!(diags[0].message.contains("U+200B"));
}
#[test]
fn test_zwsp_flagged_without_language_header() {
let diags = check("msgid \"hi\"\nmsgstr \"h\u{200B}i\"\n");
assert_eq!(diags.len(), 1);
assert!(diags[0].message.contains("U+200B"));
}
#[test]
fn test_zwsp_ignored_for_burmese_and_lao() {
for lang in &["my", "lo", "th", "bo"] {
let diags = check_with_lang(lang, "msgid \"hi\"\nmsgstr \"h\u{200B}i\"\n");
assert!(diags.is_empty(), "ZWSP should be exempt for {lang}");
}
}
#[test]
fn test_zwnj_ignored_for_persian_and_indic() {
for lang in &["fa", "ur", "hi", "bn", "ta", "te", "kn", "ml", "si"] {
let diags = check_with_lang(lang, "msgid \"hi\"\nmsgstr \"h\u{200C}i\"\n");
assert!(diags.is_empty(), "ZWNJ should be exempt for {lang}");
}
}
#[test]
fn test_zwnj_flagged_for_german() {
let diags = check_with_lang("de", "msgid \"hi\"\nmsgstr \"h\u{200C}i\"\n");
assert_eq!(diags.len(), 1);
assert!(diags[0].message.contains("U+200C"));
}
#[test]
fn test_zwj_ignored_for_indic() {
let diags = check_with_lang("hi", "msgid \"a\"\nmsgstr \"a\u{200D}b\"\n");
assert!(diags.is_empty());
}
#[test]
fn test_zwj_flagged_for_german() {
let diags = check_with_lang("de", "msgid \"a\"\nmsgstr \"a\u{200D}b\"\n");
assert_eq!(diags.len(), 1);
assert!(diags[0].message.contains("U+200D"));
}
#[test]
fn test_lrm_rlm_ignored_for_rtl_languages() {
for lang in &["ar", "he", "fa", "ur", "ckb"] {
let diags = check_with_lang(lang, "msgid \"x\"\nmsgstr \"x\u{200E}\u{200F}\"\n");
assert!(diags.is_empty(), "LRM/RLM should be exempt for {lang}");
}
}
#[test]
fn test_lrm_flagged_for_german() {
let diags = check_with_lang("de", "msgid \"x\"\nmsgstr \"x\u{200E}\"\n");
assert_eq!(diags.len(), 1);
assert!(diags[0].message.contains("U+200E"));
}
#[test]
fn test_bidi_embeddings_ignored_for_rtl() {
let diags = check_with_lang("ar", "msgid \"x\"\nmsgstr \"x\u{202A}\u{202B}\u{202C}\"\n");
assert!(diags.is_empty());
}
#[test]
fn test_rlo_lro_always_flagged_even_in_rtl() {
let diags = check_with_lang("ar", "msgid \"x\"\nmsgstr \"x\u{202E}\"\n");
assert_eq!(diags.len(), 1);
assert!(diags[0].message.contains("U+202E"));
let diags = check_with_lang("he", "msgid \"x\"\nmsgstr \"x\u{202D}\"\n");
assert_eq!(diags.len(), 1);
assert!(diags[0].message.contains("U+202D"));
}
#[test]
fn test_country_variant_is_stripped_for_lookup() {
let diags = check_with_lang("fa_IR", "msgid \"hi\"\nmsgstr \"h\u{200C}i\"\n");
assert!(diags.is_empty());
}
#[test]
fn test_soft_hyphen_not_exempt_anywhere() {
let diags = check_with_lang(
"de",
"msgid \"installation\"\nmsgstr \"instal\u{00AD}lation\"\n",
);
assert_eq!(diags.len(), 1);
assert!(diags[0].message.contains("U+00AD"));
}
#[test]
fn test_is_legitimate_for_locale() {
assert!(is_legitimate_for_locale('\u{200B}', "km"));
assert!(is_legitimate_for_locale('\u{200C}', "fa"));
assert!(is_legitimate_for_locale('\u{200C}', "hi"));
assert!(is_legitimate_for_locale('\u{200D}', "ta"));
assert!(is_legitimate_for_locale('\u{200E}', "ar"));
assert!(is_legitimate_for_locale('\u{200F}', "he"));
assert!(is_legitimate_for_locale('\u{202B}', "fa"));
assert!(!is_legitimate_for_locale('\u{200B}', "de"));
assert!(!is_legitimate_for_locale('\u{200C}', "fr"));
assert!(!is_legitimate_for_locale('\u{200E}', "en"));
assert!(!is_legitimate_for_locale('\u{202D}', "ar"));
assert!(!is_legitimate_for_locale('\u{202E}', "he"));
assert!(!is_legitimate_for_locale('\u{0000}', "km"));
assert!(!is_legitimate_for_locale('\u{FEFF}', "ar"));
}
#[test]
fn test_ctrl_char_name_returns_documented_names() {
let cases: &[(char, &str)] = &[
('\u{0000}', "NULL"),
('\u{0007}', "BELL"),
('\u{0008}', "BACKSPACE"),
('\u{000B}', "VERTICAL TAB"),
('\u{000C}', "FORM FEED"),
('\u{001B}', "ESCAPE"),
('\u{007F}', "DELETE"),
('\u{00AD}', "SOFT HYPHEN"),
('\u{200B}', "ZERO WIDTH SPACE"),
('\u{200C}', "ZERO WIDTH NON-JOINER"),
('\u{200D}', "ZERO WIDTH JOINER"),
('\u{200E}', "LEFT-TO-RIGHT MARK"),
('\u{200F}', "RIGHT-TO-LEFT MARK"),
('\u{202A}', "LEFT-TO-RIGHT EMBEDDING"),
('\u{202B}', "RIGHT-TO-LEFT EMBEDDING"),
('\u{202C}', "POP DIRECTIONAL FORMATTING"),
('\u{202D}', "LEFT-TO-RIGHT OVERRIDE"),
('\u{202E}', "RIGHT-TO-LEFT OVERRIDE"),
('\u{2028}', "LINE SEPARATOR"),
('\u{2029}', "PARAGRAPH SEPARATOR"),
('\u{2060}', "WORD JOINER"),
('\u{2061}', "FUNCTION APPLICATION"),
('\u{2062}', "INVISIBLE TIMES"),
('\u{2063}', "INVISIBLE SEPARATOR"),
('\u{2064}', "INVISIBLE PLUS"),
('\u{2066}', "LEFT-TO-RIGHT ISOLATE"),
('\u{2067}', "RIGHT-TO-LEFT ISOLATE"),
('\u{2068}', "FIRST STRONG ISOLATE"),
('\u{2069}', "POP DIRECTIONAL ISOLATE"),
('\u{FEFF}', "ZERO WIDTH NO-BREAK SPACE"),
('\u{FFF9}', "INTERLINEAR ANNOTATION ANCHOR"),
('\u{FFFA}', "INTERLINEAR ANNOTATION SEPARATOR"),
('\u{FFFB}', "INTERLINEAR ANNOTATION TERMINATOR"),
('\u{FFFD}', "REPLACEMENT CHARACTER"),
];
for (c, expected) in cases {
assert_eq!(
ctrl_char_name(*c),
*expected,
"wrong name for U+{:04X}",
*c as u32,
);
}
}
#[test]
fn test_ctrl_char_name_c0_fallback_for_unnamed_controls() {
for cp in [0x01_u32, 0x05, 0x10, 0x14, 0x1F] {
let c = char::from_u32(cp).expect("valid char");
assert_eq!(ctrl_char_name(c), "C0 CONTROL", "U+{cp:04X}");
}
}
#[test]
fn test_ctrl_char_name_c1_fallback() {
for cp in [0x80_u32, 0x90, 0x9F] {
let c = char::from_u32(cp).expect("valid char");
assert_eq!(ctrl_char_name(c), "C1 CONTROL", "U+{cp:04X}");
}
}
#[test]
fn test_ctrl_char_name_default_fallback_for_out_of_range_input() {
assert_eq!(ctrl_char_name('a'), "FORMAT CHARACTER");
assert_eq!(ctrl_char_name('\u{2065}'), "FORMAT CHARACTER");
}
#[test]
fn test_lre_rle_pdf_flagged_in_ltr_locale() {
let diags = check_with_lang(
"de",
"msgid \"x\"\nmsgstr \"x\u{202A}y\u{202B}z\u{202C}\"\n",
);
assert_eq!(diags.len(), 3);
assert!(
diags[0]
.message
.contains("U+202A (LEFT-TO-RIGHT EMBEDDING)")
);
assert!(
diags[1]
.message
.contains("U+202B (RIGHT-TO-LEFT EMBEDDING)")
);
assert!(
diags[2]
.message
.contains("U+202C (POP DIRECTIONAL FORMATTING)")
);
}
#[test]
fn test_lro_flagged_in_ltr_locale() {
let diags = check_with_lang("en", "msgid \"x\"\nmsgstr \"x\u{202D}y\"\n");
assert_eq!(diags.len(), 1);
assert!(diags[0].message.contains("U+202D (LEFT-TO-RIGHT OVERRIDE)"));
}
#[test]
fn test_rlm_flagged_in_ltr_locale() {
let diags = check_with_lang("en", "msgid \"x\"\nmsgstr \"x\u{200F}y\"\n");
assert_eq!(diags.len(), 1);
assert!(diags[0].message.contains("U+200F (RIGHT-TO-LEFT MARK)"));
}
#[test]
fn test_line_and_paragraph_separators_flagged() {
let diags = check_with_lang("de", "msgid \"x\"\nmsgstr \"x\u{2028}y\u{2029}z\"\n");
assert_eq!(diags.len(), 2);
assert!(diags[0].message.contains("U+2028 (LINE SEPARATOR)"));
assert!(diags[1].message.contains("U+2029 (PARAGRAPH SEPARATOR)"));
}
#[test]
fn test_word_joiner_and_invisible_operators_flagged() {
let diags = check_with_lang(
"de",
"msgid \"x\"\nmsgstr \"x\u{2060}\u{2061}\u{2062}\u{2063}\u{2064}\"\n",
);
assert_eq!(diags.len(), 5);
let messages: Vec<&str> = diags.iter().map(|d| d.message.as_ref()).collect();
assert!(messages.iter().any(|m| m.contains("U+2060 (WORD JOINER)")));
assert!(
messages
.iter()
.any(|m| m.contains("U+2061 (FUNCTION APPLICATION)"))
);
assert!(
messages
.iter()
.any(|m| m.contains("U+2062 (INVISIBLE TIMES)"))
);
assert!(
messages
.iter()
.any(|m| m.contains("U+2063 (INVISIBLE SEPARATOR)"))
);
assert!(
messages
.iter()
.any(|m| m.contains("U+2064 (INVISIBLE PLUS)"))
);
}
#[test]
fn test_bidi_isolates_flagged_in_ltr_locale() {
let diags = check_with_lang(
"de",
"msgid \"x\"\nmsgstr \"x\u{2066}\u{2067}\u{2068}\u{2069}\"\n",
);
assert_eq!(diags.len(), 4);
assert!(diags[0].message.contains("U+2066 (LEFT-TO-RIGHT ISOLATE)"));
assert!(diags[1].message.contains("U+2067 (RIGHT-TO-LEFT ISOLATE)"));
assert!(diags[2].message.contains("U+2068 (FIRST STRONG ISOLATE)"));
assert!(
diags[3]
.message
.contains("U+2069 (POP DIRECTIONAL ISOLATE)")
);
}
#[test]
fn test_bidi_isolates_ignored_for_rtl() {
let diags = check_with_lang(
"ar",
"msgid \"x\"\nmsgstr \"x\u{2066}\u{2067}\u{2068}\u{2069}\"\n",
);
assert!(diags.is_empty());
}
#[test]
fn test_interlinear_annotations_flagged() {
let diags = check_with_lang(
"de",
"msgid \"x\"\nmsgstr \"x\u{FFF9}a\u{FFFA}b\u{FFFB}\"\n",
);
assert_eq!(diags.len(), 3);
assert!(
diags[0]
.message
.contains("U+FFF9 (INTERLINEAR ANNOTATION ANCHOR)")
);
assert!(
diags[1]
.message
.contains("U+FFFA (INTERLINEAR ANNOTATION SEPARATOR)")
);
assert!(
diags[2]
.message
.contains("U+FFFB (INTERLINEAR ANNOTATION TERMINATOR)")
);
}
#[test]
fn test_null_byte_in_translation_is_flagged() {
let diags = check_with_lang("de", "msgid \"x\"\nmsgstr \"a\u{0000}b\"\n");
assert_eq!(diags.len(), 1);
assert!(diags[0].message.contains("U+0000 (NULL)"));
assert_eq!(diags[0].severity, Severity::Error);
}
#[test]
fn test_ctrl_char_severity() {
assert_eq!(ctrl_char_severity('\u{0000}'), Severity::Error);
assert_eq!(ctrl_char_severity('\u{0007}'), Severity::Warning);
assert_eq!(ctrl_char_severity('\u{200B}'), Severity::Warning);
assert_eq!(ctrl_char_severity('\u{202E}'), Severity::Warning);
assert_eq!(ctrl_char_severity('\u{FEFF}'), Severity::Warning);
assert_eq!(ctrl_char_severity('\u{FFFD}'), Severity::Warning);
}
#[test]
fn test_named_c0_controls_flagged() {
let diags = check_with_lang(
"de",
"msgid \"x\"\nmsgstr \"x\u{0007}y\u{0008}z\u{000B}w\u{000C}\"\n",
);
assert_eq!(diags.len(), 4);
let messages: Vec<&str> = diags.iter().map(|d| d.message.as_ref()).collect();
assert!(messages.iter().any(|m| m.contains("U+0007 (BELL)")));
assert!(messages.iter().any(|m| m.contains("U+0008 (BACKSPACE)")));
assert!(messages.iter().any(|m| m.contains("U+000B (VERTICAL TAB)")));
assert!(messages.iter().any(|m| m.contains("U+000C (FORM FEED)")));
}
#[test]
fn test_unnamed_c0_control_falls_through_to_generic_label() {
let diags = check_with_lang("de", "msgid \"x\"\nmsgstr \"x\u{0001}y\"\n");
assert_eq!(diags.len(), 1);
assert!(diags[0].message.contains("U+0001 (C0 CONTROL)"));
}
#[test]
fn test_del_and_c1_controls_flagged() {
let diags = check_with_lang(
"de",
"msgid \"x\"\nmsgstr \"x\u{007F}y\u{0080}z\u{009F}\"\n",
);
assert_eq!(diags.len(), 3);
let messages: Vec<&str> = diags.iter().map(|d| d.message.as_ref()).collect();
assert!(messages.iter().any(|m| m.contains("U+007F (DELETE)")));
assert!(messages.iter().any(|m| m.contains("U+0080 (C1 CONTROL)")));
assert!(messages.iter().any(|m| m.contains("U+009F (C1 CONTROL)")));
}
#[test]
fn test_is_ctrl_char_predicate() {
assert!(is_ctrl_char('\u{0000}'));
assert!(is_ctrl_char('\u{0007}'));
assert!(is_ctrl_char('\u{001B}'));
assert!(is_ctrl_char('\u{007F}'));
assert!(is_ctrl_char('\u{00AD}'));
assert!(is_ctrl_char('\u{200B}'));
assert!(is_ctrl_char('\u{200E}'));
assert!(is_ctrl_char('\u{202E}'));
assert!(is_ctrl_char('\u{2028}'));
assert!(is_ctrl_char('\u{2060}'));
assert!(is_ctrl_char('\u{FEFF}'));
assert!(is_ctrl_char('\u{FFFD}'));
assert!(!is_ctrl_char('\t'));
assert!(!is_ctrl_char('\n'));
assert!(!is_ctrl_char('\r'));
assert!(!is_ctrl_char(' '));
assert!(!is_ctrl_char('\u{00A0}')); assert!(!is_ctrl_char('\u{202F}')); assert!(!is_ctrl_char('\u{2009}')); assert!(!is_ctrl_char('\u{FE0F}'));
assert!(!is_ctrl_char('a'));
assert!(!is_ctrl_char('é'));
assert!(!is_ctrl_char('\u{D55C}')); }
}