use std::borrow::Cow;
use crate::checker::Checker;
use crate::diagnostic::{Diagnostic, Severity};
use crate::po::entry::Entry;
use crate::po::message::Message;
use crate::rules::rule::RuleChecker;
pub struct PuncStartRule;
impl RuleChecker for PuncStartRule {
fn name(&self) -> &'static str {
"punc-start"
}
fn description(&self) -> &'static str {
"Check for inconsistent leading punctuation between source and translation."
}
fn is_default(&self) -> bool {
true
}
fn is_check(&self) -> bool {
true
}
fn check_msg(
&self,
checker: &Checker,
_entry: &Entry,
msgid: &Message,
msgstr: &Message,
) -> Vec<Diagnostic> {
let language = checker.language_code();
let ignore_ellipsis = checker.config.check.punc_ignore_ellipsis;
let id_punc = get_punc_start(&msgid.value);
let str_punc = get_punc_start(&msgstr.value);
let id_punc2 = punc_normalize(id_punc.trim(), language, ignore_ellipsis);
let str_punc2 = punc_normalize(str_punc.trim(), language, ignore_ellipsis);
if id_punc2.starts_with('.') || str_punc2.starts_with('.') {
return vec![];
}
if id_punc2 == str_punc2 {
vec![]
} else {
self.new_diag(
checker,
Severity::Info,
format!("inconsistent leading punctuation ('{id_punc2}' / '{str_punc2}')"),
)
.map(|d| d.with_msgs_hl(msgid, [(0, id_punc.len())], msgstr, [(0, str_punc.len())]))
.into_iter()
.collect()
}
}
}
pub struct PuncEndRule;
impl RuleChecker for PuncEndRule {
fn name(&self) -> &'static str {
"punc-end"
}
fn description(&self) -> &'static str {
"Check for inconsistent trailing punctuation between source and translation."
}
fn is_default(&self) -> bool {
true
}
fn is_check(&self) -> bool {
true
}
fn check_msg(
&self,
checker: &Checker,
_entry: &Entry,
msgid: &Message,
msgstr: &Message,
) -> Vec<Diagnostic> {
let language = checker.language_code();
let ignore_ellipsis = checker.config.check.punc_ignore_ellipsis;
let id_punc = get_punc_end(&msgid.value);
let str_punc = get_punc_end(&msgstr.value);
let id_punc2 = punc_normalize(id_punc.trim(), language, ignore_ellipsis);
let str_punc2 = punc_normalize(str_punc.trim(), language, ignore_ellipsis);
if id_punc2 == str_punc2 {
vec![]
} else {
self.new_diag(
checker,
Severity::Info,
format!("inconsistent trailing punctuation ('{id_punc2}' / '{str_punc2}')"),
)
.map(|d| {
d.with_msgs_hl(
msgid,
[(msgid.value.len() - id_punc.len(), msgid.value.len())],
msgstr,
[(msgstr.value.len() - str_punc.len(), msgstr.value.len())],
)
})
.into_iter()
.collect()
}
}
}
const fn is_punc(c: char) -> bool {
c == ':'
|| c == ':'
|| c == ';'
|| c == ';'
|| c == '\u{061B}'
|| c == '.'
|| c == '。'
|| c == '…'
|| c == ','
|| c == ','
|| c == '،'
|| c == '!'
|| c == '!'
|| c == '?'
|| c == '?'
|| c == '\u{061F}'
}
fn get_punc_start(s: &str) -> &str {
let mut whitespace_ended: bool = false;
let pos = s
.chars()
.take_while(|c| {
if is_punc(*c) {
whitespace_ended = true;
true
} else if c.is_whitespace() && *c != '\n' {
!whitespace_ended
} else {
false
}
})
.map(char::len_utf8)
.sum::<usize>();
&s[..pos]
}
fn get_punc_end(s: &str) -> &str {
let mut whitespace_ended: bool = false;
let pos = s
.chars()
.rev()
.take_while(|c| {
if is_punc(*c) {
whitespace_ended = true;
true
} else if c.is_whitespace() && *c != '\n' {
!whitespace_ended
} else {
false
}
})
.map(char::len_utf8)
.sum::<usize>();
&s[s.len() - pos..]
}
fn punc_normalize<'a>(s: &'a str, language: &str, ignore_ellipsis: bool) -> Cow<'a, str> {
let needs_substitution = s.chars().any(|c| {
matches!(
c,
':' | ';' | '\u{061B}' | '。' | ',' | '،' | '!' | '?' | '\u{061F}'
) || (c == '?' && language == "el")
});
let needs_ellipsis = ignore_ellipsis && s.contains("...");
if !needs_substitution && !needs_ellipsis {
return Cow::Borrowed(s);
}
let value: String = s
.chars()
.map(|c| match c {
'?' if language == "el" => ';',
':' => ':',
';' | '\u{061B}' => ';',
'。' => '.',
',' | '،' => ',',
'!' => '!',
'?' | '\u{061F}' => '?',
_ => c,
})
.collect();
if ignore_ellipsis {
Cow::Owned(value.replace("...", "…"))
} else {
Cow::Owned(value)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{config::Config, diagnostic::Diagnostic, rules::rule::Rules};
fn check_punc_start(content: &str) -> Vec<Diagnostic> {
let mut checker = Checker::new(content.as_bytes());
let rules = Rules::new(vec![Box::new(PuncStartRule {})]);
checker.do_all_checks(&rules);
checker.diagnostics
}
fn check_punc_end(content: &str) -> Vec<Diagnostic> {
let mut checker = Checker::new(content.as_bytes());
let rules = Rules::new(vec![Box::new(PuncEndRule {})]);
checker.do_all_checks(&rules);
checker.diagnostics
}
fn check_punc_end_ignore_ellipsis(content: &str) -> Vec<Diagnostic> {
let mut config = Config::default();
config.check.punc_ignore_ellipsis = true;
let mut checker = Checker::new(content.as_bytes()).with_config(config);
let rules = Rules::new(vec![Box::new(PuncEndRule {})]);
checker.do_all_checks(&rules);
checker.diagnostics
}
#[test]
fn test_is_punc() {
let punc_chars = [':', ';', '.', ',', '!', '?'];
for &c in &punc_chars {
assert!(is_punc(c), "{c} should be punctuation");
}
let non_punc_chars = [
'a', 'Z', ' ', '-', '\'', '"', '0', 'é', '(', ')', '\r', '\n',
];
for &c in &non_punc_chars {
assert!(!is_punc(c), "{c} should not be punctuation");
}
}
#[test]
fn test_get_punc_start() {
assert_eq!(get_punc_start(""), "");
assert_eq!(get_punc_start("test"), "");
assert_eq!(get_punc_start(", test"), ",");
assert_eq!(get_punc_start("...test"), "...");
assert_eq!(get_punc_start("…test"), "…");
assert_eq!(get_punc_start("テスト済み"), "");
assert_eq!(get_punc_start("。テスト済み"), "。");
assert_eq!(get_punc_start("。。。テスト済み"), "。。。");
}
#[test]
fn test_get_punc_end() {
assert_eq!(get_punc_end(""), "");
assert_eq!(get_punc_end("test"), "");
assert_eq!(get_punc_end("test, "), ", ");
assert_eq!(get_punc_end("test..."), "...");
assert_eq!(get_punc_end("test…"), "…");
assert_eq!(get_punc_end("テスト済み"), "");
assert_eq!(get_punc_end("テスト済み。"), "。");
assert_eq!(get_punc_end("テスト済み。。。"), "。。。");
}
#[test]
fn test_punc_normalize() {
assert_eq!(punc_normalize("", "fr", false), "");
assert_eq!(punc_normalize("test", "fr", false), "test");
assert_eq!(
punc_normalize("。,!?\u{061F}:;\u{061B}。。。", "zh", false),
".,!??:;;..."
);
assert_eq!(punc_normalize("?", "fr", false), "?");
assert_eq!(punc_normalize("?", "el", false), ";");
assert_eq!(punc_normalize("...test...", "fr", false), "...test...");
assert_eq!(punc_normalize("...test...", "fr", true), "…test…");
}
#[test]
fn test_no_punc() {
let diags = check_punc_start(
r#"
msgid "tested"
msgstr "testé"
"#,
);
assert!(diags.is_empty());
let diags = check_punc_end(
r#"
msgid "tested"
msgstr "testé"
"#,
);
assert!(diags.is_empty());
}
#[test]
fn test_punc_ok() {
let diags = check_punc_end(
r#"
msgid "tested, ..."
msgstr "testé..."
"#,
);
assert!(diags.is_empty());
let diags = check_punc_end_ignore_ellipsis(
r#"
msgid "tested, ..."
msgstr "testé…"
"#,
);
assert!(diags.is_empty());
let diags = check_punc_start(
r#"
msgid "tested."
msgstr "テスト済み。"
"#,
);
assert!(diags.is_empty());
let diags = check_punc_end(
r#"
msgid "tested."
msgstr "テスト済み。"
"#,
);
assert!(diags.is_empty());
let diags = check_punc_end(
r#"
msgid "tested,"
msgstr "テスト済み,"
"#,
);
assert!(diags.is_empty());
}
#[test]
fn test_punc_error_noqa() {
let diags = check_punc_start(
r#"
#, noqa:punc-start
msgid ":tested!"
msgstr ",testé !!!"
"#,
);
assert!(diags.is_empty());
let diags = check_punc_end(
r#"
#, noqa:punc-end
msgid ":tested!"
msgstr ",testé !!!"
"#,
);
assert!(diags.is_empty());
}
#[test]
fn test_punc_error() {
let diags = check_punc_start(
r#"
msgid ":tested!"
msgstr ",testé !!!"
"#,
);
assert_eq!(diags.len(), 1);
let diag = &diags[0];
assert_eq!(diag.severity, Severity::Info);
assert_eq!(diag.message, "inconsistent leading punctuation (':' / ',')");
let diags = check_punc_end(
r#"
msgid ":tested!"
msgstr ",testé !!!"
"#,
);
assert_eq!(diags.len(), 1);
let diag = &diags[0];
assert_eq!(diag.severity, Severity::Info);
assert_eq!(
diag.message,
"inconsistent trailing punctuation ('!' / '!!!')"
);
}
}