#[cfg(feature = "options")]
use crate::Options;
use crate::{
bidi::{IsolatingRunSequence, Paragraph},
Class, Level,
};
use proptest::prelude::*;
use std::ops::Range;
proptest! {
#![proptest_config(ProptestConfig::with_cases(2000))]
#[test]
fn character_crash(c in any::<char>()) {
let _ = crate::cure_char!(c);
}
#[test]
fn string_crash(s in "\\PC*") {
let _ = crate::cure!(&s);
}
}
macro_rules! assert_matches {
($input:literal,$find:literal,$expected:expr) => {{
let cured = $crate::cure!($input).unwrap();
let matches = cured.find($find).collect::<Vec<_>>();
assert_eq!(matches, $expected);
}};
}
macro_rules! assert_no_matches {
($input:literal,$find:literal) => {{
let cured = $crate::cure!($input).unwrap();
let matches = cured.find($find).collect::<Vec<_>>();
assert!(matches.is_empty());
}};
}
#[test]
#[allow(clippy::single_range_in_vec_init)]
fn similar_equal() {
assert_matches!("h", "h", [0..1]);
assert_matches!("he", "he", [0..2]);
assert_matches!("h3", "he", [0..2]);
assert_matches!("hello", "hello", [0..5]);
assert_matches!("hhheeeeelllloo", "hello", [0..14]);
assert_matches!("?asdf-hhheeeeelllloo", "hello", [6..20]);
assert_matches!("-hello", "hello", [1..6]);
assert_matches!("hello-", "hello", [0..5]);
assert_matches!("---hello", "hello", [3..8]);
assert_matches!("---hello-", "hello", [3..8]);
assert_matches!("hhheeeeelllloo!!", "hello", [0..14]);
assert_matches!("-!?hel$2-hello?", "hello", [9..14]);
assert_matches!("-!?hel$2-hhheeeeelllloo!!", "hello", [9..23]);
assert_matches!("wow hell wow heellllo", "hello", [14..22]);
assert_matches!("wow hell wow heellllo!", "hello", [14..22]);
#[cfg(feature = "separators")]
{
assert_matches!("hh-he e eeell/l/lo//o", "hello", [0..22]);
assert_matches!(" shhhiii/iiiiitttttt/ttttt ", "shit", [1..26]);
assert_matches!("hh-he e eeell/l/lo-?", "hello", [0..19]);
assert_matches!("shhhiii/iiiiitttttt/ttttt/", "shit", [0..25]);
assert_matches!("hh-he e ee,e ll/l/lo//o-?", "hello", [0..24]);
assert_matches!("-!?hel$2-hh-he e ee,e,ll/l/lo//o-?", "hello", [9..33]);
}
#[cfg(feature = "leetspeak")]
{
assert_matches!("|-|3|_I_0", "hello", [0..9]);
assert_matches!("|--|3e33|__|_I_I_0()O[]", "hello", [0..23]);
}
assert_no_matches!("", "");
assert_no_matches!("h", "");
assert_no_matches!("", "h");
assert_no_matches!("", "he");
assert_no_matches!("h", "he");
assert_no_matches!("-", "hello");
assert_no_matches!("- !?", "hello");
assert_no_matches!("ello", "hello");
assert_no_matches!("eel", "ell");
assert_no_matches!("ell", "eel");
assert_no_matches!("-!?hel", "hell");
assert_no_matches!("ello?", "hello");
}
#[test]
fn censor() {
let mut cured = crate::cure!("word word this is a word").unwrap();
cured.censor("word", '*');
assert_eq!(cured, "**** **** this is a ****");
let mut cured2 = crate::cure!("wordword this is a word").unwrap();
cured2.censor("word", '*');
assert_eq!(cured2, "******** this is a ****");
}
#[test]
fn bidi_class() {
assert_eq!(Class::new(0x0000), Some(Class::BN));
assert_eq!(Class::new(0x0040), Some(Class::ON));
assert_eq!(Class::new(0x0041), Some(Class::L));
assert_eq!(Class::new(0x0062), Some(Class::L));
assert_eq!(Class::new(0x007f), Some(Class::BN));
assert_eq!(Class::new(0x05d0), Some(Class::R));
assert_eq!(Class::new(0x05d1), Some(Class::R));
assert_eq!(Class::new(0x0600), Some(Class::AN));
assert_eq!(Class::new(0x0627), Some(Class::AL));
assert_eq!(Class::new(0x07c0), Some(Class::R));
assert_eq!(Class::new(0x0860), Some(Class::AL));
assert_eq!(Class::new(0x08a0), Some(Class::AL));
assert_eq!(Class::new(0x089f), None);
assert_eq!(Class::new(0x08ff), None);
assert_eq!(Class::new(0x20a0), Some(Class::ET));
assert_eq!(Class::new(0xfb1d), Some(Class::R));
assert_eq!(Class::new(0xfb4f), Some(Class::R));
assert_eq!(Class::new(0xfb50), Some(Class::AL));
assert_eq!(Class::new(0xfdf0), Some(Class::AL));
assert_eq!(Class::new(0xfe70), Some(Class::AL));
assert_eq!(Class::new(0xfeff), Some(Class::BN));
assert_eq!(Class::new(0x10800), Some(Class::R));
assert_eq!(Class::new(0x1e800), Some(Class::R));
assert_eq!(Class::new(0x1ee00), Some(Class::AL));
assert_eq!(Class::new(0x30000), Some(Class::L));
}
fn level_runs(levels: &[Level], original_classes: &[Class]) -> Vec<Range<usize>> {
let mut runs = Vec::new();
if levels.is_empty() {
return runs;
}
let mut current_run_level = levels[0];
let mut current_run_start = 0;
for i in 1..levels.len() {
if !original_classes[i].removed_by_x9() && levels[i] != current_run_level {
runs.push(current_run_start..i);
current_run_level = levels[i];
current_run_start = i;
}
}
runs.push(current_run_start..levels.len());
runs
}
fn irs_sorted(
paragraph: &Paragraph,
levels: &[Level],
classes: &[Class],
) -> Vec<IsolatingRunSequence> {
let level_runs = level_runs(levels, classes);
let mut sequences = Vec::new();
paragraph
.isolating_run_sequences(levels, &level_runs, classes, &mut sequences)
.unwrap();
sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone()));
sequences
}
#[allow(clippy::needless_pass_by_value)]
fn test_irs_runs(
paragraph: &Paragraph,
classes: &[Class],
levels: &[Level],
expected: Vec<Vec<Range<usize>>>,
) {
let sequences = irs_sorted(paragraph, levels, classes);
assert_eq!(
sequences.iter().map(|s| s.runs.clone()).collect::<Vec<_>>(),
expected,
);
}
fn test_irs(
paragraph: &Paragraph,
classes: &[Class],
levels: &[Level],
expected: &[IsolatingRunSequence],
) {
let sequences = irs_sorted(paragraph, levels, classes);
assert_eq!(sequences.len(), expected.len());
for (i, seq) in sequences.iter().enumerate() {
assert_eq!(seq, &expected[i]);
}
}
#[test]
fn isolating_run_sequences() {
macro_rules! classes {
($($rest:tt),*) => {
&[$(Class::$rest),*]
}
}
macro_rules! levels {
($($rest:tt),*) => {
&[$(Level($rest)),*]
}
}
macro_rules! runs {
($([$($start:literal..$end:literal),*]),*) => {
vec![$(vec![$($start..$end),*]),*]
}
}
macro_rules! irs {
($(
[[$($start:literal..$end:literal),*],$sos:ident,$eos:ident],
)*) => {
&[$(IsolatingRunSequence {
runs: vec![$($start..$end),*],
start_class: Class::$sos,
end_class: Class::$eos,
},)*]
}
}
let mock_paragraph = Paragraph {
range: 0..1,
level: Level::ltr(),
pure_ltr: false,
has_isolate_controls: true,
};
test_irs_runs(
&mock_paragraph,
classes!(L, RLE, L, PDF, RLE, L, PDF, L),
levels!(0, 1, 1, 1, 1, 1, 1, 0),
runs!([0..2], [2..7], [7..8]),
);
test_irs_runs(
&mock_paragraph,
classes!(L, RLI, L, PDI, RLI, L, PDI, L),
levels!(0, 0, 1, 0, 0, 1, 0, 0),
runs!([0..2, 3..5, 6..8], [2..3], [5..6]),
);
test_irs_runs(
&mock_paragraph,
classes!(L, RLI, L, LRI, L, RLE, L, PDF, L, PDI, L, PDI, L),
levels!(0, 0, 1, 1, 2, 3, 3, 3, 2, 1, 1, 0, 0),
runs!([0..2, 11..13], [2..4, 9..11], [4..6], [6..8], [8..9]),
);
test_irs(
&mock_paragraph,
classes!(L, RLE, L, LRE, L, PDF, L, PDF, RLE, L, PDF, L),
levels!(0, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 0),
irs! {
[[0..2], L, R],
[[2..4], R, L],
[[4..6], L, L],
[[6..11], L, R],
[[11..12], R, L],
},
);
test_irs(
&mock_paragraph,
classes!(L, RLI, L, LRI, L, PDI, L, PDI, RLI, L, PDI, L),
levels!(0, 0, 1, 1, 2, 1, 1, 0, 0, 1, 0, 0),
irs! {
[[0..2, 7..9, 10..12], L, L],
[[2..4, 5..7], R, R],
[[4..5], L, L],
[[9..10], R, R],
},
);
}
#[cfg(feature = "options")]
fn test_reorder(input: &str, expected: &str) {
assert_eq!(
crate::cure(input, Options::default().retain_hebrew().retain_arabic()).unwrap(),
expected
);
}
#[test]
#[cfg(feature = "options")]
fn reorder() {
test_reorder("abc\ndef\nghi", "abc\ndef\nghi");
test_reorder("ab1\nde2\ngh3", "ab1\nde2\ngh3");
test_reorder(concat!("א", "ב", "ג", "abc"), concat!("abc", "ג", "ב", "א"));
test_reorder("abc\nابج", concat!("abc\n", "جبا"));
test_reorder(
"\u{0627}\u{0628}\u{062C}\nabc",
"\n\u{062C}\u{0628}\u{0627}abc",
);
test_reorder("1.-2", "1.-2");
test_reorder("1-.2", "1-.2");
test_reorder("abc אבג", "abc גבא");
test_reorder("123 \u{05D0}\u{05D1}\u{05D2}", "גבא 123");
test_reorder("abc\u{202A}def", "abc\u{202A}def");
test_reorder("abc\u{202A}def\u{202C}ghi", "abc\u{202A}def\u{202C}ghi");
test_reorder("abc\u{2066}def\u{2069}ghi", "abc\u{2066}def\u{2069}ghi");
test_reorder("\u{202B}abc אבג\u{202C}", "\u{202b}גבא abc\u{202c}");
test_reorder("\u{05D0}בג? אבג", "גבא ?גבא");
test_reorder("A אבג?", "A גבא?");
test_reorder("A אבג?\u{200F}", "A \u{200F}?גבא");
test_reorder("\u{05D0}בג abc", "abc גבא");
test_reorder("abc\u{2067}.-\u{2069}ghi", "abc\u{2067}-.\u{2069}ghi");
test_reorder(
"Hello, \u{2068}\u{202E}world\u{202C}\u{2069}!",
"Hello, \u{2068}\u{202E}\u{202C}dlrow\u{2069}!",
);
test_reorder("\u{05D0}(ב)ג.", ".ג)ב(א");
test_reorder("\u{05D0}ב(גד[&ef].)gh", "gh).]ef&[דג(בא");
}