#![allow(clippy::float_cmp)]
use super::fac::{FuzzyAhoCorasick, FuzzyAhoCorasickBuilder};
use super::{FuzzyLimits, Pattern};
fn make_engine() -> FuzzyAhoCorasick {
FuzzyAhoCorasickBuilder::new()
.fuzzy(FuzzyLimits::new().edits(2))
.build(["saddam", "hussein"])
}
#[test]
fn test_non_overlapping_regression_0() {
let fac = FuzzyAhoCorasickBuilder::new()
.fuzzy(FuzzyLimits::new().edits(2))
.case_insensitive(true)
.build(["NA", "MENA"]);
let result = fac.search_non_overlapping("NA MENA", 0.6);
println!("Result: {result:?}");
assert!(
result
.iter()
.any(|m| m.pattern.as_str() == "MENA" && m.text == "MENA")
);
}
#[test]
fn test_non_overlapping_regression_2() {
let fac = FuzzyAhoCorasickBuilder::new()
.fuzzy(FuzzyLimits::new().edits(1))
.case_insensitive(true)
.build(["KO", "KO", "LWIN"]);
let result = fac.search_non_overlapping("KWO KO LWIN", 0.6);
println!("Result: {result:#?}");
assert!(
result
.iter()
.any(|m| m.pattern.as_str() == "KO" && m.text == "KWO")
);
}
#[test]
fn test_non_overlapping_regression_3() {
let fac = FuzzyAhoCorasickBuilder::new()
.fuzzy(FuzzyLimits::new().edits(1))
.case_insensitive(true)
.build(["AL", "WASEL", "AND", "BABEL", "GENERAL", "TRADING", "LLC"]);
let result = fac.search_non_overlapping_unique("AL WASL ANT BBEL GNERAL TRATING LC", 0.6);
println!("Result: {result:#?}");
assert!(
result
.iter()
.any(|m| m.pattern.as_str() == "WASEL" && m.text == "WASL")
);
assert!(
result
.iter()
.any(|m| m.pattern.as_str() == "BABEL" && m.text == "BBEL")
);
}
#[test]
fn test_case_insensitive_ascii() {
let engine = FuzzyAhoCorasickBuilder::new()
.case_insensitive(true)
.build(["world"]);
let res = engine.search("HeLlO WoRlD", 0.9);
assert!(res.iter().any(|m| m.text.eq_ignore_ascii_case("world")));
}
#[test]
fn test_exact_match() {
let fac = make_engine();
let result = fac.search("saddamhussein", 0.5);
assert!(
result
.iter()
.any(|m| m.pattern.as_str() == "saddam" && m.text == "saddam")
);
assert!(
result.iter().any(|m| m.pattern.as_str() == "hussein"),
"Should find hussein pattern: {result:?}"
);
}
#[test]
fn test_extra_letter() {
let fac = make_engine();
let result = fac.search("saddammhussein", 0.3);
assert!(
result
.iter()
.any(|m| m.pattern.as_str() == "saddam" && m.text == "saddam")
);
}
#[test]
fn test_missing_letter() {
let fac = make_engine();
let result = fac.search("saddmhussin", 0.3);
println!("{result:?}");
assert!(
result
.iter()
.any(|m| m.pattern.as_str() == "saddam" && m.text == "saddm")
);
}
#[test]
fn test_substitution() {
let fac = make_engine();
let result = fac.search("saddamhuzein", 0.2);
assert!(
result
.iter()
.any(|m| m.pattern.as_str() == "hussein" && m.text == "huzein")
);
}
#[test]
fn test_big() {
let fac = FuzzyAhoCorasickBuilder::new()
.fuzzy(FuzzyLimits::new().edits(1))
.case_insensitive(true)
.build(["tincidunt", "porta"]);
let text = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vestibulum eros ipsum, tincidutn eu metus ut, commodo accumsan mi. Vestibulum porta, orci nec ullamcorper posuere, eros tortor pharetra est, at porttitor mi leo a velit.";
let result = fac.search_non_overlapping(text, 0.8);
assert!(
result.iter().any(|x| x.pattern.as_str() == "tincidunt"),
"{result:?}"
);
assert!(result.iter().any(|x| x.text == "porta"), "{result:?}");
}
#[test]
fn test_overlap_vs_nonoverlap() {
let engine = FuzzyAhoCorasickBuilder::new()
.fuzzy(FuzzyLimits::new().edits(2))
.build(["saddam", "ddamhu"]);
let matches = engine.search("saddamddamhu", 0.5);
assert!(
matches
.iter()
.any(|m| m.pattern.as_str() == "saddam" && m.text == "saddam")
);
assert!(
matches.iter().any(|m| m.pattern.as_str() == "ddamhu"),
"Should find ddamhu pattern: {matches:?}"
);
let matches_nonoverlap = engine.search_non_overlapping("saddamhussein", 0.7);
assert_eq!(matches_nonoverlap.len(), 1, "{matches_nonoverlap:?}");
let matches_nonoverlap_two = engine.search_non_overlapping("sadam ddamhu", 0.4);
assert!(
!matches_nonoverlap_two.is_empty(),
"Should find at least one match: {matches_nonoverlap_two:?}"
);
assert!(
matches_nonoverlap_two
.iter()
.any(|m| m.pattern.as_str() == "saddam" && m.text == "sadam"),
"{matches_nonoverlap_two:?}"
);
}
#[test]
fn test_regression_1() {
let engine = FuzzyAhoCorasickBuilder::new()
.case_insensitive(true)
.build(["CO"]);
let result = engine.search("CA", 0.8);
println!("{result:?}");
assert_eq!(result.iter().count(), 0);
}
#[test]
fn test_regression_2() {
let engine = FuzzyAhoCorasickBuilder::new()
.build([Pattern::from("TOLA").fuzzy(FuzzyLimits::new().edits(2))]);
let result = engine.search_non_overlapping("TOL", 0.5);
println!("\nResult: {result:?}");
assert!(result.iter().any(|x| x.text == "TOL"));
}
#[test]
fn test_segment_text() {
let engine = FuzzyAhoCorasickBuilder::new()
.fuzzy(FuzzyLimits::new().edits(3))
.build(["saddam", "hussein"]);
assert_eq!(engine.segment_text("sadamhusein", 0.8), "sadam husein");
let result = engine.segment_text("sadamhuseinaltikriti", 0.8);
assert!(result.contains("sadam"), "Should contain sadam: {result}");
assert!(result.contains("husein"), "Should contain husein: {result}");
}
#[test]
fn test_segment_readme() {
let engine = FuzzyAhoCorasickBuilder::new()
.fuzzy(FuzzyLimits::new().edits(1))
.build(["input", "more"]);
let matches = engine.search_non_overlapping("someinptandm0re", 0.75);
let segmented_text = matches.segment_text();
assert_eq!(segmented_text, "some inpt and m0re");
}
#[test]
fn test_segment_name() {
let engine = FuzzyAhoCorasickBuilder::new()
.fuzzy(FuzzyLimits::new().edits(3))
.build(["SHANE", "DOMINIC", "CRAWFORD"]);
let result = engine.segment_text("SHANEDOM INICCRAWFORD", 0.8);
assert!(result.contains("SHANE"), "Should contain SHANE: {result}");
assert!(
result.contains("CRAWFORD"),
"Should contain CRAWFORD: {result}"
);
}
#[test]
fn test_segment_text2() {
let engine = FuzzyAhoCorasickBuilder::new()
.case_insensitive(true)
.build(["HASAN", "JAMAL", "HUSSEIN", "ZEINIYE"]);
assert_eq!(
engine.segment_text("ZEINIYEHussEINHASaNJAMAL", 0.8),
"ZEINIYE HussEIN HASaN JAMAL"
);
}
#[test]
fn test_fail() {
let engine = FuzzyAhoCorasickBuilder::new().build(["saddam", "hussein"]);
assert_eq!(engine.segment_text("sadam husein", 0.8), "sadam husein");
}
#[test]
fn test_fuzzy_replace_fn() {
assert_eq!(
FuzzyAhoCorasickBuilder::new()
.case_insensitive(true)
.build(["hair", "bear", "wuzzy"])
.replace(
"Fuzzy Wuzzy was a hair. Fuzzy Wuzzy had no bear.",
|m| {
match m.text {
"bear" => Some("hair"),
"hair" => Some("bear"),
_ => None,
}
},
0.8,
),
"Fuzzy Wuzzy was a bear. Fuzzy Wuzzy had no hair."
);
}
#[test]
fn test_longer_match_preference() {
let engine = FuzzyAhoCorasickBuilder::new().build(["JOINT STOCK COMPANY", "STOCK"]);
let result = engine.search_non_overlapping("JOINT STOCK COMPANY GAZPROM", 0.8);
assert!(
result
.iter()
.any(|m| m.pattern.as_str() == "JOINT STOCK COMPANY")
);
assert!(!result.iter().any(|m| m.pattern.as_str() == "STOCK"));
}
#[test]
fn test_regression_0() {
let engine = FuzzyAhoCorasickBuilder::new()
.fuzzy(FuzzyLimits::new().edits(2).substitutions(1))
.case_insensitive(true)
.build(["zavod"]);
let result = engine.search_non_overlapping("NARODNY", 0.8);
assert!(result.is_empty());
}
#[test]
fn test_strip_prefix() {
assert_eq!(
FuzzyAhoCorasickBuilder::new()
.fuzzy(FuzzyLimits::new().edits(1))
.case_insensitive(true)
.build(["LOREM", "IPSUM"])
.strip_prefix("LrEM ISuM Lorm ZZZ", 0.8),
"ZZZ"
);
}
#[test]
fn test_strip_postfix() {
assert_eq!(
FuzzyAhoCorasickBuilder::new()
.fuzzy(FuzzyLimits::new().edits(1))
.case_insensitive(true)
.build(["LOREM", "IPSUM"])
.strip_postfix("ZZZ LrEM ISuM Lorm", 0.8),
"ZZZ"
);
}
#[test]
fn test_split() {
assert_eq!(
FuzzyAhoCorasickBuilder::new()
.fuzzy(FuzzyLimits::new().edits(1))
.case_insensitive(true)
.build(["LOREM", "IPSUM"])
.split("ZZZLrEMISuMAAA", 0.8)
.collect::<Vec<_>>(),
["ZZZ", "AAA"]
);
}
#[test]
fn test_truncated_walijan() {
let engine = FuzzyAhoCorasickBuilder::new()
.case_insensitive(true)
.build([Pattern::from("WALIJAN").fuzzy(FuzzyLimits::new().edits(3))]);
let result = engine.search("alijan", 0.7);
println!("\nResult for alijan: {result:?}");
assert!(
result.iter().any(|m| m.pattern.as_str() == "WALIJAN"),
"Should find WALIJAN in 'alijan' with deletions. Results: {result:?}"
);
}
#[test]
fn test_truncated_short() {
let engine = FuzzyAhoCorasickBuilder::new()
.case_insensitive(true)
.build([Pattern::from("TOLA").fuzzy(FuzzyLimits::new().edits(2))]);
let result = engine.search("OLA", 0.5);
println!("\nResult for OLA: {result:?}");
assert!(
result.iter().any(|m| m.text == "OLA"),
"Should find TOLA in 'OLA' with deletion. Results: {result:?}"
);
}
#[test]
fn test_truncated_with_global_limits() {
let engine = FuzzyAhoCorasickBuilder::new()
.case_insensitive(true)
.fuzzy(FuzzyLimits::new().edits(2))
.build(["TOLA"]);
let result = engine.search("OLA", 0.5);
println!("\nResult for OLA with global limits: {result:?}");
assert!(
result.iter().any(|m| m.text == "OLA"),
"Should find TOLA in 'OLA' with global limits. Results: {result:?}"
);
}
#[test]
fn test_truncated_walijan_with_global_limits() {
let engine = FuzzyAhoCorasickBuilder::new()
.case_insensitive(true)
.fuzzy(FuzzyLimits::new().edits(3))
.build(["WALIJAN"]);
let result = engine.search("alijan", 0.7);
println!("\nResult for alijan with global limits: {result:?}");
assert!(
result.iter().any(|m| m.pattern.as_str() == "WALIJAN"),
"Should find WALIJAN in 'alijan' with global limits. Results: {result:?}"
);
}
#[test]
fn test_phonetic_td_substitution() {
let engine = FuzzyAhoCorasickBuilder::new()
.case_insensitive(true)
.fuzzy(FuzzyLimits::new().edits(3))
.build(["DJAMEL"]);
let result = engine.search("Tjamel", 0.5);
println!("\nResult for 'Tjamel' vs 'DJAMEL' (0.5): {result:?}");
assert!(
result.iter().any(|m| m.pattern.as_str() == "DJAMEL"),
"Should find DJAMEL in 'Tjamel' with substitution. Results: {result:?}"
);
}
#[test]
fn test_missing_middle_char() {
let engine = FuzzyAhoCorasickBuilder::new()
.case_insensitive(true)
.build([Pattern::from("MOMIR").fuzzy(FuzzyLimits::new().edits(3))]);
let result = engine.search("Mmir", 0.5);
println!("\nResult for 'Mmir' vs 'MOMIR' (0.5): {result:?}");
assert!(
result.iter().any(|m| m.pattern.as_str() == "MOMIR"),
"Should find MOMIR in 'Mmir'. Results: {result:?}"
);
}
#[test]
fn test_siic_simic() {
let engine = FuzzyAhoCorasickBuilder::new()
.case_insensitive(true)
.build([Pattern::from("SIMIC").fuzzy(FuzzyLimits::new().edits(3))]);
let result = engine.search("SIIC", 0.7);
println!("\nResult for 'SIIC' vs 'SIMIC': {result:?}");
}
#[test]
fn test_aminulah_aminullah() {
let engine = FuzzyAhoCorasickBuilder::new()
.case_insensitive(true)
.build([Pattern::from("AMINULLAH").fuzzy(FuzzyLimits::new().edits(3))]);
let result = engine.search("Aminulah", 0.7);
println!("\nResult for 'Aminulah' vs 'AMINULLAH': {result:?}");
}
#[test]
fn test_jaar_jafar() {
let engine = FuzzyAhoCorasickBuilder::new()
.case_insensitive(true)
.build([Pattern::from("JAFAR").fuzzy(FuzzyLimits::new().edits(3))]);
let result = engine.search("Jaar", 0.7);
println!("\nResult for 'Jaar' vs 'JAFAR': {result:?}");
}
#[test]
fn test_aminullah_aminulah() {
let engine = FuzzyAhoCorasickBuilder::new()
.case_insensitive(true)
.build([Pattern::from("AMINULLAH").fuzzy(FuzzyLimits::new().edits(3))]);
let result = engine.search("Aminulah", 0.7);
println!("Result for 'Aminulah' vs 'AMINULLAH': {result:?}");
assert!(!result.inner.is_empty(), "AMINULLAH should match Aminulah");
}
#[test]
fn test_basic_search() {
let engine = FuzzyAhoCorasickBuilder::new()
.fuzzy(FuzzyLimits::new().edits(1))
.build(["hello", "world"]);
let results = engine.search("helo wrld", 0.7);
assert!(!results.is_empty());
}
#[test]
fn test_patterns_accessor() {
let engine = FuzzyAhoCorasickBuilder::new().build(["foo", "bar", "baz"]);
assert_eq!(engine.patterns().len(), 3);
assert_eq!(engine.patterns()[0].as_str(), "foo");
}
#[test]
fn test_matched_spans() {
let engine = FuzzyAhoCorasickBuilder::new().build(["hello"]);
let results = engine.search("hello world", 1.0);
let spans = results.matched_spans();
assert!(!spans.is_empty());
assert_eq!(spans[0], (0, 5));
}
#[test]
fn test_matched_strings() {
let engine = FuzzyAhoCorasickBuilder::new().build(["hello", "world"]);
let results = engine.search_non_overlapping("hello world", 1.0);
let strings = results.matched_strings();
assert!(strings.contains(&"hello"));
assert!(strings.contains(&"world"));
}
#[test]
fn test_filter() {
let engine = FuzzyAhoCorasickBuilder::new()
.fuzzy(FuzzyLimits::new().edits(1))
.build(["hello", "world"]);
let results = engine.search("helo wrld", 0.7);
let filtered = results.filter(|m| m.pattern.as_str() == "hello");
assert!(filtered.iter().all(|m| m.pattern.as_str() == "hello"));
}
#[test]
fn test_retain() {
let engine = FuzzyAhoCorasickBuilder::new()
.fuzzy(FuzzyLimits::new().edits(1))
.build(["hello", "world"]);
let mut results = engine.search("helo wrld", 0.7);
results.retain(|m| m.similarity > 0.8);
assert!(results.iter().all(|m| m.similarity > 0.8));
}
#[test]
fn test_segment_iter() {
let engine = FuzzyAhoCorasickBuilder::new().build(["hello"]);
let segments: Vec<_> = engine.segment_iter("hello world", 1.0).collect();
assert_eq!(segments.len(), 2); }
#[test]
fn test_search_greedy() {
let engine = FuzzyAhoCorasickBuilder::new().build(["hello", "hello world"]);
let results = engine.search_greedy("hello world", 1.0);
assert!(!results.is_empty());
}
#[test]
fn test_search_coverage_weighted() {
let engine = FuzzyAhoCorasickBuilder::new().build(["hello", "world"]);
let results = engine.search_coverage_weighted("hello world", 1.0);
assert!(!results.is_empty());
}
#[test]
fn test_custom_unique_id() {
let engine = FuzzyAhoCorasickBuilder::new()
.fuzzy(FuzzyLimits::new().edits(1))
.build([
Pattern::from("hello").custom_unique_id(1),
Pattern::from("helo").custom_unique_id(1), Pattern::from("world").custom_unique_id(2),
]);
let results = engine.search_non_overlapping_unique("hello world", 0.8);
let id1_matches: Vec<_> = results
.iter()
.filter(|m| m.pattern.custom_unique_id == Some(1))
.collect();
let id2_matches: Vec<_> = results
.iter()
.filter(|m| m.pattern.custom_unique_id == Some(2))
.collect();
assert!(
id1_matches.len() <= 1,
"Should deduplicate patterns with same custom_unique_id, got {id1_matches:?}"
);
assert!(!id2_matches.is_empty(), "Should find 'world' match");
}
#[test]
fn test_custom_unique_id_vs_automatic() {
let engine = FuzzyAhoCorasickBuilder::new()
.fuzzy(FuzzyLimits::new().edits(1))
.build([
Pattern::from("hello"), Pattern::from("world"), ]);
let results = engine.search_non_overlapping_unique("hello world", 0.8);
assert!(
results.iter().any(|m| m.pattern.as_str() == "hello"),
"Should find hello"
);
assert!(
results.iter().any(|m| m.pattern.as_str() == "world"),
"Should find world"
);
}