1#[allow(unused_imports)]
4use super::types::*;
5#[allow(unused_imports)]
6use super::*;
7
8pub struct MentionRankingCoref {
10 config: MentionRankingConfig,
12 ner: Option<Box<dyn Model>>,
14 salience_scores: Option<HashMap<String, f64>>,
17}
18
19impl std::fmt::Debug for MentionRankingCoref {
20 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
21 f.debug_struct("MentionRankingCoref")
22 .field("config", &self.config)
23 .field("ner", &self.ner.as_ref().map(|_| "Some(dyn Model)"))
24 .field(
25 "salience_scores",
26 &self
27 .salience_scores
28 .as_ref()
29 .map(|s| format!("{} entities", s.len())),
30 )
31 .finish()
32 }
33}
34
35impl MentionRankingCoref {
36 #[must_use]
38 pub fn new() -> Self {
39 Self::with_config(MentionRankingConfig::default())
40 }
41
42 #[must_use]
44 pub fn with_config(config: MentionRankingConfig) -> Self {
45 Self {
46 config,
47 ner: None,
48 salience_scores: None,
49 }
50 }
51
52 pub fn with_ner(mut self, ner: Box<dyn Model>) -> Self {
54 self.ner = Some(ner);
55 self
56 }
57
58 #[must_use]
83 pub fn with_salience(mut self, scores: HashMap<String, f64>) -> Self {
84 let normalized: HashMap<String, f64> = scores
86 .into_iter()
87 .map(|(k, v)| (k.to_lowercase(), v))
88 .collect();
89 self.salience_scores = Some(normalized);
90 self
91 }
92
93 fn get_salience(&self, text: &str) -> f64 {
95 self.salience_scores
96 .as_ref()
97 .and_then(|s| s.get(&text.to_lowercase()).copied())
98 .unwrap_or(0.0)
99 }
100
101 fn is_be_phrase_link(&self, text: &str, m1: &RankedMention, m2: &RankedMention) -> bool {
116 let (earlier, later) = if m1.end <= m2.start {
118 (m1, m2)
119 } else if m2.end <= m1.start {
120 (m2, m1)
121 } else {
122 return false; };
124
125 let text_chars: Vec<char> = text.chars().collect();
127 if later.start > text_chars.len() || earlier.end > text_chars.len() {
128 return false;
129 }
130
131 let between: String = text_chars
132 .get(earlier.end..later.start)
133 .unwrap_or(&[])
134 .iter()
135 .collect();
136 let between_lower = between.to_lowercase();
137
138 static BE_PATTERNS: &[&str] = &[
140 " is ",
141 " are ",
142 " was ",
143 " were ",
144 " be ",
145 " being ",
146 " been ",
147 " refers to ",
148 " means ",
149 " indicates ",
150 " represents ",
151 " also known as ",
152 " aka ",
153 " i.e. ",
154 " ie ",
155 " namely ",
156 " called ",
157 " named ",
158 " known as ",
159 " defined as ",
160 ];
161
162 BE_PATTERNS.iter().any(|p| between_lower.contains(p))
163 }
164
165 fn is_acronym_match(&self, m1: &RankedMention, m2: &RankedMention) -> bool {
179 anno_core::coalesce::similarity::is_acronym_match(&m1.text, &m2.text)
180 }
181
182 fn is_pleonastic_it(&self, text_lower: &str, it_byte_pos: usize) -> bool {
194 let after_it = &text_lower[it_byte_pos + 2..]; let after_it_trimmed = after_it.trim_start();
197
198 const WEATHER_VERBS: &[&str] = &[
200 "rain",
201 "rains",
202 "rained",
203 "raining",
204 "snow",
205 "snows",
206 "snowed",
207 "snowing",
208 "hail",
209 "hails",
210 "hailed",
211 "hailing",
212 "thunder",
213 "thunders",
214 "thundered",
215 "thundering",
216 ];
217
218 const WEATHER_ADJS: &[&str] = &[
220 "sunny", "cloudy", "foggy", "windy", "rainy", "snowy", "cold", "hot", "warm", "cool",
221 "humid", "dry", "freezing", "chilly", "muggy", "overcast",
222 ];
223
224 const MODAL_ADJS: &[&str] = &[
226 "important",
227 "necessary",
228 "possible",
229 "impossible",
230 "likely",
231 "unlikely",
232 "clear",
233 "obvious",
234 "evident",
235 "apparent",
236 "true",
237 "false",
238 "certain",
239 "uncertain",
240 "doubtful",
241 "essential",
242 "vital",
243 "crucial",
244 "critical",
245 "imperative",
246 "fortunate",
247 "unfortunate",
248 "surprising",
249 "unsurprising",
250 "strange",
251 "odd",
252 "weird",
253 "remarkable",
254 "noteworthy",
255 "known",
256 "unknown",
257 "believed",
258 "thought",
259 "said",
260 "reported",
261 "estimated",
262 "assumed",
263 "expected",
264 "hoped",
265 "feared",
266 ];
267
268 const COGNITIVE_VERBS: &[&str] = &[
270 "seems",
271 "seem",
272 "seemed",
273 "appears",
274 "appear",
275 "appeared",
276 "turns out",
277 "turned out",
278 "happens",
279 "happen",
280 "happened",
281 "follows",
282 "follow",
283 "followed",
284 "matters",
285 "matter",
286 "mattered",
287 "helps",
288 "help",
289 "helped",
290 "hurts",
291 "hurt",
292 ];
293
294 for verb in WEATHER_VERBS {
296 if let Some(after_verb) = after_it_trimmed.strip_prefix(verb) {
297 if after_verb.is_empty() || after_verb.starts_with(|c: char| !c.is_alphanumeric()) {
298 return true;
299 }
300 }
301 }
302
303 for verb in COGNITIVE_VERBS {
305 if let Some(after_verb) = after_it_trimmed.strip_prefix(verb) {
306 if after_verb.is_empty() || after_verb.starts_with(|c: char| !c.is_alphanumeric()) {
307 return true;
308 }
309 }
310 }
311
312 let copula_patterns = ["is ", "was ", "'s ", "has been ", "will be ", "would be "];
315 for copula in copula_patterns {
316 if let Some(after_copula) = after_it_trimmed.strip_prefix(copula) {
317 let after_copula = after_copula.trim_start();
318
319 for verb in WEATHER_VERBS {
321 if let Some(after_verb) = after_copula.strip_prefix(verb) {
322 if after_verb.is_empty()
323 || after_verb.starts_with(|c: char| !c.is_alphanumeric())
324 {
325 return true;
326 }
327 }
328 }
329
330 for adj in WEATHER_ADJS {
332 if let Some(after_adj) = after_copula.strip_prefix(adj) {
333 if after_adj.is_empty()
334 || after_adj.starts_with(|c: char| !c.is_alphanumeric())
335 {
336 return true;
337 }
338 }
339 }
340
341 for adj in MODAL_ADJS {
343 if let Some(after_adj) = after_copula.strip_prefix(adj) {
344 if after_adj.is_empty()
346 || after_adj.starts_with(" that")
347 || after_adj.starts_with(" to")
348 || after_adj.starts_with(|c: char| !c.is_alphanumeric())
349 {
350 return true;
351 }
352 }
353 }
354
355 let time_words = ["noon", "midnight", "morning", "evening", "night", "time"];
358 for tw in time_words {
359 if after_copula.starts_with(tw) {
360 return true;
361 }
362 }
363
364 if after_copula.starts_with(|c: char| c.is_ascii_digit()) {
366 return true;
367 }
368 }
369 }
370
371 false
372 }
373
374 fn should_filter_by_context(&self, text: &str, m1: &RankedMention, m2: &RankedMention) -> bool {
382 let text_chars: Vec<char> = text.chars().collect();
383 let char_count = text_chars.len();
384
385 let context_window = 20;
387
388 let m1_context_start = m1.start.saturating_sub(context_window);
389 let m1_context_end = (m1.end + context_window).min(char_count);
390 let m1_context: String = text_chars
391 .get(m1_context_start..m1_context_end)
392 .unwrap_or(&[])
393 .iter()
394 .collect();
395
396 let m2_context_start = m2.start.saturating_sub(context_window);
397 let m2_context_end = (m2.end + context_window).min(char_count);
398 let m2_context: String = text_chars
399 .get(m2_context_start..m2_context_end)
400 .unwrap_or(&[])
401 .iter()
402 .collect();
403
404 let date1 = Self::extract_date(&m1_context);
406 let date2 = Self::extract_date(&m2_context);
407 if let (Some(d1), Some(d2)) = (&date1, &date2) {
408 if d1 != d2 {
409 return true; }
411 }
412
413 let m1_negated = Self::has_negation_context(&m1_context);
416 let m2_negated = Self::has_negation_context(&m2_context);
417 if m1_negated != m2_negated {
418 return true;
419 }
420
421 false
422 }
423
424 fn extract_date(context: &str) -> Option<String> {
426 let date_patterns = [
428 r"\d{4}-\d{2}-\d{2}", r"\d{2}/\d{2}/\d{4}", r"\d{1,2}/\d{1,2}/\d{2,4}", ];
432
433 for pattern in &date_patterns {
434 if let Ok(re) = regex::Regex::new(pattern) {
435 if let Some(m) = re.find(context) {
436 return Some(m.as_str().to_string());
437 }
438 }
439 }
440 None
441 }
442
443 fn has_negation_context(context: &str) -> bool {
445 let lower = context.to_lowercase();
446 static NEGATION_MARKERS: &[&str] = &[
447 "not ",
448 "no ",
449 "never ",
450 "without ",
451 "denies ",
452 "denied ",
453 "negative for ",
454 "neg for ",
455 "ruled out ",
456 "r/o ",
457 ];
458 NEGATION_MARKERS.iter().any(|m| lower.contains(m))
459 }
460
461 fn are_synonyms(&self, m1: &RankedMention, m2: &RankedMention) -> bool {
489 let t1 = m1.text.to_lowercase();
490 let t2 = m2.text.to_lowercase();
491
492 if t1 == t2 {
493 return true;
494 }
495
496 let similarity = anno_core::coalesce::similarity::multilingual_similarity(&t1, &t2);
500 similarity > 0.8
501 }
502
503 pub fn resolve(&self, text: &str) -> Result<Vec<MentionCluster>> {
505 if text.trim().is_empty() {
506 return Ok(vec![]);
507 }
508
509 let mut mentions = self.detect_mentions(text)?;
511
512 if mentions.is_empty() {
513 return Ok(vec![]);
514 }
515
516 mentions.sort_by_key(|m| (m.start, m.end));
518
519 for mention in &mut mentions {
521 self.extract_features(mention);
522 }
523
524 let clusters = self.link_mentions(&mentions, text);
526
527 Ok(clusters)
528 }
529
530 fn get_pronoun_patterns(&self) -> Vec<(&'static str, Gender, Number)> {
535 let lang_code = self
536 .config
537 .language
538 .split('-')
539 .next()
540 .unwrap_or(&self.config.language)
541 .to_lowercase();
542
543 match lang_code.as_str() {
544 "es" => vec![
545 ("él", Gender::Masculine, Number::Singular),
547 ("ella", Gender::Feminine, Number::Singular),
548 ("ellos", Gender::Masculine, Number::Plural),
549 ("ellas", Gender::Feminine, Number::Plural),
550 ("lo", Gender::Masculine, Number::Singular),
551 ("la", Gender::Feminine, Number::Singular),
552 ("los", Gender::Masculine, Number::Plural),
553 ("las", Gender::Feminine, Number::Plural),
554 ("le", Gender::Unknown, Number::Singular), ("les", Gender::Unknown, Number::Plural),
556 ("su", Gender::Unknown, Number::Unknown),
557 ("sus", Gender::Unknown, Number::Plural),
558 ("suyo", Gender::Masculine, Number::Singular),
559 ("suya", Gender::Feminine, Number::Singular),
560 ("suyos", Gender::Masculine, Number::Plural),
561 ("suyas", Gender::Feminine, Number::Plural),
562 ("se", Gender::Unknown, Number::Unknown), ("nosotros", Gender::Masculine, Number::Plural),
564 ("nosotras", Gender::Feminine, Number::Plural),
565 ("vosotros", Gender::Masculine, Number::Plural),
566 ("vosotras", Gender::Feminine, Number::Plural),
567 ("usted", Gender::Unknown, Number::Singular),
568 ("ustedes", Gender::Unknown, Number::Plural),
569 ("elle", Gender::Unknown, Number::Singular), ("elles", Gender::Unknown, Number::Plural), ],
575 "fr" => vec![
576 ("il", Gender::Masculine, Number::Singular),
578 ("elle", Gender::Feminine, Number::Singular),
579 ("ils", Gender::Masculine, Number::Plural),
580 ("elles", Gender::Feminine, Number::Plural),
581 ("le", Gender::Masculine, Number::Singular),
582 ("la", Gender::Feminine, Number::Singular),
583 ("les", Gender::Unknown, Number::Plural),
584 ("lui", Gender::Unknown, Number::Singular),
585 ("leur", Gender::Unknown, Number::Plural),
586 ("son", Gender::Masculine, Number::Singular),
587 ("sa", Gender::Feminine, Number::Singular),
588 ("ses", Gender::Unknown, Number::Plural),
589 ("se", Gender::Unknown, Number::Unknown), ("nous", Gender::Unknown, Number::Plural),
591 ("vous", Gender::Unknown, Number::Unknown),
592 ("iel", Gender::Unknown, Number::Singular), ("iels", Gender::Unknown, Number::Plural), ],
598 "de" => vec![
599 ("er", Gender::Masculine, Number::Singular),
601 ("sie", Gender::Feminine, Number::Singular),
602 ("es", Gender::Neutral, Number::Singular),
603 ("sie", Gender::Unknown, Number::Plural), ("ihn", Gender::Masculine, Number::Singular),
605 ("ihr", Gender::Feminine, Number::Singular),
606 ("ihm", Gender::Masculine, Number::Singular),
607 ("ihnen", Gender::Unknown, Number::Plural),
608 ("sein", Gender::Masculine, Number::Singular),
609 ("seine", Gender::Feminine, Number::Singular),
610 ("sein", Gender::Neutral, Number::Singular),
611 ("ihre", Gender::Feminine, Number::Singular),
612 ("ihr", Gender::Unknown, Number::Plural),
613 ("sich", Gender::Unknown, Number::Unknown), ("wir", Gender::Unknown, Number::Plural),
615 ("ihr", Gender::Unknown, Number::Plural), ("sie", Gender::Unknown, Number::Plural), ("sier", Gender::Unknown, Number::Singular), ("xier", Gender::Unknown, Number::Singular), ("dier", Gender::Unknown, Number::Singular), ],
624 "ar" => vec![
625 ("هو", Gender::Masculine, Number::Singular), ("هي", Gender::Feminine, Number::Singular), ("هم", Gender::Masculine, Number::Plural), ("هن", Gender::Feminine, Number::Plural), ("هما", Gender::Unknown, Number::Plural), ],
632 "ru" => vec![
633 ("он", Gender::Masculine, Number::Singular),
635 ("она", Gender::Feminine, Number::Singular),
636 ("оно", Gender::Neutral, Number::Singular),
637 ("они", Gender::Unknown, Number::Plural),
638 ("его", Gender::Masculine, Number::Singular),
639 ("её", Gender::Feminine, Number::Singular),
640 ("их", Gender::Unknown, Number::Plural),
641 ("себя", Gender::Unknown, Number::Unknown), ("мы", Gender::Unknown, Number::Plural),
643 ("вы", Gender::Unknown, Number::Unknown),
644 ],
645 "zh" => vec![
646 ("他", Gender::Masculine, Number::Singular), ("她", Gender::Feminine, Number::Singular), ("它", Gender::Neutral, Number::Singular), ("牠", Gender::Neutral, Number::Singular), ("祂", Gender::Neutral, Number::Singular), ("怹", Gender::Unknown, Number::Singular), ("其", Gender::Unknown, Number::Singular), ("他们", Gender::Masculine, Number::Plural), ("她们", Gender::Feminine, Number::Plural), ("它们", Gender::Neutral, Number::Plural), ],
665 "ja" => vec![
666 ("彼", Gender::Masculine, Number::Singular), ("彼女", Gender::Feminine, Number::Singular), ("彼ら", Gender::Unknown, Number::Plural), ("その人", Gender::Unknown, Number::Singular), ("あの人", Gender::Unknown, Number::Singular), ],
677 "ko" => vec![
678 ("그", Gender::Masculine, Number::Singular), ("그녀", Gender::Feminine, Number::Singular), ("그들", Gender::Unknown, Number::Plural), ("그 사람", Gender::Unknown, Number::Singular), ("그분", Gender::Unknown, Number::Singular), ],
687 _ => {
688 vec![
690 ("he", Gender::Masculine, Number::Singular),
692 ("she", Gender::Feminine, Number::Singular),
693 ("it", Gender::Neutral, Number::Singular),
694 ("they", Gender::Unknown, Number::Unknown), ("him", Gender::Masculine, Number::Singular),
696 ("her", Gender::Feminine, Number::Singular),
697 ("them", Gender::Unknown, Number::Unknown), ("his", Gender::Masculine, Number::Singular),
699 ("hers", Gender::Feminine, Number::Singular),
700 ("its", Gender::Neutral, Number::Singular),
701 ("their", Gender::Unknown, Number::Unknown), ("theirs", Gender::Unknown, Number::Unknown),
703 ("themself", Gender::Unknown, Number::Singular), ("themselves", Gender::Unknown, Number::Plural), ("himself", Gender::Masculine, Number::Singular),
707 ("herself", Gender::Feminine, Number::Singular),
708 ("itself", Gender::Neutral, Number::Singular),
709 ("i", Gender::Unknown, Number::Singular),
711 ("me", Gender::Unknown, Number::Singular),
712 ("my", Gender::Unknown, Number::Singular),
713 ("mine", Gender::Unknown, Number::Singular),
714 ("myself", Gender::Unknown, Number::Singular),
715 ("we", Gender::Unknown, Number::Plural),
716 ("us", Gender::Unknown, Number::Plural),
717 ("our", Gender::Unknown, Number::Plural),
718 ("ours", Gender::Unknown, Number::Plural),
719 ("ourselves", Gender::Unknown, Number::Plural),
720 ("you", Gender::Unknown, Number::Unknown), ("your", Gender::Unknown, Number::Unknown),
722 ("yours", Gender::Unknown, Number::Unknown),
723 ("yourself", Gender::Unknown, Number::Singular),
724 ("yourselves", Gender::Unknown, Number::Plural),
725 ("ze", Gender::Unknown, Number::Singular),
727 ("hir", Gender::Unknown, Number::Singular),
728 ("hirs", Gender::Unknown, Number::Singular),
729 ("hirself", Gender::Unknown, Number::Singular),
730 ("xe", Gender::Unknown, Number::Singular),
732 ("xem", Gender::Unknown, Number::Singular),
733 ("xyr", Gender::Unknown, Number::Singular),
734 ("xyrs", Gender::Unknown, Number::Singular),
735 ("xemself", Gender::Unknown, Number::Singular),
736 ("ey", Gender::Unknown, Number::Singular), ("em", Gender::Unknown, Number::Singular),
739 ("eir", Gender::Unknown, Number::Singular),
740 ("eirs", Gender::Unknown, Number::Singular),
741 ("emself", Gender::Unknown, Number::Singular),
742 ("fae", Gender::Unknown, Number::Singular),
744 ("faer", Gender::Unknown, Number::Singular),
745 ("faers", Gender::Unknown, Number::Singular),
746 ("faerself", Gender::Unknown, Number::Singular),
747 ("this", Gender::Unknown, Number::Singular),
749 ("that", Gender::Unknown, Number::Singular),
750 ("these", Gender::Unknown, Number::Plural),
751 ("those", Gender::Unknown, Number::Plural),
752 ("someone", Gender::Unknown, Number::Singular),
754 ("somebody", Gender::Unknown, Number::Singular),
755 ("anyone", Gender::Unknown, Number::Singular),
756 ("anybody", Gender::Unknown, Number::Singular),
757 ("everyone", Gender::Unknown, Number::Singular), ("everybody", Gender::Unknown, Number::Singular),
759 ("no one", Gender::Unknown, Number::Singular),
760 ("nobody", Gender::Unknown, Number::Singular),
761 ("one", Gender::Unknown, Number::Singular),
763 ("oneself", Gender::Unknown, Number::Singular),
764 ("who", Gender::Unknown, Number::Unknown),
766 ("whom", Gender::Unknown, Number::Unknown),
767 ("whose", Gender::Unknown, Number::Unknown),
768 ("which", Gender::Unknown, Number::Unknown),
769 ("each other", Gender::Unknown, Number::Plural),
771 ("one another", Gender::Unknown, Number::Plural),
772 ]
773 }
774 }
775 }
776
777 fn detect_mentions(&self, text: &str) -> Result<Vec<RankedMention>> {
779 let mut mentions = Vec::new();
780
781 if let Some(ref ner) = self.ner {
783 let entities = ner.extract_entities(text, None)?;
784 for entity in entities {
785 mentions.push(RankedMention {
786 start: entity.start,
787 end: entity.end,
788 text: entity.text.clone(),
789 mention_type: MentionType::Proper,
790 gender: None,
791 number: None,
792 head: self.get_head(&entity.text),
793 });
794 }
795 }
796
797 let pronoun_patterns = self.get_pronoun_patterns();
814
815 let text_lower = text.to_lowercase();
924 let text_chars: Vec<char> = text.chars().collect();
925 for (pronoun, gender, number) in pronoun_patterns {
926 let mut search_start_byte = 0;
927 while let Some(pos) = text_lower[search_start_byte..].find(pronoun) {
928 let abs_byte_pos = search_start_byte + pos;
929 let end_byte_pos = abs_byte_pos + pronoun.len();
930
931 let char_pos = text[..abs_byte_pos].chars().count();
933 let end_char_pos = char_pos + pronoun.chars().count();
934
935 let is_word_start = char_pos == 0
937 || match text_chars.get(char_pos.saturating_sub(1)) {
938 None => true,
939 Some(c) => !c.is_alphanumeric(),
940 };
941 let is_word_end = end_char_pos >= text_chars.len()
942 || match text_chars.get(end_char_pos) {
943 None => true,
944 Some(c) => !c.is_alphanumeric(),
945 };
946
947 if is_word_start && is_word_end {
948 if pronoun == "it" && self.is_pleonastic_it(&text_lower, abs_byte_pos) {
951 search_start_byte = end_byte_pos;
952 continue;
953 }
954
955 let char_start = char_pos;
957 let char_end = end_char_pos;
958
959 mentions.push(RankedMention {
960 start: char_start,
961 end: char_end,
962 text: text[abs_byte_pos..end_byte_pos].to_string(),
963 mention_type: MentionType::Pronominal,
964 gender: Some(gender),
965 number: Some(number),
966 head: pronoun.to_string(),
967 });
968 }
969
970 search_start_byte = end_byte_pos;
971 }
972 }
973
974 let words: Vec<_> = text.split_whitespace().collect();
976 let mut search_byte_pos = 0; for (i, word) in words.iter().enumerate() {
979 let at_sentence_start = i == 0
981 || match text[..text.find(word).unwrap_or(0)].chars().last() {
982 None => true,
983 Some(c) => c == '.' || c == '!' || c == '?',
984 };
985
986 if !at_sentence_start
987 && word.chars().next().is_some_and(|c| c.is_uppercase())
988 && word.chars().count() > 1
989 {
991 if let Some(rel_byte_pos) = text[search_byte_pos..].find(word) {
993 let abs_byte_pos = search_byte_pos + rel_byte_pos;
994 let char_start = text[..abs_byte_pos].chars().count();
996 let char_end = char_start + word.chars().count();
997
998 mentions.push(RankedMention {
999 start: char_start,
1000 end: char_end,
1001 text: word.to_string(),
1002 mention_type: MentionType::Proper,
1003 gender: None,
1004 number: Some(Number::Singular),
1005 head: word.to_string(),
1006 });
1007 }
1008 }
1009
1010 search_byte_pos += word.len() + 1; }
1012
1013 if self.config.enable_nominal_adjective_detection {
1022 const NOMINALIZED_ADJECTIVES: &[&str] = &[
1025 "poor",
1027 "rich",
1028 "wealthy",
1029 "homeless",
1030 "unemployed",
1031 "employed",
1032 "young",
1034 "old",
1035 "elderly",
1036 "aged",
1037 "sick",
1039 "ill",
1040 "healthy",
1041 "wounded",
1042 "injured",
1043 "disabled",
1044 "blind",
1045 "deaf",
1046 "dead",
1048 "living",
1049 "deceased",
1050 "accused",
1052 "condemned",
1053 "convicted",
1054 "guilty",
1055 "innocent",
1056 "insured",
1057 "uninsured",
1058 "gifted",
1060 "talented",
1061 "educated",
1062 "literate",
1063 "illiterate",
1064 "powerful",
1066 "powerless",
1067 "oppressed",
1068 "weak",
1069 "famous",
1070 "infamous",
1071 "righteous",
1073 "wicked",
1074 "blessed",
1075 "damned",
1076 "faithful",
1077 "hungry",
1079 "needy",
1080 "privileged",
1081 "underprivileged",
1082 "disadvantaged",
1083 "marginalized",
1084 ];
1085
1086 let (determiners, adjectives): (Vec<&str>, Vec<&str>) =
1092 match self.config.language.as_str() {
1093 "de" => {
1094 let dets = vec!["die ", "diese ", "jene "];
1097 let adjs = vec![
1098 "armen",
1099 "reichen",
1100 "alten",
1101 "jungen",
1102 "kranken",
1103 "gesunden",
1104 "toten",
1105 "lebenden",
1106 "blinden",
1107 "tauben",
1108 "arbeitslosen",
1109 "obdachlosen",
1110 "mächtigen",
1111 "schwachen",
1112 "unterdrückten",
1113 ];
1114 (dets, adjs)
1115 }
1116 "fr" => {
1117 let dets = vec!["les ", "ces "];
1119 let adjs = vec![
1120 "pauvres",
1121 "riches",
1122 "vieux",
1123 "jeunes",
1124 "malades",
1125 "morts",
1126 "vivants",
1127 "aveugles",
1128 "sourds",
1129 "faibles",
1130 "puissants",
1131 "opprimés",
1132 "affamés",
1133 "marginalisés",
1134 ];
1135 (dets, adjs)
1136 }
1137 "es" => {
1138 let dets = vec!["los ", "las ", "estos ", "estas "];
1141 let adjs = vec![
1142 "pobres",
1143 "ricos",
1144 "viejos",
1145 "jóvenes",
1146 "enfermos",
1147 "muertos",
1148 "vivos",
1149 "ciegos",
1150 "sordos",
1151 "débiles",
1152 "poderosos",
1153 "oprimidos",
1154 "hambrientos",
1155 "marginados",
1156 ];
1157 (dets, adjs)
1158 }
1159 _ => {
1160 let dets = vec!["the ", "these ", "those "];
1162 (dets, NOMINALIZED_ADJECTIVES.to_vec())
1163 }
1164 };
1165
1166 for det in &determiners {
1167 for adj in &adjectives {
1168 let pattern = format!("{}{}", det, adj);
1169 let pattern_lower = pattern.to_lowercase();
1170
1171 let mut search_start = 0;
1172 while let Some(rel_pos) = text_lower[search_start..].find(&pattern_lower) {
1173 let abs_byte_pos = search_start + rel_pos;
1174 let end_byte_pos = abs_byte_pos + pattern.len();
1175
1176 let following_text = &text_lower[end_byte_pos..];
1183 let next_word: String = following_text
1184 .chars()
1185 .skip_while(|c| c.is_whitespace())
1186 .take_while(|c| c.is_alphabetic())
1187 .collect();
1188
1189 let valid_followers: Vec<&str> = match self.config.language.as_str() {
1191 "de" => vec![
1192 "sind", "waren", "haben", "hatten", "werden", "wurden", "brauchen",
1194 "müssen", "können", "sollen", "wollen", "und", "oder", "aber", "die", "welche",
1196 ],
1197 "fr" => vec![
1198 "sont",
1200 "étaient",
1201 "ont",
1202 "avaient",
1203 "seront",
1204 "peuvent",
1205 "doivent",
1206 "veulent",
1207 "méritent",
1208 "et",
1210 "ou",
1211 "mais",
1212 "qui",
1213 "que",
1214 ],
1215 "es" => vec![
1216 "son",
1218 "eran",
1219 "tienen",
1220 "tenían",
1221 "serán",
1222 "pueden",
1223 "deben",
1224 "quieren",
1225 "merecen",
1226 "necesitan",
1227 "sufren",
1228 "luchan",
1229 "reciben",
1230 "buscan",
1231 "y",
1233 "o",
1234 "pero",
1235 "que",
1236 "quienes",
1237 ],
1238 _ => vec![
1239 "are", "were", "is", "was", "be", "been", "being", "have", "has",
1241 "had", "having", "do", "does", "did", "can", "could", "will",
1242 "would", "shall", "should", "may", "might", "must", "need", "want",
1243 "get", "got", "struggle", "suffer", "deserve", "receive", "face",
1244 "lack", "seek", "and", "or", "but", "who", "whom", "whose", "that",
1245 "which", "in", "of", "from", "with", "without", "among",
1246 ],
1247 };
1248
1249 let is_valid_nominal =
1251 next_word.is_empty() || valid_followers.contains(&next_word.as_str());
1252
1253 if is_valid_nominal {
1254 let char_start = text[..abs_byte_pos].chars().count();
1256 let char_end = char_start + pattern.chars().count();
1257
1258 mentions.push(RankedMention {
1259 start: char_start,
1260 end: char_end,
1261 text: text[abs_byte_pos..end_byte_pos].to_string(),
1262 mention_type: MentionType::Nominal,
1263 gender: Some(Gender::Unknown), number: Some(Number::Plural), head: adj.to_string(), });
1267 }
1268
1269 search_start = end_byte_pos;
1270 }
1271 }
1272 }
1273 }
1274
1275 mentions.sort_by_key(|m| (m.start, std::cmp::Reverse(m.end)));
1277 let mut deduped = Vec::new();
1278 let mut covered_end = 0;
1279
1280 for mention in mentions {
1281 if mention.start >= covered_end {
1282 covered_end = mention.end;
1283 deduped.push(mention);
1284 }
1285 }
1286
1287 Ok(deduped)
1288 }
1289
1290 fn extract_features(&self, mention: &mut RankedMention) {
1292 if mention.gender.is_none() && mention.mention_type == MentionType::Proper {
1294 mention.gender = self.guess_gender(&mention.text);
1295 }
1296
1297 if mention.number.is_none() {
1299 mention.number = Some(Number::Singular); }
1301 }
1302
1303 fn guess_gender(&self, text: &str) -> Option<Gender> {
1305 let masc_names = [
1306 "john", "james", "michael", "david", "robert", "william", "richard",
1307 ];
1308 let fem_names = [
1309 "mary",
1310 "jennifer",
1311 "lisa",
1312 "sarah",
1313 "jessica",
1314 "emily",
1315 "elizabeth",
1316 ];
1317
1318 let first_word = text.split_whitespace().next()?.to_lowercase();
1319
1320 if masc_names.contains(&first_word.as_str()) {
1321 Some(Gender::Masculine)
1322 } else if fem_names.contains(&first_word.as_str()) {
1323 Some(Gender::Feminine)
1324 } else {
1325 None
1326 }
1327 }
1328
1329 fn get_head(&self, text: &str) -> String {
1331 text.split_whitespace().last().unwrap_or(text).to_string()
1333 }
1334
1335 fn link_mentions(&self, mentions: &[RankedMention], text: &str) -> Vec<MentionCluster> {
1342 match self.config.clustering_strategy {
1343 ClusteringStrategy::LeftToRight => self.link_mentions_left_to_right(mentions, text),
1344 ClusteringStrategy::EasyFirst => self.link_mentions_easy_first(mentions, text),
1345 }
1346 }
1347
1348 fn link_mentions_left_to_right(
1350 &self,
1351 mentions: &[RankedMention],
1352 text: &str,
1353 ) -> Vec<MentionCluster> {
1354 let mut mention_to_cluster: HashMap<usize, usize> = HashMap::new();
1355 let mut clusters: Vec<Vec<usize>> = Vec::new();
1356
1357 for (i, mention) in mentions.iter().enumerate() {
1358 let mut best_antecedent: Option<usize> = None;
1359 let mut best_score = self.config.link_threshold;
1360
1361 let max_antecedents = self.config.max_antecedents_for_type(mention.mention_type);
1363
1364 for j in (0..i).rev().take(max_antecedents) {
1366 let antecedent = &mentions[j];
1367
1368 let distance = mention.start.saturating_sub(antecedent.end);
1370 if distance > self.config.max_distance {
1371 break;
1372 }
1373
1374 let score = self.score_pair(mention, antecedent, distance, Some(text));
1375 if score > best_score {
1376 best_score = score;
1377 best_antecedent = Some(j);
1378 }
1379 }
1380
1381 if let Some(ant_idx) = best_antecedent {
1382 if let Some(&cluster_id) = mention_to_cluster.get(&ant_idx) {
1384 clusters[cluster_id].push(i);
1385 mention_to_cluster.insert(i, cluster_id);
1386 } else {
1387 let cluster_id = clusters.len();
1389 clusters.push(vec![ant_idx, i]);
1390 mention_to_cluster.insert(ant_idx, cluster_id);
1391 mention_to_cluster.insert(i, cluster_id);
1392 }
1393 }
1394 }
1395
1396 let clusters = if self.config.enable_global_proper_coref {
1398 self.apply_global_proper_coref(mentions, clusters)
1399 } else {
1400 clusters
1401 };
1402
1403 clusters
1405 .into_iter()
1406 .enumerate()
1407 .map(|(id, indices)| MentionCluster {
1408 id,
1409 mentions: indices.into_iter().map(|i| mentions[i].clone()).collect(),
1410 })
1411 .collect()
1412 }
1413
1414 fn link_mentions_easy_first(
1419 &self,
1420 mentions: &[RankedMention],
1421 text: &str,
1422 ) -> Vec<MentionCluster> {
1423 let mut scored_pairs: Vec<ScoredPair> = Vec::new();
1425 let mut non_coref_pairs: HashSet<(usize, usize)> = HashSet::new();
1426
1427 for (i, mention) in mentions.iter().enumerate() {
1428 let max_antecedents = self.config.max_antecedents_for_type(mention.mention_type);
1429
1430 for j in (0..i).rev().take(max_antecedents) {
1431 let antecedent = &mentions[j];
1432 let distance = mention.start.saturating_sub(antecedent.end);
1433 if distance > self.config.max_distance {
1434 break;
1435 }
1436
1437 let score = self.score_pair(mention, antecedent, distance, Some(text));
1438
1439 if self.config.use_non_coref_constraints && score < self.config.non_coref_threshold
1441 {
1442 non_coref_pairs.insert((j.min(i), j.max(i)));
1445 }
1446
1447 if score > self.config.link_threshold {
1448 scored_pairs.push(ScoredPair {
1449 mention_idx: i,
1450 antecedent_idx: j,
1451 score,
1452 });
1453 }
1454 }
1455 }
1456
1457 scored_pairs.sort_by(|a, b| {
1459 b.score
1460 .partial_cmp(&a.score)
1461 .unwrap_or(std::cmp::Ordering::Equal)
1462 });
1463
1464 let mut mention_to_cluster: HashMap<usize, usize> = HashMap::new();
1466 let mut clusters: Vec<Vec<usize>> = Vec::new();
1467 let mut processed: HashSet<usize> = HashSet::new();
1468
1469 for pair in scored_pairs {
1470 if processed.contains(&pair.mention_idx) {
1472 continue;
1473 }
1474
1475 let key = (
1477 pair.antecedent_idx.min(pair.mention_idx),
1478 pair.antecedent_idx.max(pair.mention_idx),
1479 );
1480 if self.config.use_non_coref_constraints && non_coref_pairs.contains(&key) {
1481 continue;
1482 }
1483
1484 let would_violate = if self.config.use_non_coref_constraints {
1486 self.would_violate_constraint(
1487 pair.mention_idx,
1488 pair.antecedent_idx,
1489 &mention_to_cluster,
1490 &clusters,
1491 &non_coref_pairs,
1492 )
1493 } else {
1494 false
1495 };
1496
1497 if would_violate {
1498 continue;
1499 }
1500
1501 processed.insert(pair.mention_idx);
1503
1504 if let Some(&cluster_id) = mention_to_cluster.get(&pair.antecedent_idx) {
1505 clusters[cluster_id].push(pair.mention_idx);
1506 mention_to_cluster.insert(pair.mention_idx, cluster_id);
1507 } else {
1508 let cluster_id = clusters.len();
1509 clusters.push(vec![pair.antecedent_idx, pair.mention_idx]);
1510 mention_to_cluster.insert(pair.antecedent_idx, cluster_id);
1511 mention_to_cluster.insert(pair.mention_idx, cluster_id);
1512 }
1513 }
1514
1515 let clusters = if self.config.enable_global_proper_coref {
1517 self.apply_global_proper_coref(mentions, clusters)
1518 } else {
1519 clusters
1520 };
1521
1522 clusters
1524 .into_iter()
1525 .enumerate()
1526 .map(|(id, indices)| MentionCluster {
1527 id,
1528 mentions: indices.into_iter().map(|i| mentions[i].clone()).collect(),
1529 })
1530 .collect()
1531 }
1532
1533 fn would_violate_constraint(
1535 &self,
1536 mention_idx: usize,
1537 antecedent_idx: usize,
1538 mention_to_cluster: &HashMap<usize, usize>,
1539 clusters: &[Vec<usize>],
1540 non_coref_pairs: &HashSet<(usize, usize)>,
1541 ) -> bool {
1542 let mut members = vec![mention_idx];
1544 if let Some(&cluster_id) = mention_to_cluster.get(&antecedent_idx) {
1545 members.extend(clusters[cluster_id].iter().copied());
1546 } else {
1547 members.push(antecedent_idx);
1548 }
1549
1550 for i in 0..members.len() {
1552 for j in (i + 1)..members.len() {
1553 let key = (members[i].min(members[j]), members[i].max(members[j]));
1554 if non_coref_pairs.contains(&key) {
1555 return true;
1556 }
1557 }
1558 }
1559
1560 false
1561 }
1562
1563 fn apply_global_proper_coref(
1568 &self,
1569 mentions: &[RankedMention],
1570 mut clusters: Vec<Vec<usize>>,
1571 ) -> Vec<Vec<usize>> {
1572 let mut proper_to_cluster: HashMap<String, usize> = HashMap::new();
1574 let mut cluster_to_propers: HashMap<usize, Vec<String>> = HashMap::new();
1575
1576 for (cluster_idx, cluster) in clusters.iter().enumerate() {
1577 for &mention_idx in cluster {
1578 let mention = &mentions[mention_idx];
1579 if mention.mention_type == MentionType::Proper {
1580 let normalized = mention.text.to_lowercase();
1581 proper_to_cluster.insert(normalized.clone(), cluster_idx);
1582 cluster_to_propers
1583 .entry(cluster_idx)
1584 .or_default()
1585 .push(normalized);
1586 }
1587 }
1588 }
1589
1590 let mut unclustered_propers: Vec<(usize, String)> = Vec::new();
1592 let mut mention_to_cluster: HashMap<usize, usize> = HashMap::new();
1593
1594 for (cluster_idx, cluster) in clusters.iter().enumerate() {
1595 for &mention_idx in cluster {
1596 mention_to_cluster.insert(mention_idx, cluster_idx);
1597 }
1598 }
1599
1600 for (i, mention) in mentions.iter().enumerate() {
1601 if mention.mention_type == MentionType::Proper && !mention_to_cluster.contains_key(&i) {
1602 unclustered_propers.push((i, mention.text.to_lowercase()));
1603 }
1604 }
1605
1606 for (mention_idx, normalized) in unclustered_propers {
1608 if let Some(&cluster_idx) = proper_to_cluster.get(&normalized) {
1609 clusters[cluster_idx].push(mention_idx);
1610 }
1611 }
1612
1613 let mut merged = vec![false; clusters.len()];
1616 let mut merge_map: HashMap<usize, usize> = HashMap::new();
1617
1618 for (idx, cluster) in clusters.iter().enumerate() {
1619 if merged[idx] {
1620 continue;
1621 }
1622
1623 let propers: Vec<_> = cluster
1624 .iter()
1625 .filter_map(|&i| {
1626 let m = &mentions[i];
1627 if m.mention_type == MentionType::Proper {
1628 Some(m.text.to_lowercase())
1629 } else {
1630 None
1631 }
1632 })
1633 .collect();
1634
1635 for (other_idx, other_cluster) in clusters.iter().enumerate() {
1637 if other_idx <= idx || merged[other_idx] {
1638 continue;
1639 }
1640
1641 let other_propers: Vec<_> = other_cluster
1642 .iter()
1643 .filter_map(|&i| {
1644 let m = &mentions[i];
1645 if m.mention_type == MentionType::Proper {
1646 Some(m.text.to_lowercase())
1647 } else {
1648 None
1649 }
1650 })
1651 .collect();
1652
1653 if propers.iter().any(|p| other_propers.contains(p)) {
1655 merged[other_idx] = true;
1656 merge_map.insert(other_idx, idx);
1657 }
1658 }
1659 }
1660
1661 if !merge_map.is_empty() {
1663 let mut final_clusters: Vec<Vec<usize>> = Vec::new();
1664 let mut old_to_new: HashMap<usize, usize> = HashMap::new();
1665
1666 for (old_idx, cluster) in clusters.into_iter().enumerate() {
1667 if merged[old_idx] {
1668 let mut target = merge_map[&old_idx];
1670 while let Some(&next) = merge_map.get(&target) {
1671 target = next;
1672 }
1673 if let Some(&new_idx) = old_to_new.get(&target) {
1674 final_clusters[new_idx].extend(cluster);
1675 }
1676 } else {
1677 let new_idx = final_clusters.len();
1678 old_to_new.insert(old_idx, new_idx);
1679 final_clusters.push(cluster);
1680 }
1681 }
1682
1683 final_clusters
1684 } else {
1685 clusters
1686 }
1687 }
1688
1689 fn score_pair(
1698 &self,
1699 mention: &RankedMention,
1700 antecedent: &RankedMention,
1701 distance: usize,
1702 text: Option<&str>,
1703 ) -> f64 {
1704 let mut score = 0.0;
1705
1706 if self.config.enable_context_filtering {
1711 if let Some(txt) = text {
1712 if self.should_filter_by_context(txt, mention, antecedent) {
1713 return -1.0; }
1715 }
1716 }
1717
1718 let m_lower = mention.text.to_lowercase();
1722 let a_lower = antecedent.text.to_lowercase();
1723
1724 if m_lower == a_lower {
1726 score += self.config.string_match_weight * 1.0;
1727 }
1728 else if mention.head.to_lowercase() == antecedent.head.to_lowercase() {
1730 score += self.config.string_match_weight * 0.6;
1731 }
1732 else if m_lower.contains(&a_lower) || a_lower.contains(&m_lower) {
1734 score += self.config.string_match_weight * 0.3;
1735 }
1736
1737 if self.config.enable_be_phrase_detection {
1742 if let Some(txt) = text {
1743 if self.is_be_phrase_link(txt, mention, antecedent) {
1744 score += self.config.be_phrase_weight;
1745 }
1746 }
1747 }
1748
1749 if self.config.enable_acronym_matching && self.is_acronym_match(mention, antecedent) {
1754 score += self.config.acronym_weight;
1755 }
1756
1757 if self.config.enable_synonym_matching && self.are_synonyms(mention, antecedent) {
1762 score += self.config.synonym_weight;
1763 }
1764
1765 match (mention.mention_type, antecedent.mention_type) {
1769 (MentionType::Pronominal, MentionType::Proper) => {
1770 score += self.config.type_compat_weight * 0.5;
1771 }
1772 (MentionType::Pronominal, MentionType::Pronominal) => {
1773 if mention.text.to_lowercase() == antecedent.text.to_lowercase() {
1775 score += self.config.type_compat_weight * 0.3;
1776 }
1777 }
1778 (MentionType::Proper, MentionType::Proper) => {
1779 score += self.config.type_compat_weight * 0.4;
1780 }
1781 _ => {}
1782 }
1783
1784 if let (Some(m_gender), Some(a_gender)) = (mention.gender, antecedent.gender) {
1788 if m_gender == a_gender {
1789 score += self.config.type_compat_weight * 0.3;
1790 } else if m_gender != Gender::Unknown && a_gender != Gender::Unknown {
1791 score -= self.config.type_compat_weight * 0.5; }
1793 }
1794
1795 if let (Some(m_number), Some(a_number)) = (mention.number, antecedent.number) {
1804 if m_number == a_number {
1805 score += self.config.type_compat_weight * 0.2;
1807 } else if m_number.is_compatible(&a_number) {
1808 score += self.config.type_compat_weight * 0.05;
1811 } else {
1812 score -= self.config.type_compat_weight * 0.4;
1814 }
1815 }
1816
1817 score -= self.config.distance_weight * (distance as f64).ln().max(0.0);
1821
1822 if self.config.salience_weight > 0.0 {
1826 let salience = self.get_salience(&antecedent.text);
1827 score += self.config.salience_weight * salience;
1828 }
1829
1830 score
1831 }
1832}
1833
1834impl Default for MentionRankingCoref {
1835 fn default() -> Self {
1836 Self::new()
1837 }
1838}
1839
1840impl MentionRankingCoref {
1845 pub fn resolve_to_grounded(
1874 &self,
1875 text: &str,
1876 ) -> Result<(
1877 Vec<anno_core::Signal<anno_core::Location>>,
1878 Vec<anno_core::Track>,
1879 )> {
1880 let clusters = self.resolve(text)?;
1881
1882 let mut all_signals = Vec::new();
1883 let mut all_tracks = Vec::new();
1884 let mut signal_id_offset = anno_core::SignalId::ZERO;
1885
1886 for cluster in clusters {
1887 let (track, signals) = cluster.to_track(signal_id_offset);
1888 signal_id_offset += signals.len() as u64;
1889 all_signals.extend(signals);
1890 all_tracks.push(track);
1891 }
1892
1893 Ok((all_signals, all_tracks))
1894 }
1895
1896 pub fn resolve_into_document(
1905 &self,
1906 text: &str,
1907 doc: &mut anno_core::GroundedDocument,
1908 ) -> Result<Vec<anno_core::TrackId>> {
1909 let (signals, tracks) = self.resolve_to_grounded(text)?;
1910 let mut track_ids = Vec::new();
1911
1912 for signal in signals {
1914 doc.signals.push(signal);
1915 }
1916
1917 for track in tracks {
1919 track_ids.push(track.id);
1920 doc.tracks.insert(track.id, track);
1921 }
1922
1923 Ok(track_ids)
1924 }
1925}
1926
1927use crate::Entity;
1932use anno_core::CoreferenceResolver;
1933
1934impl CoreferenceResolver for MentionRankingCoref {
1935 fn resolve(&self, entities: &[Entity]) -> Vec<Entity> {
1936 if entities.is_empty() {
1937 return vec![];
1938 }
1939
1940 let mut mentions: Vec<RankedMention> = entities
1942 .iter()
1943 .map(|e| {
1944 let mention_type = if e.text.chars().all(|c| c.is_lowercase()) {
1945 MentionType::Pronominal
1946 } else if e.text.chars().next().is_some_and(|c| c.is_uppercase()) {
1947 MentionType::Proper
1948 } else {
1949 MentionType::Nominal
1950 };
1951
1952 let gender = self.guess_gender(&e.text);
1953 let lower = e.text.to_lowercase();
1956 let number = if ["we", "us"].iter().any(|p| lower == *p) {
1957 Some(Number::Plural)
1958 } else if ["they", "them", "their", "you"].iter().any(|p| lower == *p) {
1959 Some(Number::Unknown) } else {
1961 Some(Number::Singular)
1962 };
1963
1964 RankedMention {
1965 start: e.start,
1966 end: e.end,
1967 text: e.text.clone(),
1968 mention_type,
1969 gender,
1970 number,
1971 head: self.get_head(&e.text),
1972 }
1973 })
1974 .collect();
1975
1976 mentions.sort_by_key(|m| (m.start, m.end));
1978
1979 for mention in &mut mentions {
1981 self.extract_features(mention);
1982 }
1983
1984 let clusters = self.link_mentions(&mentions, "");
1988
1989 let mut canonical_map: HashMap<(usize, usize), usize> = HashMap::new();
1991 for cluster in &clusters {
1992 for mention in &cluster.mentions {
1993 canonical_map.insert((mention.start, mention.end), cluster.id);
1994 }
1995 }
1996
1997 let max_cluster_id = clusters.iter().map(|c| c.id).max().unwrap_or(0);
1999 let mut next_singleton_id = max_cluster_id + 1;
2000
2001 entities
2003 .iter()
2004 .map(|e| {
2005 let mut entity = e.clone();
2006 if let Some(&cluster_id) = canonical_map.get(&(e.start, e.end)) {
2007 entity.canonical_id = Some(anno_core::CanonicalId::new(cluster_id as u64));
2008 } else {
2009 entity.canonical_id =
2011 Some(anno_core::CanonicalId::new(next_singleton_id as u64));
2012 next_singleton_id += 1;
2013 }
2014 entity
2015 })
2016 .collect()
2017 }
2018
2019 fn name(&self) -> &'static str {
2020 "MentionRankingCoref"
2021 }
2022}
2023
2024#[cfg(test)]
2025mod tests {
2026 use super::*;
2027
2028 #[test]
2029 fn test_basic_resolution() {
2030 let coref = MentionRankingCoref::new();
2031 let clusters = coref.resolve("John saw Mary. He waved to her.").unwrap();
2032
2033 for cluster in &clusters {
2035 assert!(!cluster.mentions.is_empty());
2036 for mention in &cluster.mentions {
2037 assert!(mention.start <= mention.end);
2038 }
2039 }
2040 }
2041
2042 #[test]
2043 fn test_empty_input() {
2044 let coref = MentionRankingCoref::new();
2045 let clusters = coref.resolve("").unwrap();
2046 assert!(clusters.is_empty());
2047 }
2048
2049 #[test]
2050 fn test_pronoun_detection() {
2051 let coref = MentionRankingCoref::new();
2052 let mentions = coref.detect_mentions("He saw her.").unwrap();
2053
2054 let pronouns: Vec<_> = mentions
2055 .iter()
2056 .filter(|m| m.mention_type == MentionType::Pronominal)
2057 .collect();
2058
2059 assert!(
2060 pronouns.len() >= 2,
2061 "Should detect 'He' and 'her' as pronouns"
2062 );
2063 }
2064
2065 #[test]
2066 fn test_gender_inference() {
2067 let coref = MentionRankingCoref::new();
2068
2069 assert_eq!(coref.guess_gender("John"), Some(Gender::Masculine));
2070 assert_eq!(coref.guess_gender("Mary Smith"), Some(Gender::Feminine));
2071 assert_eq!(coref.guess_gender("Google"), None);
2072 }
2073
2074 #[test]
2075 fn test_pair_scoring() {
2076 let coref = MentionRankingCoref::new();
2077
2078 let m1 = RankedMention {
2079 start: 0,
2080 end: 4,
2081 text: "John".to_string(),
2082 mention_type: MentionType::Proper,
2083 gender: Some(Gender::Masculine),
2084 number: Some(Number::Singular),
2085 head: "John".to_string(),
2086 };
2087
2088 let m2 = RankedMention {
2089 start: 10,
2090 end: 12,
2091 text: "He".to_string(),
2092 mention_type: MentionType::Pronominal,
2093 gender: Some(Gender::Masculine),
2094 number: Some(Number::Singular),
2095 head: "He".to_string(),
2096 };
2097
2098 let score = coref.score_pair(&m2, &m1, 6, None);
2099 assert!(score > 0.0, "Pronoun with matching gender should link");
2100 }
2101
2102 #[test]
2103 fn test_gender_mismatch_penalty() {
2104 let coref = MentionRankingCoref::new();
2105
2106 let m1 = RankedMention {
2107 start: 0,
2108 end: 4,
2109 text: "Mary".to_string(),
2110 mention_type: MentionType::Proper,
2111 gender: Some(Gender::Feminine),
2112 number: Some(Number::Singular),
2113 head: "Mary".to_string(),
2114 };
2115
2116 let m2 = RankedMention {
2117 start: 10,
2118 end: 12,
2119 text: "He".to_string(),
2120 mention_type: MentionType::Pronominal,
2121 gender: Some(Gender::Masculine),
2122 number: Some(Number::Singular),
2123 head: "He".to_string(),
2124 };
2125
2126 let score = coref.score_pair(&m2, &m1, 6, None);
2127 assert!(
2128 score < 0.5,
2129 "Gender mismatch should have low/negative score"
2130 );
2131 }
2132
2133 #[test]
2134 fn test_config() {
2135 let config = MentionRankingConfig {
2136 link_threshold: 0.5,
2137 ..Default::default()
2138 };
2139
2140 let coref = MentionRankingCoref::with_config(config);
2141 assert_eq!(coref.config.link_threshold, 0.5);
2142 }
2143
2144 #[test]
2145 fn test_unicode_offsets() {
2146 let coref = MentionRankingCoref::new();
2147 let text = "北京很美. He likes it.";
2148 let char_count = text.chars().count();
2149
2150 let clusters = coref.resolve(text).unwrap();
2151
2152 for cluster in &clusters {
2153 for mention in &cluster.mentions {
2154 assert!(mention.start <= mention.end);
2155 assert!(mention.end <= char_count);
2156 }
2157 }
2158 }
2159
2160 #[test]
2165 fn test_type_specific_antecedent_limits() {
2166 let config = MentionRankingConfig::default();
2167
2168 assert_eq!(config.pronoun_max_antecedents, 30);
2170 assert_eq!(config.proper_max_antecedents, 300);
2171 assert_eq!(config.nominal_max_antecedents, 300);
2172
2173 assert_eq!(config.max_antecedents_for_type(MentionType::Pronominal), 30);
2175 assert_eq!(config.max_antecedents_for_type(MentionType::Proper), 300);
2176 assert_eq!(config.max_antecedents_for_type(MentionType::Nominal), 300);
2177 assert_eq!(config.max_antecedents_for_type(MentionType::Zero), 300);
2178 assert_eq!(config.max_antecedents_for_type(MentionType::Unknown), 300);
2179 }
2180
2181 #[test]
2182 fn test_book_scale_config() {
2183 let config = MentionRankingConfig::book_scale();
2184
2185 assert!(config.enable_global_proper_coref);
2187 assert_eq!(config.clustering_strategy, ClusteringStrategy::EasyFirst);
2188 assert!(config.use_non_coref_constraints);
2189
2190 assert!(config.max_distance > 100);
2192 }
2193
2194 #[test]
2195 fn test_pronoun_antecedent_limit_enforced() {
2196 let config = MentionRankingConfig {
2198 pronoun_max_antecedents: 2,
2199 ..Default::default()
2200 };
2201 let coref = MentionRankingCoref::with_config(config);
2202
2203 assert_eq!(coref.config.pronoun_max_antecedents, 2);
2206 }
2207
2208 #[test]
2213 fn test_clustering_strategy_default() {
2214 let config = MentionRankingConfig::default();
2215 assert_eq!(config.clustering_strategy, ClusteringStrategy::LeftToRight);
2216 }
2217
2218 #[test]
2219 fn test_easy_first_clustering() {
2220 let config = MentionRankingConfig {
2221 clustering_strategy: ClusteringStrategy::EasyFirst,
2222 ..Default::default()
2223 };
2224 let coref = MentionRankingCoref::with_config(config);
2225
2226 let clusters = coref.resolve("John went home. He was tired.").unwrap();
2228 for cluster in &clusters {
2229 assert!(!cluster.mentions.is_empty());
2230 }
2231 }
2232
2233 #[test]
2234 fn test_left_to_right_vs_easy_first_produces_clusters() {
2235 let text = "John met Mary. He greeted her warmly. She smiled at him.";
2236
2237 let l2r_config = MentionRankingConfig {
2239 clustering_strategy: ClusteringStrategy::LeftToRight,
2240 ..Default::default()
2241 };
2242 let l2r_coref = MentionRankingCoref::with_config(l2r_config);
2243 let l2r_clusters = l2r_coref.resolve(text).unwrap();
2244
2245 let ef_config = MentionRankingConfig {
2247 clustering_strategy: ClusteringStrategy::EasyFirst,
2248 ..Default::default()
2249 };
2250 let ef_coref = MentionRankingCoref::with_config(ef_config);
2251 let ef_clusters = ef_coref.resolve(text).unwrap();
2252
2253 assert!(
2255 !l2r_clusters.is_empty() || !ef_clusters.is_empty(),
2256 "At least one strategy should produce clusters"
2257 );
2258 }
2259
2260 #[test]
2265 fn test_global_proper_coref_config() {
2266 let config = MentionRankingConfig {
2267 enable_global_proper_coref: true,
2268 global_proper_threshold: 0.8,
2269 ..Default::default()
2270 };
2271
2272 assert!(config.enable_global_proper_coref);
2273 assert!((config.global_proper_threshold - 0.8).abs() < 0.001);
2274 }
2275
2276 #[test]
2277 fn test_global_proper_coref_same_name() {
2278 let config = MentionRankingConfig {
2280 enable_global_proper_coref: true,
2281 ..Default::default()
2282 };
2283 let coref = MentionRankingCoref::with_config(config);
2284
2285 let text = "John arrived. He was happy. Later John left.";
2288 let clusters = coref.resolve(text).unwrap();
2289
2290 for cluster in &clusters {
2294 for mention in &cluster.mentions {
2295 assert!(mention.start <= mention.end);
2296 }
2297 }
2298 }
2299
2300 #[test]
2305 fn test_non_coref_constraints_config() {
2306 let config = MentionRankingConfig {
2307 use_non_coref_constraints: true,
2308 non_coref_threshold: 0.1,
2309 ..Default::default()
2310 };
2311
2312 assert!(config.use_non_coref_constraints);
2313 assert!((config.non_coref_threshold - 0.1).abs() < 0.001);
2314 }
2315
2316 #[test]
2317 fn test_easy_first_with_non_coref_constraints() {
2318 let config = MentionRankingConfig {
2319 clustering_strategy: ClusteringStrategy::EasyFirst,
2320 use_non_coref_constraints: true,
2321 ..Default::default()
2322 };
2323 let coref = MentionRankingCoref::with_config(config);
2324
2325 let clusters = coref.resolve("John and Mary went to the store.").unwrap();
2327
2328 for cluster in &clusters {
2330 for mention in &cluster.mentions {
2331 assert!(mention.start <= mention.end);
2332 }
2333 }
2334 }
2335
2336 #[test]
2341 fn test_full_book_scale_pipeline() {
2342 let config = MentionRankingConfig::book_scale();
2343 let coref = MentionRankingCoref::with_config(config);
2344
2345 let text = "Elizabeth Bennett was a spirited young woman. She lived at Longbourn \
2347 with her family. Her mother, Mrs. Bennett, was determined to see her \
2348 daughters married well. Elizabeth often walked in the countryside. \
2349 She enjoyed the solitude it offered.";
2350
2351 let clusters = coref.resolve(text).unwrap();
2352
2353 for cluster in &clusters {
2355 assert!(!cluster.mentions.is_empty());
2356 for mention in &cluster.mentions {
2357 assert!(mention.start <= mention.end);
2358 assert!(mention.end <= text.chars().count());
2359 }
2360 }
2361 }
2362
2363 #[test]
2364 fn test_mention_type_distribution() {
2365 let coref = MentionRankingCoref::new();
2366 let text = "Dr. Smith saw John. He examined him carefully.";
2367 let mentions = coref.detect_mentions(text).unwrap();
2368
2369 let pronoun_count = mentions
2370 .iter()
2371 .filter(|m| m.mention_type == MentionType::Pronominal)
2372 .count();
2373 let proper_count = mentions
2374 .iter()
2375 .filter(|m| m.mention_type == MentionType::Proper)
2376 .count();
2377
2378 assert!(pronoun_count > 0, "Should detect pronouns");
2380 assert!(proper_count > 0, "Should detect proper nouns");
2381 }
2382
2383 #[test]
2388 fn test_salience_config_default() {
2389 let config = MentionRankingConfig::default();
2390 assert!((config.salience_weight - 0.0).abs() < 0.001);
2392 }
2393
2394 #[test]
2395 fn test_salience_config_builder() {
2396 let config = MentionRankingConfig::default().with_salience(0.25);
2397 assert!((config.salience_weight - 0.25).abs() < 0.001);
2398
2399 let clamped = MentionRankingConfig::default().with_salience(1.5);
2401 assert!((clamped.salience_weight - 1.0).abs() < 0.001);
2402 }
2403
2404 #[test]
2405 fn test_salience_book_scale_enabled() {
2406 let config = MentionRankingConfig::book_scale();
2407 assert!(
2408 config.salience_weight > 0.0,
2409 "Book-scale should enable salience"
2410 );
2411 }
2412
2413 #[test]
2414 fn test_with_salience_scores() {
2415 let mut scores = HashMap::new();
2416 scores.insert("john".to_string(), 0.8);
2417 scores.insert("Mary".to_string(), 0.6); let coref = MentionRankingCoref::new().with_salience(scores);
2420
2421 assert!((coref.get_salience("john") - 0.8).abs() < 0.001);
2423 assert!((coref.get_salience("John") - 0.8).abs() < 0.001);
2424 assert!((coref.get_salience("JOHN") - 0.8).abs() < 0.001);
2425 assert!((coref.get_salience("mary") - 0.6).abs() < 0.001);
2426
2427 assert!((coref.get_salience("unknown") - 0.0).abs() < 0.001);
2429 }
2430
2431 #[test]
2432 fn test_salience_boosts_antecedent_score() {
2433 let config = MentionRankingConfig {
2435 salience_weight: 0.3,
2436 ..Default::default()
2437 };
2438
2439 let mut scores = HashMap::new();
2441 scores.insert("john".to_string(), 1.0);
2442 scores.insert("mary".to_string(), 0.0);
2443
2444 let coref = MentionRankingCoref::with_config(config).with_salience(scores);
2445
2446 let mention = RankedMention {
2447 start: 20,
2448 end: 22,
2449 text: "He".to_string(),
2450 mention_type: MentionType::Pronominal,
2451 gender: Some(Gender::Masculine),
2452 number: Some(Number::Singular),
2453 head: "He".to_string(),
2454 };
2455
2456 let john = RankedMention {
2457 start: 0,
2458 end: 4,
2459 text: "John".to_string(),
2460 mention_type: MentionType::Proper,
2461 gender: Some(Gender::Masculine),
2462 number: Some(Number::Singular),
2463 head: "John".to_string(),
2464 };
2465
2466 let bob = RankedMention {
2467 start: 10,
2468 end: 13,
2469 text: "Bob".to_string(), mention_type: MentionType::Proper,
2471 gender: Some(Gender::Masculine),
2472 number: Some(Number::Singular),
2473 head: "Bob".to_string(),
2474 };
2475
2476 let score_john = coref.score_pair(&mention, &john, 16, None);
2477 let score_bob = coref.score_pair(&mention, &bob, 7, None);
2478
2479 assert!(
2483 score_john > score_bob - 0.1, "Salient antecedent should score higher: john={}, bob={}",
2485 score_john,
2486 score_bob
2487 );
2488 }
2489
2490 #[test]
2491 fn test_salience_no_effect_when_disabled() {
2492 let config = MentionRankingConfig {
2493 salience_weight: 0.0, ..Default::default()
2495 };
2496
2497 let mut scores = HashMap::new();
2498 scores.insert("john".to_string(), 1.0);
2499
2500 let coref = MentionRankingCoref::with_config(config.clone()).with_salience(scores);
2501
2502 let mention = RankedMention {
2503 start: 10,
2504 end: 12,
2505 text: "He".to_string(),
2506 mention_type: MentionType::Pronominal,
2507 gender: Some(Gender::Masculine),
2508 number: Some(Number::Singular),
2509 head: "He".to_string(),
2510 };
2511
2512 let antecedent = RankedMention {
2513 start: 0,
2514 end: 4,
2515 text: "John".to_string(),
2516 mention_type: MentionType::Proper,
2517 gender: Some(Gender::Masculine),
2518 number: Some(Number::Singular),
2519 head: "John".to_string(),
2520 };
2521
2522 let coref_no_salience = MentionRankingCoref::with_config(config);
2524 let score_without = coref_no_salience.score_pair(&mention, &antecedent, 6, None);
2525
2526 let score_with = coref.score_pair(&mention, &antecedent, 6, None);
2528
2529 assert!(
2531 (score_without - score_with).abs() < 0.001,
2532 "Salience should have no effect when weight=0"
2533 );
2534 }
2535
2536 #[test]
2537 fn test_salience_resolution_integration() {
2538 let config = MentionRankingConfig {
2540 salience_weight: 0.2,
2541 ..Default::default()
2542 };
2543
2544 let mut scores = HashMap::new();
2545 scores.insert("president".to_string(), 0.9);
2546 scores.insert("john".to_string(), 0.7);
2547 scores.insert("meeting".to_string(), 0.3);
2548
2549 let coref = MentionRankingCoref::with_config(config).with_salience(scores);
2550
2551 let text = "John met the President. He was nervous.";
2552 let clusters = coref.resolve(text).unwrap();
2553
2554 for cluster in &clusters {
2556 assert!(!cluster.mentions.is_empty());
2557 for mention in &cluster.mentions {
2558 assert!(mention.start <= mention.end);
2559 assert!(mention.end <= text.chars().count());
2560 }
2561 }
2562 }
2563
2564 #[test]
2565 fn test_salience_with_multilingual_text() {
2566 let config = MentionRankingConfig {
2567 salience_weight: 0.2,
2568 ..Default::default()
2569 };
2570
2571 let mut scores = HashMap::new();
2572 scores.insert("北京".to_string(), 0.8);
2573 scores.insert("習近平".to_string(), 0.9);
2574
2575 let coref = MentionRankingCoref::with_config(config).with_salience(scores);
2576
2577 assert!((coref.get_salience("北京") - 0.8).abs() < 0.001);
2579 assert!((coref.get_salience("習近平") - 0.9).abs() < 0.001);
2580 }
2581
2582 #[test]
2587 fn test_mention_cluster_to_signals() {
2588 let cluster = MentionCluster {
2589 id: 0,
2590 mentions: vec![
2591 RankedMention {
2592 start: 0,
2593 end: 4,
2594 text: "John".to_string(),
2595 mention_type: MentionType::Proper,
2596 gender: Some(Gender::Masculine),
2597 number: Some(Number::Singular),
2598 head: "John".to_string(),
2599 },
2600 RankedMention {
2601 start: 15,
2602 end: 17,
2603 text: "He".to_string(),
2604 mention_type: MentionType::Pronominal,
2605 gender: Some(Gender::Masculine),
2606 number: Some(Number::Singular),
2607 head: "He".to_string(),
2608 },
2609 ],
2610 };
2611
2612 let signals = cluster.to_signals(anno_core::SignalId::new(100));
2613
2614 assert_eq!(signals.len(), 2);
2615 assert_eq!(signals[0].id, anno_core::SignalId::new(100));
2616 assert_eq!(signals[1].id, anno_core::SignalId::new(101));
2617 assert_eq!(signals[0].surface, "John");
2618 assert_eq!(signals[1].surface, "He");
2619
2620 if let anno_core::Location::Text { start, end } = &signals[0].location {
2622 assert_eq!(*start, 0);
2623 assert_eq!(*end, 4);
2624 } else {
2625 panic!("Expected Text location");
2626 }
2627 }
2628
2629 #[test]
2630 fn test_mention_cluster_to_track() {
2631 let cluster = MentionCluster {
2632 id: 42,
2633 mentions: vec![
2634 RankedMention {
2635 start: 0,
2636 end: 4,
2637 text: "John".to_string(),
2638 mention_type: MentionType::Proper,
2639 gender: Some(Gender::Masculine),
2640 number: Some(Number::Singular),
2641 head: "John".to_string(),
2642 },
2643 RankedMention {
2644 start: 15,
2645 end: 17,
2646 text: "He".to_string(),
2647 mention_type: MentionType::Pronominal,
2648 gender: Some(Gender::Masculine),
2649 number: Some(Number::Singular),
2650 head: "He".to_string(),
2651 },
2652 ],
2653 };
2654
2655 let (track, signals) = cluster.to_track(anno_core::SignalId::new(0));
2656
2657 assert_eq!(track.id, anno_core::TrackId::new(42));
2659 assert_eq!(track.canonical_surface, "John"); assert_eq!(track.signals.len(), 2);
2661
2662 assert_eq!(signals.len(), 2);
2664 assert_eq!(signals[0].surface, "John");
2665 assert_eq!(signals[1].surface, "He");
2666 }
2667
2668 #[test]
2669 fn test_canonical_mention_prefers_proper() {
2670 let cluster = MentionCluster {
2672 id: 0,
2673 mentions: vec![
2674 RankedMention {
2675 start: 0,
2676 end: 2,
2677 text: "He".to_string(),
2678 mention_type: MentionType::Pronominal,
2679 gender: Some(Gender::Masculine),
2680 number: Some(Number::Singular),
2681 head: "He".to_string(),
2682 },
2683 RankedMention {
2684 start: 10,
2685 end: 14,
2686 text: "John".to_string(),
2687 mention_type: MentionType::Proper,
2688 gender: Some(Gender::Masculine),
2689 number: Some(Number::Singular),
2690 head: "John".to_string(),
2691 },
2692 ],
2693 };
2694
2695 let canonical = cluster.canonical_mention().unwrap();
2697 assert_eq!(canonical.text, "John");
2698 }
2699
2700 #[test]
2701 fn test_resolve_to_grounded() {
2702 let coref = MentionRankingCoref::new();
2703 let (signals, tracks) = coref
2704 .resolve_to_grounded("John saw Mary. He waved.")
2705 .unwrap();
2706
2707 assert!(!signals.is_empty());
2709
2710 for signal in &signals {
2712 if let anno_core::Location::Text { start, end } = &signal.location {
2713 assert!(start <= end);
2714 } else {
2715 panic!("Expected Text location");
2716 }
2717 }
2718
2719 for track in &tracks {
2721 assert!(!track.signals.is_empty());
2722 assert!(!track.canonical_surface.is_empty());
2723 }
2724 }
2725
2726 #[test]
2727 fn test_resolve_into_document() {
2728 let coref = MentionRankingCoref::new();
2729 let text = "John saw Mary. He waved to her.";
2730 let mut doc = anno_core::GroundedDocument::new("test_doc", text);
2731
2732 let track_ids = coref.resolve_into_document(text, &mut doc).unwrap();
2733
2734 assert!(!doc.signals.is_empty());
2736 assert!(!doc.tracks.is_empty());
2737
2738 for track_id in &track_ids {
2740 assert!(doc.tracks.contains_key(track_id));
2741 }
2742 }
2743
2744 #[test]
2745 fn test_ranked_mention_to_signal() {
2746 let mention = RankedMention {
2747 start: 10,
2748 end: 20,
2749 text: "the company".to_string(),
2750 mention_type: MentionType::Nominal,
2751 gender: None,
2752 number: Some(Number::Singular),
2753 head: "company".to_string(),
2754 };
2755
2756 let signal = mention.to_signal(anno_core::SignalId::new(999));
2757
2758 assert_eq!(signal.id, anno_core::SignalId::new(999));
2759 assert_eq!(signal.surface, "the company");
2760 assert_eq!(signal.label, "nominal".into());
2761 assert_eq!(signal.modality, anno_core::Modality::Symbolic);
2762
2763 if let anno_core::Location::Text { start, end } = signal.location {
2764 assert_eq!(start, 10);
2765 assert_eq!(end, 20);
2766 } else {
2767 panic!("Expected Text location");
2768 }
2769 }
2770
2771 #[test]
2772 fn test_grounded_integration_unicode() {
2773 let coref = MentionRankingCoref::new();
2774 let text = "習近平在北京。他很忙。"; let (signals, _tracks) = coref.resolve_to_grounded(text).unwrap();
2777 let char_count = text.chars().count();
2778
2779 for signal in &signals {
2781 if let anno_core::Location::Text { start, end } = &signal.location {
2782 assert!(*start <= *end);
2783 assert!(
2784 *end <= char_count,
2785 "Signal end {} exceeds char count {}",
2786 end,
2787 char_count
2788 );
2789 }
2790 }
2791 }
2792
2793 #[test]
2798 fn test_be_phrase_detection() {
2799 let config = MentionRankingConfig::clinical();
2800 let coref = MentionRankingCoref::with_config(config);
2801
2802 let text = "The patient is John Smith. He was seen by Dr. Jones.";
2803
2804 let m1 = RankedMention {
2806 start: 4,
2807 end: 11,
2808 text: "patient".to_string(),
2809 mention_type: MentionType::Nominal,
2810 gender: None,
2811 number: Some(Number::Singular),
2812 head: "patient".to_string(),
2813 };
2814
2815 let m2 = RankedMention {
2816 start: 15,
2817 end: 25,
2818 text: "John Smith".to_string(),
2819 mention_type: MentionType::Proper,
2820 gender: Some(Gender::Masculine),
2821 number: Some(Number::Singular),
2822 head: "Smith".to_string(),
2823 };
2824
2825 assert!(
2827 coref.is_be_phrase_link(text, &m1, &m2),
2828 "Should detect 'is' between patient and John Smith"
2829 );
2830
2831 let score = coref.score_pair(&m1, &m2, 4, Some(text));
2833 assert!(score > 0.5, "Be-phrase should boost score: got {}", score);
2834 }
2835
2836 #[test]
2837 fn test_be_phrase_detection_negative() {
2838 let coref = MentionRankingCoref::new();
2839
2840 let text = "John saw Mary at the store.";
2841
2842 let m1 = RankedMention {
2843 start: 0,
2844 end: 4,
2845 text: "John".to_string(),
2846 mention_type: MentionType::Proper,
2847 gender: Some(Gender::Masculine),
2848 number: Some(Number::Singular),
2849 head: "John".to_string(),
2850 };
2851
2852 let m2 = RankedMention {
2853 start: 9,
2854 end: 13,
2855 text: "Mary".to_string(),
2856 mention_type: MentionType::Proper,
2857 gender: Some(Gender::Feminine),
2858 number: Some(Number::Singular),
2859 head: "Mary".to_string(),
2860 };
2861
2862 assert!(
2864 !coref.is_be_phrase_link(text, &m1, &m2),
2865 "Should not detect be-phrase between John and Mary"
2866 );
2867 }
2868
2869 #[test]
2870 fn test_acronym_matching() {
2871 let coref = MentionRankingCoref::new();
2872
2873 let mrsa = RankedMention {
2874 start: 0,
2875 end: 4,
2876 text: "MRSA".to_string(),
2877 mention_type: MentionType::Proper,
2878 gender: None,
2879 number: Some(Number::Singular),
2880 head: "MRSA".to_string(),
2881 };
2882
2883 let full = RankedMention {
2884 start: 20,
2885 end: 65,
2886 text: "Methicillin-resistant Staphylococcus aureus".to_string(),
2887 mention_type: MentionType::Proper,
2888 gender: None,
2889 number: Some(Number::Singular),
2890 head: "aureus".to_string(),
2891 };
2892
2893 assert!(
2894 coref.is_acronym_match(&mrsa, &full),
2895 "MRSA should match Methicillin-resistant Staphylococcus aureus"
2896 );
2897 }
2898
2899 #[test]
2900 fn test_acronym_matching_who() {
2901 let coref = MentionRankingCoref::new();
2902
2903 let who = RankedMention {
2904 start: 0,
2905 end: 3,
2906 text: "WHO".to_string(),
2907 mention_type: MentionType::Proper,
2908 gender: None,
2909 number: Some(Number::Singular),
2910 head: "WHO".to_string(),
2911 };
2912
2913 let full = RankedMention {
2914 start: 10,
2915 end: 35,
2916 text: "World Health Organization".to_string(),
2917 mention_type: MentionType::Proper,
2918 gender: None,
2919 number: Some(Number::Singular),
2920 head: "Organization".to_string(),
2921 };
2922
2923 assert!(
2924 coref.is_acronym_match(&who, &full),
2925 "WHO should match World Health Organization"
2926 );
2927 }
2928
2929 #[test]
2930 fn test_acronym_matching_negative() {
2931 let coref = MentionRankingCoref::new();
2932
2933 let ibm = RankedMention {
2934 start: 0,
2935 end: 3,
2936 text: "IBM".to_string(),
2937 mention_type: MentionType::Proper,
2938 gender: None,
2939 number: Some(Number::Singular),
2940 head: "IBM".to_string(),
2941 };
2942
2943 let apple = RankedMention {
2944 start: 10,
2945 end: 25,
2946 text: "Apple Inc".to_string(),
2947 mention_type: MentionType::Proper,
2948 gender: None,
2949 number: Some(Number::Singular),
2950 head: "Apple".to_string(),
2951 };
2952
2953 assert!(
2954 !coref.is_acronym_match(&ibm, &apple),
2955 "IBM should not match Apple Inc"
2956 );
2957 }
2958
2959 #[test]
2960 fn test_context_filtering_different_dates() {
2961 let config = MentionRankingConfig::clinical();
2962 let coref = MentionRankingCoref::with_config(config);
2963
2964 let text = "On 2024-01-15 the patient presented. On 2024-02-20 the patient returned.";
2966
2967 let m1 = RankedMention {
2968 start: 17,
2969 end: 24,
2970 text: "patient".to_string(),
2971 mention_type: MentionType::Nominal,
2972 gender: None,
2973 number: Some(Number::Singular),
2974 head: "patient".to_string(),
2975 };
2976
2977 let m2 = RankedMention {
2978 start: 50,
2979 end: 57,
2980 text: "patient".to_string(),
2981 mention_type: MentionType::Nominal,
2982 gender: None,
2983 number: Some(Number::Singular),
2984 head: "patient".to_string(),
2985 };
2986
2987 assert!(
2989 coref.should_filter_by_context(text, &m1, &m2),
2990 "Should filter link between patients with different dates"
2991 );
2992 }
2993
2994 #[test]
2995 fn test_context_filtering_negation() {
2996 let config = MentionRankingConfig::clinical();
2997 let coref = MentionRankingCoref::with_config(config);
2998
2999 let text = "Patient is not a diabetic. This is important. The diabetic protocol was used.";
3002 let m1 = RankedMention {
3007 start: 17,
3008 end: 25,
3009 text: "diabetic".to_string(),
3010 mention_type: MentionType::Nominal,
3011 gender: None,
3012 number: Some(Number::Singular),
3013 head: "diabetic".to_string(),
3014 };
3015
3016 let m2 = RankedMention {
3018 start: 50,
3019 end: 58,
3020 text: "diabetic".to_string(),
3021 mention_type: MentionType::Nominal,
3022 gender: None,
3023 number: Some(Number::Singular),
3024 head: "diabetic".to_string(),
3025 };
3026
3027 let text_chars: Vec<char> = text.chars().collect();
3029 let m1_context: String = text_chars
3030 [m1.start.saturating_sub(20)..m1.end.min(text_chars.len())]
3031 .iter()
3032 .collect();
3033 let m2_context: String = text_chars
3034 [m2.start.saturating_sub(20)..m2.end.min(text_chars.len())]
3035 .iter()
3036 .collect();
3037 eprintln!("m1 context: '{}'", m1_context);
3038 eprintln!("m2 context: '{}'", m2_context);
3039
3040 assert!(
3042 m1_context.contains("not"),
3043 "m1 context should contain 'not'"
3044 );
3045 assert!(
3046 !m2_context.contains("not"),
3047 "m2 context should not contain 'not'"
3048 );
3049
3050 assert!(
3052 coref.should_filter_by_context(text, &m1, &m2),
3053 "Should filter link between negated ('{}') and non-negated ('{}') mentions",
3054 m1_context,
3055 m2_context
3056 );
3057 }
3058
3059 #[test]
3060 fn test_synonym_matching_high_similarity() {
3061 let coref = MentionRankingCoref::new();
3064
3065 let obama = RankedMention {
3066 start: 0,
3067 end: 5,
3068 text: "Obama".to_string(),
3069 mention_type: MentionType::Proper,
3070 gender: None,
3071 number: Some(Number::Singular),
3072 head: "Obama".to_string(),
3073 };
3074
3075 let obama_lower = RankedMention {
3076 start: 10,
3077 end: 15,
3078 text: "obama".to_string(),
3079 mention_type: MentionType::Proper,
3080 gender: None,
3081 number: Some(Number::Singular),
3082 head: "obama".to_string(),
3083 };
3084
3085 assert!(
3087 coref.are_synonyms(&obama, &obama_lower),
3088 "Obama and obama should match (case-insensitive)"
3089 );
3090 }
3091
3092 #[test]
3093 fn test_synonym_matching_low_similarity_no_match() {
3094 let coref = MentionRankingCoref::new();
3098
3099 let heart = RankedMention {
3100 start: 0,
3101 end: 5,
3102 text: "heart".to_string(),
3103 mention_type: MentionType::Nominal,
3104 gender: None,
3105 number: Some(Number::Singular),
3106 head: "heart".to_string(),
3107 };
3108
3109 let cardiac = RankedMention {
3110 start: 10,
3111 end: 17,
3112 text: "cardiac".to_string(),
3113 mention_type: MentionType::Nominal,
3114 gender: None,
3115 number: Some(Number::Singular),
3116 head: "cardiac".to_string(),
3117 };
3118
3119 assert!(
3124 !coref.are_synonyms(&heart, &cardiac),
3125 "heart/cardiac require domain-specific SynonymSource"
3126 );
3127 }
3128
3129 #[test]
3130 fn test_clinical_config() {
3131 let config = MentionRankingConfig::clinical();
3132
3133 assert!(config.enable_be_phrase_detection);
3135 assert!(config.enable_acronym_matching);
3136 assert!(config.enable_context_filtering);
3137 assert!(config.enable_synonym_matching);
3138
3139 assert!(config.be_phrase_weight > 0.5);
3141 assert!(config.acronym_weight > 0.5);
3142 assert!(config.synonym_weight > 0.3);
3143 }
3144
3145 #[test]
3146 fn test_clinical_resolution_integration() {
3147 let config = MentionRankingConfig::clinical();
3148 let coref = MentionRankingCoref::with_config(config);
3149
3150 let text = "The patient is John Smith. Pt was admitted with MRSA. \
3152 Methicillin-resistant Staphylococcus aureus was treated.";
3153
3154 let clusters = coref.resolve(text).unwrap();
3155
3156 assert!(
3158 !clusters.is_empty(),
3159 "Should find clusters in clinical text"
3160 );
3161
3162 for cluster in &clusters {
3164 let texts: Vec<_> = cluster.mentions.iter().map(|m| &m.text).collect();
3165 eprintln!("Cluster {}: {:?}", cluster.id, texts);
3166 }
3167 }
3168
3169 #[test]
3170 fn test_i2b2_scoring_with_all_features() {
3171 let config = MentionRankingConfig::clinical();
3172 let coref = MentionRankingCoref::with_config(config);
3173
3174 let text = "Resolution of organism is MRSA.";
3176
3177 let m1 = RankedMention {
3178 start: 14,
3179 end: 22,
3180 text: "organism".to_string(),
3181 mention_type: MentionType::Nominal,
3182 gender: None,
3183 number: Some(Number::Singular),
3184 head: "organism".to_string(),
3185 };
3186
3187 let m2 = RankedMention {
3188 start: 26,
3189 end: 30,
3190 text: "MRSA".to_string(),
3191 mention_type: MentionType::Proper,
3192 gender: None,
3193 number: Some(Number::Singular),
3194 head: "MRSA".to_string(),
3195 };
3196
3197 let score = coref.score_pair(&m1, &m2, 4, Some(text));
3199 assert!(
3200 score > 0.7,
3201 "Be-phrase pattern should yield high score, got {}",
3202 score
3203 );
3204 }
3205
3206 #[test]
3211 fn test_nominal_adjective_detection_basic() {
3212 let config = MentionRankingConfig {
3213 enable_nominal_adjective_detection: true,
3214 ..Default::default()
3215 };
3216 let coref = MentionRankingCoref::with_config(config);
3217
3218 let text = "The poor are struggling while the rich get richer.";
3219 let mentions = coref.detect_mentions(text).unwrap();
3220
3221 let texts: Vec<_> = mentions.iter().map(|m| m.text.as_str()).collect();
3222 assert!(
3223 texts.contains(&"The poor"),
3224 "Should detect 'The poor': {:?}",
3225 texts
3226 );
3227 assert!(
3228 texts.contains(&"the rich"),
3229 "Should detect 'the rich': {:?}",
3230 texts
3231 );
3232
3233 let poor_mention = mentions
3235 .iter()
3236 .find(|m| m.text.to_lowercase() == "the poor");
3237 assert!(poor_mention.is_some());
3238 assert_eq!(poor_mention.unwrap().number, Some(Number::Plural));
3239 assert_eq!(poor_mention.unwrap().mention_type, MentionType::Nominal);
3240 }
3241
3242 #[test]
3243 fn test_nominal_adjective_not_before_noun() {
3244 let config = MentionRankingConfig {
3247 enable_nominal_adjective_detection: true,
3248 ..Default::default()
3249 };
3250 let coref = MentionRankingCoref::with_config(config);
3251
3252 let text = "The poor performance was criticized.";
3253 let mentions = coref.detect_mentions(text).unwrap();
3254
3255 let texts: Vec<_> = mentions.iter().map(|m| m.text.as_str()).collect();
3256 assert!(
3257 !texts.contains(&"The poor"),
3258 "Should NOT detect 'The poor' when followed by noun: {:?}",
3259 texts
3260 );
3261 }
3262
3263 #[test]
3264 fn test_nominal_adjective_at_sentence_end() {
3265 let config = MentionRankingConfig {
3266 enable_nominal_adjective_detection: true,
3267 ..Default::default()
3268 };
3269 let coref = MentionRankingCoref::with_config(config);
3270
3271 let text = "We must help the elderly.";
3272 let mentions = coref.detect_mentions(text).unwrap();
3273
3274 let texts: Vec<_> = mentions.iter().map(|m| m.text.as_str()).collect();
3275 assert!(
3276 texts.contains(&"the elderly"),
3277 "Should detect 'the elderly' at end: {:?}",
3278 texts
3279 );
3280 }
3281
3282 #[test]
3283 fn test_nominal_adjective_with_punctuation() {
3284 let config = MentionRankingConfig {
3285 enable_nominal_adjective_detection: true,
3286 ..Default::default()
3287 };
3288 let coref = MentionRankingCoref::with_config(config);
3289
3290 let text = "The accused, the condemned, and the guilty were present.";
3291 let mentions = coref.detect_mentions(text).unwrap();
3292
3293 let texts: Vec<_> = mentions.iter().map(|m| m.text.as_str()).collect();
3294 assert!(
3295 texts.contains(&"The accused"),
3296 "Should detect 'The accused': {:?}",
3297 texts
3298 );
3299 assert!(
3300 texts.contains(&"the condemned"),
3301 "Should detect 'the condemned': {:?}",
3302 texts
3303 );
3304 assert!(
3305 texts.contains(&"the guilty"),
3306 "Should detect 'the guilty': {:?}",
3307 texts
3308 );
3309 }
3310
3311 #[test]
3312 fn test_nominal_adjective_these_those() {
3313 let config = MentionRankingConfig {
3314 enable_nominal_adjective_detection: true,
3315 ..Default::default()
3316 };
3317 let coref = MentionRankingCoref::with_config(config);
3318
3319 let text = "These homeless need shelter. Those unemployed seek work.";
3320 let mentions = coref.detect_mentions(text).unwrap();
3321
3322 let texts: Vec<_> = mentions.iter().map(|m| m.text.as_str()).collect();
3323 assert!(
3324 texts.contains(&"These homeless"),
3325 "Should detect 'These homeless': {:?}",
3326 texts
3327 );
3328 assert!(
3329 texts.contains(&"Those unemployed"),
3330 "Should detect 'Those unemployed': {:?}",
3331 texts
3332 );
3333 }
3334
3335 #[test]
3336 fn test_nominal_adjective_disabled_by_default() {
3337 let coref = MentionRankingCoref::new();
3338
3339 let text = "The poor are struggling.";
3340 let mentions = coref.detect_mentions(text).unwrap();
3341
3342 let has_the_poor = mentions.iter().any(|m| m.text.to_lowercase() == "the poor");
3344 assert!(
3345 !has_the_poor,
3346 "Nominal adjective detection should be disabled by default"
3347 );
3348 }
3349
3350 #[test]
3355 fn test_singular_they_number_unknown() {
3356 let coref = MentionRankingCoref::new();
3357
3358 let text = "Alex said they would come. They brought their friends.";
3360 let mentions = coref.detect_mentions(text).unwrap();
3361
3362 let they_mentions: Vec<_> = mentions
3364 .iter()
3365 .filter(|m| m.text.to_lowercase() == "they")
3366 .collect();
3367
3368 for they in &they_mentions {
3369 assert_eq!(
3370 they.number,
3371 Some(Number::Unknown),
3372 "'they' should have Number::Unknown for singular/plural ambiguity"
3373 );
3374 }
3375 }
3376
3377 #[test]
3378 fn test_their_number_unknown() {
3379 let coref = MentionRankingCoref::new();
3380
3381 let text = "Someone left their umbrella.";
3382 let mentions = coref.detect_mentions(text).unwrap();
3383
3384 let their = mentions.iter().find(|m| m.text.to_lowercase() == "their");
3385 assert!(their.is_some(), "Should detect 'their'");
3386 assert_eq!(
3387 their.unwrap().number,
3388 Some(Number::Unknown),
3389 "'their' should have Number::Unknown"
3390 );
3391 }
3392
3393 #[test]
3394 fn test_themself_vs_themselves() {
3395 let coref = MentionRankingCoref::new();
3398
3399 let text = "The student prepared themself. The students prepared themselves.";
3400 let mentions = coref.detect_mentions(text).unwrap();
3401
3402 let themself = mentions
3403 .iter()
3404 .find(|m| m.text.to_lowercase() == "themself");
3405 let themselves = mentions
3406 .iter()
3407 .find(|m| m.text.to_lowercase() == "themselves");
3408
3409 assert!(themself.is_some(), "Should detect 'themself'");
3410 assert!(themselves.is_some(), "Should detect 'themselves'");
3411
3412 assert_eq!(
3413 themself.unwrap().number,
3414 Some(Number::Singular),
3415 "'themself' is explicitly singular"
3416 );
3417 assert_eq!(
3418 themselves.unwrap().number,
3419 Some(Number::Plural),
3420 "'themselves' is explicitly plural"
3421 );
3422 }
3423
3424 #[test]
3429 fn test_neopronoun_ze_hir() {
3430 let coref = MentionRankingCoref::new();
3431
3432 let text = "Ze told me to text hir, but I don't have hirs number.";
3433 let mentions = coref.detect_mentions(text).unwrap();
3434
3435 let ze = mentions.iter().find(|m| m.text.to_lowercase() == "ze");
3436 let hir = mentions.iter().find(|m| m.text.to_lowercase() == "hir");
3437 let hirs = mentions.iter().find(|m| m.text.to_lowercase() == "hirs");
3438
3439 assert!(ze.is_some(), "Should detect 'ze'");
3440 assert!(hir.is_some(), "Should detect 'hir'");
3441 assert!(hirs.is_some(), "Should detect 'hirs'");
3442
3443 assert_eq!(ze.unwrap().number, Some(Number::Singular));
3445 assert_eq!(hir.unwrap().number, Some(Number::Singular));
3446 assert_eq!(hirs.unwrap().number, Some(Number::Singular));
3447
3448 assert_eq!(ze.unwrap().gender, Some(Gender::Unknown));
3450 }
3451
3452 #[test]
3453 fn test_neopronoun_xe_xem() {
3454 let coref = MentionRankingCoref::new();
3455
3456 let text = "Xe said xem would bring xyr notes.";
3457 let mentions = coref.detect_mentions(text).unwrap();
3458
3459 let xe = mentions.iter().find(|m| m.text.to_lowercase() == "xe");
3460 let xem = mentions.iter().find(|m| m.text.to_lowercase() == "xem");
3461 let xyr = mentions.iter().find(|m| m.text.to_lowercase() == "xyr");
3462
3463 assert!(xe.is_some(), "Should detect 'xe'");
3464 assert!(xem.is_some(), "Should detect 'xem'");
3465 assert!(xyr.is_some(), "Should detect 'xyr'");
3466
3467 assert_eq!(xe.unwrap().number, Some(Number::Singular));
3468 assert_eq!(xe.unwrap().gender, Some(Gender::Unknown));
3469 }
3470
3471 #[test]
3472 fn test_neopronoun_spivak_ey_em() {
3473 let coref = MentionRankingCoref::new();
3474
3475 let text = "Ey told me to call em later.";
3476 let mentions = coref.detect_mentions(text).unwrap();
3477
3478 let ey = mentions.iter().find(|m| m.text.to_lowercase() == "ey");
3479 let em = mentions.iter().find(|m| m.text.to_lowercase() == "em");
3480
3481 assert!(ey.is_some(), "Should detect 'ey' (Spivak pronoun)");
3482 assert!(em.is_some(), "Should detect 'em' (Spivak pronoun)");
3483
3484 assert_eq!(ey.unwrap().number, Some(Number::Singular));
3485 }
3486
3487 #[test]
3488 fn test_neopronoun_fae_faer() {
3489 let coref = MentionRankingCoref::new();
3490
3491 let text = "Fae said faer class was cancelled.";
3492 let mentions = coref.detect_mentions(text).unwrap();
3493
3494 let fae = mentions.iter().find(|m| m.text.to_lowercase() == "fae");
3495 let faer = mentions.iter().find(|m| m.text.to_lowercase() == "faer");
3496
3497 assert!(fae.is_some(), "Should detect 'fae'");
3498 assert!(faer.is_some(), "Should detect 'faer'");
3499
3500 assert_eq!(fae.unwrap().number, Some(Number::Singular));
3501 }
3502
3503 #[test]
3508 fn test_ranked_mention_from_entity() {
3509 let entity = crate::Entity::new("Barack Obama", crate::EntityType::Person, 0, 12, 0.95);
3510 let mention = RankedMention::from(&entity);
3511
3512 assert_eq!(mention.start, 0);
3513 assert_eq!(mention.end, 12);
3514 assert_eq!(mention.text, "Barack Obama");
3515 assert_eq!(mention.head, "Obama"); assert_eq!(mention.mention_type, MentionType::Proper);
3517 }
3518
3519 #[test]
3520 fn test_ranked_mention_to_coref_mention() {
3521 let mention = RankedMention {
3522 start: 10,
3523 end: 20,
3524 text: "the patient".to_string(),
3525 mention_type: MentionType::Nominal,
3526 gender: Some(Gender::Unknown),
3527 number: Some(Number::Singular),
3528 head: "patient".to_string(),
3529 };
3530
3531 let coref_mention: anno_core::Mention = (&mention).into();
3532
3533 assert_eq!(coref_mention.start, 10);
3534 assert_eq!(coref_mention.end, 20);
3535 assert_eq!(coref_mention.text, "the patient");
3536 assert_eq!(coref_mention.mention_type, Some(MentionType::Nominal));
3537 }
3538
3539 #[test]
3540 fn test_ranked_mention_span() {
3541 let mention = RankedMention {
3542 start: 5,
3543 end: 15,
3544 text: "test".to_string(),
3545 mention_type: MentionType::Nominal,
3546 gender: None,
3547 number: None,
3548 head: "test".to_string(),
3549 };
3550
3551 assert_eq!(mention.span(), (5, 15));
3552 }
3553
3554 #[test]
3559 fn test_nominal_adjective_pronoun_resolution() {
3560 let config = MentionRankingConfig {
3563 enable_nominal_adjective_detection: true,
3564 link_threshold: 0.1, ..Default::default()
3566 };
3567 let coref = MentionRankingCoref::with_config(config);
3568
3569 let text = "We must help the poor. They deserve better.";
3571
3572 let detected = coref.detect_mentions(text).unwrap();
3574 let detected_texts: Vec<_> = detected.iter().map(|m| m.text.as_str()).collect();
3575
3576 assert!(
3577 detected.iter().any(|m| m.text.to_lowercase() == "the poor"),
3578 "Should detect 'the poor' in detect_mentions: {:?}",
3579 detected_texts
3580 );
3581 assert!(
3582 detected.iter().any(|m| m.text.to_lowercase() == "they"),
3583 "Should detect 'They' in detect_mentions: {:?}",
3584 detected_texts
3585 );
3586
3587 let the_poor = detected
3589 .iter()
3590 .find(|m| m.text.to_lowercase() == "the poor")
3591 .unwrap();
3592 let they = detected
3593 .iter()
3594 .find(|m| m.text.to_lowercase() == "they")
3595 .unwrap();
3596
3597 let distance = they.start.saturating_sub(the_poor.end);
3598 let score = coref.score_pair(they, the_poor, distance, Some(text));
3599
3600 assert!(
3603 score > -0.5,
3604 "Score between 'They' and 'the poor' should not be strongly negative, got {}",
3605 score
3606 );
3607
3608 }
3613
3614 #[test]
3619 fn test_neopronoun_xe_detection() {
3620 let coref = MentionRankingCoref::new();
3621 let text = "Alex introduced xemself. Xe said xe was happy to be here.";
3622 let mentions = coref.detect_mentions(text).unwrap();
3623
3624 let texts: Vec<_> = mentions.iter().map(|m| m.text.to_lowercase()).collect();
3625 assert!(
3626 texts.contains(&"xemself".to_string()),
3627 "Should detect 'xemself': {:?}",
3628 texts
3629 );
3630 assert!(
3631 texts.contains(&"xe".to_string()),
3632 "Should detect 'xe': {:?}",
3633 texts
3634 );
3635 }
3636
3637 #[test]
3638 fn test_neopronoun_ze_detection() {
3639 let coref = MentionRankingCoref::new();
3640 let text = "Jordan uses ze/hir pronouns. Hir presentation was excellent.";
3641 let mentions = coref.detect_mentions(text).unwrap();
3642
3643 let texts: Vec<_> = mentions.iter().map(|m| m.text.to_lowercase()).collect();
3644 assert!(
3645 texts.contains(&"ze".to_string()),
3646 "Should detect 'ze': {:?}",
3647 texts
3648 );
3649 assert!(
3650 texts.contains(&"hir".to_string()),
3651 "Should detect 'hir': {:?}",
3652 texts
3653 );
3654 }
3655
3656 #[test]
3657 fn test_neopronoun_ey_detection() {
3658 let coref = MentionRankingCoref::new();
3659 let text = "Sam asked em to pass eir notebook.";
3660 let mentions = coref.detect_mentions(text).unwrap();
3661
3662 let texts: Vec<_> = mentions.iter().map(|m| m.text.to_lowercase()).collect();
3663 assert!(
3664 texts.contains(&"em".to_string()),
3665 "Should detect 'em': {:?}",
3666 texts
3667 );
3668 assert!(
3669 texts.contains(&"eir".to_string()),
3670 "Should detect 'eir': {:?}",
3671 texts
3672 );
3673 }
3674
3675 #[test]
3676 fn test_neopronoun_fae_detection() {
3677 let coref = MentionRankingCoref::new();
3678 let text = "River explained faer perspective. Fae was very articulate.";
3679 let mentions = coref.detect_mentions(text).unwrap();
3680
3681 let texts: Vec<_> = mentions.iter().map(|m| m.text.to_lowercase()).collect();
3682 assert!(
3683 texts.contains(&"faer".to_string()),
3684 "Should detect 'faer': {:?}",
3685 texts
3686 );
3687 assert!(
3688 texts.contains(&"fae".to_string()),
3689 "Should detect 'fae': {:?}",
3690 texts
3691 );
3692 }
3693
3694 #[test]
3695 fn test_neopronoun_gender_and_number() {
3696 let coref = MentionRankingCoref::new();
3697 let text = "Xe arrived early.";
3698 let mentions = coref.detect_mentions(text).unwrap();
3699
3700 let xe_mention = mentions.iter().find(|m| m.text.to_lowercase() == "xe");
3701 assert!(xe_mention.is_some(), "Should detect 'xe'");
3702
3703 let xe = xe_mention.unwrap();
3704 assert_eq!(
3706 xe.number,
3707 Some(Number::Singular),
3708 "Neopronouns are singular"
3709 );
3710 assert_eq!(
3711 xe.gender,
3712 Some(Gender::Unknown),
3713 "Neopronouns use Unknown gender"
3714 );
3715 }
3716
3717 #[test]
3718 fn test_neopronoun_coreference_linking() {
3719 let coref = MentionRankingCoref::new();
3723 let text = "Xe said xe would be late. Xem was right.";
3724 let mentions = coref.detect_mentions(text).unwrap();
3725
3726 let texts: Vec<_> = mentions.iter().map(|m| m.text.to_lowercase()).collect();
3728 assert!(
3729 texts.iter().filter(|t| *t == "xe").count() >= 2,
3730 "Should detect multiple 'xe': {:?}",
3731 texts
3732 );
3733 assert!(
3734 texts.contains(&"xem".to_string()),
3735 "Should detect 'xem': {:?}",
3736 texts
3737 );
3738
3739 for m in &mentions {
3741 if ["xe", "xem"].contains(&m.text.to_lowercase().as_str()) {
3742 assert_eq!(
3743 m.mention_type,
3744 MentionType::Pronominal,
3745 "Neopronouns should be Pronominal type"
3746 );
3747 }
3748 }
3749 }
3750
3751 #[test]
3756 fn test_dual_number_compatibility_scoring() {
3757 let coref = MentionRankingCoref::new();
3761
3762 let dual_mention = RankedMention {
3764 start: 0,
3765 end: 5,
3766 text: "كتابان".to_string(), mention_type: MentionType::Nominal,
3768 gender: Some(Gender::Neutral),
3769 number: Some(Number::Dual),
3770 head: "كتابان".to_string(),
3771 };
3772
3773 let plural_mention = RankedMention {
3774 start: 10,
3775 end: 15,
3776 text: "هم".to_string(), mention_type: MentionType::Pronominal,
3778 gender: Some(Gender::Unknown),
3779 number: Some(Number::Plural),
3780 head: "هم".to_string(),
3781 };
3782
3783 let singular_mention = RankedMention {
3784 start: 20,
3785 end: 22,
3786 text: "هو".to_string(), mention_type: MentionType::Pronominal,
3788 gender: Some(Gender::Masculine),
3789 number: Some(Number::Singular),
3790 head: "هو".to_string(),
3791 };
3792
3793 assert!(
3795 Number::Dual.is_compatible(&Number::Plural),
3796 "Dual should be compatible with Plural"
3797 );
3798 assert!(
3799 !Number::Dual.is_compatible(&Number::Singular),
3800 "Dual should NOT be compatible with Singular"
3801 );
3802
3803 let score_dual_plural = coref.score_pair(&plural_mention, &dual_mention, 5, None);
3805 let score_dual_singular = coref.score_pair(&singular_mention, &dual_mention, 5, None);
3806
3807 assert!(
3808 score_dual_plural > score_dual_singular,
3809 "Dual-Plural score ({}) should be higher than Dual-Singular ({})",
3810 score_dual_plural,
3811 score_dual_singular
3812 );
3813 }
3814
3815 #[test]
3816 fn test_number_compatibility_unknown() {
3817 assert!(Number::Unknown.is_compatible(&Number::Singular));
3820 assert!(Number::Unknown.is_compatible(&Number::Plural));
3821 assert!(Number::Unknown.is_compatible(&Number::Dual));
3822 assert!(Number::Unknown.is_compatible(&Number::Unknown));
3823
3824 let coref = MentionRankingCoref::new();
3826
3827 let they_mention = RankedMention {
3828 start: 0,
3829 end: 4,
3830 text: "They".to_string(),
3831 mention_type: MentionType::Pronominal,
3832 gender: Some(Gender::Unknown),
3833 number: Some(Number::Unknown), head: "They".to_string(),
3835 };
3836
3837 let singular_mention = RankedMention {
3838 start: 10,
3839 end: 14,
3840 text: "Alex".to_string(),
3841 mention_type: MentionType::Proper,
3842 gender: Some(Gender::Unknown),
3843 number: Some(Number::Singular),
3844 head: "Alex".to_string(),
3845 };
3846
3847 let plural_mention = RankedMention {
3848 start: 20,
3849 end: 30,
3850 text: "the students".to_string(),
3851 mention_type: MentionType::Nominal,
3852 gender: Some(Gender::Unknown),
3853 number: Some(Number::Plural),
3854 head: "students".to_string(),
3855 };
3856
3857 let score_they_singular = coref.score_pair(&they_mention, &singular_mention, 5, None);
3859 let score_they_plural = coref.score_pair(&they_mention, &plural_mention, 5, None);
3860
3861 assert!(
3863 score_they_singular > -1.0,
3864 "'They' ↔ singular should not be heavily penalized: {}",
3865 score_they_singular
3866 );
3867 assert!(
3868 score_they_plural > -1.0,
3869 "'They' ↔ plural should not be heavily penalized: {}",
3870 score_they_plural
3871 );
3872 }
3873
3874 #[test]
3879 fn test_pleonastic_it_weather() {
3880 let coref = MentionRankingCoref::new();
3882
3883 let weather_texts = [
3884 "It rains every day in Seattle.",
3885 "It is raining outside.",
3886 "It snows heavily in winter.",
3887 "It was snowing when we arrived.",
3888 "It thundered all night.",
3889 ];
3890
3891 for text in weather_texts {
3892 let mentions = coref.detect_mentions(text).unwrap();
3893 let has_it = mentions.iter().any(|m| m.text.to_lowercase() == "it");
3894 assert!(
3895 !has_it,
3896 "Weather 'it' should be filtered as pleonastic in: '{}'\nDetected: {:?}",
3897 text,
3898 mentions.iter().map(|m| &m.text).collect::<Vec<_>>()
3899 );
3900 }
3901 }
3902
3903 #[test]
3904 fn test_pleonastic_it_weather_adjectives() {
3905 let coref = MentionRankingCoref::new();
3906
3907 let weather_adj_texts = [
3908 "It is sunny today.",
3909 "It was cold last night.",
3910 "It's foggy this morning.",
3911 "It will be warm tomorrow.",
3912 ];
3913
3914 for text in weather_adj_texts {
3915 let mentions = coref.detect_mentions(text).unwrap();
3916 let has_it = mentions.iter().any(|m| m.text.to_lowercase() == "it");
3917 assert!(
3918 !has_it,
3919 "Weather adjective 'it' should be filtered: '{}'\nDetected: {:?}",
3920 text,
3921 mentions.iter().map(|m| &m.text).collect::<Vec<_>>()
3922 );
3923 }
3924 }
3925
3926 #[test]
3927 fn test_pleonastic_it_modal() {
3928 let coref = MentionRankingCoref::new();
3929
3930 let modal_texts = [
3931 "It is important that we finish on time.",
3932 "It is likely that he will arrive late.",
3933 "It was clear that something was wrong.",
3934 "It is necessary to complete the form.",
3935 "It's obvious that she was upset.",
3936 ];
3937
3938 for text in modal_texts {
3939 let mentions = coref.detect_mentions(text).unwrap();
3940 let has_it = mentions.iter().any(|m| m.text.to_lowercase() == "it");
3941 assert!(
3942 !has_it,
3943 "Modal 'it' should be filtered: '{}'\nDetected: {:?}",
3944 text,
3945 mentions.iter().map(|m| &m.text).collect::<Vec<_>>()
3946 );
3947 }
3948 }
3949
3950 #[test]
3951 fn test_pleonastic_it_cognitive_verbs() {
3952 let coref = MentionRankingCoref::new();
3953
3954 let cognitive_texts = [
3955 "It seems that the project is delayed.",
3956 "It appears he was mistaken.",
3957 "It turns out she was right.",
3958 "It happened that we met by chance.",
3959 ];
3960
3961 for text in cognitive_texts {
3962 let mentions = coref.detect_mentions(text).unwrap();
3963 let has_it = mentions.iter().any(|m| m.text.to_lowercase() == "it");
3964 assert!(
3965 !has_it,
3966 "Cognitive verb 'it' should be filtered: '{}'\nDetected: {:?}",
3967 text,
3968 mentions.iter().map(|m| &m.text).collect::<Vec<_>>()
3969 );
3970 }
3971 }
3972
3973 #[test]
3974 fn test_referential_it_not_filtered() {
3975 let coref = MentionRankingCoref::new();
3977
3978 let referential_texts = [
3979 "I read the book. It was fascinating.",
3980 "The car broke down. We had to push it.",
3981 "She gave him a gift. He loved it.",
3982 ];
3983
3984 for text in referential_texts {
3985 let mentions = coref.detect_mentions(text).unwrap();
3986 let has_it = mentions.iter().any(|m| m.text.to_lowercase() == "it");
3987 assert!(
3988 has_it,
3989 "Referential 'it' should be detected: '{}'\nDetected: {:?}",
3990 text,
3991 mentions.iter().map(|m| &m.text).collect::<Vec<_>>()
3992 );
3993 }
3994 }
3995
3996 #[test]
3997 fn test_pleonastic_it_time_expressions() {
3998 let coref = MentionRankingCoref::new();
3999
4000 let time_texts = [
4001 "It is midnight.",
4002 "It was noon when we left.",
4003 "It is 5 o'clock.",
4004 ];
4005
4006 for text in time_texts {
4007 let mentions = coref.detect_mentions(text).unwrap();
4008 let has_it = mentions.iter().any(|m| m.text.to_lowercase() == "it");
4009 assert!(
4010 !has_it,
4011 "Time expression 'it' should be filtered: '{}'\nDetected: {:?}",
4012 text,
4013 mentions.iter().map(|m| &m.text).collect::<Vec<_>>()
4014 );
4015 }
4016 }
4017
4018 #[test]
4023 fn test_demonstrative_pronoun_detection() {
4024 let coref = MentionRankingCoref::new();
4025
4026 let text = "I saw the problem. This was unexpected. Those are the facts.";
4027 let mentions = coref.detect_mentions(text).unwrap();
4028 let texts: Vec<_> = mentions.iter().map(|m| m.text.to_lowercase()).collect();
4029
4030 assert!(
4031 texts.contains(&"this".to_string()),
4032 "Should detect 'This': {:?}",
4033 texts
4034 );
4035 assert!(
4036 texts.contains(&"those".to_string()),
4037 "Should detect 'Those': {:?}",
4038 texts
4039 );
4040 }
4041
4042 #[test]
4043 fn test_demonstrative_pronoun_number() {
4044 let coref = MentionRankingCoref::new();
4045
4046 let text = "This is important. These are facts. That was clear. Those were obvious.";
4048 let mentions = coref.detect_mentions(text).unwrap();
4049
4050 let this_m = mentions.iter().find(|m| m.text.to_lowercase() == "this");
4051 let these_m = mentions.iter().find(|m| m.text.to_lowercase() == "these");
4052 let that_m = mentions.iter().find(|m| m.text.to_lowercase() == "that");
4053 let those_m = mentions.iter().find(|m| m.text.to_lowercase() == "those");
4054
4055 assert_eq!(this_m.map(|m| m.number), Some(Some(Number::Singular)));
4056 assert_eq!(these_m.map(|m| m.number), Some(Some(Number::Plural)));
4057 assert_eq!(that_m.map(|m| m.number), Some(Some(Number::Singular)));
4058 assert_eq!(those_m.map(|m| m.number), Some(Some(Number::Plural)));
4059 }
4060
4061 #[test]
4066 fn test_indefinite_pronoun_detection() {
4067 let coref = MentionRankingCoref::new();
4068
4069 let text = "Someone called yesterday. Everyone was surprised.";
4070 let mentions = coref.detect_mentions(text).unwrap();
4071 let texts: Vec<_> = mentions.iter().map(|m| m.text.to_lowercase()).collect();
4072
4073 assert!(
4074 texts.contains(&"someone".to_string()),
4075 "Should detect 'Someone': {:?}",
4076 texts
4077 );
4078 assert!(
4079 texts.contains(&"everyone".to_string()),
4080 "Should detect 'Everyone': {:?}",
4081 texts
4082 );
4083 }
4084
4085 #[test]
4086 fn test_indefinite_pronouns_are_singular() {
4087 let coref = MentionRankingCoref::new();
4090
4091 let text = "Everyone was there. Nobody left early.";
4092 let mentions = coref.detect_mentions(text).unwrap();
4093
4094 let everyone_m = mentions
4095 .iter()
4096 .find(|m| m.text.to_lowercase() == "everyone");
4097 let nobody_m = mentions.iter().find(|m| m.text.to_lowercase() == "nobody");
4098
4099 assert!(everyone_m.is_some(), "Should detect 'Everyone'");
4100 assert!(nobody_m.is_some(), "Should detect 'Nobody'");
4101
4102 assert_eq!(
4103 everyone_m.unwrap().number,
4104 Some(Number::Singular),
4105 "'everyone' is grammatically singular"
4106 );
4107 assert_eq!(
4108 nobody_m.unwrap().number,
4109 Some(Number::Singular),
4110 "'nobody' is grammatically singular"
4111 );
4112 }
4113
4114 #[test]
4115 fn test_impersonal_one_detection() {
4116 let coref = MentionRankingCoref::new();
4118
4119 let text = "One should always be prepared. One never knows what might happen.";
4120 let mentions = coref.detect_mentions(text).unwrap();
4121 let one_count = mentions
4122 .iter()
4123 .filter(|m| m.text.to_lowercase() == "one")
4124 .count();
4125
4126 assert!(
4127 one_count >= 2,
4128 "Should detect impersonal 'one': {:?}",
4129 mentions.iter().map(|m| &m.text).collect::<Vec<_>>()
4130 );
4131 }
4132
4133 #[test]
4138 fn test_reflexive_pronoun_detection() {
4139 let coref = MentionRankingCoref::new();
4140
4141 let text = "John saw himself in the mirror. Mary hurt herself.";
4142 let mentions = coref.detect_mentions(text).unwrap();
4143 let texts: Vec<_> = mentions.iter().map(|m| m.text.to_lowercase()).collect();
4144
4145 assert!(
4146 texts.contains(&"himself".to_string()),
4147 "Should detect 'himself': {:?}",
4148 texts
4149 );
4150 assert!(
4151 texts.contains(&"herself".to_string()),
4152 "Should detect 'herself': {:?}",
4153 texts
4154 );
4155 }
4156
4157 #[test]
4158 fn test_reflexive_pronoun_gender() {
4159 let coref = MentionRankingCoref::new();
4160
4161 let text = "He saw himself. She saw herself. It fixed itself.";
4162 let mentions = coref.detect_mentions(text).unwrap();
4163
4164 let himself = mentions.iter().find(|m| m.text.to_lowercase() == "himself");
4165 let herself = mentions.iter().find(|m| m.text.to_lowercase() == "herself");
4166 let itself = mentions.iter().find(|m| m.text.to_lowercase() == "itself");
4167
4168 assert!(himself.is_some(), "Should detect 'himself'");
4169 assert!(herself.is_some(), "Should detect 'herself'");
4170 assert!(itself.is_some(), "Should detect 'itself'");
4171
4172 assert_eq!(himself.unwrap().gender, Some(Gender::Masculine));
4173 assert_eq!(herself.unwrap().gender, Some(Gender::Feminine));
4174 assert_eq!(itself.unwrap().gender, Some(Gender::Neutral));
4175 }
4176
4177 #[test]
4182 fn test_reciprocal_pronoun_detection() {
4183 let coref = MentionRankingCoref::new();
4184
4185 let text = "John and Mary looked at each other. The teams competed against one another.";
4186 let mentions = coref.detect_mentions(text).unwrap();
4187 let texts: Vec<_> = mentions.iter().map(|m| m.text.to_lowercase()).collect();
4188
4189 assert!(
4190 texts.contains(&"each other".to_string()),
4191 "Should detect 'each other': {:?}",
4192 texts
4193 );
4194 assert!(
4195 texts.contains(&"one another".to_string()),
4196 "Should detect 'one another': {:?}",
4197 texts
4198 );
4199 }
4200
4201 #[test]
4202 fn test_reciprocal_pronouns_are_plural() {
4203 let coref = MentionRankingCoref::new();
4205
4206 let text = "They helped each other.";
4207 let mentions = coref.detect_mentions(text).unwrap();
4208
4209 let each_other = mentions
4210 .iter()
4211 .find(|m| m.text.to_lowercase() == "each other");
4212 assert!(each_other.is_some(), "Should detect 'each other'");
4213 assert_eq!(
4214 each_other.unwrap().number,
4215 Some(Number::Plural),
4216 "Reciprocals are grammatically plural"
4217 );
4218 }
4219
4220 use proptest::prelude::*;
4230
4231 fn text_with_pronouns() -> impl Strategy<Value = String> {
4233 prop::collection::vec(
4234 prop_oneof![
4235 Just("he".to_string()),
4236 Just("she".to_string()),
4237 Just("they".to_string()),
4238 Just("it".to_string()),
4239 Just("the dog".to_string()),
4240 Just("John".to_string()),
4241 "[a-z]{3,10}".prop_map(|s| s),
4242 ],
4243 3..15,
4244 )
4245 .prop_map(|words| words.join(" ") + ".")
4246 }
4247
4248 #[test]
4253 fn test_multilingual_nominal_adjective_german() {
4254 let config = MentionRankingConfig {
4255 enable_nominal_adjective_detection: true,
4256 language: "de".to_string(),
4257 ..Default::default()
4258 };
4259
4260 let coref = MentionRankingCoref::with_config(config);
4261 let text = "Die Armen leiden unter der Krise.";
4262 let mentions = coref.detect_mentions(text).unwrap();
4263
4264 let has_armen = mentions
4265 .iter()
4266 .any(|m| m.text.to_lowercase().contains("armen"));
4267 assert!(
4268 has_armen,
4269 "Should detect 'die Armen' as a nominal adjective in German"
4270 );
4271 }
4272
4273 #[test]
4274 fn test_multilingual_nominal_adjective_french() {
4275 let config = MentionRankingConfig {
4276 enable_nominal_adjective_detection: true,
4277 language: "fr".to_string(),
4278 ..Default::default()
4279 };
4280
4281 let coref = MentionRankingCoref::with_config(config);
4282 let text = "Les pauvres ont besoin d'aide.";
4283 let mentions = coref.detect_mentions(text).unwrap();
4284
4285 let has_pauvres = mentions
4286 .iter()
4287 .any(|m| m.text.to_lowercase().contains("pauvres"));
4288 assert!(
4289 has_pauvres,
4290 "Should detect 'les pauvres' as a nominal adjective in French"
4291 );
4292 }
4293
4294 #[test]
4295 fn test_multilingual_nominal_adjective_spanish() {
4296 let config = MentionRankingConfig {
4297 enable_nominal_adjective_detection: true,
4298 language: "es".to_string(),
4299 ..Default::default()
4300 };
4301
4302 let coref = MentionRankingCoref::with_config(config);
4303 let text = "Los pobres necesitan ayuda.";
4304 let mentions = coref.detect_mentions(text).unwrap();
4305
4306 let has_pobres = mentions
4307 .iter()
4308 .any(|m| m.text.to_lowercase().contains("pobres"));
4309 assert!(
4310 has_pobres,
4311 "Should detect 'los pobres' as a nominal adjective in Spanish"
4312 );
4313 }
4314
4315 #[test]
4316 fn test_config_language_field() {
4317 let config = MentionRankingConfig::default();
4319 assert_eq!(config.language, "en");
4320
4321 let book_config = MentionRankingConfig::book_scale();
4323 assert_eq!(book_config.language, "en");
4324
4325 let clinical_config = MentionRankingConfig::clinical();
4327 assert_eq!(clinical_config.language, "en");
4328 }
4329
4330 proptest! {
4331 #![proptest_config(ProptestConfig::with_cases(50))]
4332
4333 #[test]
4337 fn mention_spans_within_bounds(text in text_with_pronouns()) {
4338 let coref = MentionRankingCoref::new();
4339 if let Ok(mentions) = coref.detect_mentions(&text) {
4340 let char_count = text.chars().count();
4341 for mention in &mentions {
4342 prop_assert!(
4343 mention.start <= mention.end,
4344 "Start {} > end {} for '{}'",
4345 mention.start, mention.end, mention.text
4346 );
4347 prop_assert!(
4348 mention.end <= char_count,
4349 "End {} > text length {} for '{}'",
4350 mention.end, char_count, mention.text
4351 );
4352 }
4353 }
4354 }
4355
4356 #[test]
4360 fn mention_text_matches_span(text in text_with_pronouns()) {
4361 let coref = MentionRankingCoref::new();
4362 if let Ok(mentions) = coref.detect_mentions(&text) {
4363 for mention in &mentions {
4364 let extracted: String = text.chars()
4365 .skip(mention.start)
4366 .take(mention.end - mention.start)
4367 .collect();
4368 prop_assert_eq!(
4370 extracted.to_lowercase(),
4371 mention.text.to_lowercase(),
4372 "Extracted text doesn't match stored text"
4373 );
4374 }
4375 }
4376 }
4377
4378 #[test]
4380 fn pronouns_are_pronominal(text in text_with_pronouns()) {
4381 let coref = MentionRankingCoref::new();
4382 if let Ok(mentions) = coref.detect_mentions(&text) {
4383 let pronouns = ["he", "she", "it", "they", "him", "her", "them"];
4384 for mention in &mentions {
4385 if pronouns.contains(&mention.text.to_lowercase().as_str()) {
4386 prop_assert_eq!(
4387 mention.mention_type,
4388 MentionType::Pronominal,
4389 "'{}' should be Pronominal",
4390 mention.text
4391 );
4392 }
4393 }
4394 }
4395 }
4396
4397 #[test]
4399 fn pronouns_have_gender(text in text_with_pronouns()) {
4400 let coref = MentionRankingCoref::new();
4401 if let Ok(mentions) = coref.detect_mentions(&text) {
4402 for mention in &mentions {
4403 if mention.mention_type == MentionType::Pronominal {
4404 prop_assert!(
4405 mention.gender.is_some(),
4406 "Pronoun '{}' should have gender",
4407 mention.text
4408 );
4409 }
4410 }
4411 }
4412 }
4413
4414 #[test]
4416 fn pronouns_have_number(text in text_with_pronouns()) {
4417 let coref = MentionRankingCoref::new();
4418 if let Ok(mentions) = coref.detect_mentions(&text) {
4419 for mention in &mentions {
4420 if mention.mention_type == MentionType::Pronominal {
4421 prop_assert!(
4422 mention.number.is_some(),
4423 "Pronoun '{}' should have number",
4424 mention.text
4425 );
4426 }
4427 }
4428 }
4429 }
4430
4431 #[test]
4433 fn clusters_partition_mentions(text in text_with_pronouns()) {
4434 let coref = MentionRankingCoref::new();
4435 if let Ok(clusters) = coref.resolve(&text) {
4436 let mut all_mentions: Vec<_> = clusters.iter()
4438 .flat_map(|c| &c.mentions)
4439 .collect();
4440
4441 let original_len = all_mentions.len();
4443 all_mentions.sort_by_key(|m| (m.start, m.end));
4444 all_mentions.dedup_by_key(|m| (m.start, m.end));
4445 prop_assert_eq!(
4446 all_mentions.len(),
4447 original_len,
4448 "Duplicate mentions across clusters"
4449 );
4450 }
4451 }
4452
4453 #[test]
4457 fn score_pair_deterministic(text in text_with_pronouns()) {
4458 let coref = MentionRankingCoref::new();
4459 if let Ok(mentions) = coref.detect_mentions(&text) {
4460 if mentions.len() >= 2 {
4461 let distance = mentions[1].start.saturating_sub(mentions[0].end);
4462 let score1 = coref.score_pair(&mentions[0], &mentions[1], distance, Some(&text));
4463 let score2 = coref.score_pair(&mentions[0], &mentions[1], distance, Some(&text));
4464 prop_assert!(
4465 (score1 - score2).abs() < 0.0001,
4466 "Scoring should be deterministic"
4467 );
4468 }
4469 }
4470 }
4471 }
4472}