use crate::{Confidence, Entity};
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
#[non_exhaustive]
pub enum ReferentType {
#[default]
Nominal,
Event,
Fact,
Proposition,
Situation,
Manner,
Segment,
}
impl ReferentType {
#[must_use]
pub const fn is_abstract(&self) -> bool {
!matches!(self, ReferentType::Nominal)
}
#[must_use]
pub const fn as_str(&self) -> &'static str {
match self {
ReferentType::Nominal => "nominal",
ReferentType::Event => "event",
ReferentType::Fact => "fact",
ReferentType::Proposition => "proposition",
ReferentType::Situation => "situation",
ReferentType::Manner => "manner",
ReferentType::Segment => "segment",
}
}
#[must_use]
pub const fn can_be_this_antecedent(&self) -> bool {
self.is_abstract()
}
#[must_use]
pub const fn can_be_it_antecedent(&self) -> bool {
matches!(
self,
ReferentType::Nominal | ReferentType::Event | ReferentType::Situation
)
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EventMention {
pub trigger: String,
pub trigger_start: usize,
pub trigger_end: usize,
pub trigger_type: Option<String>,
pub arguments: Vec<(String, String)>,
pub confidence: Confidence,
pub polarity: EventPolarity,
pub tense: Option<EventTense>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
pub enum EventPolarity {
#[default]
Positive,
Negative,
Uncertain,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub enum EventTense {
Past,
Present,
Future,
Hypothetical,
}
impl EventMention {
#[must_use]
pub fn new(trigger: impl Into<String>, start: usize, end: usize) -> Self {
Self {
trigger: trigger.into(),
trigger_start: start,
trigger_end: end,
trigger_type: None,
arguments: Vec::new(),
confidence: Confidence::ONE,
polarity: EventPolarity::default(),
tense: None,
}
}
#[must_use]
pub fn with_trigger_type(mut self, trigger_type: impl Into<String>) -> Self {
self.trigger_type = Some(trigger_type.into());
self
}
#[must_use]
pub fn with_arguments<S: Into<String>>(mut self, args: Vec<(&str, S)>) -> Self {
self.arguments = args
.into_iter()
.map(|(role, text)| (role.to_string(), text.into()))
.collect();
self
}
#[must_use]
pub fn with_confidence(mut self, confidence: f64) -> Self {
self.confidence = Confidence::new(confidence);
self
}
#[must_use]
pub fn with_polarity(mut self, polarity: EventPolarity) -> Self {
self.polarity = polarity;
self
}
#[must_use]
pub fn with_tense(mut self, tense: EventTense) -> Self {
self.tense = Some(tense);
self
}
#[must_use]
pub fn get_argument(&self, role: &str) -> Option<&str> {
self.arguments
.iter()
.find(|(r, _)| r.eq_ignore_ascii_case(role))
.map(|(_, text)| text.as_str())
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DiscourseReferent {
pub referent_type: ReferentType,
pub start: usize,
pub end: usize,
pub label: Option<String>,
pub text: Option<String>,
pub event: Option<EventMention>,
pub canonical_id: Option<crate::CanonicalId>,
pub confidence: Confidence,
pub depth: u32,
}
impl DiscourseReferent {
#[must_use]
pub fn new(referent_type: ReferentType, start: usize, end: usize) -> Self {
Self {
referent_type,
start,
end,
label: None,
text: None,
event: None,
canonical_id: None,
confidence: Confidence::ONE,
depth: 0,
}
}
#[must_use]
pub fn from_entity(entity: &Entity) -> Self {
Self {
referent_type: ReferentType::Nominal,
start: entity.start(),
end: entity.end(),
label: Some(entity.text.clone()),
text: Some(entity.text.clone()),
event: None,
canonical_id: entity.canonical_id,
confidence: entity.confidence,
depth: 0,
}
}
#[must_use]
pub fn with_label(mut self, label: impl Into<String>) -> Self {
self.label = Some(label.into());
self
}
#[must_use]
pub fn with_text(mut self, text: impl Into<String>) -> Self {
self.text = Some(text.into());
self
}
#[must_use]
pub fn with_event(mut self, event: EventMention) -> Self {
self.event = Some(event);
self
}
#[must_use]
pub fn with_canonical_id(mut self, id: impl Into<crate::CanonicalId>) -> Self {
self.canonical_id = Some(id.into());
self
}
#[must_use]
pub fn with_confidence(mut self, confidence: f64) -> Self {
self.confidence = Confidence::new(confidence);
self
}
#[must_use]
pub fn with_depth(mut self, depth: u32) -> Self {
self.depth = depth;
self
}
#[must_use]
pub const fn span(&self) -> (usize, usize) {
(self.start, self.end)
}
#[must_use]
pub const fn len(&self) -> usize {
self.end.saturating_sub(self.start)
}
#[must_use]
pub const fn is_empty(&self) -> bool {
self.end <= self.start
}
#[must_use]
pub const fn is_abstract(&self) -> bool {
self.referent_type.is_abstract()
}
#[must_use]
pub fn display_text(&self) -> &str {
self.label
.as_deref()
.or(self.text.as_deref())
.unwrap_or(self.referent_type.as_str())
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ShellNoun {
pub lemma: String,
pub class: ShellNounClass,
pub determiner: Option<String>,
pub start: usize,
pub end: usize,
pub full_text: Option<String>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[non_exhaustive]
pub enum ShellNounClass {
Factual,
Linguistic,
Mental,
Modal,
Eventive,
Circumstantial,
}
impl ShellNounClass {
#[must_use]
pub const fn as_str(&self) -> &'static str {
match self {
ShellNounClass::Factual => "factual",
ShellNounClass::Linguistic => "linguistic",
ShellNounClass::Mental => "mental",
ShellNounClass::Modal => "modal",
ShellNounClass::Eventive => "eventive",
ShellNounClass::Circumstantial => "circumstantial",
}
}
#[must_use]
pub fn typical_antecedent_types(&self) -> &[ReferentType] {
match self {
ShellNounClass::Factual => &[ReferentType::Fact, ReferentType::Event],
ShellNounClass::Linguistic => &[ReferentType::Proposition],
ShellNounClass::Mental => &[ReferentType::Proposition, ReferentType::Fact],
ShellNounClass::Modal => &[ReferentType::Proposition],
ShellNounClass::Eventive => &[ReferentType::Event, ReferentType::Situation],
ShellNounClass::Circumstantial => &[ReferentType::Situation],
}
}
}
impl ShellNoun {
#[must_use]
pub fn new(lemma: impl Into<String>, class: ShellNounClass) -> Self {
Self {
lemma: lemma.into(),
class,
determiner: None,
start: 0,
end: 0,
full_text: None,
}
}
#[must_use]
pub fn with_determiner(mut self, det: impl Into<String>) -> Self {
self.determiner = Some(det.into());
self
}
#[must_use]
pub fn at_span(mut self, start: usize, end: usize) -> Self {
self.start = start;
self.end = end;
self
}
#[must_use]
pub fn with_full_text(mut self, text: impl Into<String>) -> Self {
self.full_text = Some(text.into());
self
}
#[must_use]
pub fn is_demonstrative(&self) -> bool {
self.determiner
.as_ref()
.map(|d| {
matches!(
d.to_lowercase().as_str(),
"this" | "that" | "these" | "those"
)
})
.unwrap_or(false)
}
#[must_use]
pub fn typical_antecedent_types(&self) -> &[ReferentType] {
self.class.typical_antecedent_types()
}
}
#[must_use]
pub fn classify_shell_noun(lemma: &str) -> Option<ShellNounClass> {
match lemma.to_lowercase().as_str() {
"fact" | "reason" | "evidence" | "proof" | "point" | "truth" | "result" | "outcome"
| "consequence" | "effect" | "cause" => Some(ShellNounClass::Factual),
"claim" | "statement" | "argument" | "answer" | "question" | "response" | "reply"
| "assertion" | "allegation" | "announcement" | "explanation" | "suggestion"
| "recommendation" | "proposal" | "promise" | "warning" | "threat" => {
Some(ShellNounClass::Linguistic)
}
"idea" | "belief" | "thought" | "view" | "opinion" | "impression" | "feeling" | "sense"
| "notion" | "assumption" | "understanding" | "knowledge" | "memory" | "expectation"
| "hope" | "fear" | "worry" | "concerno" => Some(ShellNounClass::Mental),
"possibility" | "chance" | "ability" | "need" | "requirement" | "necessity"
| "obligation" | "duty" | "right" | "permission" | "opportunity" | "risk" | "danger"
| "likelihood" | "probability" => Some(ShellNounClass::Modal),
"event" | "incident" | "action" | "step" | "move" | "development" | "change"
| "process" | "procedure" | "activity" | "behavior" | "decision" | "choice" | "attempt"
| "effort" | "achievement" | "success" | "failure" => Some(ShellNounClass::Eventive),
"situation" | "context" | "case" | "circumstance" | "condition" | "state" | "position"
| "environment" | "scenario" | "aspect" | "factor" | "issue" | "problem" | "difficulty"
| "challenge" | "crisis" | "dilemma" => Some(ShellNounClass::Circumstantial),
_ => None,
}
}
#[must_use]
pub fn is_shell_noun(word: &str) -> bool {
classify_shell_noun(word).is_some()
}
#[derive(Debug, Clone)]
pub struct DiscourseScope {
pub sentence_boundaries: Vec<usize>,
pub clause_boundaries: Vec<usize>,
char_to_byte: Vec<usize>,
}
impl DiscourseScope {
#[must_use]
pub fn analyze(text: &str) -> Self {
let char_to_byte = Self::build_char_to_byte_map(text);
let sentence_boundaries = Self::find_sentence_boundaries(text);
let clause_boundaries = Self::find_clause_boundaries(text);
Self {
sentence_boundaries,
clause_boundaries,
char_to_byte,
}
}
fn build_char_to_byte_map(text: &str) -> Vec<usize> {
let char_count = text.chars().count();
let mut map = Vec::with_capacity(char_count + 1);
for (byte_idx, _ch) in text.char_indices() {
map.push(byte_idx);
}
map.push(text.len());
map
}
fn char_to_byte_offset(&self, char_offset: usize) -> usize {
self.char_to_byte
.get(char_offset)
.copied()
.unwrap_or_else(|| self.char_to_byte.last().copied().unwrap_or(0))
}
fn find_sentence_boundaries(text: &str) -> Vec<usize> {
let mut boundaries = vec![0]; let chars: Vec<char> = text.chars().collect();
let char_count = chars.len();
for (i, &c) in chars.iter().enumerate() {
if matches!(c, '.' | '!' | '?' | '。' | '!' | '?') {
let next_char = chars.get(i + 1).or(chars.get(i + 2));
let after_space = chars.get(i + 2);
let boundary_ok = match next_char {
None => true,
Some(&nc) => nc.is_whitespace() || nc == '"' || nc == '\'',
};
let after_ok = match after_space {
None => true,
Some(&ac) => ac.is_uppercase() || ac == '"',
};
if boundary_ok && after_ok {
boundaries.push(i + 1);
}
}
}
if boundaries.last() != Some(&char_count) {
boundaries.push(char_count);
}
boundaries
}
fn find_clause_boundaries(text: &str) -> Vec<usize> {
let mut boundaries = vec![0];
let clause_markers = [
", and ",
", but ",
", or ",
", so ",
", yet ",
"; ",
": ",
" -- ",
" – ",
" while ",
" although ",
" because ",
" since ",
" when ",
" whereas ",
" unless ",
" if ",
"、", ",", ];
let text_lower = text.to_lowercase();
for marker in &clause_markers {
let marker_lower = marker.to_lowercase();
let mut search_from_byte = 0;
while let Some(byte_pos) = text_lower[search_from_byte..].find(&marker_lower) {
let absolute_byte_pos = search_from_byte + byte_pos + marker.len();
let char_pos = text[..absolute_byte_pos.min(text.len())].chars().count();
boundaries.push(char_pos);
search_from_byte = absolute_byte_pos;
}
}
boundaries.extend(Self::find_sentence_boundaries(text));
boundaries.sort();
boundaries.dedup();
boundaries
}
#[must_use]
pub fn sentence_count(&self) -> usize {
self.sentence_boundaries.len().saturating_sub(1)
}
#[must_use]
pub fn clause_count(&self) -> usize {
self.clause_boundaries.len().saturating_sub(1)
}
#[must_use]
pub fn sentence_at(&self, offset: usize) -> Option<(usize, usize)> {
for window in self.sentence_boundaries.windows(2) {
if offset >= window[0] && offset < window[1] {
return Some((window[0], window[1]));
}
}
None
}
#[must_use]
pub fn clause_at(&self, offset: usize) -> Option<(usize, usize)> {
for window in self.clause_boundaries.windows(2) {
if offset >= window[0] && offset < window[1] {
return Some((window[0], window[1]));
}
}
None
}
#[must_use]
pub fn preceding_clauses(&self, offset: usize, n: usize) -> Vec<(usize, usize)> {
let mut clauses = Vec::new();
let mut current_idx = None;
for (i, window) in self.clause_boundaries.windows(2).enumerate() {
if offset >= window[0] && offset < window[1] {
current_idx = Some(i);
break;
}
}
if let Some(idx) = current_idx {
for i in (0..idx).rev().take(n) {
if i + 1 < self.clause_boundaries.len() {
clauses.push((self.clause_boundaries[i], self.clause_boundaries[i + 1]));
}
}
}
clauses
}
#[must_use]
pub fn extract_span<'a>(&self, text: &'a str, start: usize, end: usize) -> &'a str {
let byte_start = self.char_to_byte_offset(start);
let byte_end = self.char_to_byte_offset(end);
text.get(byte_start..byte_end).unwrap_or("")
}
#[must_use]
pub fn candidate_antecedent_spans(&self, anaphor_offset: usize) -> Vec<(usize, usize)> {
let mut candidates = Vec::new();
let preceding = self.preceding_clauses(anaphor_offset, 3);
candidates.extend(preceding);
if let Some((sent_start, _)) = self.sentence_at(anaphor_offset) {
for window in self.sentence_boundaries.windows(2) {
if window[1] <= sent_start {
candidates.push((window[0], window[1]));
}
}
}
candidates.sort_by_key(|&(start, _)| std::cmp::Reverse(start));
candidates.dedup();
candidates
}
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct EventCluster {
pub mentions: Vec<EventMention>,
pub id: u64,
pub event_type: Option<String>,
pub confidence: Confidence,
}
impl EventCluster {
#[must_use]
pub fn new(mentions: Vec<EventMention>) -> Self {
let event_type = mentions
.iter()
.filter_map(|m| m.trigger_type.clone())
.next();
Self {
mentions,
id: 0,
event_type,
confidence: Confidence::ONE,
}
}
#[must_use]
pub fn len(&self) -> usize {
self.mentions.len()
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.mentions.is_empty()
}
#[must_use]
pub fn canonical_trigger(&self) -> &str {
self.mentions
.first()
.map(|m| m.trigger.as_str())
.unwrap_or("")
}
#[must_use]
pub fn with_id(mut self, id: u64) -> Self {
self.id = id;
self
}
pub fn add(&mut self, mention: EventMention) {
self.mentions.push(mention);
}
}
#[derive(Debug, Clone, Default)]
pub struct EventCorefResolver {
pub require_type_match: bool,
pub min_arg_overlap: f64,
}
impl EventCorefResolver {
#[must_use]
pub fn new() -> Self {
Self {
require_type_match: true,
min_arg_overlap: 0.3,
}
}
#[must_use]
pub fn resolve(&self, mentions: &[EventMention]) -> Vec<EventCluster> {
if mentions.is_empty() {
return vec![];
}
let mut clusters: Vec<EventCluster> = Vec::new();
let mut assigned: Vec<bool> = vec![false; mentions.len()];
for i in 0..mentions.len() {
if assigned[i] {
continue;
}
let mut cluster_mentions = vec![mentions[i].clone()];
assigned[i] = true;
for j in (i + 1)..mentions.len() {
if assigned[j] {
continue;
}
if self.should_corefer(&mentions[i], &mentions[j]) {
cluster_mentions.push(mentions[j].clone());
assigned[j] = true;
}
}
clusters.push(EventCluster::new(cluster_mentions).with_id(clusters.len() as u64));
}
clusters
}
fn should_corefer(&self, a: &EventMention, b: &EventMention) -> bool {
if self.require_type_match {
match (&a.trigger_type, &b.trigger_type) {
(Some(ta), Some(tb)) if ta != tb => return false,
_ => {} }
}
let trigger_match = self.triggers_match(&a.trigger, &b.trigger);
if !trigger_match {
return false;
}
if self.min_arg_overlap > 0.0 {
let overlap = self.compute_arg_overlap(a, b);
if overlap < self.min_arg_overlap {
return false;
}
}
true
}
fn simple_stem(&self, word: &str) -> String {
let mut s = word.to_string();
if s.ends_with("ation") {
s = s.trim_end_matches("ation").to_string();
if !s.is_empty() && s.chars().last().map(|c| c.is_alphabetic()).unwrap_or(false) {
}
} else if s.ends_with("tion") || s.ends_with("sion") {
s = s.trim_end_matches("ion").to_string();
} else if s.ends_with("ing") {
s = s.trim_end_matches("ing").to_string();
} else if s.ends_with("ed") && s.len() > 3 {
s = s.trim_end_matches("ed").to_string();
} else if s.ends_with("s") && s.len() > 2 && !s.ends_with("ss") {
s = s.trim_end_matches('s').to_string();
}
let bytes = s.as_bytes();
if bytes.len() > 2 && bytes[bytes.len() - 1] == bytes[bytes.len() - 2] {
s.pop();
}
s
}
pub fn triggers_match(&self, a: &str, b: &str) -> bool {
let a_lower = a.to_lowercase();
let b_lower = b.to_lowercase();
if a_lower == b_lower {
return true;
}
let stem_a = self.simple_stem(&a_lower);
let stem_b = self.simple_stem(&b_lower);
stem_a == stem_b
}
fn compute_arg_overlap(&self, a: &EventMention, b: &EventMention) -> f64 {
if a.arguments.is_empty() && b.arguments.is_empty() {
return 1.0; }
let total = a.arguments.len().max(b.arguments.len());
if total == 0 {
return 1.0;
}
let mut matches = 0;
for (role_a, val_a) in &a.arguments {
for (role_b, val_b) in &b.arguments {
if role_a == role_b && self.values_similar(val_a, val_b) {
matches += 1;
break;
}
}
}
matches as f64 / total as f64
}
fn values_similar(&self, a: &str, b: &str) -> bool {
let a_lower = a.to_lowercase();
let b_lower = b.to_lowercase();
a_lower == b_lower || a_lower.contains(&b_lower) || b_lower.contains(&a_lower)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::EntityType;
#[test]
fn test_referent_types() {
assert!(!ReferentType::Nominal.is_abstract());
assert!(ReferentType::Event.is_abstract());
assert!(ReferentType::Fact.is_abstract());
assert!(ReferentType::Proposition.is_abstract());
assert!(ReferentType::Situation.is_abstract());
}
#[test]
fn test_event_mention() {
let event = EventMention::new("invaded", 7, 14)
.with_trigger_type("attack")
.with_arguments(vec![("Agent", "Russia"), ("Patient", "Ukraine")]);
assert_eq!(event.trigger, "invaded");
assert_eq!(event.trigger_type.as_deref(), Some("attack"));
assert_eq!(event.get_argument("Agent"), Some("Russia"));
assert_eq!(event.get_argument("Patient"), Some("Ukraine"));
assert_eq!(event.get_argument("Location"), None);
}
#[test]
fn test_discourse_referent() {
let event = EventMention::new("invaded", 7, 14);
let referent = DiscourseReferent::new(ReferentType::Event, 0, 30)
.with_event(event)
.with_label("Russian invasion");
assert_eq!(referent.span(), (0, 30));
assert_eq!(referent.len(), 30);
assert!(referent.is_abstract());
assert_eq!(referent.display_text(), "Russian invasion");
}
#[test]
fn test_shell_noun_classification() {
assert_eq!(
classify_shell_noun("problem"),
Some(ShellNounClass::Circumstantial)
);
assert_eq!(classify_shell_noun("fact"), Some(ShellNounClass::Factual));
assert_eq!(classify_shell_noun("idea"), Some(ShellNounClass::Mental));
assert_eq!(
classify_shell_noun("possibility"),
Some(ShellNounClass::Modal)
);
assert_eq!(classify_shell_noun("event"), Some(ShellNounClass::Eventive));
assert_eq!(
classify_shell_noun("claim"),
Some(ShellNounClass::Linguistic)
);
assert_eq!(classify_shell_noun("foobar"), None);
}
#[test]
fn test_shell_noun_demonstrative() {
let shell = ShellNoun::new("problem", ShellNounClass::Circumstantial)
.with_determiner("this")
.at_span(32, 44);
assert!(shell.is_demonstrative());
let shell_the =
ShellNoun::new("problem", ShellNounClass::Circumstantial).with_determiner("the");
assert!(!shell_the.is_demonstrative());
}
#[test]
fn test_shell_noun_typical_antecedents() {
let shell = ShellNoun::new("event", ShellNounClass::Eventive);
let types = shell.typical_antecedent_types();
assert!(types.contains(&ReferentType::Event));
assert!(types.contains(&ReferentType::Situation));
}
#[test]
fn test_from_entity() {
let entity = Entity::new("Russia", EntityType::Location, 0, 6, 0.95);
let referent = DiscourseReferent::from_entity(&entity);
assert_eq!(referent.referent_type, ReferentType::Nominal);
assert_eq!(referent.start, 0);
assert_eq!(referent.end, 6);
assert!(!referent.is_abstract());
}
#[test]
fn test_discourse_scope_sentences() {
let text = "Russia invaded Ukraine. This caused inflation. The crisis deepened.";
let scope = DiscourseScope::analyze(text);
assert_eq!(scope.sentence_count(), 3);
}
#[test]
fn test_discourse_scope_clauses() {
let text = "Prices rose, and wages fell. This was unsustainable.";
let scope = DiscourseScope::analyze(text);
assert!(scope.clause_count() >= 2);
}
#[test]
fn test_discourse_scope_preceding() {
let text = "Russia invaded Ukraine. This caused inflation.";
let scope = DiscourseScope::analyze(text);
let preceding = scope.preceding_clauses(24, 2);
assert!(!preceding.is_empty(), "Should find preceding clauses");
}
#[test]
fn test_candidate_antecedent_spans() {
let text = "Russia invaded Ukraine in 2022. This caused a global energy crisis.";
let scope = DiscourseScope::analyze(text);
let candidates = scope.candidate_antecedent_spans(32);
assert!(!candidates.is_empty(), "Should find candidate spans");
let first_sentence = scope.extract_span(text, candidates[0].0, candidates[0].1);
assert!(
first_sentence.contains("invaded"),
"First candidate should include the invasion"
);
}
#[test]
fn test_event_cluster_creation() {
let mentions = vec![
EventMention::new("invasion", 10, 18).with_trigger_type("attack"),
EventMention::new("invaded", 50, 57).with_trigger_type("attack"),
];
let cluster = EventCluster::new(mentions);
assert_eq!(cluster.len(), 2);
assert_eq!(cluster.canonical_trigger(), "invasion");
assert_eq!(cluster.event_type, Some("attack".to_string()));
}
#[test]
fn test_event_coref_resolver_simple() {
let resolver = EventCorefResolver::new();
let mentions = vec![
EventMention::new("attacked", 10, 18)
.with_trigger_type("attack")
.with_arguments(vec![("Agent", "Russia"), ("Patient", "Ukraine")]),
EventMention::new("attack", 50, 56)
.with_trigger_type("attack")
.with_arguments(vec![("Agent", "Russia")]),
EventMention::new("meeting", 100, 107)
.with_trigger_type("meeting")
.with_arguments(vec![("Participant", "leaders")]),
];
let clusters = resolver.resolve(&mentions);
assert_eq!(clusters.len(), 2, "Expected 2 clusters");
let attack_cluster = &clusters[0];
assert_eq!(
attack_cluster.len(),
2,
"Attack cluster should have 2 mentions"
);
let meeting_cluster = &clusters[1];
assert_eq!(
meeting_cluster.len(),
1,
"Meeting cluster should have 1 mention"
);
}
#[test]
fn test_event_coref_trigger_matching() {
let resolver = EventCorefResolver::new();
assert!(resolver.triggers_match("attack", "attack"));
assert!(resolver.triggers_match("attack", "attacks"));
assert!(resolver.triggers_match("attack", "attacked"));
assert!(resolver.triggers_match("attack", "attacking"));
assert!(!resolver.triggers_match("attack", "meeting"));
assert!(!resolver.triggers_match("invade", "defend"));
}
#[test]
fn test_empty_text_discourse_scope() {
let scope = DiscourseScope::analyze("");
assert_eq!(scope.sentence_count(), 0);
assert_eq!(scope.clause_count(), 0);
}
#[test]
fn test_single_word_text() {
let scope = DiscourseScope::analyze("Hello");
assert!(scope.sentence_boundaries.len() >= 2); }
#[test]
fn test_abbreviation_handling() {
let text = "Dr. Smith went to the U.S. embassy. He met with officials.";
let scope = DiscourseScope::analyze(text);
assert!(scope.sentence_count() >= 1);
}
#[test]
fn test_shell_noun_case_insensitive() {
assert_eq!(classify_shell_noun("FACT"), Some(ShellNounClass::Factual));
assert_eq!(
classify_shell_noun("Problem"),
Some(ShellNounClass::Circumstantial)
);
assert_eq!(classify_shell_noun("IDEA"), Some(ShellNounClass::Mental));
}
#[test]
fn test_event_mention_empty_arguments() {
let event = EventMention::new("attacked", 0, 8);
assert!(event.arguments.is_empty());
assert_eq!(event.get_argument("Agent"), None);
}
#[test]
fn test_discourse_referent_empty_span() {
let referent = DiscourseReferent::new(ReferentType::Event, 5, 5);
assert!(referent.is_empty());
assert_eq!(referent.len(), 0);
}
#[test]
fn test_event_polarity_variants() {
let positive = EventMention::new("attacked", 0, 8).with_polarity(EventPolarity::Positive);
let negative = EventMention::new("attacked", 0, 8).with_polarity(EventPolarity::Negative);
let uncertain = EventMention::new("attacked", 0, 8).with_polarity(EventPolarity::Uncertain);
assert_eq!(positive.polarity, EventPolarity::Positive);
assert_eq!(negative.polarity, EventPolarity::Negative);
assert_eq!(uncertain.polarity, EventPolarity::Uncertain);
}
#[test]
fn test_event_tense_variants() {
let past = EventMention::new("attacked", 0, 8).with_tense(EventTense::Past);
let future = EventMention::new("will attack", 0, 11).with_tense(EventTense::Future);
assert_eq!(past.tense, Some(EventTense::Past));
assert_eq!(future.tense, Some(EventTense::Future));
}
#[test]
fn test_event_cluster_empty() {
let cluster = EventCluster::new(vec![]);
assert!(cluster.is_empty());
assert_eq!(cluster.len(), 0);
assert_eq!(cluster.canonical_trigger(), "");
}
#[test]
fn test_event_coref_empty_input() {
let resolver = EventCorefResolver::new();
let clusters = resolver.resolve(&[]);
assert!(clusters.is_empty());
}
#[test]
fn test_event_coref_single_mention() {
let resolver = EventCorefResolver::new();
let mentions = vec![EventMention::new("attacked", 0, 8).with_trigger_type("attack")];
let clusters = resolver.resolve(&mentions);
assert_eq!(clusters.len(), 1);
assert_eq!(clusters[0].len(), 1);
}
#[test]
fn test_referent_type_can_be_antecedent() {
assert!(ReferentType::Event.can_be_this_antecedent());
assert!(ReferentType::Fact.can_be_this_antecedent());
assert!(ReferentType::Proposition.can_be_this_antecedent());
assert!(!ReferentType::Nominal.can_be_this_antecedent());
assert!(ReferentType::Event.can_be_it_antecedent());
assert!(ReferentType::Nominal.can_be_it_antecedent());
assert!(!ReferentType::Fact.can_be_it_antecedent());
}
#[test]
fn test_discourse_scope_sentence_at() {
let text = "First sentence. Second sentence. Third.";
let scope = DiscourseScope::analyze(text);
let sent1 = scope.sentence_at(5); assert!(sent1.is_some());
let sent2 = scope.sentence_at(20); assert!(sent2.is_some());
}
#[test]
fn test_discourse_scope_clause_at() {
let text = "Prices rose, and wages fell.";
let scope = DiscourseScope::analyze(text);
let clause = scope.clause_at(5); assert!(clause.is_some());
}
#[test]
fn test_shell_noun_all_classes() {
let tests = vec![
("fact", ShellNounClass::Factual),
("claim", ShellNounClass::Linguistic),
("idea", ShellNounClass::Mental),
("possibility", ShellNounClass::Modal),
("event", ShellNounClass::Eventive),
("situation", ShellNounClass::Circumstantial),
];
for (noun, expected_class) in tests {
let result = classify_shell_noun(noun);
assert_eq!(result, Some(expected_class), "Failed for noun: {}", noun);
}
}
}