use chrono::NaiveDateTime;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Mention {
pub global_event_id: i64,
pub event_time_date: Option<NaiveDateTime>,
pub mention_time_date: Option<NaiveDateTime>,
pub mention_type: i32,
pub mention_source_name: String,
pub mention_identifier: String,
pub sentence_id: Option<i32>,
pub actor1_char_offset: Option<i32>,
pub actor2_char_offset: Option<i32>,
pub action_char_offset: Option<i32>,
pub in_raw_text: bool,
pub confidence: f64,
pub mention_doc_len: Option<i32>,
pub mention_doc_tone: f64,
pub mention_doc_translation_info: Option<String>,
pub extras: Option<String>,
}
impl Mention {
pub fn from_tsv(line: &str) -> Result<Self, String> {
let fields: Vec<&str> = line.split('\t').collect();
if fields.len() < 14 {
return Err(format!("Expected at least 14 fields, got {}", fields.len()));
}
Ok(Mention {
global_event_id: parse_i64(fields[0])?,
event_time_date: parse_mention_date(fields[1]),
mention_time_date: parse_mention_date(fields[2]),
mention_type: parse_i32(fields[3]).unwrap_or(1),
mention_source_name: fields[4].to_string(),
mention_identifier: fields[5].to_string(),
sentence_id: parse_i32(fields[6]).ok(),
actor1_char_offset: parse_i32(fields[7]).ok(),
actor2_char_offset: parse_i32(fields[8]).ok(),
action_char_offset: parse_i32(fields[9]).ok(),
in_raw_text: fields[10] == "1",
confidence: parse_f64(fields[11]).unwrap_or(0.0),
mention_doc_len: parse_i32(fields[12]).ok(),
mention_doc_tone: parse_f64(fields[13]).unwrap_or(0.0),
mention_doc_translation_info: fields.get(14).and_then(|s| non_empty(s)),
extras: fields.get(15).and_then(|s| non_empty(s)),
})
}
pub fn mention_type_name(&self) -> &'static str {
match self.mention_type {
1 => "WEB",
2 => "CITATIONONLY",
3 => "CORE",
4 => "DTIC",
5 => "JSTOR",
6 => "NONTEXTUALSOURCE",
_ => "UNKNOWN",
}
}
pub fn is_web_mention(&self) -> bool {
self.mention_type == 1
}
pub fn is_confident(&self, threshold: f64) -> bool {
self.confidence >= threshold
}
}
fn parse_i64(s: &str) -> Result<i64, String> {
s.trim().parse().map_err(|_| format!("Invalid i64: {}", s))
}
fn parse_i32(s: &str) -> Result<i32, String> {
s.trim().parse().map_err(|_| format!("Invalid i32: {}", s))
}
fn parse_f64(s: &str) -> Result<f64, String> {
let trimmed = s.trim();
if trimmed.is_empty() {
return Ok(0.0);
}
trimmed.parse().map_err(|_| format!("Invalid f64: {}", s))
}
fn parse_mention_date(s: &str) -> Option<NaiveDateTime> {
if s.len() < 14 {
return None;
}
chrono::NaiveDateTime::parse_from_str(s, "%Y%m%d%H%M%S").ok()
}
fn non_empty(s: &str) -> Option<String> {
let trimmed = s.trim();
if trimmed.is_empty() {
None
} else {
Some(trimmed.to_string())
}
}
pub const MENTION_COLUMNS: &[&str] = &[
"GlobalEventID", "EventTimeDate", "MentionTimeDate", "MentionType",
"MentionSourceName", "MentionIdentifier", "SentenceID",
"Actor1CharOffset", "Actor2CharOffset", "ActionCharOffset",
"InRawText", "Confidence", "MentionDocLen", "MentionDocTone",
"MentionDocTranslationInfo", "Extras"
];