gdelt 0.1.0

CLI for GDELT Project - optimized for agentic usage with local data caching
//! GDELT Mentions model.
//!
//! Mentions link events to the specific articles that mention them,
//! providing provenance and context for event coding.

use chrono::NaiveDateTime;
use serde::{Deserialize, Serialize};

/// A mention of an event in a news article
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Mention {
    /// Global Event ID (links to Event)
    pub global_event_id: i64,
    /// Time the mention was found
    pub event_time_date: Option<NaiveDateTime>,
    /// Mention time date (YYYYMMDDHHMMSS)
    pub mention_time_date: Option<NaiveDateTime>,
    /// Type of mention (1=WEB, 2=CITATIONONLY, 3=CORE, 4=DTIC, 5=JSTOR, 6=NONTEXTUALSOURCE)
    pub mention_type: i32,
    /// Source name
    pub mention_source_name: String,
    /// Mention identifier (usually URL)
    pub mention_identifier: String,
    /// Sentence number in source
    pub sentence_id: Option<i32>,
    /// Actor 1 character offset
    pub actor1_char_offset: Option<i32>,
    /// Actor 2 character offset
    pub actor2_char_offset: Option<i32>,
    /// Action character offset
    pub action_char_offset: Option<i32>,
    /// Whether this mention reports the event still in progress
    pub in_raw_text: bool,
    /// Confidence score (0-100)
    pub confidence: f64,
    /// Mention document length
    pub mention_doc_len: Option<i32>,
    /// Mention document tone
    pub mention_doc_tone: f64,
    /// Mention document translation info
    pub mention_doc_translation_info: Option<String>,
    /// Extra data
    pub extras: Option<String>,
}

impl Mention {
    /// Parse a mention from a TSV line
    pub fn from_tsv(line: &str) -> Result<Self, String> {
        let fields: Vec<&str> = line.split('\t').collect();
        if fields.len() < 14 {
            return Err(format!("Expected at least 14 fields, got {}", fields.len()));
        }

        Ok(Mention {
            global_event_id: parse_i64(fields[0])?,
            event_time_date: parse_mention_date(fields[1]),
            mention_time_date: parse_mention_date(fields[2]),
            mention_type: parse_i32(fields[3]).unwrap_or(1),
            mention_source_name: fields[4].to_string(),
            mention_identifier: fields[5].to_string(),
            sentence_id: parse_i32(fields[6]).ok(),
            actor1_char_offset: parse_i32(fields[7]).ok(),
            actor2_char_offset: parse_i32(fields[8]).ok(),
            action_char_offset: parse_i32(fields[9]).ok(),
            in_raw_text: fields[10] == "1",
            confidence: parse_f64(fields[11]).unwrap_or(0.0),
            mention_doc_len: parse_i32(fields[12]).ok(),
            mention_doc_tone: parse_f64(fields[13]).unwrap_or(0.0),
            mention_doc_translation_info: fields.get(14).and_then(|s| non_empty(s)),
            extras: fields.get(15).and_then(|s| non_empty(s)),
        })
    }

    /// Get the mention type description
    pub fn mention_type_name(&self) -> &'static str {
        match self.mention_type {
            1 => "WEB",
            2 => "CITATIONONLY",
            3 => "CORE",
            4 => "DTIC",
            5 => "JSTOR",
            6 => "NONTEXTUALSOURCE",
            _ => "UNKNOWN",
        }
    }

    /// Check if this is a web mention
    pub fn is_web_mention(&self) -> bool {
        self.mention_type == 1
    }

    /// Check if confidence is above threshold
    pub fn is_confident(&self, threshold: f64) -> bool {
        self.confidence >= threshold
    }
}

fn parse_i64(s: &str) -> Result<i64, String> {
    s.trim().parse().map_err(|_| format!("Invalid i64: {}", s))
}

fn parse_i32(s: &str) -> Result<i32, String> {
    s.trim().parse().map_err(|_| format!("Invalid i32: {}", s))
}

fn parse_f64(s: &str) -> Result<f64, String> {
    let trimmed = s.trim();
    if trimmed.is_empty() {
        return Ok(0.0);
    }
    trimmed.parse().map_err(|_| format!("Invalid f64: {}", s))
}

fn parse_mention_date(s: &str) -> Option<NaiveDateTime> {
    if s.len() < 14 {
        return None;
    }
    chrono::NaiveDateTime::parse_from_str(s, "%Y%m%d%H%M%S").ok()
}

fn non_empty(s: &str) -> Option<String> {
    let trimmed = s.trim();
    if trimmed.is_empty() {
        None
    } else {
        Some(trimmed.to_string())
    }
}

/// Column names for mentions export
pub const MENTION_COLUMNS: &[&str] = &[
    "GlobalEventID", "EventTimeDate", "MentionTimeDate", "MentionType",
    "MentionSourceName", "MentionIdentifier", "SentenceID",
    "Actor1CharOffset", "Actor2CharOffset", "ActionCharOffset",
    "InRawText", "Confidence", "MentionDocLen", "MentionDocTone",
    "MentionDocTranslationInfo", "Extras"
];