gdelt 0.1.0

CLI for GDELT Project - optimized for agentic usage with local data caching
//! GDELT Event model.
//!
//! Events are the core data structure in GDELT 2.0, representing
//! actions between actors extracted from news articles.

use chrono::NaiveDate;
use serde::{Deserialize, Serialize};

/// A GDELT event record
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Event {
    /// Global Event ID (unique identifier)
    pub global_event_id: i64,
    /// Date the event was added to database
    pub date_added: Option<NaiveDate>,
    /// SQL date (YYYYMMDD)
    pub sql_date: i32,
    /// Month/Year (YYYYMM)
    pub month_year: i32,
    /// Year
    pub year: i32,
    /// Fraction date (decimal year)
    pub fraction_date: f64,

    // Actor 1 (source actor)
    pub actor1_code: Option<String>,
    pub actor1_name: Option<String>,
    pub actor1_country_code: Option<String>,
    pub actor1_known_group_code: Option<String>,
    pub actor1_ethnic_code: Option<String>,
    pub actor1_religion1_code: Option<String>,
    pub actor1_religion2_code: Option<String>,
    pub actor1_type1_code: Option<String>,
    pub actor1_type2_code: Option<String>,
    pub actor1_type3_code: Option<String>,

    // Actor 2 (target actor)
    pub actor2_code: Option<String>,
    pub actor2_name: Option<String>,
    pub actor2_country_code: Option<String>,
    pub actor2_known_group_code: Option<String>,
    pub actor2_ethnic_code: Option<String>,
    pub actor2_religion1_code: Option<String>,
    pub actor2_religion2_code: Option<String>,
    pub actor2_type1_code: Option<String>,
    pub actor2_type2_code: Option<String>,
    pub actor2_type3_code: Option<String>,

    // Event classification
    pub is_root_event: bool,
    pub event_code: String,
    pub event_base_code: String,
    pub event_root_code: String,
    /// Quad class (1-4)
    pub quad_class: u8,
    /// Goldstein scale (-10 to +10)
    pub goldstein_scale: f64,
    /// Number of mentions
    pub num_mentions: i32,
    /// Number of sources
    pub num_sources: i32,
    /// Number of articles
    pub num_articles: i32,
    /// Average tone (-100 to +100)
    pub avg_tone: f64,

    // Actor 1 geography
    pub actor1_geo_type: Option<i32>,
    pub actor1_geo_fullname: Option<String>,
    pub actor1_geo_country_code: Option<String>,
    pub actor1_geo_adm1_code: Option<String>,
    pub actor1_geo_adm2_code: Option<String>,
    pub actor1_geo_lat: Option<f64>,
    pub actor1_geo_long: Option<f64>,
    pub actor1_geo_feature_id: Option<String>,

    // Actor 2 geography
    pub actor2_geo_type: Option<i32>,
    pub actor2_geo_fullname: Option<String>,
    pub actor2_geo_country_code: Option<String>,
    pub actor2_geo_adm1_code: Option<String>,
    pub actor2_geo_adm2_code: Option<String>,
    pub actor2_geo_lat: Option<f64>,
    pub actor2_geo_long: Option<f64>,
    pub actor2_geo_feature_id: Option<String>,

    // Action geography (where the event occurred)
    pub action_geo_type: Option<i32>,
    pub action_geo_fullname: Option<String>,
    pub action_geo_country_code: Option<String>,
    pub action_geo_adm1_code: Option<String>,
    pub action_geo_adm2_code: Option<String>,
    pub action_geo_lat: Option<f64>,
    pub action_geo_long: Option<f64>,
    pub action_geo_feature_id: Option<String>,

    /// Source URL
    pub source_url: Option<String>,
}

impl Event {
    /// Parse an event from a TSV line
    pub fn from_tsv(line: &str) -> Result<Self, String> {
        let fields: Vec<&str> = line.split('\t').collect();
        if fields.len() < 58 {
            return Err(format!("Expected at least 58 fields, got {}", fields.len()));
        }

        Ok(Event {
            global_event_id: parse_i64(fields[0])?,
            date_added: None, // Will be set if present
            sql_date: parse_i32(fields[1])?,
            month_year: parse_i32(fields[2])?,
            year: parse_i32(fields[3])?,
            fraction_date: parse_f64(fields[4])?,
            actor1_code: non_empty(fields[5]),
            actor1_name: non_empty(fields[6]),
            actor1_country_code: non_empty(fields[7]),
            actor1_known_group_code: non_empty(fields[8]),
            actor1_ethnic_code: non_empty(fields[9]),
            actor1_religion1_code: non_empty(fields[10]),
            actor1_religion2_code: non_empty(fields[11]),
            actor1_type1_code: non_empty(fields[12]),
            actor1_type2_code: non_empty(fields[13]),
            actor1_type3_code: non_empty(fields[14]),
            actor2_code: non_empty(fields[15]),
            actor2_name: non_empty(fields[16]),
            actor2_country_code: non_empty(fields[17]),
            actor2_known_group_code: non_empty(fields[18]),
            actor2_ethnic_code: non_empty(fields[19]),
            actor2_religion1_code: non_empty(fields[20]),
            actor2_religion2_code: non_empty(fields[21]),
            actor2_type1_code: non_empty(fields[22]),
            actor2_type2_code: non_empty(fields[23]),
            actor2_type3_code: non_empty(fields[24]),
            is_root_event: fields[25] == "1",
            event_code: fields[26].to_string(),
            event_base_code: fields[27].to_string(),
            event_root_code: fields[28].to_string(),
            quad_class: parse_u8(fields[29])?,
            goldstein_scale: parse_f64(fields[30]).unwrap_or(0.0),
            num_mentions: parse_i32(fields[31]).unwrap_or(0),
            num_sources: parse_i32(fields[32]).unwrap_or(0),
            num_articles: parse_i32(fields[33]).unwrap_or(0),
            avg_tone: parse_f64(fields[34]).unwrap_or(0.0),
            actor1_geo_type: parse_i32(fields[35]).ok(),
            actor1_geo_fullname: non_empty(fields[36]),
            actor1_geo_country_code: non_empty(fields[37]),
            actor1_geo_adm1_code: non_empty(fields[38]),
            actor1_geo_adm2_code: non_empty(fields[39]),
            actor1_geo_lat: parse_f64(fields[40]).ok(),
            actor1_geo_long: parse_f64(fields[41]).ok(),
            actor1_geo_feature_id: non_empty(fields[42]),
            actor2_geo_type: parse_i32(fields[43]).ok(),
            actor2_geo_fullname: non_empty(fields[44]),
            actor2_geo_country_code: non_empty(fields[45]),
            actor2_geo_adm1_code: non_empty(fields[46]),
            actor2_geo_adm2_code: non_empty(fields[47]),
            actor2_geo_lat: parse_f64(fields[48]).ok(),
            actor2_geo_long: parse_f64(fields[49]).ok(),
            actor2_geo_feature_id: non_empty(fields[50]),
            action_geo_type: parse_i32(fields[51]).ok(),
            action_geo_fullname: non_empty(fields[52]),
            action_geo_country_code: non_empty(fields[53]),
            action_geo_adm1_code: non_empty(fields[54]),
            action_geo_adm2_code: non_empty(fields[55]),
            action_geo_lat: parse_f64(fields[56]).ok(),
            action_geo_long: parse_f64(fields[57]).ok(),
            action_geo_feature_id: if fields.len() > 58 { non_empty(fields[58]) } else { None },
            source_url: if fields.len() > 59 { non_empty(fields[59]) } else { None },
        })
    }

    /// Get the event date as NaiveDate
    pub fn event_date(&self) -> Option<NaiveDate> {
        let year = self.sql_date / 10000;
        let month = (self.sql_date % 10000) / 100;
        let day = self.sql_date % 100;
        NaiveDate::from_ymd_opt(year, month as u32, day as u32)
    }

    /// Check if event is cooperative (quad class 1 or 2)
    pub fn is_cooperative(&self) -> bool {
        self.quad_class == 1 || self.quad_class == 2
    }

    /// Check if event is conflictual (quad class 3 or 4)
    pub fn is_conflictual(&self) -> bool {
        self.quad_class == 3 || self.quad_class == 4
    }
}

fn parse_i64(s: &str) -> Result<i64, String> {
    s.trim().parse().map_err(|_| format!("Invalid i64: {}", s))
}

fn parse_i32(s: &str) -> Result<i32, String> {
    s.trim().parse().map_err(|_| format!("Invalid i32: {}", s))
}

fn parse_u8(s: &str) -> Result<u8, String> {
    s.trim().parse().map_err(|_| format!("Invalid u8: {}", s))
}

fn parse_f64(s: &str) -> Result<f64, String> {
    let trimmed = s.trim();
    if trimmed.is_empty() {
        return Ok(0.0);
    }
    trimmed.parse().map_err(|_| format!("Invalid f64: {}", s))
}

fn non_empty(s: &str) -> Option<String> {
    let trimmed = s.trim();
    if trimmed.is_empty() {
        None
    } else {
        Some(trimmed.to_string())
    }
}

/// Column names for GDELT events export
pub const EVENT_COLUMNS: &[&str] = &[
    "GlobalEventID", "Day", "MonthYear", "Year", "FractionDate",
    "Actor1Code", "Actor1Name", "Actor1CountryCode", "Actor1KnownGroupCode",
    "Actor1EthnicCode", "Actor1Religion1Code", "Actor1Religion2Code",
    "Actor1Type1Code", "Actor1Type2Code", "Actor1Type3Code",
    "Actor2Code", "Actor2Name", "Actor2CountryCode", "Actor2KnownGroupCode",
    "Actor2EthnicCode", "Actor2Religion1Code", "Actor2Religion2Code",
    "Actor2Type1Code", "Actor2Type2Code", "Actor2Type3Code",
    "IsRootEvent", "EventCode", "EventBaseCode", "EventRootCode",
    "QuadClass", "GoldsteinScale", "NumMentions", "NumSources", "NumArticles", "AvgTone",
    "Actor1Geo_Type", "Actor1Geo_FullName", "Actor1Geo_CountryCode",
    "Actor1Geo_ADM1Code", "Actor1Geo_ADM2Code", "Actor1Geo_Lat", "Actor1Geo_Long",
    "Actor1Geo_FeatureID",
    "Actor2Geo_Type", "Actor2Geo_FullName", "Actor2Geo_CountryCode",
    "Actor2Geo_ADM1Code", "Actor2Geo_ADM2Code", "Actor2Geo_Lat", "Actor2Geo_Long",
    "Actor2Geo_FeatureID",
    "ActionGeo_Type", "ActionGeo_FullName", "ActionGeo_CountryCode",
    "ActionGeo_ADM1Code", "ActionGeo_ADM2Code", "ActionGeo_Lat", "ActionGeo_Long",
    "ActionGeo_FeatureID",
    "DATEADDED", "SOURCEURL"
];