use chrono::NaiveDate;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Event {
pub global_event_id: i64,
pub date_added: Option<NaiveDate>,
pub sql_date: i32,
pub month_year: i32,
pub year: i32,
pub fraction_date: f64,
pub actor1_code: Option<String>,
pub actor1_name: Option<String>,
pub actor1_country_code: Option<String>,
pub actor1_known_group_code: Option<String>,
pub actor1_ethnic_code: Option<String>,
pub actor1_religion1_code: Option<String>,
pub actor1_religion2_code: Option<String>,
pub actor1_type1_code: Option<String>,
pub actor1_type2_code: Option<String>,
pub actor1_type3_code: Option<String>,
pub actor2_code: Option<String>,
pub actor2_name: Option<String>,
pub actor2_country_code: Option<String>,
pub actor2_known_group_code: Option<String>,
pub actor2_ethnic_code: Option<String>,
pub actor2_religion1_code: Option<String>,
pub actor2_religion2_code: Option<String>,
pub actor2_type1_code: Option<String>,
pub actor2_type2_code: Option<String>,
pub actor2_type3_code: Option<String>,
pub is_root_event: bool,
pub event_code: String,
pub event_base_code: String,
pub event_root_code: String,
pub quad_class: u8,
pub goldstein_scale: f64,
pub num_mentions: i32,
pub num_sources: i32,
pub num_articles: i32,
pub avg_tone: f64,
pub actor1_geo_type: Option<i32>,
pub actor1_geo_fullname: Option<String>,
pub actor1_geo_country_code: Option<String>,
pub actor1_geo_adm1_code: Option<String>,
pub actor1_geo_adm2_code: Option<String>,
pub actor1_geo_lat: Option<f64>,
pub actor1_geo_long: Option<f64>,
pub actor1_geo_feature_id: Option<String>,
pub actor2_geo_type: Option<i32>,
pub actor2_geo_fullname: Option<String>,
pub actor2_geo_country_code: Option<String>,
pub actor2_geo_adm1_code: Option<String>,
pub actor2_geo_adm2_code: Option<String>,
pub actor2_geo_lat: Option<f64>,
pub actor2_geo_long: Option<f64>,
pub actor2_geo_feature_id: Option<String>,
pub action_geo_type: Option<i32>,
pub action_geo_fullname: Option<String>,
pub action_geo_country_code: Option<String>,
pub action_geo_adm1_code: Option<String>,
pub action_geo_adm2_code: Option<String>,
pub action_geo_lat: Option<f64>,
pub action_geo_long: Option<f64>,
pub action_geo_feature_id: Option<String>,
pub source_url: Option<String>,
}
impl Event {
pub fn from_tsv(line: &str) -> Result<Self, String> {
let fields: Vec<&str> = line.split('\t').collect();
if fields.len() < 58 {
return Err(format!("Expected at least 58 fields, got {}", fields.len()));
}
Ok(Event {
global_event_id: parse_i64(fields[0])?,
date_added: None, sql_date: parse_i32(fields[1])?,
month_year: parse_i32(fields[2])?,
year: parse_i32(fields[3])?,
fraction_date: parse_f64(fields[4])?,
actor1_code: non_empty(fields[5]),
actor1_name: non_empty(fields[6]),
actor1_country_code: non_empty(fields[7]),
actor1_known_group_code: non_empty(fields[8]),
actor1_ethnic_code: non_empty(fields[9]),
actor1_religion1_code: non_empty(fields[10]),
actor1_religion2_code: non_empty(fields[11]),
actor1_type1_code: non_empty(fields[12]),
actor1_type2_code: non_empty(fields[13]),
actor1_type3_code: non_empty(fields[14]),
actor2_code: non_empty(fields[15]),
actor2_name: non_empty(fields[16]),
actor2_country_code: non_empty(fields[17]),
actor2_known_group_code: non_empty(fields[18]),
actor2_ethnic_code: non_empty(fields[19]),
actor2_religion1_code: non_empty(fields[20]),
actor2_religion2_code: non_empty(fields[21]),
actor2_type1_code: non_empty(fields[22]),
actor2_type2_code: non_empty(fields[23]),
actor2_type3_code: non_empty(fields[24]),
is_root_event: fields[25] == "1",
event_code: fields[26].to_string(),
event_base_code: fields[27].to_string(),
event_root_code: fields[28].to_string(),
quad_class: parse_u8(fields[29])?,
goldstein_scale: parse_f64(fields[30]).unwrap_or(0.0),
num_mentions: parse_i32(fields[31]).unwrap_or(0),
num_sources: parse_i32(fields[32]).unwrap_or(0),
num_articles: parse_i32(fields[33]).unwrap_or(0),
avg_tone: parse_f64(fields[34]).unwrap_or(0.0),
actor1_geo_type: parse_i32(fields[35]).ok(),
actor1_geo_fullname: non_empty(fields[36]),
actor1_geo_country_code: non_empty(fields[37]),
actor1_geo_adm1_code: non_empty(fields[38]),
actor1_geo_adm2_code: non_empty(fields[39]),
actor1_geo_lat: parse_f64(fields[40]).ok(),
actor1_geo_long: parse_f64(fields[41]).ok(),
actor1_geo_feature_id: non_empty(fields[42]),
actor2_geo_type: parse_i32(fields[43]).ok(),
actor2_geo_fullname: non_empty(fields[44]),
actor2_geo_country_code: non_empty(fields[45]),
actor2_geo_adm1_code: non_empty(fields[46]),
actor2_geo_adm2_code: non_empty(fields[47]),
actor2_geo_lat: parse_f64(fields[48]).ok(),
actor2_geo_long: parse_f64(fields[49]).ok(),
actor2_geo_feature_id: non_empty(fields[50]),
action_geo_type: parse_i32(fields[51]).ok(),
action_geo_fullname: non_empty(fields[52]),
action_geo_country_code: non_empty(fields[53]),
action_geo_adm1_code: non_empty(fields[54]),
action_geo_adm2_code: non_empty(fields[55]),
action_geo_lat: parse_f64(fields[56]).ok(),
action_geo_long: parse_f64(fields[57]).ok(),
action_geo_feature_id: if fields.len() > 58 { non_empty(fields[58]) } else { None },
source_url: if fields.len() > 59 { non_empty(fields[59]) } else { None },
})
}
pub fn event_date(&self) -> Option<NaiveDate> {
let year = self.sql_date / 10000;
let month = (self.sql_date % 10000) / 100;
let day = self.sql_date % 100;
NaiveDate::from_ymd_opt(year, month as u32, day as u32)
}
pub fn is_cooperative(&self) -> bool {
self.quad_class == 1 || self.quad_class == 2
}
pub fn is_conflictual(&self) -> bool {
self.quad_class == 3 || self.quad_class == 4
}
}
fn parse_i64(s: &str) -> Result<i64, String> {
s.trim().parse().map_err(|_| format!("Invalid i64: {}", s))
}
fn parse_i32(s: &str) -> Result<i32, String> {
s.trim().parse().map_err(|_| format!("Invalid i32: {}", s))
}
fn parse_u8(s: &str) -> Result<u8, String> {
s.trim().parse().map_err(|_| format!("Invalid u8: {}", s))
}
fn parse_f64(s: &str) -> Result<f64, String> {
let trimmed = s.trim();
if trimmed.is_empty() {
return Ok(0.0);
}
trimmed.parse().map_err(|_| format!("Invalid f64: {}", s))
}
fn non_empty(s: &str) -> Option<String> {
let trimmed = s.trim();
if trimmed.is_empty() {
None
} else {
Some(trimmed.to_string())
}
}
pub const EVENT_COLUMNS: &[&str] = &[
"GlobalEventID", "Day", "MonthYear", "Year", "FractionDate",
"Actor1Code", "Actor1Name", "Actor1CountryCode", "Actor1KnownGroupCode",
"Actor1EthnicCode", "Actor1Religion1Code", "Actor1Religion2Code",
"Actor1Type1Code", "Actor1Type2Code", "Actor1Type3Code",
"Actor2Code", "Actor2Name", "Actor2CountryCode", "Actor2KnownGroupCode",
"Actor2EthnicCode", "Actor2Religion1Code", "Actor2Religion2Code",
"Actor2Type1Code", "Actor2Type2Code", "Actor2Type3Code",
"IsRootEvent", "EventCode", "EventBaseCode", "EventRootCode",
"QuadClass", "GoldsteinScale", "NumMentions", "NumSources", "NumArticles", "AvgTone",
"Actor1Geo_Type", "Actor1Geo_FullName", "Actor1Geo_CountryCode",
"Actor1Geo_ADM1Code", "Actor1Geo_ADM2Code", "Actor1Geo_Lat", "Actor1Geo_Long",
"Actor1Geo_FeatureID",
"Actor2Geo_Type", "Actor2Geo_FullName", "Actor2Geo_CountryCode",
"Actor2Geo_ADM1Code", "Actor2Geo_ADM2Code", "Actor2Geo_Lat", "Actor2Geo_Long",
"Actor2Geo_FeatureID",
"ActionGeo_Type", "ActionGeo_FullName", "ActionGeo_CountryCode",
"ActionGeo_ADM1Code", "ActionGeo_ADM2Code", "ActionGeo_Lat", "ActionGeo_Long",
"ActionGeo_FeatureID",
"DATEADDED", "SOURCEURL"
];