#![allow(dead_code)]
use crate::api::client::GdeltClient;
use crate::data::masterfile::MasterFileList;
use crate::db::CacheDb;
use crate::error::Result;
use tracing::{debug, info, warn};
#[derive(Debug, Clone)]
pub struct SyncResult {
pub files_downloaded: u64,
pub events_imported: u64,
pub gkg_imported: u64,
pub mentions_imported: u64,
pub errors: Vec<String>,
}
pub async fn run_sync() -> Result<SyncResult> {
info!("Starting GDELT data sync");
let client = GdeltClient::new()?;
let cache = CacheDb::open()?;
let latest = MasterFileList::fetch_latest(&client).await?;
info!("Found {} files in latest update", latest.entries.len());
let mut result = SyncResult {
files_downloaded: 0,
events_imported: 0,
gkg_imported: 0,
mentions_imported: 0,
errors: Vec::new(),
};
for entry in &latest.entries {
let file_id = entry.file_id();
if cache.is_file_processed(&file_id)? {
debug!("Skipping already processed file: {}", entry.filename());
continue;
}
info!("Processing new file: {}", entry.filename());
match client.download_file(&entry.url).await {
Ok(data) => {
debug!("Downloaded {} bytes", data.len());
result.files_downloaded += 1;
match entry.file_type {
crate::data::masterfile::FileType::Events => {
result.events_imported += 1;
}
crate::data::masterfile::FileType::Gkg => {
result.gkg_imported += 1;
}
crate::data::masterfile::FileType::Mentions => {
result.mentions_imported += 1;
}
crate::data::masterfile::FileType::Unknown => {
warn!("Unknown file type: {}", entry.filename());
}
}
cache.mark_file_processed(&file_id, entry.size)?;
}
Err(e) => {
warn!("Failed to download {}: {}", entry.filename(), e);
result.errors.push(format!("{}: {}", entry.filename(), e));
}
}
}
info!(
"Sync complete: {} files, {} events, {} GKG, {} mentions",
result.files_downloaded,
result.events_imported,
result.gkg_imported,
result.mentions_imported
);
Ok(result)
}
pub fn sync_needed(last_sync: Option<chrono::DateTime<chrono::Utc>>, interval_secs: u64) -> bool {
match last_sync {
Some(last) => {
let now = chrono::Utc::now();
let elapsed = (now - last).num_seconds() as u64;
elapsed >= interval_secs
}
None => true, }
}