gdelt 0.1.0

CLI for GDELT Project - optimized for agentic usage with local data caching
//! Data synchronization for the daemon.

#![allow(dead_code)]

use crate::api::client::GdeltClient;
use crate::data::masterfile::MasterFileList;
use crate::db::CacheDb;
use crate::error::Result;
use tracing::{debug, info, warn};

/// Result of a sync operation
#[derive(Debug, Clone)]
pub struct SyncResult {
    pub files_downloaded: u64,
    pub events_imported: u64,
    pub gkg_imported: u64,
    pub mentions_imported: u64,
    pub errors: Vec<String>,
}

/// Run a sync operation
pub async fn run_sync() -> Result<SyncResult> {
    info!("Starting GDELT data sync");

    let client = GdeltClient::new()?;
    let cache = CacheDb::open()?;

    // Fetch latest update list
    let latest = MasterFileList::fetch_latest(&client).await?;
    info!("Found {} files in latest update", latest.entries.len());

    let mut result = SyncResult {
        files_downloaded: 0,
        events_imported: 0,
        gkg_imported: 0,
        mentions_imported: 0,
        errors: Vec::new(),
    };

    // Check which files we haven't processed yet
    for entry in &latest.entries {
        let file_id = entry.file_id();

        // Check if already processed
        if cache.is_file_processed(&file_id)? {
            debug!("Skipping already processed file: {}", entry.filename());
            continue;
        }

        info!("Processing new file: {}", entry.filename());

        // Download the file
        match client.download_file(&entry.url).await {
            Ok(data) => {
                debug!("Downloaded {} bytes", data.len());
                result.files_downloaded += 1;

                // Import based on file type
                match entry.file_type {
                    crate::data::masterfile::FileType::Events => {
                        // For now, just count - actual import would decompress and parse
                        result.events_imported += 1;
                    }
                    crate::data::masterfile::FileType::Gkg => {
                        result.gkg_imported += 1;
                    }
                    crate::data::masterfile::FileType::Mentions => {
                        result.mentions_imported += 1;
                    }
                    crate::data::masterfile::FileType::Unknown => {
                        warn!("Unknown file type: {}", entry.filename());
                    }
                }

                // Mark as processed
                cache.mark_file_processed(&file_id, entry.size)?;
            }
            Err(e) => {
                warn!("Failed to download {}: {}", entry.filename(), e);
                result.errors.push(format!("{}: {}", entry.filename(), e));
            }
        }
    }

    info!(
        "Sync complete: {} files, {} events, {} GKG, {} mentions",
        result.files_downloaded,
        result.events_imported,
        result.gkg_imported,
        result.mentions_imported
    );

    Ok(result)
}

/// Check if sync is needed (time since last sync)
pub fn sync_needed(last_sync: Option<chrono::DateTime<chrono::Utc>>, interval_secs: u64) -> bool {
    match last_sync {
        Some(last) => {
            let now = chrono::Utc::now();
            let elapsed = (now - last).num_seconds() as u64;
            elapsed >= interval_secs
        }
        None => true, // Never synced
    }
}