gdelt 0.1.0

CLI for GDELT Project - optimized for agentic usage with local data caching
//! Base HTTP client for GDELT APIs.

#![allow(dead_code)]

use crate::error::{GdeltError, Result};
use reqwest::{Client, Response};
use std::time::Duration;
use tracing::{debug, instrument};

/// Base URLs for GDELT APIs
pub mod endpoints {
    pub const DOC_API: &str = "https://api.gdeltproject.org/api/v2/doc/doc";
    pub const GEO_API: &str = "https://api.gdeltproject.org/api/v2/geo/geo";
    pub const TV_API: &str = "https://api.gdeltproject.org/api/v2/tv/tv";
    pub const CONTEXT_API: &str = "https://api.gdeltproject.org/api/v2/context/context";

    // Data download URLs
    pub const MASTER_FILE_LIST: &str = "http://data.gdeltproject.org/gdeltv2/masterfilelist.txt";
    pub const MASTER_FILE_LIST_TRANSLATION: &str = "http://data.gdeltproject.org/gdeltv2/masterfilelist-translation.txt";
    pub const LAST_UPDATE: &str = "http://data.gdeltproject.org/gdeltv2/lastupdate.txt";
}

/// Configuration for the GDELT HTTP client
#[derive(Debug, Clone)]
pub struct ClientConfig {
    pub timeout: Duration,
    pub retries: u32,
    pub user_agent: String,
    /// Wait and retry when rate limited (shows countdown)
    pub wait_on_rate_limit: bool,
    /// Maximum wait time when rate limited (seconds)
    pub max_rate_limit_wait: u64,
}

impl Default for ClientConfig {
    fn default() -> Self {
        Self {
            timeout: Duration::from_secs(30),
            retries: 3,
            user_agent: format!("gdelt-cli/{}", env!("CARGO_PKG_VERSION")),
            wait_on_rate_limit: false,
            max_rate_limit_wait: 120,
        }
    }
}

/// Main HTTP client for GDELT APIs
#[derive(Debug, Clone)]
pub struct GdeltClient {
    client: Client,
    config: ClientConfig,
}

impl GdeltClient {
    /// Create a new GDELT client with default configuration
    pub fn new() -> Result<Self> {
        Self::with_config(ClientConfig::default())
    }

    /// Create a new GDELT client with custom configuration
    pub fn with_config(config: ClientConfig) -> Result<Self> {
        let client = Client::builder()
            .timeout(config.timeout)
            .user_agent(&config.user_agent)
            .gzip(true)
            .brotli(true)
            .build()
            .map_err(GdeltError::Network)?;

        Ok(Self { client, config })
    }

    /// Create client with custom timeout
    pub fn with_timeout(timeout: Duration) -> Result<Self> {
        let mut config = ClientConfig::default();
        config.timeout = timeout;
        Self::with_config(config)
    }

    /// Get the underlying HTTP client
    pub fn inner(&self) -> &Client {
        &self.client
    }

    /// Execute a GET request with retries
    #[instrument(skip(self), fields(url = %url))]
    pub async fn get(&self, url: &str) -> Result<Response> {
        let mut last_error = None;
        let mut rate_limit_retries = 0;
        const MAX_RATE_LIMIT_RETRIES: u32 = 3;

        for attempt in 0..=self.config.retries {
            if attempt > 0 {
                debug!("Retry attempt {} for {}", attempt, url);
                tokio::time::sleep(Duration::from_millis(500 * 2u64.pow(attempt - 1))).await;
            }

            match self.client.get(url).send().await {
                Ok(response) => {
                    let status = response.status();

                    if status.is_success() {
                        return Ok(response);
                    }

                    if status == reqwest::StatusCode::TOO_MANY_REQUESTS {
                        let retry_after = response
                            .headers()
                            .get("retry-after")
                            .and_then(|v| v.to_str().ok())
                            .and_then(|v| v.parse().ok())
                            .unwrap_or(60);

                        // If wait_on_rate_limit is enabled, wait and retry
                        if self.config.wait_on_rate_limit && rate_limit_retries < MAX_RATE_LIMIT_RETRIES {
                            let wait_time = retry_after.min(self.config.max_rate_limit_wait);

                            eprintln!(
                                "Rate limited. Waiting {}s before retrying... (attempt {}/{})",
                                wait_time,
                                rate_limit_retries + 1,
                                MAX_RATE_LIMIT_RETRIES
                            );

                            // Show countdown
                            for remaining in (1..=wait_time).rev() {
                                if remaining % 10 == 0 || remaining <= 5 {
                                    eprint!("\r  {} seconds remaining...  ", remaining);
                                }
                                tokio::time::sleep(Duration::from_secs(1)).await;
                            }
                            eprintln!("\r  Retrying now...              ");

                            rate_limit_retries += 1;
                            continue;
                        }

                        return Err(GdeltError::RateLimited { retry_after_secs: retry_after });
                    }

                    if status.is_server_error() && attempt < self.config.retries {
                        last_error = Some(GdeltError::Api {
                            message: format!("Server error: {}", status),
                            status_code: Some(status.as_u16()),
                        });
                        continue;
                    }

                    return Err(GdeltError::Api {
                        message: format!("Request failed: {}", status),
                        status_code: Some(status.as_u16()),
                    });
                }
                Err(e) => {
                    if e.is_timeout() && attempt < self.config.retries {
                        last_error = Some(GdeltError::Network(e));
                        continue;
                    }
                    return Err(GdeltError::Network(e));
                }
            }
        }

        Err(last_error.unwrap_or_else(|| GdeltError::Other("Unknown error".to_string())))
    }

    /// Execute a GET request and parse JSON response
    pub async fn get_json<T: serde::de::DeserializeOwned>(&self, url: &str) -> Result<T> {
        let response = self.get(url).await?;

        // Check content type to provide better error messages
        let content_type = response
            .headers()
            .get("content-type")
            .and_then(|v| v.to_str().ok())
            .map(|s| s.to_string())
            .unwrap_or_default();

        let text = response.text().await.map_err(GdeltError::Network)?;

        // If response is HTML, it's likely an error page
        if content_type.contains("text/html") || text.trim().starts_with("<!DOCTYPE") || text.trim().starts_with("<html") {
            return Err(GdeltError::Api {
                message: "API returned HTML instead of JSON. The endpoint may be unavailable or deprecated.".to_string(),
                status_code: None,
            });
        }

        // If response is plain text (not JSON), it's likely an API error message
        if !text.trim().starts_with('{') && !text.trim().starts_with('[') {
            let error_msg = text.trim();
            // Truncate long error messages
            let display_msg = if error_msg.len() > 200 {
                format!("{}...", &error_msg[..200])
            } else {
                error_msg.to_string()
            };
            return Err(GdeltError::Api {
                message: format!("API error: {}", display_msg),
                status_code: None,
            });
        }

        serde_json::from_str(&text).map_err(GdeltError::Json)
    }

    /// Execute a GET request and return text
    pub async fn get_text(&self, url: &str) -> Result<String> {
        let response = self.get(url).await?;
        response.text().await.map_err(GdeltError::Network)
    }

    /// Execute a GET request and return bytes
    pub async fn get_bytes(&self, url: &str) -> Result<bytes::Bytes> {
        let response = self.get(url).await?;
        response.bytes().await.map_err(GdeltError::Network)
    }

    /// Download a file from a URL (alias for get_bytes)
    pub async fn download_file(&self, url: &str) -> Result<bytes::Bytes> {
        self.get_bytes(url).await
    }

    /// Build a URL with query parameters
    pub fn build_url(base: &str, params: &[(&str, &str)]) -> String {
        if params.is_empty() {
            return base.to_string();
        }

        let query: Vec<String> = params
            .iter()
            .filter(|(_, v)| !v.is_empty())
            .map(|(k, v)| format!("{}={}", k, urlencoding::encode(v)))
            .collect();

        if query.is_empty() {
            base.to_string()
        } else {
            format!("{}?{}", base, query.join("&"))
        }
    }
}

impl Default for GdeltClient {
    fn default() -> Self {
        Self::new().expect("Failed to create default GDELT client")
    }
}

/// URL encoding helper
mod urlencoding {
    pub fn encode(s: &str) -> String {
        url::form_urlencoded::byte_serialize(s.as_bytes()).collect()
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_build_url_no_params() {
        let url = GdeltClient::build_url("https://example.com", &[]);
        assert_eq!(url, "https://example.com");
    }

    #[test]
    fn test_build_url_with_params() {
        let url = GdeltClient::build_url("https://example.com", &[("q", "test"), ("n", "10")]);
        assert_eq!(url, "https://example.com?q=test&n=10");
    }

    #[test]
    fn test_build_url_encodes_spaces() {
        let url = GdeltClient::build_url("https://example.com", &[("q", "hello world")]);
        assert_eq!(url, "https://example.com?q=hello+world");
    }
}