gdelt 0.1.0

CLI for GDELT Project - optimized for agentic usage with local data caching
//! DOC 2.0 API client for searching news articles.
//!
//! The DOC API provides access to GDELT's full-text search capabilities
//! for news articles from around the world.

use crate::api::client::{endpoints, GdeltClient};
use crate::error::Result;
use serde::{Deserialize, Serialize};
use tracing::instrument;

/// DOC 2.0 API client
#[derive(Debug, Clone)]
pub struct DocApi {
    client: GdeltClient,
}

impl DocApi {
    /// Create a new DOC API client
    pub fn new(client: GdeltClient) -> Self {
        Self { client }
    }

    /// Search for articles
    #[instrument(skip(self))]
    pub async fn search(&self, params: DocSearchParams) -> Result<DocSearchResponse> {
        let url = self.build_search_url(&params);
        self.client.get_json(&url).await
    }

    /// Get timeline data (volume or tone over time)
    #[instrument(skip(self))]
    pub async fn timeline(&self, params: DocTimelineParams) -> Result<DocTimelineResponse> {
        let url = self.build_timeline_url(&params);
        self.client.get_json(&url).await
    }

    /// Get word cloud data
    #[instrument(skip(self))]
    pub async fn wordcloud(&self, params: DocWordcloudParams) -> Result<DocWordcloudResponse> {
        let url = self.build_wordcloud_url(&params);
        self.client.get_json(&url).await
    }

    /// Get tone/sentiment distribution chart
    ///
    /// Returns a histogram of article tones from -100 (very negative) to +100 (very positive).
    /// This is useful for understanding the overall sentiment of coverage on a topic.
    #[instrument(skip(self))]
    pub async fn tonechart(&self, params: DocTonechartParams) -> Result<DocTonechartResponse> {
        let url = self.build_tonechart_url(&params);
        self.client.get_json(&url).await
    }

    fn build_search_url(&self, params: &DocSearchParams) -> String {
        let mut query_params = vec![
            ("query", params.query.as_str()),
            ("mode", "ArtList"),
            ("format", "json"),
        ];

        if let Some(ref timespan) = params.timespan {
            query_params.push(("timespan", timespan));
        }
        if let Some(ref start) = params.start_datetime {
            query_params.push(("startdatetime", start));
        }
        if let Some(ref end) = params.end_datetime {
            query_params.push(("enddatetime", end));
        }
        if let Some(max) = params.max_records {
            // We'll handle this specially
            let max_str = max.to_string();
            query_params.push(("maxrecords", Box::leak(max_str.into_boxed_str())));
        }
        if let Some(ref sort) = params.sort {
            query_params.push(("sort", sort));
        }
        if let Some(ref lang) = params.source_lang {
            query_params.push(("sourcelang", lang));
        }
        if let Some(ref country) = params.source_country {
            query_params.push(("sourcecountry", country));
        }
        if let Some(ref domain) = params.domain {
            query_params.push(("domain", domain));
        }
        if let Some(ref theme) = params.theme {
            query_params.push(("theme", theme));
        }
        if let Some(tone_min) = params.tone_min {
            let tone_str = tone_min.to_string();
            query_params.push(("tonemin", Box::leak(tone_str.into_boxed_str())));
        }
        if let Some(tone_max) = params.tone_max {
            let tone_str = tone_max.to_string();
            query_params.push(("tonemax", Box::leak(tone_str.into_boxed_str())));
        }

        GdeltClient::build_url(endpoints::DOC_API, &query_params)
    }

    fn build_timeline_url(&self, params: &DocTimelineParams) -> String {
        let mode = match params.mode.as_deref() {
            Some("vol") | None => "TimelineVol",
            Some("volraw") => "TimelineVolRaw",
            Some("tone") => "TimelineTone",
            Some("lang") => "TimelineLang",
            Some("sourcecountry") => "TimelineSourceCountry",
            _ => "TimelineVol",
        };

        let mut query_params = vec![
            ("query", params.query.as_str()),
            ("mode", mode),
            ("format", "json"),
        ];

        if let Some(ref timespan) = params.timespan {
            query_params.push(("timespan", timespan));
        }
        if let Some(smooth) = params.smooth {
            let smooth_str = smooth.to_string();
            query_params.push(("smooth", Box::leak(smooth_str.into_boxed_str())));
        }

        GdeltClient::build_url(endpoints::DOC_API, &query_params)
    }

    fn build_wordcloud_url(&self, params: &DocWordcloudParams) -> String {
        let query_params = vec![
            ("query", params.query.as_str()),
            ("mode", "WordCloudJson"),
            ("format", "json"),
            ("timespan", params.timespan.as_deref().unwrap_or("24h")),
        ];

        GdeltClient::build_url(endpoints::DOC_API, &query_params)
    }

    fn build_tonechart_url(&self, params: &DocTonechartParams) -> String {
        let mut query_params = vec![
            ("query", params.query.as_str()),
            ("mode", "ToneChart"),
            ("format", "json"),
        ];

        if let Some(ref timespan) = params.timespan {
            query_params.push(("timespan", timespan));
        }

        GdeltClient::build_url(endpoints::DOC_API, &query_params)
    }
}

/// Parameters for DOC search
#[derive(Debug, Clone, Default, Serialize)]
pub struct DocSearchParams {
    pub query: String,
    pub timespan: Option<String>,
    pub start_datetime: Option<String>,
    pub end_datetime: Option<String>,
    pub max_records: Option<u32>,
    pub sort: Option<String>,
    pub source_lang: Option<String>,
    pub source_country: Option<String>,
    pub domain: Option<String>,
    pub theme: Option<String>,
    pub tone_min: Option<f64>,
    pub tone_max: Option<f64>,
}

/// Parameters for timeline queries
#[derive(Debug, Clone, Default, Serialize)]
pub struct DocTimelineParams {
    pub query: String,
    pub mode: Option<String>,
    pub timespan: Option<String>,
    pub smooth: Option<u8>,
}

/// Parameters for word cloud queries
#[derive(Debug, Clone, Default, Serialize)]
pub struct DocWordcloudParams {
    pub query: String,
    pub timespan: Option<String>,
}

/// Parameters for tone chart queries
#[derive(Debug, Clone, Default, Serialize)]
pub struct DocTonechartParams {
    pub query: String,
    pub timespan: Option<String>,
}

/// Response from DOC search
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct DocSearchResponse {
    #[serde(default)]
    pub articles: Vec<Article>,
}

/// A news article from GDELT
///
/// Note: The `tone` field is not returned by the GDELT DOC API in ArtList mode.
/// To get tone/sentiment data, use:
/// - `gdelt doc timeline --mode tone` for aggregate tone over time
/// - `gdelt analytics sentiment` with local database for detailed sentiment analysis
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct Article {
    pub url: String,
    #[serde(default)]
    pub url_mobile: Option<String>,
    pub title: String,
    #[serde(default)]
    pub seendate: String,
    #[serde(default)]
    pub socialimage: Option<String>,
    #[serde(default)]
    pub domain: String,
    #[serde(default)]
    pub language: String,
    #[serde(default)]
    pub sourcecountry: String,
    /// Note: This field is typically not populated in ArtList mode.
    /// Use ToneChart mode or local analytics for tone data.
    #[serde(default)]
    pub tone: Option<f64>,
}

/// Response from timeline queries
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct DocTimelineResponse {
    #[serde(default)]
    pub timeline: Vec<TimelineEntry>,
}

/// A single timeline entry
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct TimelineEntry {
    pub date: String,
    #[serde(default)]
    pub value: f64,
    #[serde(default)]
    pub series: Option<String>,
}

/// Response from word cloud queries
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct DocWordcloudResponse {
    #[serde(default)]
    pub wordcloud: Vec<WordCloudEntry>,
}

/// A word cloud entry
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct WordCloudEntry {
    pub word: String,
    pub weight: f64,
}

/// Response from tone chart queries
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct DocTonechartResponse {
    #[serde(default)]
    pub tonechart: Vec<ToneBin>,
}

/// A tone histogram bin
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct ToneBin {
    /// Tone value from -100 (very negative) to +100 (very positive)
    pub bin: i32,
    /// Number of articles with this tone
    #[serde(default)]
    pub count: u32,
    /// Sample articles at this tone level
    #[serde(default)]
    pub toparts: Vec<ToneArticle>,
}

/// A sample article from tone chart
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct ToneArticle {
    #[serde(default)]
    pub url: String,
    #[serde(default)]
    pub title: String,
}