gdelt 0.1.0

CLI for GDELT Project - optimized for agentic usage with local data caching
//! Report generation for GDELT analytics.

#![allow(dead_code)]

use crate::db::AnalyticsDb;
use crate::error::Result;
use chrono::{NaiveDate, Utc};
use serde::{Deserialize, Serialize};
use std::path::PathBuf;

use super::entities::{extract_entities, EntitiesConfig, EntityResult, EntityType};
use super::sentiment::{analyze_sentiment, SentimentConfig, SentimentResult};
use super::trends::{analyze_trends, Granularity, TrendsConfig, TrendsResult};

/// Report configuration
#[derive(Debug, Clone)]
pub struct ReportConfig {
    pub topics: Vec<String>,
    pub start_date: String,
    pub end_date: String,
    pub output_path: Option<PathBuf>,
    pub format: ReportFormat,
}

/// Report output format
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ReportFormat {
    Json,
    Markdown,
    Html,
}

/// Generated report
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Report {
    /// Report title
    pub title: String,
    /// Generation timestamp
    pub generated_at: String,
    /// Date range covered
    pub date_range: DateRange,
    /// Summary statistics
    pub summary: ReportSummary,
    /// Trend analysis results
    pub trends: Vec<TrendsResult>,
    /// Top entities
    pub entities: Vec<EntityResult>,
    /// Sentiment analysis
    pub sentiment: Vec<SentimentResult>,
}

/// Date range for the report
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DateRange {
    pub start: String,
    pub end: String,
    pub days: i64,
}

/// Report summary statistics
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ReportSummary {
    /// Total events in period
    pub total_events: i64,
    /// Total GKG records
    pub total_gkg: i64,
    /// Average daily events
    pub avg_daily_events: f64,
    /// Overall tone
    pub overall_tone: f64,
    /// Top event categories
    pub top_categories: Vec<CategoryCount>,
}

/// Event category count
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CategoryCount {
    pub category: String,
    pub count: i64,
}

/// Generate a report
pub fn generate_report(db: &AnalyticsDb, config: &ReportConfig) -> Result<Report> {
    let generated_at = Utc::now().format("%Y-%m-%d %H:%M:%S UTC").to_string();

    // Parse dates
    let start = NaiveDate::parse_from_str(&config.start_date, "%Y-%m-%d")
        .unwrap_or_else(|_| Utc::now().naive_utc().date());
    let end = NaiveDate::parse_from_str(&config.end_date, "%Y-%m-%d")
        .unwrap_or_else(|_| Utc::now().naive_utc().date());
    let days = (end - start).num_days().max(1);

    let date_range = DateRange {
        start: config.start_date.clone(),
        end: config.end_date.clone(),
        days,
    };

    // Generate summary
    let summary = generate_summary(db, &config.start_date, &config.end_date, days)?;

    // Generate trends for topics
    let trends = if !config.topics.is_empty() {
        let trends_config = TrendsConfig {
            topics: config.topics.clone(),
            granularity: Granularity::Day,
            start_date: Some(config.start_date.clone()),
            end_date: Some(config.end_date.clone()),
            normalize: true,
            detect_anomalies: true,
            ..Default::default()
        };
        analyze_trends(db, &trends_config)?
    } else {
        Vec::new()
    };

    // Extract top entities
    let entities_config = EntitiesConfig {
        entity_type: EntityType::All,
        min_count: 10,
        limit: 20,
        start_date: Some(config.start_date.clone()),
        end_date: Some(config.end_date.clone()),
    };
    let entities = extract_entities(db, &entities_config)?;

    // Sentiment analysis for topics
    let sentiment: Vec<SentimentResult> = config
        .topics
        .iter()
        .filter_map(|topic| {
            let sentiment_config = SentimentConfig {
                topic: topic.clone(),
                start_date: Some(config.start_date.clone()),
                end_date: Some(config.end_date.clone()),
                ..Default::default()
            };
            analyze_sentiment(db, &sentiment_config).ok()
        })
        .collect();

    let title = format!(
        "GDELT Report: {} to {}",
        config.start_date, config.end_date
    );

    Ok(Report {
        title,
        generated_at,
        date_range,
        summary,
        trends,
        entities,
        sentiment,
    })
}

fn generate_summary(
    db: &AnalyticsDb,
    start_date: &str,
    end_date: &str,
    days: i64,
) -> Result<ReportSummary> {
    let start_int = start_date.replace('-', "");
    let end_int = end_date.replace('-', "");

    // Get total events
    let events_sql = format!(
        "SELECT COUNT(*), AVG(avg_tone) FROM events WHERE sql_date >= {} AND sql_date <= {}",
        start_int, end_int
    );
    let events_result = db.query(&events_sql)?;
    let total_events = events_result
        .rows
        .first()
        .and_then(|r| r.get(0))
        .and_then(|v| v.as_i64())
        .unwrap_or(0);
    let overall_tone = events_result
        .rows
        .first()
        .and_then(|r| r.get(1))
        .and_then(|v| v.as_f64())
        .unwrap_or(0.0);

    // Get total GKG
    let gkg_sql = "SELECT COUNT(*) FROM gkg";
    let gkg_result = db.query(gkg_sql)?;
    let total_gkg = gkg_result
        .rows
        .first()
        .and_then(|r| r.get(0))
        .and_then(|v| v.as_i64())
        .unwrap_or(0);

    let avg_daily_events = if days > 0 {
        total_events as f64 / days as f64
    } else {
        0.0
    };

    // Get top event categories
    let categories_sql = format!(
        r#"
        SELECT event_root_code, COUNT(*) as cnt
        FROM events
        WHERE sql_date >= {} AND sql_date <= {}
        AND event_root_code IS NOT NULL
        GROUP BY event_root_code
        ORDER BY cnt DESC
        LIMIT 5
        "#,
        start_int, end_int
    );
    let categories_result = db.query(&categories_sql)?;
    let top_categories: Vec<CategoryCount> = categories_result
        .rows
        .iter()
        .map(|row| {
            let code = row.get(0).and_then(|v| v.as_str()).unwrap_or("");
            let count = row.get(1).and_then(|v| v.as_i64()).unwrap_or(0);
            CategoryCount {
                category: format_event_category(code),
                count,
            }
        })
        .collect();

    Ok(ReportSummary {
        total_events,
        total_gkg,
        avg_daily_events,
        overall_tone,
        top_categories,
    })
}

fn format_event_category(code: &str) -> String {
    match code {
        "01" => "Public Statements".to_string(),
        "02" => "Appeals".to_string(),
        "03" => "Cooperation Intent".to_string(),
        "04" => "Consultations".to_string(),
        "05" => "Diplomatic Cooperation".to_string(),
        "06" => "Material Cooperation".to_string(),
        "07" => "Aid Provision".to_string(),
        "08" => "Yielding".to_string(),
        "09" => "Investigations".to_string(),
        "10" => "Demands".to_string(),
        "11" => "Disapproval".to_string(),
        "12" => "Rejections".to_string(),
        "13" => "Threats".to_string(),
        "14" => "Protests".to_string(),
        "15" => "Force Posture".to_string(),
        "16" => "Reduced Relations".to_string(),
        "17" => "Coercion".to_string(),
        "18" => "Assaults".to_string(),
        "19" => "Fighting".to_string(),
        "20" => "Mass Violence".to_string(),
        _ => format!("Category {}", code),
    }
}

impl Report {
    /// Render the report as markdown
    pub fn to_markdown(&self) -> String {
        let mut md = String::new();

        md.push_str(&format!("# {}\n\n", self.title));
        md.push_str(&format!("*Generated: {}*\n\n", self.generated_at));

        // Summary
        md.push_str("## Summary\n\n");
        md.push_str(&format!("- **Date Range**: {} to {} ({} days)\n",
            self.date_range.start, self.date_range.end, self.date_range.days));
        md.push_str(&format!("- **Total Events**: {}\n", self.summary.total_events));
        md.push_str(&format!("- **Total GKG Records**: {}\n", self.summary.total_gkg));
        md.push_str(&format!("- **Average Daily Events**: {:.1}\n", self.summary.avg_daily_events));
        md.push_str(&format!("- **Overall Tone**: {:.2}\n\n", self.summary.overall_tone));

        // Top Categories
        if !self.summary.top_categories.is_empty() {
            md.push_str("### Top Event Categories\n\n");
            md.push_str("| Category | Count |\n");
            md.push_str("|----------|-------|\n");
            for cat in &self.summary.top_categories {
                md.push_str(&format!("| {} | {} |\n", cat.category, cat.count));
            }
            md.push_str("\n");
        }

        // Trends
        if !self.trends.is_empty() {
            md.push_str("## Trend Analysis\n\n");
            for trend in &self.trends {
                md.push_str(&format!("### {}\n\n", trend.topic));
                md.push_str(&format!("- Total Count: {}\n", trend.total_count));
                md.push_str(&format!("- Average per Period: {:.1}\n", trend.avg_count));
                md.push_str(&format!("- Standard Deviation: {:.1}\n", trend.std_dev));
                if !trend.anomalies.is_empty() {
                    md.push_str(&format!("- **Anomalies Detected**: {} periods\n", trend.anomalies.len()));
                }
                md.push_str("\n");
            }
        }

        // Entities
        if !self.entities.is_empty() {
            md.push_str("## Top Entities\n\n");
            md.push_str("| Entity | Type | Count | Avg Tone |\n");
            md.push_str("|--------|------|-------|----------|\n");
            for entity in self.entities.iter().take(15) {
                let tone = entity.avg_tone.map(|t| format!("{:.2}", t)).unwrap_or_else(|| "-".to_string());
                md.push_str(&format!("| {} | {:?} | {} | {} |\n",
                    entity.name, entity.entity_type, entity.count, tone));
            }
            md.push_str("\n");
        }

        // Sentiment
        if !self.sentiment.is_empty() {
            md.push_str("## Sentiment Analysis\n\n");
            for sent in &self.sentiment {
                md.push_str(&format!("### {}\n\n", sent.topic));
                md.push_str(&format!("- Overall Tone: {:.2}\n", sent.overall_tone));
                md.push_str(&format!("- Tone Trend: {:.4} (per period)\n", sent.tone_trend));
                md.push_str(&format!("- Records Analyzed: {}\n\n", sent.total_count));
            }
        }

        md
    }

    /// Render the report as HTML
    pub fn to_html(&self) -> String {
        // Simple HTML wrapper around markdown
        let markdown = self.to_markdown();
        format!(
            r#"<!DOCTYPE html>
<html>
<head>
    <meta charset="UTF-8">
    <title>{}</title>
    <style>
        body {{ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; max-width: 800px; margin: 40px auto; padding: 20px; }}
        table {{ border-collapse: collapse; width: 100%; margin: 20px 0; }}
        th, td {{ border: 1px solid #ddd; padding: 8px; text-align: left; }}
        th {{ background-color: #f5f5f5; }}
        h1 {{ color: #333; }}
        h2 {{ color: #555; border-bottom: 1px solid #eee; padding-bottom: 5px; }}
        h3 {{ color: #666; }}
    </style>
</head>
<body>
<pre style="white-space: pre-wrap;">
{}
</pre>
</body>
</html>"#,
            self.title, markdown
        )
    }
}