gdelt 0.1.0

CLI for GDELT Project - optimized for agentic usage with local data caching
//! GKG query command handlers.

use crate::cli::args::{GkgCommand, GlobalArgs};
use crate::cli::output::OutputWriter;
use crate::db::duckdb::{AnalyticsDb, validation};
use crate::error::{ExitStatus, Result};

/// Handle GKG commands
pub async fn handle_gkg(cmd: GkgCommand, global: &GlobalArgs) -> Result<ExitStatus> {
    let db = AnalyticsDb::open()?;
    let output = OutputWriter::new(global);

    match cmd {
        GkgCommand::Query(args) => {
            // Build parameterized query
            let mut where_clauses = Vec::new();
            let mut params: Vec<Box<dyn duckdb::ToSql>> = Vec::new();

            let has_theme_filter = args.theme.is_some();
            let has_person_filter = args.person.is_some();
            let has_org_filter = args.org.is_some();
            let has_date_filter = args.start.is_some() || args.end.is_some();

            if let Some(ref theme) = args.theme {
                let validated = validation::validate_theme(theme)?;
                where_clauses.push("list_contains(themes, ?)".to_string());
                params.push(Box::new(validated));
            }
            if let Some(ref person) = args.person {
                let validated = validation::validate_entity_name(person)?;
                where_clauses.push("list_contains(persons, ?)".to_string());
                params.push(Box::new(validated));
            }
            if let Some(ref org) = args.org {
                let validated = validation::validate_entity_name(org)?;
                where_clauses.push("list_contains(organizations, ?)".to_string());
                params.push(Box::new(validated));
            }
            if let Some(ref start) = args.start {
                let date_int = validation::validate_date(start)?;
                where_clauses.push("date >= ?".to_string());
                params.push(Box::new(date_int as i64));
            }
            if let Some(ref end) = args.end {
                let date_int = validation::validate_date(end)?;
                where_clauses.push("date <= ?".to_string());
                params.push(Box::new(date_int as i64));
            }

            let where_clause = if where_clauses.is_empty() {
                String::new()
            } else {
                format!("WHERE {}", where_clauses.join(" AND "))
            };

            let sql = format!(
                r#"
                SELECT gkg_record_id, date, source_common_name, document_identifier,
                       themes, persons, organizations, tone
                FROM gkg
                {}
                ORDER BY date DESC
                LIMIT {}
                "#,
                where_clause,
                args.limit
            );

            let params_refs: Vec<&dyn duckdb::ToSql> = params.iter().map(|p| p.as_ref()).collect();
            let result = db.query_with_params(&sql, &params_refs)?;

            // If no results, check if we have any data and provide guidance
            if result.rows.is_empty() && !global.quiet {
                let stats = db.stats()?;
                if stats.gkg_count == 0 {
                    eprintln!("No results found. No local GKG data available.");
                    eprintln!();
                    eprintln!("Sync data first:");
                    eprintln!("  gdelt data sync");
                    eprintln!("  gdelt data download gkg --start 2024-01-01 --end 2024-01-31");
                } else {
                    eprintln!("No results found for your query.");
                    eprintln!();
                    eprintln!("Try broadening your search:");
                    if has_theme_filter || has_person_filter || has_org_filter {
                        eprintln!("  - Remove filters (--theme, --person, --org)");
                    }
                    if has_date_filter {
                        eprintln!("  - Expand date range (--start, --end)");
                    }
                    eprintln!();
                    eprintln!("Check available data range: gdelt data status");
                }
            }

            output.write_value(&result)?;

            Ok(ExitStatus::Success)
        }

        GkgCommand::Themes(args) => {
            // Build parameterized query for date filters
            let mut where_clauses = Vec::new();
            let mut params: Vec<Box<dyn duckdb::ToSql>> = Vec::new();

            if let Some(ref start) = args.start {
                let date_int = validation::validate_date(start)?;
                where_clauses.push("date >= ?".to_string());
                params.push(Box::new(date_int as i64));
            }
            if let Some(ref end) = args.end {
                let date_int = validation::validate_date(end)?;
                where_clauses.push("date <= ?".to_string());
                params.push(Box::new(date_int as i64));
            }

            let where_clause = if where_clauses.is_empty() {
                String::new()
            } else {
                format!("WHERE {}", where_clauses.join(" AND "))
            };

            // Validate and add pattern filter
            let (pattern_filter, pattern_param) = if let Some(ref p) = args.pattern {
                let validated = validation::validate_pattern(p)?;
                let like_pattern = format!("%{}%", validated.to_uppercase());
                ("WHERE theme LIKE ?".to_string(), Some(like_pattern))
            } else {
                (String::new(), None)
            };

            if let Some(ref pattern) = pattern_param {
                params.push(Box::new(pattern.clone()));
            }

            let sql = format!(
                r#"
                WITH theme_counts AS (
                    SELECT unnest(themes) as theme, COUNT(*) as count
                    FROM gkg
                    {}
                    GROUP BY theme
                )
                SELECT theme, count
                FROM theme_counts
                {}
                HAVING count >= {}
                ORDER BY count DESC
                LIMIT 100
                "#,
                where_clause,
                pattern_filter,
                args.min_count
            );

            let params_refs: Vec<&dyn duckdb::ToSql> = params.iter().map(|p| p.as_ref()).collect();
            let result = db.query_with_params(&sql, &params_refs)?;
            output.write_value(&result)?;

            Ok(ExitStatus::Success)
        }

        GkgCommand::Persons(args) => {
            let mut params: Vec<Box<dyn duckdb::ToSql>> = Vec::new();

            // Validate and add pattern filter
            let pattern_filter = if let Some(ref p) = args.pattern {
                let validated = validation::validate_pattern(p)?;
                let like_pattern = format!("%{}%", validated);
                params.push(Box::new(like_pattern));
                "AND person LIKE ?".to_string()
            } else {
                String::new()
            };

            let sql = format!(
                r#"
                WITH person_counts AS (
                    SELECT unnest(persons) as person, COUNT(*) as count
                    FROM gkg
                    WHERE persons IS NOT NULL
                    GROUP BY person
                )
                SELECT person, count
                FROM person_counts
                WHERE count >= {} {}
                ORDER BY count DESC
                LIMIT {}
                "#,
                args.min_count,
                pattern_filter,
                args.limit
            );

            let params_refs: Vec<&dyn duckdb::ToSql> = params.iter().map(|p| p.as_ref()).collect();
            let result = db.query_with_params(&sql, &params_refs)?;
            output.write_value(&result)?;

            Ok(ExitStatus::Success)
        }

        GkgCommand::Organizations(args) => {
            let mut params: Vec<Box<dyn duckdb::ToSql>> = Vec::new();

            // Validate and add pattern filter
            let pattern_filter = if let Some(ref p) = args.pattern {
                let validated = validation::validate_pattern(p)?;
                let like_pattern = format!("%{}%", validated);
                params.push(Box::new(like_pattern));
                "AND org LIKE ?".to_string()
            } else {
                String::new()
            };

            let sql = format!(
                r#"
                WITH org_counts AS (
                    SELECT unnest(organizations) as org, COUNT(*) as count
                    FROM gkg
                    WHERE organizations IS NOT NULL
                    GROUP BY org
                )
                SELECT org, count
                FROM org_counts
                WHERE count >= {} {}
                ORDER BY count DESC
                LIMIT {}
                "#,
                args.min_count,
                pattern_filter,
                args.limit
            );

            let params_refs: Vec<&dyn duckdb::ToSql> = params.iter().map(|p| p.as_ref()).collect();
            let result = db.query_with_params(&sql, &params_refs)?;
            output.write_value(&result)?;

            Ok(ExitStatus::Success)
        }

        GkgCommand::Locations(args) => {
            let mut where_clauses = vec!["locations IS NOT NULL".to_string()];
            let mut params: Vec<Box<dyn duckdb::ToSql>> = Vec::new();

            if let Some(ref country) = args.country {
                let validated = validation::validate_country_code(country)?;
                where_clauses.push("list_contains(locations, ?)".to_string());
                params.push(Box::new(validated));
            }

            // Validate and add pattern filter
            let pattern_filter = if let Some(ref p) = args.pattern {
                let validated = validation::validate_pattern(p)?;
                let like_pattern = format!("%{}%", validated);
                params.push(Box::new(like_pattern));
                "AND loc LIKE ?".to_string()
            } else {
                String::new()
            };

            let sql = format!(
                r#"
                WITH loc_counts AS (
                    SELECT unnest(locations) as loc, COUNT(*) as count
                    FROM gkg
                    WHERE {}
                    GROUP BY loc
                )
                SELECT loc as location, count
                FROM loc_counts
                WHERE count >= 5 {}
                ORDER BY count DESC
                LIMIT {}
                "#,
                where_clauses.join(" AND "),
                pattern_filter,
                args.limit
            );

            let params_refs: Vec<&dyn duckdb::ToSql> = params.iter().map(|p| p.as_ref()).collect();
            let result = db.query_with_params(&sql, &params_refs)?;
            output.write_value(&result)?;

            Ok(ExitStatus::Success)
        }
    }
}