#![allow(dead_code)]
use crate::error::{GdeltError, Result};
use reqwest::{Client, Response};
use std::time::Duration;
use tracing::{debug, instrument};
pub mod endpoints {
pub const DOC_API: &str = "https://api.gdeltproject.org/api/v2/doc/doc";
pub const GEO_API: &str = "https://api.gdeltproject.org/api/v2/geo/geo";
pub const TV_API: &str = "https://api.gdeltproject.org/api/v2/tv/tv";
pub const CONTEXT_API: &str = "https://api.gdeltproject.org/api/v2/context/context";
pub const MASTER_FILE_LIST: &str = "http://data.gdeltproject.org/gdeltv2/masterfilelist.txt";
pub const MASTER_FILE_LIST_TRANSLATION: &str = "http://data.gdeltproject.org/gdeltv2/masterfilelist-translation.txt";
pub const LAST_UPDATE: &str = "http://data.gdeltproject.org/gdeltv2/lastupdate.txt";
}
#[derive(Debug, Clone)]
pub struct ClientConfig {
pub timeout: Duration,
pub retries: u32,
pub user_agent: String,
pub wait_on_rate_limit: bool,
pub max_rate_limit_wait: u64,
}
impl Default for ClientConfig {
fn default() -> Self {
Self {
timeout: Duration::from_secs(30),
retries: 3,
user_agent: format!("gdelt-cli/{}", env!("CARGO_PKG_VERSION")),
wait_on_rate_limit: false,
max_rate_limit_wait: 120,
}
}
}
#[derive(Debug, Clone)]
pub struct GdeltClient {
client: Client,
config: ClientConfig,
}
impl GdeltClient {
pub fn new() -> Result<Self> {
Self::with_config(ClientConfig::default())
}
pub fn with_config(config: ClientConfig) -> Result<Self> {
let client = Client::builder()
.timeout(config.timeout)
.user_agent(&config.user_agent)
.gzip(true)
.brotli(true)
.build()
.map_err(GdeltError::Network)?;
Ok(Self { client, config })
}
pub fn with_timeout(timeout: Duration) -> Result<Self> {
let mut config = ClientConfig::default();
config.timeout = timeout;
Self::with_config(config)
}
pub fn inner(&self) -> &Client {
&self.client
}
#[instrument(skip(self), fields(url = %url))]
pub async fn get(&self, url: &str) -> Result<Response> {
let mut last_error = None;
let mut rate_limit_retries = 0;
const MAX_RATE_LIMIT_RETRIES: u32 = 3;
for attempt in 0..=self.config.retries {
if attempt > 0 {
debug!("Retry attempt {} for {}", attempt, url);
tokio::time::sleep(Duration::from_millis(500 * 2u64.pow(attempt - 1))).await;
}
match self.client.get(url).send().await {
Ok(response) => {
let status = response.status();
if status.is_success() {
return Ok(response);
}
if status == reqwest::StatusCode::TOO_MANY_REQUESTS {
let retry_after = response
.headers()
.get("retry-after")
.and_then(|v| v.to_str().ok())
.and_then(|v| v.parse().ok())
.unwrap_or(60);
if self.config.wait_on_rate_limit && rate_limit_retries < MAX_RATE_LIMIT_RETRIES {
let wait_time = retry_after.min(self.config.max_rate_limit_wait);
eprintln!(
"Rate limited. Waiting {}s before retrying... (attempt {}/{})",
wait_time,
rate_limit_retries + 1,
MAX_RATE_LIMIT_RETRIES
);
for remaining in (1..=wait_time).rev() {
if remaining % 10 == 0 || remaining <= 5 {
eprint!("\r {} seconds remaining... ", remaining);
}
tokio::time::sleep(Duration::from_secs(1)).await;
}
eprintln!("\r Retrying now... ");
rate_limit_retries += 1;
continue;
}
return Err(GdeltError::RateLimited { retry_after_secs: retry_after });
}
if status.is_server_error() && attempt < self.config.retries {
last_error = Some(GdeltError::Api {
message: format!("Server error: {}", status),
status_code: Some(status.as_u16()),
});
continue;
}
return Err(GdeltError::Api {
message: format!("Request failed: {}", status),
status_code: Some(status.as_u16()),
});
}
Err(e) => {
if e.is_timeout() && attempt < self.config.retries {
last_error = Some(GdeltError::Network(e));
continue;
}
return Err(GdeltError::Network(e));
}
}
}
Err(last_error.unwrap_or_else(|| GdeltError::Other("Unknown error".to_string())))
}
pub async fn get_json<T: serde::de::DeserializeOwned>(&self, url: &str) -> Result<T> {
let response = self.get(url).await?;
let content_type = response
.headers()
.get("content-type")
.and_then(|v| v.to_str().ok())
.map(|s| s.to_string())
.unwrap_or_default();
let text = response.text().await.map_err(GdeltError::Network)?;
if content_type.contains("text/html") || text.trim().starts_with("<!DOCTYPE") || text.trim().starts_with("<html") {
return Err(GdeltError::Api {
message: "API returned HTML instead of JSON. The endpoint may be unavailable or deprecated.".to_string(),
status_code: None,
});
}
if !text.trim().starts_with('{') && !text.trim().starts_with('[') {
let error_msg = text.trim();
let display_msg = if error_msg.len() > 200 {
format!("{}...", &error_msg[..200])
} else {
error_msg.to_string()
};
return Err(GdeltError::Api {
message: format!("API error: {}", display_msg),
status_code: None,
});
}
serde_json::from_str(&text).map_err(GdeltError::Json)
}
pub async fn get_text(&self, url: &str) -> Result<String> {
let response = self.get(url).await?;
response.text().await.map_err(GdeltError::Network)
}
pub async fn get_bytes(&self, url: &str) -> Result<bytes::Bytes> {
let response = self.get(url).await?;
response.bytes().await.map_err(GdeltError::Network)
}
pub async fn download_file(&self, url: &str) -> Result<bytes::Bytes> {
self.get_bytes(url).await
}
pub fn build_url(base: &str, params: &[(&str, &str)]) -> String {
if params.is_empty() {
return base.to_string();
}
let query: Vec<String> = params
.iter()
.filter(|(_, v)| !v.is_empty())
.map(|(k, v)| format!("{}={}", k, urlencoding::encode(v)))
.collect();
if query.is_empty() {
base.to_string()
} else {
format!("{}?{}", base, query.join("&"))
}
}
}
impl Default for GdeltClient {
fn default() -> Self {
Self::new().expect("Failed to create default GDELT client")
}
}
mod urlencoding {
pub fn encode(s: &str) -> String {
url::form_urlencoded::byte_serialize(s.as_bytes()).collect()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_build_url_no_params() {
let url = GdeltClient::build_url("https://example.com", &[]);
assert_eq!(url, "https://example.com");
}
#[test]
fn test_build_url_with_params() {
let url = GdeltClient::build_url("https://example.com", &[("q", "test"), ("n", "10")]);
assert_eq!(url, "https://example.com?q=test&n=10");
}
#[test]
fn test_build_url_encodes_spaces() {
let url = GdeltClient::build_url("https://example.com", &[("q", "hello world")]);
assert_eq!(url, "https://example.com?q=hello+world");
}
}