use crate::cache::{create_cache_entry, generate_cache_key, hash_string, Cache, MemoryCache};
use crate::error::{Error, Result};
use crate::types::*;
use crate::version::{build_user_agent, check_api_version_compatibility};
use rand::Rng;
use reqwest::header::{HeaderMap, HeaderValue, ACCEPT, AUTHORIZATION, CONTENT_TYPE, USER_AGENT};
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;
use std::time::Duration;
use tokio::time::sleep;
use tracing::warn;
fn calculate_backoff(attempt: u32) -> Duration {
let base_secs = 2u64.pow(attempt - 1).min(30);
let jitter_ms = rand::rng().random_range(0..=(base_secs * 250));
Duration::from_millis(base_secs * 1000 + jitter_ms)
}
const DEFAULT_BASE_URL: &str = "https://api.refyne.uk";
const DEFAULT_TIMEOUT_SECS: u64 = 30;
const DEFAULT_MAX_RETRIES: u32 = 3;
pub struct ClientBuilder {
api_key: String,
base_url: String,
timeout: Duration,
max_retries: u32,
cache: Option<Arc<dyn Cache>>,
cache_enabled: bool,
user_agent_suffix: Option<String>,
}
impl ClientBuilder {
pub fn new(api_key: impl Into<String>) -> Self {
Self {
api_key: api_key.into(),
base_url: DEFAULT_BASE_URL.to_string(),
timeout: Duration::from_secs(DEFAULT_TIMEOUT_SECS),
max_retries: DEFAULT_MAX_RETRIES,
cache: None,
cache_enabled: true,
user_agent_suffix: None,
}
}
pub fn base_url(mut self, url: impl Into<String>) -> Self {
self.base_url = url.into().trim_end_matches('/').to_string();
self
}
pub fn timeout(mut self, timeout: Duration) -> Self {
self.timeout = timeout;
self
}
pub fn max_retries(mut self, retries: u32) -> Self {
self.max_retries = retries;
self
}
pub fn cache(mut self, cache: Arc<dyn Cache>) -> Self {
self.cache = Some(cache);
self
}
pub fn cache_enabled(mut self, enabled: bool) -> Self {
self.cache_enabled = enabled;
self
}
pub fn user_agent_suffix(mut self, suffix: impl Into<String>) -> Self {
self.user_agent_suffix = Some(suffix.into());
self
}
pub fn build(self) -> Result<Client> {
if self.api_key.is_empty() {
return Err(Error::Config("API key is required".into()));
}
if !self.base_url.starts_with("https://") {
warn!(
base_url = %self.base_url,
"API base URL is not using HTTPS. This is insecure."
);
}
let http_client = reqwest::Client::builder()
.timeout(self.timeout)
.build()
.map_err(Error::Http)?;
let cache: Arc<dyn Cache> = self
.cache
.unwrap_or_else(|| Arc::new(MemoryCache::default()));
let user_agent = build_user_agent(self.user_agent_suffix.as_deref());
let auth_hash = hash_string(&self.api_key);
Ok(Client {
api_key: self.api_key,
base_url: self.base_url,
http_client,
cache,
cache_enabled: self.cache_enabled,
user_agent,
max_retries: self.max_retries,
auth_hash,
api_version_checked: Arc::new(AtomicBool::new(false)),
})
}
}
pub struct Client {
api_key: String,
base_url: String,
http_client: reqwest::Client,
cache: Arc<dyn Cache>,
cache_enabled: bool,
user_agent: String,
max_retries: u32,
auth_hash: String,
api_version_checked: Arc<AtomicBool>,
}
impl Client {
pub fn builder(api_key: impl Into<String>) -> ClientBuilder {
ClientBuilder::new(api_key)
}
pub fn jobs(&self) -> JobsClient<'_> {
JobsClient { client: self }
}
pub fn schemas(&self) -> SchemasClient<'_> {
SchemasClient { client: self }
}
pub fn sites(&self) -> SitesClient<'_> {
SitesClient { client: self }
}
pub fn keys(&self) -> KeysClient<'_> {
KeysClient { client: self }
}
pub fn llm(&self) -> LlmClient<'_> {
LlmClient { client: self }
}
pub fn webhooks(&self) -> WebhooksClient<'_> {
WebhooksClient { client: self }
}
pub async fn extract(&self, request: ExtractRequest) -> Result<ExtractResponse> {
self.post("/api/v1/extract", &request).await
}
pub async fn crawl(&self, request: CrawlRequest) -> Result<CrawlJobCreated> {
self.post("/api/v1/crawl", &request).await
}
pub async fn analyze(&self, request: AnalyzeRequest) -> Result<AnalyzeResponse> {
self.post("/api/v1/analyze", &request).await
}
pub async fn get_usage(&self) -> Result<GetUsageOutputBody> {
self.get("/api/v1/usage").await
}
pub async fn list_jobs(&self, limit: Option<u32>, offset: Option<u32>) -> Result<JobList> {
let mut path = "/api/v1/jobs".to_string();
let mut params = vec![];
if let Some(l) = limit {
params.push(format!("limit={}", l));
}
if let Some(o) = offset {
params.push(format!("offset={}", o));
}
if !params.is_empty() {
path.push('?');
path.push_str(¶ms.join("&"));
}
self.get(&path).await
}
pub async fn get_job(&self, id: &str) -> Result<Job> {
self.get_skip_cache(&format!("/api/v1/jobs/{}", id)).await
}
pub async fn get_job_results(&self, id: &str, merge: bool) -> Result<JobResults> {
let path = if merge {
format!("/api/v1/jobs/{}/results?merge=true", id)
} else {
format!("/api/v1/jobs/{}/results", id)
};
self.get_skip_cache(&path).await
}
pub async fn download_job(&self, id: &str) -> Result<GetJobResultsDownloadOutputBody> {
self.get(&format!("/api/v1/jobs/{}/download", id)).await
}
pub async fn get_job_crawl_map(&self, id: &str) -> Result<GetCrawlMapOutputBody> {
self.get(&format!("/api/v1/jobs/{}/crawl-map", id)).await
}
pub async fn get_job_debug_capture(&self, id: &str) -> Result<GetJobDebugCaptureOutputBody> {
self.get(&format!("/api/v1/jobs/{}/debug-capture", id)).await
}
pub async fn get_job_webhook_deliveries(
&self,
id: &str,
) -> Result<GetJobWebhookDeliveriesOutputBody> {
self.get(&format!("/api/v1/jobs/{}/webhooks", id)).await
}
pub async fn list_schemas(&self) -> Result<SchemaList> {
self.get("/api/v1/schemas").await
}
pub async fn get_schema(&self, id: &str) -> Result<Schema> {
self.get(&format!("/api/v1/schemas/{}", id)).await
}
pub async fn create_schema(&self, request: CreateSchemaRequest) -> Result<Schema> {
self.post("/api/v1/schemas", &request).await
}
pub async fn update_schema(&self, id: &str, request: CreateSchemaRequest) -> Result<Schema> {
self.put(&format!("/api/v1/schemas/{}", id), &request).await
}
pub async fn delete_schema(&self, id: &str) -> Result<()> {
self.delete(&format!("/api/v1/schemas/{}", id)).await
}
pub async fn list_sites(&self) -> Result<SiteList> {
self.get("/api/v1/sites").await
}
pub async fn get_site(&self, id: &str) -> Result<Site> {
self.get(&format!("/api/v1/sites/{}", id)).await
}
pub async fn create_site(&self, request: CreateSiteRequest) -> Result<Site> {
self.post("/api/v1/sites", &request).await
}
pub async fn update_site(&self, id: &str, request: CreateSiteRequest) -> Result<Site> {
self.put(&format!("/api/v1/sites/{}", id), &request).await
}
pub async fn delete_site(&self, id: &str) -> Result<()> {
self.delete(&format!("/api/v1/sites/{}", id)).await
}
pub async fn list_keys(&self) -> Result<ApiKeyList> {
self.get("/api/v1/keys").await
}
pub async fn create_key(&self, name: &str) -> Result<ApiKeyCreated> {
self.post("/api/v1/keys", &serde_json::json!({"name": name}))
.await
}
pub async fn revoke_key(&self, id: &str) -> Result<()> {
self.delete(&format!("/api/v1/keys/{}", id)).await
}
pub async fn list_providers(&self) -> Result<ProvidersResponse> {
self.get("/api/v1/llm/providers").await
}
pub async fn list_llm_keys(&self) -> Result<LlmKeyList> {
self.get("/api/v1/llm/keys").await
}
pub async fn upsert_llm_key(&self, request: UpsertLlmKeyRequest) -> Result<LlmKey> {
self.put("/api/v1/llm/keys", &request).await
}
pub async fn delete_llm_key(&self, id: &str) -> Result<()> {
self.delete(&format!("/api/v1/llm/keys/{}", id)).await
}
pub async fn get_llm_chain(&self) -> Result<LlmChain> {
self.get("/api/v1/llm/chain").await
}
pub async fn set_llm_chain(&self, chain: Vec<LlmChainEntry>) -> Result<()> {
self.put("/api/v1/llm/chain", &serde_json::json!({"chain": chain}))
.await
}
pub async fn list_models(&self, provider: &str) -> Result<ModelList> {
self.get(&format!("/api/v1/llm/models/{}", provider)).await
}
pub async fn list_webhooks(&self) -> Result<ListWebhooksOutputBody> {
self.get("/api/v1/webhooks").await
}
pub async fn get_webhook(&self, id: &str) -> Result<WebhookResponse> {
self.get(&format!("/api/v1/webhooks/{}", id)).await
}
pub async fn create_webhook(&self, input: WebhookInput) -> Result<WebhookResponse> {
self.post("/api/v1/webhooks", &input).await
}
pub async fn update_webhook(&self, id: &str, input: WebhookInput) -> Result<WebhookResponse> {
self.put(&format!("/api/v1/webhooks/{}", id), &input).await
}
pub async fn delete_webhook(&self, id: &str) -> Result<()> {
self.delete(&format!("/api/v1/webhooks/{}", id)).await
}
pub async fn list_webhook_deliveries(
&self,
id: &str,
limit: Option<u32>,
offset: Option<u32>,
) -> Result<ListWebhookDeliveriesOutputBody> {
let mut path = format!("/api/v1/webhooks/{}/deliveries", id);
let mut params = vec![];
if let Some(l) = limit {
params.push(format!("limit={}", l));
}
if let Some(o) = offset {
params.push(format!("offset={}", o));
}
if !params.is_empty() {
path.push('?');
path.push_str(¶ms.join("&"));
}
self.get(&path).await
}
pub async fn health(&self) -> Result<HealthCheckOutputBody> {
self.get("/health").await
}
pub async fn list_cleaners(&self) -> Result<ListCleanersOutputBody> {
self.get("/api/v1/cleaners").await
}
pub async fn get_pricing_tiers(&self) -> Result<ListTierLimitsOutputBody> {
self.get("/api/v1/pricing/tiers").await
}
async fn get<T: serde::de::DeserializeOwned>(&self, path: &str) -> Result<T> {
self.request("GET", path, None::<&()>, false).await
}
async fn get_skip_cache<T: serde::de::DeserializeOwned>(&self, path: &str) -> Result<T> {
self.request("GET", path, None::<&()>, true).await
}
async fn post<T: serde::de::DeserializeOwned, B: serde::Serialize>(
&self,
path: &str,
body: &B,
) -> Result<T> {
self.request("POST", path, Some(body), false).await
}
async fn put<T: serde::de::DeserializeOwned, B: serde::Serialize>(
&self,
path: &str,
body: &B,
) -> Result<T> {
self.request("PUT", path, Some(body), false).await
}
async fn delete(&self, path: &str) -> Result<()> {
let url = format!("{}{}", self.base_url, path);
let response = self
.execute_with_retry("DELETE", &url, None::<&()>, 1)
.await?;
if !response.status().is_success() {
return Err(Error::from_response(response).await);
}
Ok(())
}
async fn request<T, B>(
&self,
method: &str,
path: &str,
body: Option<&B>,
skip_cache: bool,
) -> Result<T>
where
T: serde::de::DeserializeOwned,
B: serde::Serialize,
{
let url = format!("{}{}", self.base_url, path);
let cache_key = generate_cache_key(method, &url, Some(&self.auth_hash));
if method == "GET" && self.cache_enabled && !skip_cache {
if let Some(entry) = self.cache.get(&cache_key) {
return serde_json::from_value(entry.value).map_err(Error::Json);
}
}
let response = self.execute_with_retry(method, &url, body, 1).await?;
if !self.api_version_checked.swap(true, Ordering::SeqCst) {
if let Some(api_version) = response.headers().get("X-API-Version") {
if let Ok(v) = api_version.to_str() {
check_api_version_compatibility(v)?;
}
} else {
warn!("API did not return X-API-Version header");
}
}
if !response.status().is_success() {
return Err(Error::from_response(response).await);
}
let cache_control = response
.headers()
.get("Cache-Control")
.and_then(|v| v.to_str().ok())
.map(|s| s.to_string());
let value: serde_json::Value = response.json().await.map_err(Error::Http)?;
if method == "GET" && self.cache_enabled {
if let Some(entry) = create_cache_entry(value.clone(), cache_control.as_deref()) {
self.cache.set(&cache_key, entry);
}
}
serde_json::from_value(value).map_err(Error::Json)
}
async fn execute_with_retry<B: serde::Serialize>(
&self,
method: &str,
url: &str,
body: Option<&B>,
attempt: u32,
) -> Result<reqwest::Response> {
let mut headers = HeaderMap::new();
headers.insert(
AUTHORIZATION,
HeaderValue::from_str(&format!("Bearer {}", self.api_key)).unwrap(),
);
headers.insert(CONTENT_TYPE, HeaderValue::from_static("application/json"));
headers.insert(ACCEPT, HeaderValue::from_static("application/json"));
headers.insert(USER_AGENT, HeaderValue::from_str(&self.user_agent).unwrap());
let mut req = self.http_client.request(method.parse().unwrap(), url);
req = req.headers(headers);
if let Some(b) = body {
req = req.json(b);
}
let response = match req.send().await {
Ok(r) => r,
Err(e) => {
if e.is_timeout() {
return Err(Error::Timeout);
}
if attempt <= self.max_retries {
let backoff = calculate_backoff(attempt);
warn!(
error = %e,
attempt = attempt,
max_retries = self.max_retries,
"Network error. Retrying in {:?}",
backoff
);
sleep(backoff).await;
return Box::pin(self.execute_with_retry(method, url, body, attempt + 1)).await;
}
return Err(Error::Http(e));
}
};
let status = response.status();
if status.as_u16() == 429 && attempt <= self.max_retries {
let retry_after: u64 = response
.headers()
.get("Retry-After")
.and_then(|v| v.to_str().ok())
.and_then(|v| v.parse().ok())
.unwrap_or(1);
warn!(
retry_after = retry_after,
attempt = attempt,
max_retries = self.max_retries,
"Rate limited. Retrying"
);
sleep(Duration::from_secs(retry_after)).await;
return Box::pin(self.execute_with_retry(method, url, body, attempt + 1)).await;
}
if status.is_server_error() && attempt <= self.max_retries {
let backoff = calculate_backoff(attempt);
warn!(
status = %status,
attempt = attempt,
max_retries = self.max_retries,
"Server error. Retrying in {:?}",
backoff
);
sleep(backoff).await;
return Box::pin(self.execute_with_retry(method, url, body, attempt + 1)).await;
}
Ok(response)
}
}
pub struct JobsClient<'a> {
client: &'a Client,
}
impl<'a> JobsClient<'a> {
pub async fn list(&self, limit: Option<u32>, offset: Option<u32>) -> Result<JobList> {
self.client.list_jobs(limit, offset).await
}
pub async fn get(&self, id: &str) -> Result<Job> {
self.client.get_job(id).await
}
pub async fn get_results(&self, id: &str, merge: bool) -> Result<JobResults> {
self.client.get_job_results(id, merge).await
}
pub async fn download(&self, id: &str) -> Result<GetJobResultsDownloadOutputBody> {
self.client.download_job(id).await
}
pub async fn get_crawl_map(&self, id: &str) -> Result<GetCrawlMapOutputBody> {
self.client.get_job_crawl_map(id).await
}
pub async fn get_debug_capture(&self, id: &str) -> Result<GetJobDebugCaptureOutputBody> {
self.client.get_job_debug_capture(id).await
}
pub async fn get_webhook_deliveries(
&self,
id: &str,
) -> Result<GetJobWebhookDeliveriesOutputBody> {
self.client.get_job_webhook_deliveries(id).await
}
}
pub struct SchemasClient<'a> {
client: &'a Client,
}
impl<'a> SchemasClient<'a> {
pub async fn list(&self) -> Result<SchemaList> {
self.client.list_schemas().await
}
pub async fn get(&self, id: &str) -> Result<Schema> {
self.client.get_schema(id).await
}
pub async fn create(&self, request: CreateSchemaRequest) -> Result<Schema> {
self.client.create_schema(request).await
}
pub async fn update(&self, id: &str, request: CreateSchemaRequest) -> Result<Schema> {
self.client.update_schema(id, request).await
}
pub async fn delete(&self, id: &str) -> Result<()> {
self.client.delete_schema(id).await
}
}
pub struct SitesClient<'a> {
client: &'a Client,
}
impl<'a> SitesClient<'a> {
pub async fn list(&self) -> Result<SiteList> {
self.client.list_sites().await
}
pub async fn get(&self, id: &str) -> Result<Site> {
self.client.get_site(id).await
}
pub async fn create(&self, request: CreateSiteRequest) -> Result<Site> {
self.client.create_site(request).await
}
pub async fn update(&self, id: &str, request: CreateSiteRequest) -> Result<Site> {
self.client.update_site(id, request).await
}
pub async fn delete(&self, id: &str) -> Result<()> {
self.client.delete_site(id).await
}
}
pub struct KeysClient<'a> {
client: &'a Client,
}
impl<'a> KeysClient<'a> {
pub async fn list(&self) -> Result<ApiKeyList> {
self.client.list_keys().await
}
pub async fn create(&self, name: &str) -> Result<ApiKeyCreated> {
self.client.create_key(name).await
}
pub async fn revoke(&self, id: &str) -> Result<()> {
self.client.revoke_key(id).await
}
}
pub struct LlmClient<'a> {
client: &'a Client,
}
impl<'a> LlmClient<'a> {
pub async fn list_providers(&self) -> Result<ProvidersResponse> {
self.client.list_providers().await
}
pub async fn list_models(&self, provider: &str) -> Result<ModelList> {
self.client.list_models(provider).await
}
pub async fn list_keys(&self) -> Result<LlmKeyList> {
self.client.list_llm_keys().await
}
pub async fn upsert_key(&self, request: UpsertLlmKeyRequest) -> Result<LlmKey> {
self.client.upsert_llm_key(request).await
}
pub async fn delete_key(&self, id: &str) -> Result<()> {
self.client.delete_llm_key(id).await
}
pub async fn get_chain(&self) -> Result<LlmChain> {
self.client.get_llm_chain().await
}
pub async fn set_chain(&self, chain: Vec<LlmChainEntry>) -> Result<()> {
self.client.set_llm_chain(chain).await
}
}
pub struct WebhooksClient<'a> {
client: &'a Client,
}
impl<'a> WebhooksClient<'a> {
pub async fn list(&self) -> Result<ListWebhooksOutputBody> {
self.client.list_webhooks().await
}
pub async fn get(&self, id: &str) -> Result<WebhookResponse> {
self.client.get_webhook(id).await
}
pub async fn create(&self, input: WebhookInput) -> Result<WebhookResponse> {
self.client.create_webhook(input).await
}
pub async fn update(&self, id: &str, input: WebhookInput) -> Result<WebhookResponse> {
self.client.update_webhook(id, input).await
}
pub async fn delete(&self, id: &str) -> Result<()> {
self.client.delete_webhook(id).await
}
pub async fn list_deliveries(
&self,
id: &str,
limit: Option<u32>,
offset: Option<u32>,
) -> Result<ListWebhookDeliveriesOutputBody> {
self.client.list_webhook_deliveries(id, limit, offset).await
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_client_builder_requires_api_key() {
let result = ClientBuilder::new("").build();
assert!(result.is_err());
if let Err(Error::Config(msg)) = result {
assert!(msg.contains("API key is required"));
} else {
panic!("Expected Config error");
}
}
#[test]
fn test_client_builder_default_values() {
let builder = ClientBuilder::new("test-key");
assert_eq!(builder.base_url, DEFAULT_BASE_URL);
assert_eq!(builder.timeout, Duration::from_secs(DEFAULT_TIMEOUT_SECS));
assert_eq!(builder.max_retries, DEFAULT_MAX_RETRIES);
assert!(builder.cache_enabled);
}
#[test]
fn test_client_builder_custom_base_url() {
let result = ClientBuilder::new("test-key")
.base_url("https://custom.api.com/")
.build();
assert!(result.is_ok());
let client = result.unwrap();
assert_eq!(client.base_url, "https://custom.api.com");
}
#[test]
fn test_client_builder_strips_trailing_slash() {
let result = ClientBuilder::new("test-key")
.base_url("https://api.example.com/")
.build();
assert!(result.is_ok());
let client = result.unwrap();
assert!(!client.base_url.ends_with('/'));
}
#[test]
fn test_client_builder_custom_timeout() {
let builder = ClientBuilder::new("test-key").timeout(Duration::from_secs(60));
assert_eq!(builder.timeout, Duration::from_secs(60));
}
#[test]
fn test_client_builder_custom_max_retries() {
let builder = ClientBuilder::new("test-key").max_retries(5);
assert_eq!(builder.max_retries, 5);
}
#[test]
fn test_client_builder_cache_disabled() {
let result = ClientBuilder::new("test-key").cache_enabled(false).build();
assert!(result.is_ok());
let client = result.unwrap();
assert!(!client.cache_enabled);
}
#[test]
fn test_client_builder_custom_user_agent_suffix() {
let result = ClientBuilder::new("test-key")
.user_agent_suffix("MyApp/1.0")
.build();
assert!(result.is_ok());
let client = result.unwrap();
assert!(client.user_agent.contains("MyApp/1.0"));
}
#[test]
fn test_client_builder_static_method() {
let result = Client::builder("test-key").build();
assert!(result.is_ok());
}
#[test]
fn test_client_has_sub_clients() {
let client = Client::builder("test-key").build().unwrap();
let _ = client.jobs();
let _ = client.schemas();
let _ = client.sites();
let _ = client.keys();
let _ = client.llm();
let _ = client.webhooks();
}
#[test]
fn test_constants() {
assert_eq!(DEFAULT_BASE_URL, "https://api.refyne.uk");
assert_eq!(DEFAULT_TIMEOUT_SECS, 30);
assert_eq!(DEFAULT_MAX_RETRIES, 3);
}
}