opencellid 0.2.0

Rust client library for the OpenCellID API — sync and async clients with tracing, structured errors, and bounded I/O.
Documentation
//! Client builder and shared configuration.

use std::sync::Arc;
use std::time::Duration;

use url::Url;

use crate::error::{Error, Result};

#[cfg(feature = "async")]
pub(crate) mod async_client;
#[cfg(feature = "blocking")]
pub(crate) mod blocking;

const DEFAULT_BASE_URL: &str = "https://opencellid.org/";
const DEFAULT_USER_AGENT: &str = concat!("opencellid-rs/", env!("CARGO_PKG_VERSION"));
const DEFAULT_TIMEOUT_SECS: u64 = 30;

/// Maximum body size accepted by the OpenCellID upload endpoints (2 MiB).
pub(crate) const MAX_UPLOAD_BYTES: usize = 2 * 1024 * 1024;

/// Maximum response body the client will buffer for non-dump endpoints. Acts
/// as a defence against a malicious or misconfigured upstream returning
/// unbounded data; OpenCellID's own answers fit comfortably under this.
pub(crate) const MAX_RESPONSE_BYTES: usize = 8 * 1024 * 1024;

/// Cap on `MeasurementsPayload::measurements.len()` enforced before
/// serialising — prevents unbounded `Vec<u8>` allocation when a caller hands
/// us a huge batch.
pub(crate) const MAX_MEASUREMENTS_PER_UPLOAD: usize = 8_000;

/// Default ceiling on dump downloads. Acts as a defence against a runaway
/// upstream; user can override via [`ClientBuilder::max_dump_bytes`].
pub(crate) const DEFAULT_MAX_DUMP_BYTES: u64 = 5 * 1024 * 1024 * 1024;

/// How many bytes of the response head the dump path reads before deciding
/// gzip vs JSON-error envelope. 8 KiB comfortably covers any plausible error
/// body advertised by OpenCellID.
pub(crate) const DOWNLOAD_SNIFF_BYTES: usize = 8 * 1024;

/// Streaming buffer size for the blocking dump path.
#[cfg(feature = "blocking")]
pub(crate) const DUMP_READ_BUF_BYTES: usize = 64 * 1024;

/// Default connect timeout — fail fast on unreachable hosts even if the overall
/// request timeout is generous.
const DEFAULT_CONNECT_TIMEOUT_SECS: u64 = 10;

/// Default per-request timeout for dump downloads (1 hour). The world export
/// is hundreds of megabytes; the regular per-request `timeout` (30 s by
/// default) would abort the body read mid-stream. Override via
/// [`ClientBuilder::download_timeout`].
const DEFAULT_DOWNLOAD_TIMEOUT_SECS: u64 = 60 * 60;

/// Configuration shared by all client variants.
///
/// `Debug` is implemented manually to redact the API key.
///
/// `api_key` is held as `Arc<str>` so cheap clones survive across the client's
/// `Inner`. `reqwest::multipart::Form::text` requires `Cow<'static, str>` and
/// has no `'static`-friendly path for runtime-supplied keys, so each upload pays
/// for one `String` copy of the key.
#[derive(Clone)]
pub(crate) struct ClientConfig {
    pub api_key: Arc<str>,
    pub base_url: Url,
    pub timeout: Duration,
    pub download_timeout: Duration,
    pub max_dump_bytes: u64,
    pub user_agent: String,
}

impl std::fmt::Debug for ClientConfig {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("ClientConfig")
            .field("api_key", &"***")
            .field("base_url", &self.base_url)
            .field("timeout", &self.timeout)
            .field("download_timeout", &self.download_timeout)
            .field("max_dump_bytes", &self.max_dump_bytes)
            .field("user_agent", &self.user_agent)
            .finish()
    }
}

/// Builder for the OpenCellID clients.
///
/// Use [`ClientBuilder::build`] for the async [`crate::Client`] and
/// [`ClientBuilder::build_blocking`] for the [`crate::BlockingClient`]. Both
/// flavours can coexist in the same program.
#[derive(Debug, Clone)]
pub struct ClientBuilder {
    api_key: Option<String>,
    base_url: Option<Url>,
    timeout: Option<Duration>,
    download_timeout: Option<Duration>,
    max_dump_bytes: Option<u64>,
    user_agent: Option<String>,
}

impl Default for ClientBuilder {
    fn default() -> Self {
        Self::new()
    }
}

impl ClientBuilder {
    /// Create an empty builder. The API key must be set before `build`.
    pub fn new() -> Self {
        Self {
            api_key: None,
            base_url: None,
            timeout: None,
            download_timeout: None,
            max_dump_bytes: None,
            user_agent: None,
        }
    }

    /// Set the OpenCellID API key (required).
    pub fn api_key(mut self, key: impl Into<String>) -> Self {
        self.api_key = Some(key.into());
        self
    }

    /// Override the base URL. Defaults to `https://opencellid.org/`.
    ///
    /// # Errors
    ///
    /// Returns [`Error::Url`] if the string is not a valid URL.
    ///
    /// # Security
    ///
    /// The base URL controls where API keys are sent. Allowing user input here is
    /// an SSRF risk; if you accept it from configuration, validate the host
    /// against an allow-list. The library only requires that the URL parse and
    /// end with `/`; it does not enforce HTTPS or block private addresses.
    pub fn base_url(mut self, url: impl AsRef<str>) -> Result<Self> {
        self.base_url = Some(Url::parse(url.as_ref())?);
        Ok(self)
    }

    /// Set the per-request timeout (applied to every HTTP call by reqwest
    /// **except** dump downloads, which have their own timeout — see
    /// [`Self::download_timeout`]).
    pub fn timeout(mut self, timeout: Duration) -> Self {
        self.timeout = Some(timeout);
        self
    }

    /// Set the per-request timeout used for `download_dump` /
    /// `download_dump_to_path` / `list_daily_diffs` (default: 1 hour).
    ///
    /// Because the world export is hundreds of megabytes and the regular
    /// `timeout` would normally be tens of seconds, downloads use a separate,
    /// generous budget. Bump this if you expect very slow connections, or
    /// lower it if you only ever fetch country exports.
    pub fn download_timeout(mut self, timeout: Duration) -> Self {
        self.download_timeout = Some(timeout);
        self
    }

    /// Override the defensive cap on dump body size (default: 5 GiB).
    ///
    /// `download_dump` and `download_dump_to_path` reject responses larger
    /// than this. Useful in environments with limited disk where the world
    /// export is undesirable.
    pub fn max_dump_bytes(mut self, bytes: u64) -> Self {
        self.max_dump_bytes = Some(bytes);
        self
    }

    /// Set the HTTP `User-Agent` header.
    pub fn user_agent(mut self, ua: impl Into<String>) -> Self {
        self.user_agent = Some(ua.into());
        self
    }

    fn finalize_config(self) -> Result<ClientConfig> {
        let api_key = self.api_key.ok_or(Error::MissingConfig("api_key"))?;
        if api_key.trim().is_empty() {
            return Err(Error::InvalidInput("api_key must not be empty".into()));
        }
        let base_url = self
            .base_url
            .map_or_else(|| Url::parse(DEFAULT_BASE_URL), Ok)?;
        if !base_url.path().ends_with('/') {
            return Err(Error::InvalidInput(
                "base_url must end with '/' so that endpoint paths are joined correctly".into(),
            ));
        }
        Ok(ClientConfig {
            api_key: Arc::<str>::from(api_key),
            base_url,
            timeout: self.timeout.unwrap_or(Duration::from_secs(DEFAULT_TIMEOUT_SECS)),
            download_timeout: self
                .download_timeout
                .unwrap_or(Duration::from_secs(DEFAULT_DOWNLOAD_TIMEOUT_SECS)),
            max_dump_bytes: self.max_dump_bytes.unwrap_or(DEFAULT_MAX_DUMP_BYTES),
            user_agent: self.user_agent.unwrap_or_else(|| DEFAULT_USER_AGENT.to_string()),
        })
    }

    /// Build the async [`crate::Client`].
    ///
    /// # Errors
    ///
    /// Returns [`Error::MissingConfig`] / [`Error::InvalidInput`] for builder
    /// misconfiguration, or [`Error::Transport`] if the underlying HTTP client
    /// cannot be built (TLS misconfiguration, etc.).
    #[cfg(feature = "async")]
    #[cfg_attr(docsrs, doc(cfg(feature = "async")))]
    pub fn build(self) -> Result<crate::Client> {
        let cfg = self.finalize_config()?;
        let http = reqwest::Client::builder()
            .timeout(cfg.timeout)
            .connect_timeout(Duration::from_secs(DEFAULT_CONNECT_TIMEOUT_SECS))
            .pool_idle_timeout(Duration::from_secs(90))
            .tcp_keepalive(Duration::from_secs(60))
            .user_agent(&cfg.user_agent)
            .build()?;
        Ok(crate::Client::from_parts(cfg, http))
    }

    /// Build the [`crate::BlockingClient`].
    ///
    /// # Errors
    ///
    /// See [`Self::build`].
    #[cfg(feature = "blocking")]
    #[cfg_attr(docsrs, doc(cfg(feature = "blocking")))]
    pub fn build_blocking(self) -> Result<crate::BlockingClient> {
        let cfg = self.finalize_config()?;
        let http = reqwest::blocking::Client::builder()
            .timeout(cfg.timeout)
            .connect_timeout(Duration::from_secs(DEFAULT_CONNECT_TIMEOUT_SECS))
            .pool_idle_timeout(Duration::from_secs(90))
            .tcp_keepalive(Duration::from_secs(60))
            .user_agent(&cfg.user_agent)
            .build()?;
        Ok(crate::BlockingClient::from_parts(cfg, http))
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn missing_api_key_errors() {
        let res = ClientBuilder::new().finalize_config();
        match res.unwrap_err() {
            Error::MissingConfig(field) => assert_eq!(field, "api_key"),
            other => panic!("unexpected: {other:?}"),
        }
    }

    #[test]
    fn empty_api_key_errors() {
        let res = ClientBuilder::new().api_key("   ").finalize_config();
        assert!(matches!(res, Err(Error::InvalidInput(_))));
    }

    #[test]
    fn defaults_are_applied() {
        let cfg = ClientBuilder::new().api_key("k").finalize_config().unwrap();
        assert_eq!(cfg.base_url.as_str(), DEFAULT_BASE_URL);
        assert_eq!(cfg.timeout, Duration::from_secs(DEFAULT_TIMEOUT_SECS));
        assert!(cfg.user_agent.starts_with("opencellid-rs/"));
    }

    #[test]
    fn base_url_must_end_with_slash() {
        let res = ClientBuilder::new()
            .api_key("k")
            .base_url("https://example.com/api")
            .unwrap()
            .finalize_config();
        assert!(matches!(res, Err(Error::InvalidInput(_))));
    }

    #[test]
    fn timeout_and_user_agent_propagate() {
        let cfg = ClientBuilder::new()
            .api_key("k")
            .timeout(Duration::from_millis(2_500))
            .user_agent("myapp/0.1")
            .finalize_config()
            .unwrap();
        assert_eq!(cfg.timeout, Duration::from_millis(2_500));
        assert_eq!(cfg.user_agent, "myapp/0.1");
    }

    #[test]
    fn last_setter_wins_for_api_key() {
        let cfg = ClientBuilder::new()
            .api_key("first")
            .api_key("second")
            .finalize_config()
            .unwrap();
        assert_eq!(&*cfg.api_key, "second");
    }

    #[test]
    fn config_debug_redacts_api_key() {
        let cfg = ClientBuilder::new().api_key("SECRET").finalize_config().unwrap();
        let printed = format!("{cfg:?}");
        assert!(!printed.contains("SECRET"), "debug output leaked api_key: {printed}");
        assert!(printed.contains("***"));
    }
}