Skip to main content

stygian_graph/adapters/
rest_api.rs

1//! REST API scraping adapter with authentication and pagination support.
2//!
3//! Implements [`crate::ports::ScrapingService`] for structured REST JSON APIs. Supports:
4//!
5//! - HTTP methods: `GET`, `POST`, `PUT`, `PATCH`, `DELETE`, `HEAD`
6//! - Authentication: Bearer token, HTTP Basic, API key (header or query param)
7//! - Automatic pagination: offset/page, cursor, or RFC 8288 `Link` header
8//! - JSON response data extraction via dot-separated path
9//! - Custom request headers and query string parameters
10//! - Configurable retries with exponential backoff
11//!
12//! All per-request options live in `ServiceInput::params`; see the
13//! `RestApiAdapter::execute` docs for the full contract.
14//!
15//! # Example
16//!
17//! ```no_run
18//! use stygian_graph::adapters::rest_api::{RestApiAdapter, RestApiConfig};
19//! use stygian_graph::ports::{ScrapingService, ServiceInput};
20//! use serde_json::json;
21//! use std::time::Duration;
22//!
23//! # tokio::runtime::Runtime::new().unwrap().block_on(async {
24//! let adapter = RestApiAdapter::with_config(RestApiConfig {
25//!     timeout:      Duration::from_secs(20),
26//!     max_retries:  2,
27//!     ..Default::default()
28//! });
29//!
30//! let input = ServiceInput {
31//!     url: "https://api.github.com/repos/rust-lang/rust/issues".to_string(),
32//!     params: json!({
33//!         "auth": { "type": "bearer", "token": "ghp_..." },
34//!         "query": { "state": "open", "per_page": "30" },
35//!         "pagination": { "strategy": "link_header", "max_pages": 5 },
36//!         "response": { "data_path": "" }
37//!     }),
38//! };
39//! // let output = adapter.execute(input).await.unwrap();
40//! # });
41//! ```
42
43use std::collections::HashMap;
44use std::time::Duration;
45
46use async_trait::async_trait;
47use reqwest::{Client, Method, Proxy, header};
48use serde_json::{Value, json};
49use tracing::{debug, info, warn};
50
51use crate::domain::error::{Result, ServiceError, StygianError};
52use crate::ports::{ScrapingService, ServiceInput, ServiceOutput};
53
54// ─── Config ───────────────────────────────────────────────────────────────────
55
56/// Configuration for [`RestApiAdapter`].
57///
58/// Adapter-level defaults; per-request settings come from `ServiceInput.params`.
59///
60/// # Example
61///
62/// ```
63/// use stygian_graph::adapters::rest_api::RestApiConfig;
64/// use std::time::Duration;
65///
66/// let cfg = RestApiConfig {
67///     timeout:          Duration::from_secs(20),
68///     max_retries:      2,
69///     retry_base_delay: Duration::from_millis(500),
70///     proxy_url:        None,
71/// };
72/// ```
73#[derive(Debug, Clone)]
74pub struct RestApiConfig {
75    /// Per-request timeout (default: 30 s).
76    pub timeout: Duration,
77    /// Maximum retry attempts per page request on transient errors (default: 3).
78    pub max_retries: u32,
79    /// Base delay for exponential backoff (default: 1 s).
80    pub retry_base_delay: Duration,
81    /// Optional HTTP/HTTPS/SOCKS5 proxy URL.
82    pub proxy_url: Option<String>,
83}
84
85impl Default for RestApiConfig {
86    fn default() -> Self {
87        Self {
88            timeout: Duration::from_secs(30),
89            max_retries: 3,
90            retry_base_delay: Duration::from_secs(1),
91            proxy_url: None,
92        }
93    }
94}
95
96// ─── Internal request model ───────────────────────────────────────────────────
97
98/// Authentication scheme, parsed from `params.auth`.
99#[derive(Debug, Clone)]
100enum AuthScheme {
101    /// No authentication.
102    None,
103    /// `Authorization: Bearer <token>`
104    Bearer(String),
105    /// HTTP Basic authentication.
106    Basic { username: String, password: String },
107    /// Arbitrary header: `<header>: <key>`
108    ApiKeyHeader { header: String, key: String },
109    /// Append `?<param>=<key>` to the query string.
110    ApiKeyQuery { param: String, key: String },
111}
112
113/// Request body variant.
114#[derive(Debug, Clone)]
115enum RequestBody {
116    Json(Value),
117    Raw(String),
118}
119
120/// How to advance to the next page.
121#[derive(Debug, Clone)]
122enum PaginationStrategy {
123    /// Single request — no pagination.
124    None,
125    /// Increment a page/offset query parameter.
126    Offset {
127        page_param: String,
128        page_size_param: Option<String>,
129        page_size: Option<u64>,
130        current_page: u64,
131    },
132    /// Follow a cursor embedded in the response JSON.
133    Cursor {
134        /// Query parameter name that carries the cursor on subsequent requests.
135        cursor_param: String,
136        /// Dot-separated path into the response JSON where the next cursor lives.
137        cursor_field: String,
138    },
139    /// Follow RFC 8288 `Link: <URL>; rel="next"` response header.
140    LinkHeader,
141}
142
143/// Fully-parsed per-request specification, derived from `ServiceInput.params`.
144#[derive(Debug, Clone)]
145struct RequestSpec {
146    method: Method,
147    extra_headers: HashMap<String, String>,
148    query_params: HashMap<String, String>,
149    body: Option<RequestBody>,
150    auth: AuthScheme,
151    accept: String,
152    /// Dot-separated path into the JSON response to extract as data.
153    /// `None` means use the full response body.
154    data_path: Option<String>,
155    /// Return paged data as a flat JSON array even when only one page was fetched.
156    collect_as_array: bool,
157    pagination: PaginationStrategy,
158    max_pages: usize,
159}
160
161// ─── Adapter ──────────────────────────────────────────────────────────────────
162
163/// REST API scraping adapter.
164///
165/// Thread-safe and cheaply cloneable — the inner `reqwest::Client` uses `Arc`
166/// internally. Build once, share across tasks.
167///
168/// # Example
169///
170/// ```
171/// use stygian_graph::adapters::rest_api::RestApiAdapter;
172///
173/// let adapter = RestApiAdapter::new();
174/// ```
175#[derive(Clone)]
176pub struct RestApiAdapter {
177    client: Client,
178    config: RestApiConfig,
179}
180
181impl RestApiAdapter {
182    /// Create a new adapter with default configuration.
183    ///
184    /// # Example
185    ///
186    /// ```
187    /// use stygian_graph::adapters::rest_api::RestApiAdapter;
188    /// let adapter = RestApiAdapter::new();
189    /// ```
190    pub fn new() -> Self {
191        Self::with_config(RestApiConfig::default())
192    }
193
194    /// Create an adapter with custom configuration.
195    ///
196    /// # Panics
197    ///
198    /// Panics only if TLS is unavailable on the host (extremely rare).
199    ///
200    /// # Example
201    ///
202    /// ```
203    /// use stygian_graph::adapters::rest_api::{RestApiAdapter, RestApiConfig};
204    /// use std::time::Duration;
205    ///
206    /// let adapter = RestApiAdapter::with_config(RestApiConfig {
207    ///     timeout: Duration::from_secs(10),
208    ///     ..Default::default()
209    /// });
210    /// ```
211    pub fn with_config(config: RestApiConfig) -> Self {
212        let mut builder = Client::builder()
213            .timeout(config.timeout)
214            .gzip(true)
215            .brotli(true)
216            .use_rustls_tls();
217
218        if let Some(ref proxy_url) = config.proxy_url
219            && let Ok(proxy) = Proxy::all(proxy_url)
220        {
221            builder = builder.proxy(proxy);
222        }
223
224        // SAFETY: TLS via rustls is always available; build() can only fail if the
225        // TLS backend is completely absent, which cannot happen with use_rustls_tls().
226        #[allow(clippy::expect_used)]
227        let client = builder.build().expect("TLS backend unavailable");
228
229        Self { client, config }
230    }
231
232    /// Resolve a dot-separated path into a JSON [`Value`].
233    ///
234    /// Returns `None` if any path segment is missing.
235    ///
236    /// # Example
237    ///
238    /// ```
239    /// use serde_json::json;
240    /// use stygian_graph::adapters::rest_api::RestApiAdapter;
241    ///
242    /// let v = json!({"meta": {"next": "abc123"}});
243    /// assert_eq!(
244    ///     RestApiAdapter::extract_path(&v, "meta.next"),
245    ///     Some(&json!("abc123"))
246    /// );
247    /// assert!(RestApiAdapter::extract_path(&v, "meta.gone").is_none());
248    /// ```
249    pub fn extract_path<'a>(value: &'a Value, path: &str) -> Option<&'a Value> {
250        let mut current = value;
251        for segment in path.split('.') {
252            current = current.get(segment)?;
253        }
254        Some(current)
255    }
256
257    /// Parse an RFC 8288 `Link` header and return the `rel="next"` URL, if any.
258    ///
259    /// # Example
260    ///
261    /// ```
262    /// use stygian_graph::adapters::rest_api::RestApiAdapter;
263    ///
264    /// let link = r#"<https://api.example.com/items?page=2>; rel="next", <https://api.example.com/items?page=1>; rel="prev""#;
265    /// assert_eq!(
266    ///     RestApiAdapter::parse_link_next(link),
267    ///     Some("https://api.example.com/items?page=2".to_owned())
268    /// );
269    /// ```
270    pub fn parse_link_next(link_header: &str) -> Option<String> {
271        for part in link_header.split(',') {
272            let part = part.trim();
273            let mut url: Option<String> = None;
274            let mut is_next = false;
275            for segment in part.split(';') {
276                let segment = segment.trim();
277                if segment.starts_with('<') && segment.ends_with('>') {
278                    url = Some(segment[1..segment.len() - 1].to_owned());
279                } else if segment.trim_start_matches("rel=").trim_matches('"') == "next" {
280                    is_next = true;
281                }
282            }
283            if is_next {
284                return url;
285            }
286        }
287        None
288    }
289
290    /// Parse `ServiceInput.params` into a `RequestSpec`.
291    #[allow(clippy::indexing_slicing)]
292    fn parse_spec(params: &Value) -> Result<RequestSpec> {
293        let method_str = params["method"].as_str().unwrap_or("GET").to_uppercase();
294        let method = match method_str.as_str() {
295            "GET" => Method::GET,
296            "POST" => Method::POST,
297            "PUT" => Method::PUT,
298            "PATCH" => Method::PATCH,
299            "DELETE" => Method::DELETE,
300            "HEAD" => Method::HEAD,
301            other => {
302                return Err(StygianError::from(ServiceError::Unavailable(format!(
303                    "unknown HTTP method: {other}"
304                ))));
305            }
306        };
307
308        let extra_headers = params["headers"]
309            .as_object()
310            .map(|obj| {
311                obj.iter()
312                    .filter_map(|(k, v)| v.as_str().map(|s| (k.clone(), s.to_owned())))
313                    .collect()
314            })
315            .unwrap_or_default();
316
317        let query_params = params["query"]
318            .as_object()
319            .map(|obj| {
320                obj.iter()
321                    .filter_map(|(k, v)| {
322                        let s = if v.is_string() {
323                            v.as_str().map(ToOwned::to_owned)
324                        } else {
325                            Some(v.to_string())
326                        };
327                        s.map(|val| (k.clone(), val))
328                    })
329                    .collect()
330            })
331            .unwrap_or_default();
332
333        let body = if params["body"].is_null() {
334            params["body_raw"]
335                .as_str()
336                .map(|raw| RequestBody::Raw(raw.to_owned()))
337        } else {
338            Some(RequestBody::Json(params["body"].clone()))
339        };
340
341        let accept = params["accept"]
342            .as_str()
343            .unwrap_or("application/json")
344            .to_owned();
345
346        let auth = Self::parse_auth(&params["auth"]);
347
348        let data_path = match params["response"]["data_path"].as_str() {
349            Some("") | None => None,
350            Some(p) => Some(p.to_owned()),
351        };
352        let collect_as_array = params["response"]["collect_as_array"]
353            .as_bool()
354            .unwrap_or(false);
355
356        let max_pages = params["pagination"]["max_pages"]
357            .as_u64()
358            .map_or(1, |n| usize::try_from(n).unwrap_or(usize::MAX));
359
360        let pagination = Self::parse_pagination(&params["pagination"]);
361
362        Ok(RequestSpec {
363            method,
364            extra_headers,
365            query_params,
366            body,
367            auth,
368            accept,
369            data_path,
370            collect_as_array,
371            pagination,
372            max_pages,
373        })
374    }
375
376    /// Parse `params.auth` into an [`AuthScheme`].
377    #[allow(clippy::indexing_slicing)]
378    fn parse_auth(auth: &Value) -> AuthScheme {
379        match auth["type"].as_str().unwrap_or("none") {
380            "bearer" | "oauth2" => auth["token"]
381                .as_str()
382                .map_or(AuthScheme::None, |t| AuthScheme::Bearer(t.to_owned())),
383            "basic" => AuthScheme::Basic {
384                username: auth["username"].as_str().unwrap_or("").to_owned(),
385                password: auth["password"].as_str().unwrap_or("").to_owned(),
386            },
387            "api_key_header" => AuthScheme::ApiKeyHeader {
388                header: auth["header"].as_str().unwrap_or("X-Api-Key").to_owned(),
389                key: auth["key"].as_str().unwrap_or("").to_owned(),
390            },
391            "api_key_query" => AuthScheme::ApiKeyQuery {
392                param: auth["param"].as_str().unwrap_or("api_key").to_owned(),
393                key: auth["key"].as_str().unwrap_or("").to_owned(),
394            },
395            _ => AuthScheme::None,
396        }
397    }
398
399    /// Parse `params.pagination` into a [`PaginationStrategy`].
400    #[allow(clippy::indexing_slicing)]
401    fn parse_pagination(pag: &Value) -> PaginationStrategy {
402        match pag["strategy"].as_str().unwrap_or("none") {
403            "offset" => PaginationStrategy::Offset {
404                page_param: pag["page_param"].as_str().unwrap_or("page").to_owned(),
405                page_size_param: pag["page_size_param"].as_str().map(ToOwned::to_owned),
406                page_size: pag["page_size"].as_u64(),
407                current_page: pag["start_page"].as_u64().unwrap_or(1),
408            },
409            "cursor" => PaginationStrategy::Cursor {
410                cursor_param: pag["cursor_param"].as_str().unwrap_or("cursor").to_owned(),
411                cursor_field: pag["cursor_field"]
412                    .as_str()
413                    .unwrap_or("next_cursor")
414                    .to_owned(),
415            },
416            "link_header" => PaginationStrategy::LinkHeader,
417            _ => PaginationStrategy::None,
418        }
419    }
420
421    /// Extract the data portion of a parsed response using `spec.data_path`.
422    fn extract_data(response: &Value, spec: &RequestSpec) -> Value {
423        spec.data_path
424            .as_deref()
425            .and_then(|path| Self::extract_path(response, path))
426            .cloned()
427            .unwrap_or_else(|| response.clone())
428    }
429
430    /// Execute a single HTTP request, retrying on transient failures.
431    async fn send_one(
432        &self,
433        url: &str,
434        spec: &RequestSpec,
435        extra_query: &HashMap<String, String>,
436    ) -> Result<(Value, Option<String>)> {
437        let mut last_err: Option<StygianError> = None;
438
439        for attempt in 0..=self.config.max_retries {
440            if attempt > 0 {
441                let delay = self.config.retry_base_delay * 2u32.saturating_pow(attempt - 1);
442                tokio::time::sleep(delay).await;
443                debug!(url, attempt, "REST API retry");
444            }
445
446            match self.do_send(url, spec, extra_query).await {
447                Ok(r) => return Ok(r),
448                Err(e) if is_retryable(&e) && attempt < self.config.max_retries => {
449                    last_err = Some(e);
450                }
451                Err(e) => return Err(e),
452            }
453        }
454
455        Err(last_err.unwrap_or_else(|| {
456            StygianError::from(ServiceError::Unavailable("max retries exceeded".into()))
457        }))
458    }
459
460    /// Perform exactly one HTTP round-trip (no retry).
461    ///
462    /// Returns the parsed JSON response body and the raw `Link` header value (if present).
463    async fn do_send(
464        &self,
465        url: &str,
466        spec: &RequestSpec,
467        extra_query: &HashMap<String, String>,
468    ) -> Result<(Value, Option<String>)> {
469        let mut req = self.client.request(spec.method.clone(), url);
470
471        // Accept header
472        req = req.header(header::ACCEPT, spec.accept.as_str());
473
474        // Auth — header-based schemes
475        req = match &spec.auth {
476            AuthScheme::Bearer(token) => req.bearer_auth(token),
477            AuthScheme::Basic { username, password } => req.basic_auth(username, Some(password)),
478            AuthScheme::ApiKeyHeader { header: hdr, key } => req.header(hdr.as_str(), key.as_str()),
479            AuthScheme::ApiKeyQuery { .. } | AuthScheme::None => req,
480        };
481
482        // Custom headers
483        for (k, v) in &spec.extra_headers {
484            req = req.header(k.as_str(), v.as_str());
485        }
486
487        // Merge query params: static + per-page extra + API key query (if applicable)
488        let mut merged: HashMap<String, String> = spec.query_params.clone();
489        merged.extend(extra_query.iter().map(|(k, v)| (k.clone(), v.clone())));
490        if let AuthScheme::ApiKeyQuery { param, key } = &spec.auth {
491            merged.insert(param.clone(), key.clone());
492        }
493        if !merged.is_empty() {
494            let pairs: Vec<(&String, &String)> = merged.iter().collect();
495            req = req.query(&pairs);
496        }
497
498        // Body
499        req = match &spec.body {
500            Some(RequestBody::Json(v)) => req.json(v),
501            Some(RequestBody::Raw(s)) => req.body(s.clone()),
502            None => req,
503        };
504
505        let response = req
506            .send()
507            .await
508            .map_err(|e| StygianError::from(ServiceError::Unavailable(e.to_string())))?;
509
510        let status = response.status();
511
512        // Capture Link header before consuming the response
513        let link_header = response
514            .headers()
515            .get("link")
516            .and_then(|v| v.to_str().ok())
517            .map(ToOwned::to_owned);
518
519        // 429 — log retry-after hint
520        if status.as_u16() == 429 {
521            let retry_after = response
522                .headers()
523                .get("retry-after")
524                .and_then(|v| v.to_str().ok())
525                .and_then(|s| s.parse::<u64>().ok())
526                .unwrap_or(5);
527            warn!(url, retry_after, "REST API rate-limited (429)");
528            return Err(StygianError::from(ServiceError::Unavailable(format!(
529                "HTTP 429 rate-limited; retry-after={retry_after}s"
530            ))));
531        }
532
533        if !status.is_success() {
534            let snippet: String = response
535                .text()
536                .await
537                .unwrap_or_default()
538                .chars()
539                .take(200)
540                .collect();
541            return Err(StygianError::from(ServiceError::Unavailable(format!(
542                "HTTP {status}: {snippet}"
543            ))));
544        }
545
546        let body = response
547            .text()
548            .await
549            .map_err(|e| StygianError::from(ServiceError::Unavailable(e.to_string())))?;
550
551        // Parse as JSON when possible; wrap plain text as a JSON string otherwise.
552        let parsed: Value = serde_json::from_str(&body).unwrap_or(Value::String(body));
553
554        Ok((parsed, link_header))
555    }
556}
557
558impl Default for RestApiAdapter {
559    fn default() -> Self {
560        Self::new()
561    }
562}
563
564// ─── Helpers ──────────────────────────────────────────────────────────────────
565
566/// Returns `true` for transient errors that are worth retrying.
567fn is_retryable(err: &StygianError) -> bool {
568    let StygianError::Service(ServiceError::Unavailable(msg)) = err else {
569        return false;
570    };
571    msg.contains("429")
572        || msg.contains("500")
573        || msg.contains("502")
574        || msg.contains("503")
575        || msg.contains("504")
576        || msg.contains("connection")
577        || msg.contains("timed out")
578}
579
580// ─── ScrapingService ──────────────────────────────────────────────────────────
581
582#[async_trait]
583impl ScrapingService for RestApiAdapter {
584    /// Execute one or more REST API requests and return the aggregated result.
585    ///
586    /// # `ServiceInput.url`
587    ///
588    /// Base URL of the REST endpoint (including path; query string is optional).
589    ///
590    /// # `ServiceInput.params` contract
591    ///
592    /// ```json
593    /// {
594    ///   "method":   "GET",
595    ///   "body":     { "key": "value" },
596    ///   "body_raw": "raw body string",
597    ///   "headers":  { "X-Custom-Header": "value" },
598    ///   "query":    { "state": "open", "per_page": "30" },
599    ///   "accept":   "application/json",
600    ///
601    ///   "auth": {
602    ///     "type":     "bearer",
603    ///     "token":    "...",
604    ///     "username": "user",
605    ///     "password": "pass",
606    ///     "header":   "X-Api-Key",
607    ///     "param":    "api_key",
608    ///     "key":      "sk-..."
609    ///   },
610    ///
611    ///   "response": {
612    ///     "data_path":        "items",
613    ///     "collect_as_array": true
614    ///   },
615    ///
616    ///   "pagination": {
617    ///     "strategy":        "link_header",
618    ///     "max_pages":       10,
619    ///     "page_param":      "page",
620    ///     "page_size_param": "per_page",
621    ///     "page_size":       100,
622    ///     "start_page":      1,
623    ///     "cursor_param":    "cursor",
624    ///     "cursor_field":    "meta.next_cursor"
625    ///   }
626    /// }
627    /// ```
628    ///
629    /// # Auth `type` values
630    ///
631    /// | `type` | Required fields | Description |
632    /// | --- | --- | --- |
633    /// | `"bearer"` / `"oauth2"` | `token` | `Authorization: Bearer <token>` |
634    /// | `"basic"` | `username`, `password` | HTTP Basic |
635    /// | `"api_key_header"` | `header`, `key` | Custom header |
636    /// | `"api_key_query"` | `param`, `key` | Query string |
637    /// | `"none"` or absent | — | No auth |
638    ///
639    /// # Pagination strategies
640    ///
641    /// | `strategy` | Description |
642    /// | --- | --- |
643    /// | `"none"` | Single request (default) |
644    /// | `"offset"` | Increment `page_param` from `start_page` |
645    /// | `"cursor"` | Extract next cursor at `cursor_field` in each response; pass it as `cursor_param` |
646    /// | `"link_header"` | Follow RFC 8288 `Link: <url>; rel="next"` header |
647    async fn execute(&self, input: ServiceInput) -> Result<ServiceOutput> {
648        let spec = Self::parse_spec(&input.params)?;
649
650        let mut accumulated: Vec<Value> = Vec::new();
651        let mut page_count: usize = 0;
652        let mut current_url = input.url.clone();
653        let mut pagination = spec.pagination.clone();
654        let mut extra_query: HashMap<String, String> = HashMap::new();
655
656        // Cursor state lives outside the loop so it persists across pages.
657        let mut cursor_state: Option<String> = None;
658
659        info!(url = %input.url, "REST API execute start");
660
661        loop {
662            if page_count >= spec.max_pages {
663                debug!(%current_url, page_count, "REST API: max_pages reached");
664                break;
665            }
666
667            // Build per-page query additions
668            extra_query.clear();
669            match &pagination {
670                PaginationStrategy::Offset {
671                    page_param,
672                    page_size_param,
673                    page_size,
674                    current_page,
675                } => {
676                    extra_query.insert(page_param.clone(), current_page.to_string());
677                    if let (Some(size_param), Some(size)) = (page_size_param, page_size) {
678                        extra_query.insert(size_param.clone(), size.to_string());
679                    }
680                }
681                PaginationStrategy::Cursor { cursor_param, .. } => {
682                    if let Some(ref cursor) = cursor_state {
683                        extra_query.insert(cursor_param.clone(), cursor.clone());
684                    }
685                }
686                PaginationStrategy::None | PaginationStrategy::LinkHeader => {}
687            }
688
689            let (response, link_header) = self.send_one(&current_url, &spec, &extra_query).await?;
690
691            let page_data = Self::extract_data(&response, &spec);
692
693            // Accumulate — empty array responses signal end-of-pagination.
694            match &page_data {
695                Value::Array(items) => {
696                    if items.is_empty() {
697                        debug!("REST API: empty page, stopping pagination");
698                        break;
699                    }
700                    accumulated.extend(items.iter().cloned());
701                }
702                other => {
703                    accumulated.push(other.clone());
704                }
705            }
706            page_count += 1;
707
708            // Advance pagination state
709            let stop = match &mut pagination {
710                PaginationStrategy::None => true,
711                PaginationStrategy::Offset { current_page, .. } => {
712                    *current_page += 1;
713                    false
714                }
715                PaginationStrategy::Cursor { cursor_field, .. } => {
716                    Self::extract_path(&response, cursor_field.as_str())
717                        .and_then(Value::as_str)
718                        .filter(|s| !s.is_empty())
719                        .map(ToOwned::to_owned)
720                        .is_none_or(|cursor| {
721                            cursor_state = Some(cursor);
722                            false
723                        })
724                }
725                PaginationStrategy::LinkHeader => link_header
726                    .as_deref()
727                    .and_then(Self::parse_link_next)
728                    .is_none_or(|next_url| {
729                        current_url = next_url;
730                        false
731                    }),
732            };
733            if stop {
734                break;
735            }
736        }
737
738        // Serialise accumulated results
739        let data_value = if spec.collect_as_array || accumulated.len() > 1 {
740            Value::Array(accumulated)
741        } else {
742            accumulated.into_iter().next().unwrap_or(Value::Null)
743        };
744
745        let data_str = match &data_value {
746            Value::String(s) => s.clone(),
747            other => serde_json::to_string_pretty(other).unwrap_or_default(),
748        };
749
750        let metadata = json!({
751            "url":        input.url,
752            "page_count": page_count,
753        });
754
755        info!(%input.url, page_count, "REST API execute done");
756
757        Ok(ServiceOutput {
758            data: data_str,
759            metadata,
760        })
761    }
762
763    fn name(&self) -> &'static str {
764        "rest-api"
765    }
766}
767
768// ─── Tests ────────────────────────────────────────────────────────────────────
769
770#[cfg(test)]
771#[allow(clippy::unwrap_used, clippy::panic, clippy::indexing_slicing)]
772mod tests {
773    use super::*;
774    use serde_json::json;
775
776    // ── parse_auth ─────────────────────────────────────────────────────────────
777
778    #[test]
779    fn parse_auth_bearer() {
780        let auth = json!({"type": "bearer", "token": "tok123"});
781        match RestApiAdapter::parse_auth(&auth) {
782            AuthScheme::Bearer(t) => assert_eq!(t, "tok123"),
783            other => panic!("unexpected: {other:?}"),
784        }
785    }
786
787    #[test]
788    fn parse_auth_oauth2_alias() {
789        let auth = json!({"type": "oauth2", "token": "oauth_tok"});
790        match RestApiAdapter::parse_auth(&auth) {
791            AuthScheme::Bearer(t) => assert_eq!(t, "oauth_tok"),
792            other => panic!("unexpected: {other:?}"),
793        }
794    }
795
796    #[test]
797    fn parse_auth_basic() {
798        let auth = json!({"type": "basic", "username": "alice", "password": "s3cr3t"});
799        match RestApiAdapter::parse_auth(&auth) {
800            AuthScheme::Basic { username, password } => {
801                assert_eq!(username, "alice");
802                assert_eq!(password, "s3cr3t");
803            }
804            other => panic!("unexpected: {other:?}"),
805        }
806    }
807
808    #[test]
809    fn parse_auth_api_key_header() {
810        let auth = json!({"type": "api_key_header", "header": "X-Token", "key": "k123"});
811        match RestApiAdapter::parse_auth(&auth) {
812            AuthScheme::ApiKeyHeader { header, key } => {
813                assert_eq!(header, "X-Token");
814                assert_eq!(key, "k123");
815            }
816            other => panic!("unexpected: {other:?}"),
817        }
818    }
819
820    #[test]
821    fn parse_auth_api_key_query() {
822        let auth = json!({"type": "api_key_query", "param": "api_key", "key": "qk"});
823        match RestApiAdapter::parse_auth(&auth) {
824            AuthScheme::ApiKeyQuery { param, key } => {
825                assert_eq!(param, "api_key");
826                assert_eq!(key, "qk");
827            }
828            other => panic!("unexpected: {other:?}"),
829        }
830    }
831
832    #[test]
833    fn parse_auth_none_default() {
834        let auth = json!(null);
835        assert!(matches!(
836            RestApiAdapter::parse_auth(&auth),
837            AuthScheme::None
838        ));
839    }
840
841    // ── extract_path ───────────────────────────────────────────────────────────
842
843    #[test]
844    fn extract_path_top_level() {
845        let v = json!({"items": [1, 2, 3]});
846        assert_eq!(
847            RestApiAdapter::extract_path(&v, "items"),
848            Some(&json!([1, 2, 3]))
849        );
850    }
851
852    #[test]
853    fn extract_path_nested() {
854        let v = json!({"meta": {"next_cursor": "abc"}});
855        assert_eq!(
856            RestApiAdapter::extract_path(&v, "meta.next_cursor"),
857            Some(&json!("abc"))
858        );
859    }
860
861    #[test]
862    fn extract_path_missing() {
863        let v = json!({"a": {"b": 1}});
864        assert!(RestApiAdapter::extract_path(&v, "a.c").is_none());
865    }
866
867    // ── parse_link_next ────────────────────────────────────────────────────────
868
869    #[test]
870    fn parse_link_next_present() {
871        let h = r#"<https://api.example.com/items?page=2>; rel="next", <https://api.example.com/items?page=1>; rel="prev""#;
872        assert_eq!(
873            RestApiAdapter::parse_link_next(h),
874            Some("https://api.example.com/items?page=2".to_owned())
875        );
876    }
877
878    #[test]
879    fn parse_link_next_absent() {
880        let h = r#"<https://api.example.com/items?page=1>; rel="prev""#;
881        assert!(RestApiAdapter::parse_link_next(h).is_none());
882    }
883
884    #[test]
885    fn parse_link_next_single() {
886        let h = r#"<https://api.example.com/items?page=3>; rel="next""#;
887        assert_eq!(
888            RestApiAdapter::parse_link_next(h),
889            Some("https://api.example.com/items?page=3".to_owned())
890        );
891    }
892
893    // ── parse_spec ─────────────────────────────────────────────────────────────
894
895    #[test]
896    fn parse_spec_defaults() {
897        let spec = RestApiAdapter::parse_spec(&json!({})).unwrap();
898        assert_eq!(spec.method, Method::GET);
899        assert_eq!(spec.accept, "application/json");
900        assert_eq!(spec.max_pages, 1);
901        assert!(spec.data_path.is_none());
902        assert!(!spec.collect_as_array);
903        assert!(matches!(spec.pagination, PaginationStrategy::None));
904    }
905
906    #[test]
907    fn parse_spec_post_with_body_and_headers() {
908        let params = json!({
909            "method":  "POST",
910            "body":    { "key": "value" },
911            "headers": { "X-Foo": "bar" },
912            "query":   { "limit": "10" }
913        });
914        let spec = RestApiAdapter::parse_spec(&params).unwrap();
915        assert_eq!(spec.method, Method::POST);
916        assert_eq!(spec.extra_headers.get("X-Foo"), Some(&"bar".to_string()));
917        assert_eq!(spec.query_params.get("limit"), Some(&"10".to_string()));
918        assert!(matches!(spec.body, Some(RequestBody::Json(_))));
919    }
920
921    #[test]
922    fn parse_spec_unknown_method_returns_error() {
923        let result = RestApiAdapter::parse_spec(&json!({"method": "BREW"}));
924        assert!(result.is_err());
925    }
926
927    #[test]
928    fn parse_spec_cursor_pagination() {
929        let params = json!({
930            "pagination": {
931                "strategy":     "cursor",
932                "cursor_param": "after",
933                "cursor_field": "page_info.end_cursor",
934                "max_pages":    10
935            }
936        });
937        let spec = RestApiAdapter::parse_spec(&params).unwrap();
938        assert_eq!(spec.max_pages, 10);
939        match spec.pagination {
940            PaginationStrategy::Cursor {
941                cursor_param,
942                cursor_field,
943            } => {
944                assert_eq!(cursor_param, "after");
945                assert_eq!(cursor_field, "page_info.end_cursor");
946            }
947            other => panic!("unexpected: {other:?}"),
948        }
949    }
950
951    #[test]
952    fn parse_spec_offset_pagination() {
953        let params = json!({
954            "pagination": {
955                "strategy":        "offset",
956                "page_param":      "page",
957                "page_size_param": "per_page",
958                "page_size":       50,
959                "start_page":      1,
960                "max_pages":       3
961            }
962        });
963        let spec = RestApiAdapter::parse_spec(&params).unwrap();
964        assert_eq!(spec.max_pages, 3);
965        match spec.pagination {
966            PaginationStrategy::Offset {
967                page_size,
968                current_page,
969                page_param,
970                ..
971            } => {
972                assert_eq!(page_size, Some(50));
973                assert_eq!(current_page, 1);
974                assert_eq!(page_param, "page");
975            }
976            other => panic!("unexpected: {other:?}"),
977        }
978    }
979
980    #[test]
981    fn parse_spec_link_header_pagination() {
982        let params = json!({
983            "pagination": { "strategy": "link_header", "max_pages": 5 }
984        });
985        let spec = RestApiAdapter::parse_spec(&params).unwrap();
986        assert_eq!(spec.max_pages, 5);
987        assert!(matches!(spec.pagination, PaginationStrategy::LinkHeader));
988    }
989
990    #[test]
991    fn parse_spec_data_path_and_collect_as_array() {
992        let params = json!({
993            "response": { "data_path": "data.items", "collect_as_array": true }
994        });
995        let spec = RestApiAdapter::parse_spec(&params).unwrap();
996        assert_eq!(spec.data_path, Some("data.items".to_owned()));
997        assert!(spec.collect_as_array);
998    }
999
1000    #[test]
1001    fn parse_spec_empty_data_path_is_none() {
1002        let params = json!({ "response": { "data_path": "" } });
1003        let spec = RestApiAdapter::parse_spec(&params).unwrap();
1004        assert!(spec.data_path.is_none());
1005    }
1006
1007    // ── adapter_name ───────────────────────────────────────────────────────────
1008
1009    #[test]
1010    fn adapter_name() {
1011        assert_eq!(RestApiAdapter::new().name(), "rest-api");
1012    }
1013
1014    // ── is_retryable ────────────────────────────────────────────────────────────
1015
1016    #[test]
1017    fn is_retryable_429() {
1018        let e = StygianError::from(ServiceError::Unavailable(
1019            "HTTP 429 rate-limited".to_string(),
1020        ));
1021        assert!(is_retryable(&e));
1022    }
1023
1024    #[test]
1025    fn is_retryable_503() {
1026        let e = StygianError::from(ServiceError::Unavailable(
1027            "HTTP 503 Service Unavailable".to_string(),
1028        ));
1029        assert!(is_retryable(&e));
1030    }
1031
1032    #[test]
1033    fn is_retryable_404_not_retryable() {
1034        let e = StygianError::from(ServiceError::Unavailable("HTTP 404 Not Found".to_string()));
1035        assert!(!is_retryable(&e));
1036    }
1037
1038    // ── integration ────────────────────────────────────────────────────────────
1039
1040    /// Real HTTP integration test — requires `REST_API_TEST_URL` env var.
1041    ///
1042    /// Run with: `REST_API_TEST_URL=https://httpbin.org/get cargo test -- --ignored`
1043    #[tokio::test]
1044    #[ignore = "requires live REST API endpoint; set REST_API_TEST_URL env var"]
1045    async fn integration_get_httpbin() {
1046        let url = std::env::var("REST_API_TEST_URL")
1047            .unwrap_or_else(|_| "https://httpbin.org/get".to_string());
1048
1049        let adapter = RestApiAdapter::new();
1050        let input = ServiceInput {
1051            url,
1052            params: json!({}),
1053        };
1054        let output = adapter.execute(input).await.unwrap();
1055        assert!(!output.data.is_empty());
1056        assert_eq!(output.metadata["page_count"], 1);
1057    }
1058}