Skip to main content

stygian_graph/adapters/
rest_api.rs

1//! REST API scraping adapter with authentication and pagination support.
2//!
3//! Implements [`crate::ports::ScrapingService`] for structured REST JSON APIs. Supports:
4//!
5//! - HTTP methods: `GET`, `POST`, `PUT`, `PATCH`, `DELETE`, `HEAD`
6//! - Authentication: Bearer token, HTTP Basic, API key (header or query param)
7//! - Automatic pagination: offset/page, cursor, or RFC 8288 `Link` header
8//! - JSON response data extraction via dot-separated path
9//! - Custom request headers and query string parameters
10//! - Configurable retries with exponential backoff
11//!
12//! All per-request options live in `ServiceInput::params`; see the
13//! `RestApiAdapter::execute` docs for the full contract.
14//!
15//! # Example
16//!
17//! ```no_run
18//! use stygian_graph::adapters::rest_api::{RestApiAdapter, RestApiConfig};
19//! use stygian_graph::ports::{ScrapingService, ServiceInput};
20//! use serde_json::json;
21//! use std::time::Duration;
22//!
23//! # tokio::runtime::Runtime::new().unwrap().block_on(async {
24//! let adapter = RestApiAdapter::with_config(RestApiConfig {
25//!     timeout:      Duration::from_secs(20),
26//!     max_retries:  2,
27//!     ..Default::default()
28//! });
29//!
30//! let input = ServiceInput {
31//!     url: "https://api.github.com/repos/rust-lang/rust/issues".to_string(),
32//!     params: json!({
33//!         "auth": { "type": "bearer", "token": "ghp_..." },
34//!         "query": { "state": "open", "per_page": "30" },
35//!         "pagination": { "strategy": "link_header", "max_pages": 5 },
36//!         "response": { "data_path": "" }
37//!     }),
38//! };
39//! // let output = adapter.execute(input).await.unwrap();
40//! # });
41//! ```
42
43use std::collections::HashMap;
44use std::time::Duration;
45
46use async_trait::async_trait;
47use reqwest::{Client, Method, Proxy, header};
48use serde_json::{Value, json};
49use tracing::{debug, info, warn};
50
51use crate::domain::error::{Result, ServiceError, StygianError};
52use crate::ports::{ScrapingService, ServiceInput, ServiceOutput};
53
54// ─── Config ───────────────────────────────────────────────────────────────────
55
56/// Configuration for [`RestApiAdapter`].
57///
58/// Adapter-level defaults; per-request settings come from `ServiceInput.params`.
59///
60/// # Example
61///
62/// ```
63/// use stygian_graph::adapters::rest_api::RestApiConfig;
64/// use std::time::Duration;
65///
66/// let cfg = RestApiConfig {
67///     timeout:          Duration::from_secs(20),
68///     max_retries:      2,
69///     retry_base_delay: Duration::from_millis(500),
70///     proxy_url:        None,
71/// };
72/// ```
73#[derive(Debug, Clone)]
74pub struct RestApiConfig {
75    /// Per-request timeout (default: 30 s).
76    pub timeout: Duration,
77    /// Maximum retry attempts per page request on transient errors (default: 3).
78    pub max_retries: u32,
79    /// Base delay for exponential backoff (default: 1 s).
80    pub retry_base_delay: Duration,
81    /// Optional HTTP/HTTPS/SOCKS5 proxy URL.
82    pub proxy_url: Option<String>,
83}
84
85impl Default for RestApiConfig {
86    fn default() -> Self {
87        Self {
88            timeout: Duration::from_secs(30),
89            max_retries: 3,
90            retry_base_delay: Duration::from_secs(1),
91            proxy_url: None,
92        }
93    }
94}
95
96// ─── Internal request model ───────────────────────────────────────────────────
97
98/// Authentication scheme, parsed from `params.auth`.
99#[derive(Debug, Clone)]
100enum AuthScheme {
101    /// No authentication.
102    None,
103    /// `Authorization: Bearer <token>`
104    Bearer(String),
105    /// HTTP Basic authentication.
106    Basic { username: String, password: String },
107    /// Arbitrary header: `<header>: <key>`
108    ApiKeyHeader { header: String, key: String },
109    /// Append `?<param>=<key>` to the query string.
110    ApiKeyQuery { param: String, key: String },
111}
112
113/// Request body variant.
114#[derive(Debug, Clone)]
115enum RequestBody {
116    Json(Value),
117    Raw(String),
118}
119
120/// How to advance to the next page.
121#[derive(Debug, Clone)]
122enum PaginationStrategy {
123    /// Single request — no pagination.
124    None,
125    /// Increment a page/offset query parameter.
126    Offset {
127        page_param: String,
128        page_size_param: Option<String>,
129        page_size: Option<u64>,
130        current_page: u64,
131    },
132    /// Follow a cursor embedded in the response JSON.
133    Cursor {
134        /// Query parameter name that carries the cursor on subsequent requests.
135        cursor_param: String,
136        /// Dot-separated path into the response JSON where the next cursor lives.
137        cursor_field: String,
138    },
139    /// Follow RFC 8288 `Link: <URL>; rel="next"` response header.
140    LinkHeader,
141}
142
143/// Fully-parsed per-request specification, derived from `ServiceInput.params`.
144#[derive(Debug, Clone)]
145struct RequestSpec {
146    method: Method,
147    extra_headers: HashMap<String, String>,
148    query_params: HashMap<String, String>,
149    body: Option<RequestBody>,
150    auth: AuthScheme,
151    accept: String,
152    /// Dot-separated path into the JSON response to extract as data.
153    /// `None` means use the full response body.
154    data_path: Option<String>,
155    /// Return paged data as a flat JSON array even when only one page was fetched.
156    collect_as_array: bool,
157    pagination: PaginationStrategy,
158    max_pages: usize,
159}
160
161// ─── Adapter ──────────────────────────────────────────────────────────────────
162
163/// REST API scraping adapter.
164///
165/// Thread-safe and cheaply cloneable — the inner `reqwest::Client` uses `Arc`
166/// internally. Build once, share across tasks.
167///
168/// # Example
169///
170/// ```
171/// use stygian_graph::adapters::rest_api::RestApiAdapter;
172///
173/// let adapter = RestApiAdapter::new();
174/// ```
175#[derive(Clone)]
176pub struct RestApiAdapter {
177    client: Client,
178    config: RestApiConfig,
179}
180
181impl RestApiAdapter {
182    /// Create a new adapter with default configuration.
183    ///
184    /// # Example
185    ///
186    /// ```
187    /// use stygian_graph::adapters::rest_api::RestApiAdapter;
188    /// let adapter = RestApiAdapter::new();
189    /// ```
190    pub fn new() -> Self {
191        Self::with_config(RestApiConfig::default())
192    }
193
194    /// Create an adapter with custom configuration.
195    ///
196    /// # Panics
197    ///
198    /// Panics only if TLS is unavailable on the host (extremely rare).
199    ///
200    /// # Example
201    ///
202    /// ```
203    /// use stygian_graph::adapters::rest_api::{RestApiAdapter, RestApiConfig};
204    /// use std::time::Duration;
205    ///
206    /// let adapter = RestApiAdapter::with_config(RestApiConfig {
207    ///     timeout: Duration::from_secs(10),
208    ///     ..Default::default()
209    /// });
210    /// ```
211    pub fn with_config(config: RestApiConfig) -> Self {
212        let mut builder = Client::builder()
213            .timeout(config.timeout)
214            .gzip(true)
215            .brotli(true)
216            .use_rustls_tls();
217
218        if let Some(ref proxy_url) = config.proxy_url
219            && let Ok(proxy) = Proxy::all(proxy_url)
220        {
221            builder = builder.proxy(proxy);
222        }
223
224        // SAFETY: TLS via rustls is always available; build() can only fail if the
225        // TLS backend is completely absent, which cannot happen with use_rustls_tls().
226        #[allow(clippy::expect_used)]
227        let client = builder.build().expect("TLS backend unavailable");
228
229        Self { client, config }
230    }
231
232    /// Resolve a dot-separated path into a JSON [`Value`].
233    ///
234    /// Returns `None` if any path segment is missing.
235    ///
236    /// # Example
237    ///
238    /// ```
239    /// use serde_json::json;
240    /// use stygian_graph::adapters::rest_api::RestApiAdapter;
241    ///
242    /// let v = json!({"meta": {"next": "abc123"}});
243    /// assert_eq!(
244    ///     RestApiAdapter::extract_path(&v, "meta.next"),
245    ///     Some(&json!("abc123"))
246    /// );
247    /// assert!(RestApiAdapter::extract_path(&v, "meta.gone").is_none());
248    /// ```
249    pub fn extract_path<'a>(value: &'a Value, path: &str) -> Option<&'a Value> {
250        let mut current = value;
251        for segment in path.split('.') {
252            current = current.get(segment)?;
253        }
254        Some(current)
255    }
256
257    /// Parse an RFC 8288 `Link` header and return the `rel="next"` URL, if any.
258    ///
259    /// # Example
260    ///
261    /// ```
262    /// use stygian_graph::adapters::rest_api::RestApiAdapter;
263    ///
264    /// let link = r#"<https://api.example.com/items?page=2>; rel="next", <https://api.example.com/items?page=1>; rel="prev""#;
265    /// assert_eq!(
266    ///     RestApiAdapter::parse_link_next(link),
267    ///     Some("https://api.example.com/items?page=2".to_owned())
268    /// );
269    /// ```
270    pub fn parse_link_next(link_header: &str) -> Option<String> {
271        for part in link_header.split(',') {
272            let part = part.trim();
273            let mut url: Option<String> = None;
274            let mut is_next = false;
275            for segment in part.split(';') {
276                let segment = segment.trim();
277                if segment.starts_with('<') && segment.ends_with('>') {
278                    url = Some(segment[1..segment.len() - 1].to_owned());
279                } else if segment.trim_start_matches("rel=").trim_matches('"') == "next" {
280                    is_next = true;
281                }
282            }
283            if is_next {
284                return url;
285            }
286        }
287        None
288    }
289
290    /// Parse `ServiceInput.params` into a `RequestSpec`.
291    #[allow(clippy::indexing_slicing)]
292    fn parse_spec(params: &Value) -> Result<RequestSpec> {
293        let method_str = params["method"].as_str().unwrap_or("GET").to_uppercase();
294        let method = match method_str.as_str() {
295            "GET" => Method::GET,
296            "POST" => Method::POST,
297            "PUT" => Method::PUT,
298            "PATCH" => Method::PATCH,
299            "DELETE" => Method::DELETE,
300            "HEAD" => Method::HEAD,
301            other => {
302                return Err(StygianError::from(ServiceError::Unavailable(format!(
303                    "unknown HTTP method: {other}"
304                ))));
305            }
306        };
307
308        let extra_headers = params["headers"]
309            .as_object()
310            .map(|obj| {
311                obj.iter()
312                    .filter_map(|(k, v)| v.as_str().map(|s| (k.clone(), s.to_owned())))
313                    .collect()
314            })
315            .unwrap_or_default();
316
317        let query_params = params["query"]
318            .as_object()
319            .map(|obj| {
320                obj.iter()
321                    .filter_map(|(k, v)| {
322                        let s = if v.is_string() {
323                            v.as_str().map(ToOwned::to_owned)
324                        } else {
325                            Some(v.to_string())
326                        };
327                        s.map(|val| (k.clone(), val))
328                    })
329                    .collect()
330            })
331            .unwrap_or_default();
332
333        // body_raw takes precedence over body (raw string vs structured JSON).
334        let body = if let Some(raw) = params["body_raw"].as_str().filter(|s| !s.is_empty()) {
335            Some(RequestBody::Raw(raw.to_owned()))
336        } else if !params["body"].is_null() {
337            Some(RequestBody::Json(params["body"].clone()))
338        } else {
339            None
340        };
341
342        let accept = params["accept"]
343            .as_str()
344            .unwrap_or("application/json")
345            .to_owned();
346
347        let auth = Self::parse_auth(&params["auth"]);
348
349        let data_path = match params["response"]["data_path"].as_str() {
350            Some("") | None => None,
351            Some(p) => Some(p.to_owned()),
352        };
353        let collect_as_array = params["response"]["collect_as_array"]
354            .as_bool()
355            .unwrap_or(false);
356
357        let max_pages = params["pagination"]["max_pages"]
358            .as_u64()
359            .map_or(1, |n| usize::try_from(n).unwrap_or(usize::MAX));
360
361        let pagination = Self::parse_pagination(&params["pagination"]);
362
363        Ok(RequestSpec {
364            method,
365            extra_headers,
366            query_params,
367            body,
368            auth,
369            accept,
370            data_path,
371            collect_as_array,
372            pagination,
373            max_pages,
374        })
375    }
376
377    /// Parse `params.auth` into an [`AuthScheme`].
378    #[allow(clippy::indexing_slicing)]
379    fn parse_auth(auth: &Value) -> AuthScheme {
380        match auth["type"].as_str().unwrap_or("none") {
381            "bearer" | "oauth2" => auth["token"]
382                .as_str()
383                .map_or(AuthScheme::None, |t| AuthScheme::Bearer(t.to_owned())),
384            "basic" => AuthScheme::Basic {
385                username: auth["username"].as_str().unwrap_or("").to_owned(),
386                password: auth["password"].as_str().unwrap_or("").to_owned(),
387            },
388            "api_key_header" => AuthScheme::ApiKeyHeader {
389                header: auth["header"].as_str().unwrap_or("X-Api-Key").to_owned(),
390                key: auth["key"].as_str().unwrap_or("").to_owned(),
391            },
392            "api_key_query" => AuthScheme::ApiKeyQuery {
393                param: auth["param"].as_str().unwrap_or("api_key").to_owned(),
394                key: auth["key"].as_str().unwrap_or("").to_owned(),
395            },
396            _ => AuthScheme::None,
397        }
398    }
399
400    /// Parse `params.pagination` into a [`PaginationStrategy`].
401    #[allow(clippy::indexing_slicing)]
402    fn parse_pagination(pag: &Value) -> PaginationStrategy {
403        match pag["strategy"].as_str().unwrap_or("none") {
404            "offset" => PaginationStrategy::Offset {
405                page_param: pag["page_param"].as_str().unwrap_or("page").to_owned(),
406                page_size_param: pag["page_size_param"].as_str().map(ToOwned::to_owned),
407                page_size: pag["page_size"].as_u64(),
408                current_page: pag["start_page"].as_u64().unwrap_or(1),
409            },
410            "cursor" => PaginationStrategy::Cursor {
411                cursor_param: pag["cursor_param"].as_str().unwrap_or("cursor").to_owned(),
412                cursor_field: pag["cursor_field"]
413                    .as_str()
414                    .unwrap_or("next_cursor")
415                    .to_owned(),
416            },
417            "link_header" => PaginationStrategy::LinkHeader,
418            _ => PaginationStrategy::None,
419        }
420    }
421
422    /// Extract the data portion of a parsed response using `spec.data_path`.
423    fn extract_data(response: &Value, spec: &RequestSpec) -> Value {
424        spec.data_path
425            .as_deref()
426            .and_then(|path| Self::extract_path(response, path))
427            .cloned()
428            .unwrap_or_else(|| response.clone())
429    }
430
431    /// Execute a single HTTP request, retrying on transient failures.
432    async fn send_one(
433        &self,
434        url: &str,
435        spec: &RequestSpec,
436        extra_query: &HashMap<String, String>,
437    ) -> Result<(Value, Option<String>)> {
438        let mut last_err: Option<StygianError> = None;
439
440        for attempt in 0..=self.config.max_retries {
441            if attempt > 0 {
442                // Honour server Retry-After when available; otherwise exponential backoff.
443                let delay = match &last_err {
444                    Some(StygianError::Service(ServiceError::RateLimited { retry_after_ms })) => {
445                        Duration::from_millis(*retry_after_ms)
446                    }
447                    _ => self.config.retry_base_delay * 2u32.saturating_pow(attempt - 1),
448                };
449                tokio::time::sleep(delay).await;
450                debug!(url, attempt, ?delay, "REST API retry");
451            }
452
453            match self.do_send(url, spec, extra_query).await {
454                Ok(r) => return Ok(r),
455                Err(e) if is_retryable(&e) && attempt < self.config.max_retries => {
456                    last_err = Some(e);
457                }
458                Err(e) => return Err(e),
459            }
460        }
461
462        Err(last_err.unwrap_or_else(|| {
463            StygianError::from(ServiceError::Unavailable("max retries exceeded".into()))
464        }))
465    }
466
467    /// Perform exactly one HTTP round-trip (no retry).
468    ///
469    /// Returns the parsed JSON response body and the raw `Link` header value (if present).
470    async fn do_send(
471        &self,
472        url: &str,
473        spec: &RequestSpec,
474        extra_query: &HashMap<String, String>,
475    ) -> Result<(Value, Option<String>)> {
476        let mut req = self.client.request(spec.method.clone(), url);
477
478        // Accept header
479        req = req.header(header::ACCEPT, spec.accept.as_str());
480
481        // Auth — header-based schemes
482        req = match &spec.auth {
483            AuthScheme::Bearer(token) => req.bearer_auth(token),
484            AuthScheme::Basic { username, password } => req.basic_auth(username, Some(password)),
485            AuthScheme::ApiKeyHeader { header: hdr, key } => req.header(hdr.as_str(), key.as_str()),
486            AuthScheme::ApiKeyQuery { .. } | AuthScheme::None => req,
487        };
488
489        // Custom headers
490        for (k, v) in &spec.extra_headers {
491            req = req.header(k.as_str(), v.as_str());
492        }
493
494        // Merge query params: static + per-page extra + API key query (if applicable)
495        let mut merged: HashMap<String, String> = spec.query_params.clone();
496        merged.extend(extra_query.iter().map(|(k, v)| (k.clone(), v.clone())));
497        if let AuthScheme::ApiKeyQuery { param, key } = &spec.auth {
498            merged.insert(param.clone(), key.clone());
499        }
500        if !merged.is_empty() {
501            let pairs: Vec<(&String, &String)> = merged.iter().collect();
502            req = req.query(&pairs);
503        }
504
505        // Body
506        req = match &spec.body {
507            Some(RequestBody::Json(v)) => req.json(v),
508            Some(RequestBody::Raw(s)) => req.body(s.clone()),
509            None => req,
510        };
511
512        let response = req
513            .send()
514            .await
515            .map_err(|e| StygianError::from(ServiceError::Unavailable(e.to_string())))?;
516
517        let status = response.status();
518
519        // Capture Link header before consuming the response
520        let link_header = response
521            .headers()
522            .get("link")
523            .and_then(|v| v.to_str().ok())
524            .map(ToOwned::to_owned);
525
526        // 429 — honour server Retry-After hint via dedicated error variant.
527        if status.as_u16() == 429 {
528            let retry_after_secs = response
529                .headers()
530                .get("retry-after")
531                .and_then(|v| v.to_str().ok())
532                .and_then(|s| s.parse::<u64>().ok())
533                .unwrap_or(5);
534            warn!(url, retry_after_secs, "REST API rate-limited (429)");
535            return Err(StygianError::from(ServiceError::RateLimited {
536                retry_after_ms: retry_after_secs.saturating_mul(1000),
537            }));
538        }
539
540        if !status.is_success() {
541            let snippet: String = response
542                .text()
543                .await
544                .unwrap_or_default()
545                .chars()
546                .take(200)
547                .collect();
548            return Err(StygianError::from(ServiceError::Unavailable(format!(
549                "HTTP {status}: {snippet}"
550            ))));
551        }
552
553        let body = response
554            .text()
555            .await
556            .map_err(|e| StygianError::from(ServiceError::Unavailable(e.to_string())))?;
557
558        // Parse as JSON when possible; wrap plain text as a JSON string otherwise.
559        let parsed: Value = serde_json::from_str(&body).unwrap_or(Value::String(body));
560
561        Ok((parsed, link_header))
562    }
563}
564
565impl Default for RestApiAdapter {
566    fn default() -> Self {
567        Self::new()
568    }
569}
570
571// ─── Helpers ──────────────────────────────────────────────────────────────────
572
573/// Returns `true` for transient errors that are worth retrying.
574fn is_retryable(err: &StygianError) -> bool {
575    match err {
576        StygianError::Service(ServiceError::RateLimited { .. }) => true,
577        StygianError::Service(ServiceError::Unavailable(msg)) => {
578            msg.contains("429")
579                || msg.contains("500")
580                || msg.contains("502")
581                || msg.contains("503")
582                || msg.contains("504")
583                || msg.contains("connection")
584                || msg.contains("timed out")
585        }
586        _ => false,
587    }
588}
589
590// ─── ScrapingService ──────────────────────────────────────────────────────────
591
592#[async_trait]
593impl ScrapingService for RestApiAdapter {
594    /// Execute one or more REST API requests and return the aggregated result.
595    ///
596    /// # `ServiceInput.url`
597    ///
598    /// Base URL of the REST endpoint (including path; query string is optional).
599    ///
600    /// # `ServiceInput.params` contract
601    ///
602    /// ```json
603    /// {
604    ///   "method":   "GET",
605    ///   "body":     { "key": "value" },
606    ///   "body_raw": "raw body string",
607    ///   "headers":  { "X-Custom-Header": "value" },
608    ///   "query":    { "state": "open", "per_page": "30" },
609    ///   "accept":   "application/json",
610    ///
611    ///   "auth": {
612    ///     "type":     "bearer",
613    ///     "token":    "...",
614    ///     "username": "user",
615    ///     "password": "pass",
616    ///     "header":   "X-Api-Key",
617    ///     "param":    "api_key",
618    ///     "key":      "sk-..."
619    ///   },
620    ///
621    ///   "response": {
622    ///     "data_path":        "items",
623    ///     "collect_as_array": true
624    ///   },
625    ///
626    ///   "pagination": {
627    ///     "strategy":        "link_header",
628    ///     "max_pages":       10,
629    ///     "page_param":      "page",
630    ///     "page_size_param": "per_page",
631    ///     "page_size":       100,
632    ///     "start_page":      1,
633    ///     "cursor_param":    "cursor",
634    ///     "cursor_field":    "meta.next_cursor"
635    ///   }
636    /// }
637    /// ```
638    ///
639    /// # Auth `type` values
640    ///
641    /// | `type` | Required fields | Description |
642    /// | --- | --- | --- |
643    /// | `"bearer"` / `"oauth2"` | `token` | `Authorization: Bearer <token>` |
644    /// | `"basic"` | `username`, `password` | HTTP Basic |
645    /// | `"api_key_header"` | `header`, `key` | Custom header |
646    /// | `"api_key_query"` | `param`, `key` | Query string |
647    /// | `"none"` or absent | — | No auth |
648    ///
649    /// # Pagination strategies
650    ///
651    /// | `strategy` | Description |
652    /// | --- | --- |
653    /// | `"none"` | Single request (default) |
654    /// | `"offset"` | Increment `page_param` from `start_page` |
655    /// | `"cursor"` | Extract next cursor at `cursor_field` in each response; pass it as `cursor_param` |
656    /// | `"link_header"` | Follow RFC 8288 `Link: <url>; rel="next"` header |
657    async fn execute(&self, input: ServiceInput) -> Result<ServiceOutput> {
658        let spec = Self::parse_spec(&input.params)?;
659
660        let mut accumulated: Vec<Value> = Vec::new();
661        let mut page_count: usize = 0;
662        let mut current_url = input.url.clone();
663        let mut pagination = spec.pagination.clone();
664        let mut extra_query: HashMap<String, String> = HashMap::new();
665
666        // Cursor state lives outside the loop so it persists across pages.
667        let mut cursor_state: Option<String> = None;
668
669        info!(url = %input.url, "REST API execute start");
670
671        loop {
672            if page_count >= spec.max_pages {
673                debug!(%current_url, page_count, "REST API: max_pages reached");
674                break;
675            }
676
677            // Build per-page query additions
678            extra_query.clear();
679            match &pagination {
680                PaginationStrategy::Offset {
681                    page_param,
682                    page_size_param,
683                    page_size,
684                    current_page,
685                } => {
686                    extra_query.insert(page_param.clone(), current_page.to_string());
687                    if let (Some(size_param), Some(size)) = (page_size_param, page_size) {
688                        extra_query.insert(size_param.clone(), size.to_string());
689                    }
690                }
691                PaginationStrategy::Cursor { cursor_param, .. } => {
692                    if let Some(ref cursor) = cursor_state {
693                        extra_query.insert(cursor_param.clone(), cursor.clone());
694                    }
695                }
696                PaginationStrategy::None | PaginationStrategy::LinkHeader => {}
697            }
698
699            let (response, link_header) = self.send_one(&current_url, &spec, &extra_query).await?;
700
701            let page_data = Self::extract_data(&response, &spec);
702
703            // Accumulate — empty array responses signal end-of-pagination.
704            match &page_data {
705                Value::Array(items) => {
706                    if items.is_empty() {
707                        debug!("REST API: empty page, stopping pagination");
708                        break;
709                    }
710                    accumulated.extend(items.iter().cloned());
711                }
712                other => {
713                    accumulated.push(other.clone());
714                }
715            }
716            page_count += 1;
717
718            // Advance pagination state
719            let stop = match &mut pagination {
720                PaginationStrategy::None => true,
721                PaginationStrategy::Offset { current_page, .. } => {
722                    *current_page += 1;
723                    false
724                }
725                PaginationStrategy::Cursor { cursor_field, .. } => {
726                    Self::extract_path(&response, cursor_field.as_str())
727                        .and_then(Value::as_str)
728                        .filter(|s| !s.is_empty())
729                        .map(ToOwned::to_owned)
730                        .is_none_or(|cursor| {
731                            cursor_state = Some(cursor);
732                            false
733                        })
734                }
735                PaginationStrategy::LinkHeader => link_header
736                    .as_deref()
737                    .and_then(Self::parse_link_next)
738                    .is_none_or(|next_url| {
739                        current_url = next_url;
740                        false
741                    }),
742            };
743            if stop {
744                break;
745            }
746        }
747
748        // Serialise accumulated results
749        let data_value = if spec.collect_as_array || accumulated.len() > 1 {
750            Value::Array(accumulated)
751        } else {
752            accumulated.into_iter().next().unwrap_or(Value::Null)
753        };
754
755        let data_str = match &data_value {
756            Value::String(s) => s.clone(),
757            other => serde_json::to_string_pretty(other).unwrap_or_default(),
758        };
759
760        let metadata = json!({
761            "url":        input.url,
762            "page_count": page_count,
763        });
764
765        info!(%input.url, page_count, "REST API execute done");
766
767        Ok(ServiceOutput {
768            data: data_str,
769            metadata,
770        })
771    }
772
773    fn name(&self) -> &'static str {
774        "rest-api"
775    }
776}
777
778// ─── Tests ────────────────────────────────────────────────────────────────────
779
780#[cfg(test)]
781#[allow(clippy::unwrap_used, clippy::panic, clippy::indexing_slicing)]
782mod tests {
783    use super::*;
784    use serde_json::json;
785
786    // ── parse_auth ─────────────────────────────────────────────────────────────
787
788    #[test]
789    fn parse_auth_bearer() {
790        let auth = json!({"type": "bearer", "token": "tok123"});
791        match RestApiAdapter::parse_auth(&auth) {
792            AuthScheme::Bearer(t) => assert_eq!(t, "tok123"),
793            other => panic!("unexpected: {other:?}"),
794        }
795    }
796
797    #[test]
798    fn parse_auth_oauth2_alias() {
799        let auth = json!({"type": "oauth2", "token": "oauth_tok"});
800        match RestApiAdapter::parse_auth(&auth) {
801            AuthScheme::Bearer(t) => assert_eq!(t, "oauth_tok"),
802            other => panic!("unexpected: {other:?}"),
803        }
804    }
805
806    #[test]
807    fn parse_auth_basic() {
808        let auth = json!({"type": "basic", "username": "alice", "password": "s3cr3t"});
809        match RestApiAdapter::parse_auth(&auth) {
810            AuthScheme::Basic { username, password } => {
811                assert_eq!(username, "alice");
812                assert_eq!(password, "s3cr3t");
813            }
814            other => panic!("unexpected: {other:?}"),
815        }
816    }
817
818    #[test]
819    fn parse_auth_api_key_header() {
820        let auth = json!({"type": "api_key_header", "header": "X-Token", "key": "k123"});
821        match RestApiAdapter::parse_auth(&auth) {
822            AuthScheme::ApiKeyHeader { header, key } => {
823                assert_eq!(header, "X-Token");
824                assert_eq!(key, "k123");
825            }
826            other => panic!("unexpected: {other:?}"),
827        }
828    }
829
830    #[test]
831    fn parse_auth_api_key_query() {
832        let auth = json!({"type": "api_key_query", "param": "api_key", "key": "qk"});
833        match RestApiAdapter::parse_auth(&auth) {
834            AuthScheme::ApiKeyQuery { param, key } => {
835                assert_eq!(param, "api_key");
836                assert_eq!(key, "qk");
837            }
838            other => panic!("unexpected: {other:?}"),
839        }
840    }
841
842    #[test]
843    fn parse_auth_none_default() {
844        let auth = json!(null);
845        assert!(matches!(
846            RestApiAdapter::parse_auth(&auth),
847            AuthScheme::None
848        ));
849    }
850
851    // ── extract_path ───────────────────────────────────────────────────────────
852
853    #[test]
854    fn extract_path_top_level() {
855        let v = json!({"items": [1, 2, 3]});
856        assert_eq!(
857            RestApiAdapter::extract_path(&v, "items"),
858            Some(&json!([1, 2, 3]))
859        );
860    }
861
862    #[test]
863    fn extract_path_nested() {
864        let v = json!({"meta": {"next_cursor": "abc"}});
865        assert_eq!(
866            RestApiAdapter::extract_path(&v, "meta.next_cursor"),
867            Some(&json!("abc"))
868        );
869    }
870
871    #[test]
872    fn extract_path_missing() {
873        let v = json!({"a": {"b": 1}});
874        assert!(RestApiAdapter::extract_path(&v, "a.c").is_none());
875    }
876
877    // ── parse_link_next ────────────────────────────────────────────────────────
878
879    #[test]
880    fn parse_link_next_present() {
881        let h = r#"<https://api.example.com/items?page=2>; rel="next", <https://api.example.com/items?page=1>; rel="prev""#;
882        assert_eq!(
883            RestApiAdapter::parse_link_next(h),
884            Some("https://api.example.com/items?page=2".to_owned())
885        );
886    }
887
888    #[test]
889    fn parse_link_next_absent() {
890        let h = r#"<https://api.example.com/items?page=1>; rel="prev""#;
891        assert!(RestApiAdapter::parse_link_next(h).is_none());
892    }
893
894    #[test]
895    fn parse_link_next_single() {
896        let h = r#"<https://api.example.com/items?page=3>; rel="next""#;
897        assert_eq!(
898            RestApiAdapter::parse_link_next(h),
899            Some("https://api.example.com/items?page=3".to_owned())
900        );
901    }
902
903    // ── parse_spec ─────────────────────────────────────────────────────────────
904
905    #[test]
906    fn parse_spec_defaults() {
907        let spec = RestApiAdapter::parse_spec(&json!({})).unwrap();
908        assert_eq!(spec.method, Method::GET);
909        assert_eq!(spec.accept, "application/json");
910        assert_eq!(spec.max_pages, 1);
911        assert!(spec.data_path.is_none());
912        assert!(!spec.collect_as_array);
913        assert!(matches!(spec.pagination, PaginationStrategy::None));
914    }
915
916    #[test]
917    fn parse_spec_post_with_body_and_headers() {
918        let params = json!({
919            "method":  "POST",
920            "body":    { "key": "value" },
921            "headers": { "X-Foo": "bar" },
922            "query":   { "limit": "10" }
923        });
924        let spec = RestApiAdapter::parse_spec(&params).unwrap();
925        assert_eq!(spec.method, Method::POST);
926        assert_eq!(spec.extra_headers.get("X-Foo"), Some(&"bar".to_string()));
927        assert_eq!(spec.query_params.get("limit"), Some(&"10".to_string()));
928        assert!(matches!(spec.body, Some(RequestBody::Json(_))));
929    }
930
931    #[test]
932    fn parse_spec_unknown_method_returns_error() {
933        let result = RestApiAdapter::parse_spec(&json!({"method": "BREW"}));
934        assert!(result.is_err());
935    }
936
937    #[test]
938    fn parse_spec_cursor_pagination() {
939        let params = json!({
940            "pagination": {
941                "strategy":     "cursor",
942                "cursor_param": "after",
943                "cursor_field": "page_info.end_cursor",
944                "max_pages":    10
945            }
946        });
947        let spec = RestApiAdapter::parse_spec(&params).unwrap();
948        assert_eq!(spec.max_pages, 10);
949        match spec.pagination {
950            PaginationStrategy::Cursor {
951                cursor_param,
952                cursor_field,
953            } => {
954                assert_eq!(cursor_param, "after");
955                assert_eq!(cursor_field, "page_info.end_cursor");
956            }
957            other => panic!("unexpected: {other:?}"),
958        }
959    }
960
961    #[test]
962    fn parse_spec_offset_pagination() {
963        let params = json!({
964            "pagination": {
965                "strategy":        "offset",
966                "page_param":      "page",
967                "page_size_param": "per_page",
968                "page_size":       50,
969                "start_page":      1,
970                "max_pages":       3
971            }
972        });
973        let spec = RestApiAdapter::parse_spec(&params).unwrap();
974        assert_eq!(spec.max_pages, 3);
975        match spec.pagination {
976            PaginationStrategy::Offset {
977                page_size,
978                current_page,
979                page_param,
980                ..
981            } => {
982                assert_eq!(page_size, Some(50));
983                assert_eq!(current_page, 1);
984                assert_eq!(page_param, "page");
985            }
986            other => panic!("unexpected: {other:?}"),
987        }
988    }
989
990    #[test]
991    fn parse_spec_link_header_pagination() {
992        let params = json!({
993            "pagination": { "strategy": "link_header", "max_pages": 5 }
994        });
995        let spec = RestApiAdapter::parse_spec(&params).unwrap();
996        assert_eq!(spec.max_pages, 5);
997        assert!(matches!(spec.pagination, PaginationStrategy::LinkHeader));
998    }
999
1000    #[test]
1001    fn parse_spec_data_path_and_collect_as_array() {
1002        let params = json!({
1003            "response": { "data_path": "data.items", "collect_as_array": true }
1004        });
1005        let spec = RestApiAdapter::parse_spec(&params).unwrap();
1006        assert_eq!(spec.data_path, Some("data.items".to_owned()));
1007        assert!(spec.collect_as_array);
1008    }
1009
1010    #[test]
1011    fn parse_spec_empty_data_path_is_none() {
1012        let params = json!({ "response": { "data_path": "" } });
1013        let spec = RestApiAdapter::parse_spec(&params).unwrap();
1014        assert!(spec.data_path.is_none());
1015    }
1016
1017    // ── adapter_name ───────────────────────────────────────────────────────────
1018
1019    #[test]
1020    fn adapter_name() {
1021        assert_eq!(RestApiAdapter::new().name(), "rest-api");
1022    }
1023
1024    // ── is_retryable ────────────────────────────────────────────────────────────
1025
1026    #[test]
1027    fn is_retryable_429() {
1028        let e = StygianError::from(ServiceError::Unavailable(
1029            "HTTP 429 rate-limited".to_string(),
1030        ));
1031        assert!(is_retryable(&e));
1032    }
1033
1034    #[test]
1035    fn is_retryable_503() {
1036        let e = StygianError::from(ServiceError::Unavailable(
1037            "HTTP 503 Service Unavailable".to_string(),
1038        ));
1039        assert!(is_retryable(&e));
1040    }
1041
1042    #[test]
1043    fn is_retryable_404_not_retryable() {
1044        let e = StygianError::from(ServiceError::Unavailable("HTTP 404 Not Found".to_string()));
1045        assert!(!is_retryable(&e));
1046    }
1047
1048    // ── integration ────────────────────────────────────────────────────────────
1049
1050    /// Real HTTP integration test — requires `REST_API_TEST_URL` env var.
1051    ///
1052    /// Run with: `REST_API_TEST_URL=https://httpbin.org/get cargo test -- --ignored`
1053    #[tokio::test]
1054    #[ignore = "requires live REST API endpoint; set REST_API_TEST_URL env var"]
1055    async fn integration_get_httpbin() {
1056        let url = std::env::var("REST_API_TEST_URL")
1057            .unwrap_or_else(|_| "https://httpbin.org/get".to_string());
1058
1059        let adapter = RestApiAdapter::new();
1060        let input = ServiceInput {
1061            url,
1062            params: json!({}),
1063        };
1064        let output = adapter.execute(input).await.unwrap();
1065        assert!(!output.data.is_empty());
1066        assert_eq!(output.metadata["page_count"], 1);
1067    }
1068}