Skip to main content

faucet_source_rest/pagination/
mod.rs

1//! Pagination strategies for REST APIs.
2
3pub mod cursor;
4pub mod link_header;
5pub mod next_link_body;
6pub mod offset;
7pub mod page;
8
9use faucet_core::FaucetError;
10use reqwest::header::HeaderMap;
11use schemars::JsonSchema;
12use serde::{Deserialize, Serialize};
13use serde_json::Value;
14use std::collections::HashMap;
15
16/// Supported pagination strategies.
17#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
18#[serde(tag = "type")]
19pub enum PaginationStyle {
20    None,
21    Cursor {
22        next_token_path: String,
23        param_name: String,
24    },
25    LinkHeader,
26    /// The full URL of the next page is embedded in the response body.
27    /// `next_link_path` is a JSONPath expression pointing to that URL field
28    /// (e.g. `"$.next_link"`).  Pagination stops when the field is absent,
29    /// null, or an empty string.
30    NextLinkInBody {
31        next_link_path: String,
32    },
33    PageNumber {
34        param_name: String,
35        start_page: usize,
36        page_size: Option<usize>,
37        page_size_param: Option<String>,
38    },
39    Offset {
40        offset_param: String,
41        limit_param: String,
42        limit: usize,
43        total_path: Option<String>,
44    },
45}
46
47/// Internal state tracked across pages.
48#[derive(Debug, Default)]
49pub struct PaginationState {
50    pub page: usize,
51    pub next_token: Option<String>,
52    pub offset: usize,
53    pub next_link: Option<String>,
54    /// The previous page's token/link, used for loop detection.
55    /// If `advance()` produces the same value twice in a row, pagination
56    /// is stuck and we stop rather than looping forever.
57    #[doc(hidden)]
58    pub previous_token: Option<String>,
59    /// Fingerprint of the previous page's body, used by `PageNumber` loop
60    /// detection: APIs that clamp an out-of-range page to the last page and
61    /// re-return it (non-empty) would otherwise loop until `max_pages`.
62    #[doc(hidden)]
63    pub previous_page_fingerprint: Option<u64>,
64}
65
66/// Cheap, stable fingerprint of a response body for content-stagnation
67/// loop detection.
68fn body_fingerprint(body: &Value) -> u64 {
69    use std::hash::{Hash, Hasher};
70    let mut h = std::collections::hash_map::DefaultHasher::new();
71    body.to_string().hash(&mut h);
72    h.finish()
73}
74
75impl PaginationStyle {
76    pub fn apply_params(&self, params: &mut HashMap<String, String>, state: &PaginationState) {
77        match self {
78            PaginationStyle::None => {}
79            PaginationStyle::Cursor { param_name, .. } => {
80                cursor::apply_params(params, param_name, &state.next_token);
81            }
82            PaginationStyle::LinkHeader => {}
83            PaginationStyle::NextLinkInBody { .. } => {}
84            PaginationStyle::PageNumber {
85                param_name,
86                start_page,
87                page_size,
88                page_size_param,
89            } => {
90                page::apply_params(
91                    params,
92                    param_name,
93                    *start_page,
94                    state.page,
95                    *page_size,
96                    page_size_param.as_deref(),
97                );
98            }
99            PaginationStyle::Offset {
100                offset_param,
101                limit_param,
102                limit,
103                ..
104            } => {
105                offset::apply_params(params, offset_param, limit_param, state.offset, *limit);
106            }
107        }
108    }
109
110    /// Advance pagination state based on the response body and headers.
111    /// Returns `true` if there is a next page to fetch.
112    ///
113    /// Includes **loop detection**: if a cursor or next-link value is identical
114    /// to the previous page's value, pagination stops with a warning instead of
115    /// looping forever.
116    pub fn advance(
117        &self,
118        body: &Value,
119        headers: &HeaderMap,
120        state: &mut PaginationState,
121        record_count: usize,
122    ) -> Result<bool, FaucetError> {
123        match self {
124            PaginationStyle::None => Ok(false),
125            PaginationStyle::Cursor {
126                next_token_path, ..
127            } => {
128                let has_next = cursor::advance(body, next_token_path, &mut state.next_token)?;
129                if has_next {
130                    if state.next_token == state.previous_token {
131                        tracing::warn!(
132                            "pagination loop detected: cursor {:?} repeated — stopping",
133                            state.next_token
134                        );
135                        return Ok(false);
136                    }
137                    state.previous_token = state.next_token.clone();
138                }
139                Ok(has_next)
140            }
141            PaginationStyle::LinkHeader => match link_header::extract_next_link(headers) {
142                Some(link) => {
143                    if Some(&link) == state.previous_token.as_ref() {
144                        tracing::warn!(
145                            "pagination loop detected: link {link:?} repeated — stopping"
146                        );
147                        state.next_link = None;
148                        return Ok(false);
149                    }
150                    state.previous_token = Some(link.clone());
151                    state.next_link = Some(link);
152                    Ok(true)
153                }
154                None => {
155                    state.next_link = None;
156                    Ok(false)
157                }
158            },
159            PaginationStyle::NextLinkInBody { next_link_path } => {
160                let has_next = next_link_body::advance(body, next_link_path, &mut state.next_link)?;
161                if has_next {
162                    if state.next_link == state.previous_token {
163                        tracing::warn!(
164                            "pagination loop detected: next_link {:?} repeated — stopping",
165                            state.next_link
166                        );
167                        return Ok(false);
168                    }
169                    state.previous_token = state.next_link.clone();
170                }
171                Ok(has_next)
172            }
173            PaginationStyle::PageNumber { .. } => {
174                state.page += 1;
175                if record_count == 0 {
176                    return Ok(false);
177                }
178                // Content-stagnation guard: some APIs clamp an out-of-range
179                // page to the last page and return it again (non-empty), which
180                // would loop until `max_pages` and duplicate records. Stop if
181                // this page's body is identical to the previous one (#78/#15).
182                let fp = body_fingerprint(body);
183                if state.previous_page_fingerprint == Some(fp) {
184                    tracing::warn!(
185                        "pagination loop detected: PageNumber returned an identical page — stopping"
186                    );
187                    return Ok(false);
188                }
189                state.previous_page_fingerprint = Some(fp);
190                Ok(true)
191            }
192            PaginationStyle::Offset {
193                limit, total_path, ..
194            } => offset::advance(
195                body,
196                &mut state.offset,
197                record_count,
198                *limit,
199                total_path.as_deref(),
200            ),
201        }
202    }
203}