1use anyhow::{anyhow, Result};
18use reqwest::header::HeaderMap;
19use serde_json::Value as JsonValue;
20use url::Url;
21
22use crate::config::PaginationConfig;
23
24#[derive(Debug)]
26pub enum NextPage {
27 QueryParams(Vec<(String, String)>),
29 NewUrl(String),
31}
32
33pub trait Paginator: Send + Sync {
35 fn initial_params(&self) -> Vec<(String, String)>;
37
38 fn next_page(
41 &mut self,
42 response_body: &JsonValue,
43 response_headers: &HeaderMap,
44 items_count: usize,
45 ) -> Result<Option<NextPage>>;
46}
47
48fn validate_pagination_url(next_url: &str, origin_host: &str) -> Result<String> {
51 let parsed =
52 Url::parse(next_url).map_err(|e| anyhow!("Invalid pagination URL '{next_url}': {e}"))?;
53
54 let scheme = parsed.scheme();
55 if scheme != "http" && scheme != "https" {
56 return Err(anyhow!(
57 "Pagination URL has disallowed scheme '{scheme}': {next_url}"
58 ));
59 }
60
61 let host = parsed
62 .host_str()
63 .ok_or_else(|| anyhow!("Pagination URL has no host: {next_url}"))?;
64
65 if host != origin_host {
66 return Err(anyhow!(
67 "Pagination URL host '{host}' does not match origin host '{origin_host}' (SSRF protection)"
68 ));
69 }
70
71 Ok(next_url.to_string())
72}
73
74pub fn extract_origin_host(url: &str) -> Option<String> {
76 Url::parse(url)
77 .ok()
78 .and_then(|u| u.host_str().map(|h| h.to_string()))
79}
80
81pub fn create_paginator(config: &PaginationConfig, origin_host: String) -> Box<dyn Paginator> {
83 match config {
84 PaginationConfig::OffsetLimit {
85 offset_param,
86 limit_param,
87 page_size,
88 total_path,
89 } => Box::new(OffsetLimitPaginator {
90 offset_param: offset_param.clone(),
91 limit_param: limit_param.clone(),
92 page_size: *page_size,
93 total_path: total_path.clone(),
94 current_offset: 0,
95 }),
96 PaginationConfig::PageNumber {
97 page_param,
98 page_size_param,
99 page_size,
100 total_pages_path,
101 } => Box::new(PageNumberPaginator {
102 page_param: page_param.clone(),
103 page_size_param: page_size_param.clone(),
104 page_size: *page_size,
105 total_pages_path: total_pages_path.clone(),
106 current_page: 1,
107 }),
108 PaginationConfig::Cursor {
109 cursor_param,
110 cursor_path,
111 has_more_path,
112 page_size_param,
113 page_size,
114 } => Box::new(CursorPaginator {
115 cursor_param: cursor_param.clone(),
116 cursor_path: cursor_path.clone(),
117 has_more_path: has_more_path.clone(),
118 page_size_param: page_size_param.clone(),
119 page_size: *page_size,
120 }),
121 PaginationConfig::LinkHeader {
122 page_size_param,
123 page_size,
124 } => Box::new(LinkHeaderPaginator {
125 page_size_param: page_size_param.clone(),
126 page_size: *page_size,
127 origin_host: origin_host.clone(),
128 }),
129 PaginationConfig::NextUrl {
130 next_url_path,
131 base_url,
132 } => Box::new(NextUrlPaginator {
133 next_url_path: next_url_path.clone(),
134 base_url: base_url.clone(),
135 origin_host,
136 }),
137 }
138}
139
140struct OffsetLimitPaginator {
143 offset_param: String,
144 limit_param: String,
145 page_size: u64,
146 total_path: Option<String>,
147 current_offset: u64,
148}
149
150impl Paginator for OffsetLimitPaginator {
151 fn initial_params(&self) -> Vec<(String, String)> {
152 vec![
153 (self.offset_param.clone(), "0".to_string()),
154 (self.limit_param.clone(), self.page_size.to_string()),
155 ]
156 }
157
158 fn next_page(
159 &mut self,
160 response_body: &JsonValue,
161 _response_headers: &HeaderMap,
162 items_count: usize,
163 ) -> Result<Option<NextPage>> {
164 self.current_offset += self.page_size;
165
166 if (items_count as u64) < self.page_size {
168 return Ok(None);
169 }
170
171 if let Some(ref total_path) = self.total_path {
173 if let Some(total) = extract_json_path_u64(response_body, total_path) {
174 if self.current_offset >= total {
175 return Ok(None);
176 }
177 }
178 }
179
180 Ok(Some(NextPage::QueryParams(vec![
181 (self.offset_param.clone(), self.current_offset.to_string()),
182 (self.limit_param.clone(), self.page_size.to_string()),
183 ])))
184 }
185}
186
187struct PageNumberPaginator {
190 page_param: String,
191 page_size_param: String,
192 page_size: u64,
193 total_pages_path: Option<String>,
194 current_page: u64,
195}
196
197impl Paginator for PageNumberPaginator {
198 fn initial_params(&self) -> Vec<(String, String)> {
199 vec![
200 (self.page_param.clone(), "1".to_string()),
201 (self.page_size_param.clone(), self.page_size.to_string()),
202 ]
203 }
204
205 fn next_page(
206 &mut self,
207 response_body: &JsonValue,
208 _response_headers: &HeaderMap,
209 items_count: usize,
210 ) -> Result<Option<NextPage>> {
211 self.current_page += 1;
212
213 if (items_count as u64) < self.page_size {
215 return Ok(None);
216 }
217
218 if let Some(ref total_path) = self.total_pages_path {
220 if let Some(total_pages) = extract_json_path_u64(response_body, total_path) {
221 if self.current_page > total_pages {
222 return Ok(None);
223 }
224 }
225 }
226
227 Ok(Some(NextPage::QueryParams(vec![
228 (self.page_param.clone(), self.current_page.to_string()),
229 (self.page_size_param.clone(), self.page_size.to_string()),
230 ])))
231 }
232}
233
234struct CursorPaginator {
237 cursor_param: String,
238 cursor_path: String,
239 has_more_path: Option<String>,
240 page_size_param: Option<String>,
241 page_size: Option<u64>,
242}
243
244impl Paginator for CursorPaginator {
245 fn initial_params(&self) -> Vec<(String, String)> {
246 let mut params = Vec::new();
247 if let (Some(ref param), Some(size)) = (&self.page_size_param, self.page_size) {
248 params.push((param.clone(), size.to_string()));
249 }
250 params
251 }
252
253 fn next_page(
254 &mut self,
255 response_body: &JsonValue,
256 _response_headers: &HeaderMap,
257 items_count: usize,
258 ) -> Result<Option<NextPage>> {
259 if let Some(ref has_more_path) = self.has_more_path {
261 if let Some(has_more) = extract_json_path_bool(response_body, has_more_path) {
262 if !has_more {
263 return Ok(None);
264 }
265 }
266 }
267
268 if items_count == 0 {
270 return Ok(None);
271 }
272
273 let cursor = extract_json_path_string(response_body, &self.cursor_path);
275 match cursor {
276 Some(cursor_value) if !cursor_value.is_empty() => {
277 let mut params = vec![(self.cursor_param.clone(), cursor_value)];
278 if let (Some(ref param), Some(size)) = (&self.page_size_param, self.page_size) {
279 params.push((param.clone(), size.to_string()));
280 }
281 Ok(Some(NextPage::QueryParams(params)))
282 }
283 _ => Ok(None),
284 }
285 }
286}
287
288struct LinkHeaderPaginator {
291 page_size_param: Option<String>,
292 page_size: Option<u64>,
293 origin_host: String,
294}
295
296impl Paginator for LinkHeaderPaginator {
297 fn initial_params(&self) -> Vec<(String, String)> {
298 let mut params = Vec::new();
299 if let (Some(ref param), Some(size)) = (&self.page_size_param, self.page_size) {
300 params.push((param.clone(), size.to_string()));
301 }
302 params
303 }
304
305 fn next_page(
306 &mut self,
307 _response_body: &JsonValue,
308 response_headers: &HeaderMap,
309 items_count: usize,
310 ) -> Result<Option<NextPage>> {
311 if items_count == 0 {
312 return Ok(None);
313 }
314
315 let next_url = parse_link_header_next(response_headers);
316 match next_url {
317 Some(url) => {
318 let validated = validate_pagination_url(&url, &self.origin_host)?;
319 Ok(Some(NextPage::NewUrl(validated)))
320 }
321 None => Ok(None),
322 }
323 }
324}
325
326struct NextUrlPaginator {
329 next_url_path: String,
330 base_url: Option<String>,
331 origin_host: String,
332}
333
334impl Paginator for NextUrlPaginator {
335 fn initial_params(&self) -> Vec<(String, String)> {
336 Vec::new()
337 }
338
339 fn next_page(
340 &mut self,
341 response_body: &JsonValue,
342 _response_headers: &HeaderMap,
343 _items_count: usize,
344 ) -> Result<Option<NextPage>> {
345 let next_url = extract_json_path_string(response_body, &self.next_url_path);
346 match next_url {
347 Some(url) if !url.is_empty() => {
348 let full_url = if url.starts_with("http://") || url.starts_with("https://") {
350 url
351 } else if let Some(ref base) = self.base_url {
352 format!("{}{}", base.trim_end_matches('/'), url)
353 } else {
354 url
355 };
356 let validated = validate_pagination_url(&full_url, &self.origin_host)?;
357 Ok(Some(NextPage::NewUrl(validated)))
358 }
359 _ => Ok(None),
360 }
361 }
362}
363
364pub fn extract_json_path_string(value: &JsonValue, path: &str) -> Option<String> {
369 let result = navigate_path(value, path)?;
370 match result {
371 JsonValue::String(s) => Some(s.clone()),
372 JsonValue::Number(n) => Some(n.to_string()),
373 JsonValue::Bool(b) => Some(b.to_string()),
374 JsonValue::Null => None,
375 _ => Some(result.to_string()),
376 }
377}
378
379pub fn extract_json_path_u64(value: &JsonValue, path: &str) -> Option<u64> {
381 let result = navigate_path(value, path)?;
382 result.as_u64()
383}
384
385pub fn extract_json_path_bool(value: &JsonValue, path: &str) -> Option<bool> {
387 let result = navigate_path(value, path)?;
388 result.as_bool()
389}
390
391pub fn navigate_path<'a>(value: &'a JsonValue, path: &str) -> Option<&'a JsonValue> {
394 let path = path
395 .strip_prefix("$.")
396 .unwrap_or(path.strip_prefix("$").unwrap_or(path));
397
398 if path.is_empty() {
399 return Some(value);
400 }
401
402 let mut current = value;
403 for segment in split_path_segments(path) {
404 current = navigate_segment(current, &segment)?;
405 }
406 Some(current)
407}
408
409fn split_path_segments(path: &str) -> Vec<String> {
411 let mut segments = Vec::new();
412 let mut current = String::new();
413
414 let chars: Vec<char> = path.chars().collect();
415 let mut i = 0;
416
417 while i < chars.len() {
418 match chars[i] {
419 '.' => {
420 if !current.is_empty() {
421 segments.push(current.clone());
422 current.clear();
423 }
424 }
425 '[' => {
426 if !current.is_empty() {
427 segments.push(current.clone());
428 current.clear();
429 }
430 let mut bracket_content = String::new();
432 i += 1;
433 while i < chars.len() && chars[i] != ']' {
434 bracket_content.push(chars[i]);
435 i += 1;
436 }
437 segments.push(format!("[{bracket_content}]"));
438 }
439 c => {
440 current.push(c);
441 }
442 }
443 i += 1;
444 }
445
446 if !current.is_empty() {
447 segments.push(current);
448 }
449
450 segments
451}
452
453fn navigate_segment<'a>(value: &'a JsonValue, segment: &str) -> Option<&'a JsonValue> {
455 if let Some(index_str) = segment.strip_prefix('[').and_then(|s| s.strip_suffix(']')) {
456 let arr = value.as_array()?;
458 if arr.is_empty() {
459 return None;
460 }
461 let index: i64 = index_str.parse().ok()?;
462 let len = arr.len() as i64;
463 let actual_index = if index < 0 {
464 if -index > len {
466 return None;
467 }
468 (len + index) as usize
469 } else {
470 index as usize
471 };
472 arr.get(actual_index)
473 } else {
474 value.get(segment)
476 }
477}
478
479fn parse_link_header_next(headers: &HeaderMap) -> Option<String> {
483 let link_header = headers.get("link")?.to_str().ok()?;
484
485 for part in split_link_header(link_header) {
487 let part = part.trim();
488 if has_rel_next(part) {
490 if let Some(start) = part.find('<') {
492 if let Some(end) = part.find('>') {
493 return Some(part[start + 1..end].to_string());
494 }
495 }
496 }
497 }
498
499 None
500}
501
502fn has_rel_next(part: &str) -> bool {
505 for param in part.split(';') {
506 let param = param.trim();
507 if param.eq_ignore_ascii_case("rel=\"next\"") || param.eq_ignore_ascii_case("rel='next'") {
508 return true;
509 }
510 }
511 false
512}
513
514fn split_link_header(header: &str) -> Vec<&str> {
516 let mut parts = Vec::new();
517 let mut depth = 0u32;
518 let mut start = 0;
519
520 for (i, c) in header.char_indices() {
521 match c {
522 '<' => depth += 1,
523 '>' => depth = depth.saturating_sub(1),
524 ',' if depth == 0 => {
525 parts.push(&header[start..i]);
526 start = i + 1;
527 }
528 _ => {}
529 }
530 }
531 parts.push(&header[start..]);
532 parts
533}
534
535#[cfg(test)]
536mod tests {
537 use super::*;
538 use serde_json::json;
539
540 #[test]
541 fn test_extract_simple_path() {
542 let data = json!({"data": {"total": 100}});
543 assert_eq!(extract_json_path_u64(&data, "$.data.total"), Some(100));
544 }
545
546 #[test]
547 fn test_extract_array_last() {
548 let data = json!({"data": [{"id": "a"}, {"id": "b"}, {"id": "c"}]});
549 assert_eq!(
550 extract_json_path_string(&data, "$.data[-1].id"),
551 Some("c".to_string())
552 );
553 }
554
555 #[test]
556 fn test_extract_bool() {
557 let data = json!({"has_more": true});
558 assert_eq!(extract_json_path_bool(&data, "$.has_more"), Some(true));
559 }
560
561 #[test]
562 fn test_extract_missing_path() {
563 let data = json!({"data": {}});
564 assert_eq!(extract_json_path_string(&data, "$.nonexistent"), None);
565 }
566
567 #[test]
568 fn test_parse_link_header() {
569 let mut headers = HeaderMap::new();
570 headers.insert(
571 "link",
572 r#"<https://api.github.com/repos?page=3>; rel="next", <https://api.github.com/repos?page=50>; rel="last""#
573 .parse()
574 .unwrap(),
575 );
576 assert_eq!(
577 parse_link_header_next(&headers),
578 Some("https://api.github.com/repos?page=3".to_string())
579 );
580 }
581
582 #[test]
583 fn test_parse_link_header_no_next() {
584 let mut headers = HeaderMap::new();
585 headers.insert(
586 "link",
587 r#"<https://api.github.com/repos?page=1>; rel="first""#
588 .parse()
589 .unwrap(),
590 );
591 assert_eq!(parse_link_header_next(&headers), None);
592 }
593
594 #[test]
595 fn test_offset_limit_paginator() {
596 let config = PaginationConfig::OffsetLimit {
597 offset_param: "offset".to_string(),
598 limit_param: "limit".to_string(),
599 page_size: 10,
600 total_path: None,
601 };
602
603 let mut paginator = create_paginator(&config, "example.com".to_string());
604 let initial = paginator.initial_params();
605 assert_eq!(
606 initial,
607 vec![
608 ("offset".to_string(), "0".to_string()),
609 ("limit".to_string(), "10".to_string())
610 ]
611 );
612
613 let headers = HeaderMap::new();
615 let body = json!({});
616 let next = paginator.next_page(&body, &headers, 10).unwrap();
617 assert!(next.is_some());
618
619 let next = paginator.next_page(&body, &headers, 5).unwrap();
621 assert!(next.is_none());
622 }
623
624 #[test]
625 fn test_cursor_paginator_with_has_more() {
626 let config = PaginationConfig::Cursor {
627 cursor_param: "starting_after".to_string(),
628 cursor_path: "$.data[-1].id".to_string(),
629 has_more_path: Some("$.has_more".to_string()),
630 page_size_param: Some("limit".to_string()),
631 page_size: Some(10),
632 };
633
634 let mut paginator = create_paginator(&config, "example.com".to_string());
635
636 let headers = HeaderMap::new();
637 let body = json!({"data": [{"id": "a"}, {"id": "b"}], "has_more": true});
638 let next = paginator.next_page(&body, &headers, 2).unwrap();
639 assert!(next.is_some());
640
641 let body = json!({"data": [{"id": "c"}], "has_more": false});
642 let next = paginator.next_page(&body, &headers, 1).unwrap();
643 assert!(next.is_none());
644 }
645
646 #[test]
647 fn test_next_url_paginator() {
648 let config = PaginationConfig::NextUrl {
649 next_url_path: "$.nextRecordsUrl".to_string(),
650 base_url: Some("https://instance.salesforce.com".to_string()),
651 };
652
653 let mut paginator = create_paginator(&config, "instance.salesforce.com".to_string());
654 let headers = HeaderMap::new();
655
656 let body = json!({"nextRecordsUrl": "/services/data/v56.0/query/abc-123"});
657 let next = paginator.next_page(&body, &headers, 10).unwrap();
658 match next {
659 Some(NextPage::NewUrl(url)) => {
660 assert_eq!(
661 url,
662 "https://instance.salesforce.com/services/data/v56.0/query/abc-123"
663 );
664 }
665 _ => panic!("Expected NewUrl"),
666 }
667
668 let body = json!({"records": []});
670 let next = paginator.next_page(&body, &headers, 0).unwrap();
671 assert!(next.is_none());
672 }
673
674 #[test]
675 fn test_negative_index_out_of_bounds() {
676 let data = json!({"data": [{"id": "a"}, {"id": "b"}]});
677 assert_eq!(extract_json_path_string(&data, "$.data[-3].id"), None);
679 assert_eq!(
681 extract_json_path_string(&data, "$.data[-2].id"),
682 Some("a".to_string())
683 );
684 }
685
686 #[test]
687 fn test_navigate_path_top_level_array() {
688 let data = json!([{"id": "1"}, {"id": "2"}]);
689 let result = navigate_path(&data, "$");
690 assert!(result.is_some());
691 assert!(result.unwrap().is_array());
692 }
693
694 #[test]
695 fn test_ssrf_protection_rejects_different_host() {
696 let config = PaginationConfig::NextUrl {
697 next_url_path: "$.next".to_string(),
698 base_url: None,
699 };
700
701 let mut paginator = create_paginator(&config, "api.example.com".to_string());
702 let headers = HeaderMap::new();
703
704 let body = json!({"next": "http://169.254.169.254/latest/meta-data/"}); let result = paginator.next_page(&body, &headers, 10);
707 assert!(result.is_err(), "Should reject URL to different host");
708 let err_msg = format!("{}", result.unwrap_err());
709 assert!(
710 err_msg.contains("SSRF protection"),
711 "Error should mention SSRF: {err_msg}"
712 );
713 }
714
715 #[test]
716 fn test_ssrf_protection_allows_same_host() {
717 let config = PaginationConfig::NextUrl {
718 next_url_path: "$.next".to_string(),
719 base_url: None,
720 };
721
722 let mut paginator = create_paginator(&config, "api.example.com".to_string());
723 let headers = HeaderMap::new();
724
725 let body = json!({"next": "https://api.example.com/page/2"});
726 let result = paginator.next_page(&body, &headers, 10).unwrap();
727 assert!(matches!(result, Some(NextPage::NewUrl(_))));
728 }
729
730 #[test]
731 fn test_ssrf_protection_rejects_non_http_scheme() {
732 let config = PaginationConfig::NextUrl {
733 next_url_path: "$.next".to_string(),
734 base_url: None,
735 };
736
737 let mut paginator = create_paginator(&config, "api.example.com".to_string());
738 let headers = HeaderMap::new();
739
740 let body = json!({"next": "file:///etc/passwd"});
741 let result = paginator.next_page(&body, &headers, 10);
742 assert!(result.is_err(), "Should reject non-HTTP scheme");
743 }
744}