1use crate::browser::PageHandle;
7use crate::error::{Error, NavigationError, Result};
8use std::time::Duration;
9use tracing::{debug, info, instrument, warn};
10
11#[derive(Debug, Clone)]
13pub struct NavigationOptions {
14 pub timeout_ms: u64,
16 pub wait_until: WaitUntil,
18 pub retries: u32,
20 pub retry_delay_ms: u64,
22 pub human_like: bool,
24}
25
26impl Default for NavigationOptions {
27 fn default() -> Self {
28 Self {
29 timeout_ms: 30000,
30 wait_until: WaitUntil::NetworkIdle0,
31 retries: 3,
32 retry_delay_ms: 1000,
33 human_like: true,
34 }
35 }
36}
37
38#[derive(Debug, Clone, Copy, PartialEq, Eq)]
40pub enum WaitUntil {
41 Load,
43 DomContentLoaded,
45 NetworkIdle0,
47 NetworkIdle2,
49}
50
51#[derive(Debug)]
53pub struct NavigationResult {
54 pub final_url: String,
56 pub status: Option<u16>,
58 pub title: Option<String>,
60 pub duration_ms: u64,
62}
63
64pub struct UrlValidator;
66
67impl UrlValidator {
68 pub fn validate(url: &str) -> std::result::Result<(), String> {
70 if url.is_empty() {
72 return Err("URL cannot be empty".to_string());
73 }
74
75 if !url.starts_with("http://")
77 && !url.starts_with("https://")
78 && !url.starts_with("file://")
79 {
80 return Err(format!(
81 "URL must start with http://, https://, or file://: {}",
82 url
83 ));
84 }
85
86 if url.len() > 2048 {
88 return Err("URL exceeds maximum length of 2048 characters".to_string());
89 }
90
91 let _is_localhost = Self::is_localhost(url);
94
95 Ok(())
96 }
97
98 pub fn is_localhost(url: &str) -> bool {
100 let lower = url.to_lowercase();
101 lower.contains("://localhost")
102 || lower.contains("://127.0.0.1")
103 || lower.contains("://[::1]")
104 || lower.contains("://0.0.0.0")
105 }
106
107 pub fn is_external(url: &str) -> bool {
109 !Self::is_localhost(url)
110 }
111
112 pub fn extract_host(url: &str) -> Option<String> {
114 if let Some(protocol_end) = url.find("://") {
116 let after_protocol = &url[protocol_end + 3..];
117 let host_end = after_protocol.find('/').unwrap_or(after_protocol.len());
118 let host_with_port = &after_protocol[..host_end];
119 let host = host_with_port
121 .rsplit(':')
122 .next_back()
123 .or(Some(host_with_port))
124 .map(|h| {
125 if host_with_port.contains(':') && !host_with_port.starts_with('[') {
126 host_with_port.split(':').next().unwrap_or(host_with_port)
128 } else {
129 h
130 }
131 })?;
132 Some(host.to_string())
133 } else {
134 None
135 }
136 }
137}
138
139pub struct RateLimiter {
141 max_requests: u32,
143 window_secs: u64,
145 request_count: u32,
147 window_start: std::time::Instant,
149}
150
151impl RateLimiter {
152 pub fn new(max_requests: u32, window_secs: u64) -> Self {
154 Self {
155 max_requests,
156 window_secs,
157 request_count: 0,
158 window_start: std::time::Instant::now(),
159 }
160 }
161
162 pub fn check(&mut self) -> bool {
164 let now = std::time::Instant::now();
165 let elapsed = now.duration_since(self.window_start).as_secs();
166
167 if elapsed >= self.window_secs {
169 self.window_start = now;
170 self.request_count = 0;
171 }
172
173 if self.request_count < self.max_requests {
175 self.request_count += 1;
176 true
177 } else {
178 false
179 }
180 }
181
182 pub fn remaining(&self) -> u32 {
184 self.max_requests.saturating_sub(self.request_count)
185 }
186
187 pub fn reset(&mut self) {
189 self.request_count = 0;
190 self.window_start = std::time::Instant::now();
191 }
192}
193
194pub struct PageNavigator;
196
197impl PageNavigator {
198 #[instrument(skip(page))]
200 pub async fn goto(
201 page: &PageHandle,
202 url: &str,
203 options: Option<NavigationOptions>,
204 ) -> Result<NavigationResult> {
205 let opts = options.unwrap_or_default();
206 let start = std::time::Instant::now();
207
208 if !url.starts_with("http://")
210 && !url.starts_with("https://")
211 && !url.starts_with("file://")
212 {
213 return Err(NavigationError::InvalidUrl(format!(
214 "URL must start with http://, https://, or file://: {}",
215 url
216 ))
217 .into());
218 }
219
220 info!("Navigating to: {}", url);
221
222 let mut last_error = None;
223 for attempt in 0..=opts.retries {
224 if attempt > 0 {
225 warn!("Navigation retry attempt {} of {}", attempt, opts.retries);
226 tokio::time::sleep(Duration::from_millis(opts.retry_delay_ms)).await;
227 }
228
229 match Self::navigate_once(&page.page, url, &opts).await {
230 Ok(result) => {
231 page.set_url(result.final_url.clone()).await;
233
234 if opts.human_like {
236 Self::simulate_human_behavior(&page.page).await?;
237 }
238
239 let duration_ms = start.elapsed().as_millis() as u64;
240 return Ok(NavigationResult {
241 final_url: result.final_url,
242 status: result.status,
243 title: result.title,
244 duration_ms,
245 });
246 }
247 Err(e) => {
248 warn!("Navigation attempt {} failed: {}", attempt + 1, e);
249 last_error = Some(e);
250 }
251 }
252 }
253
254 Err(last_error.unwrap_or_else(|| {
255 NavigationError::LoadFailed("Navigation failed after all retries".to_string()).into()
256 }))
257 }
258
259 async fn navigate_once(
261 page: &chromiumoxide::Page,
262 url: &str,
263 opts: &NavigationOptions,
264 ) -> Result<NavigationResult> {
265 let timeout = Duration::from_millis(opts.timeout_ms);
267
268 let nav_future = page.goto(url);
269 let _response = tokio::time::timeout(timeout, nav_future)
270 .await
271 .map_err(|_| NavigationError::Timeout(opts.timeout_ms))?
272 .map_err(|e| NavigationError::LoadFailed(e.to_string()))?;
273
274 Self::wait_for_ready(page, opts).await?;
276
277 let final_url = page
279 .url()
280 .await
281 .map_err(|e| Error::cdp(e.to_string()))?
282 .unwrap_or_else(|| url.to_string());
283
284 let title = page
285 .evaluate("document.title")
286 .await
287 .ok()
288 .and_then(|v| v.into_value::<String>().ok());
289
290 let status: Option<u16> = None;
292
293 debug!("Navigation complete: {} -> {}", url, final_url);
294
295 Ok(NavigationResult {
296 final_url,
297 status,
298 title,
299 duration_ms: 0, })
301 }
302
303 async fn wait_for_ready(page: &chromiumoxide::Page, opts: &NavigationOptions) -> Result<()> {
305 let script = match opts.wait_until {
306 WaitUntil::Load => {
307 r#"
308 new Promise(resolve => {
309 if (document.readyState === 'complete') {
310 resolve(true);
311 } else {
312 window.addEventListener('load', () => resolve(true));
313 }
314 })
315 "#
316 }
317 WaitUntil::DomContentLoaded => {
318 r#"
319 new Promise(resolve => {
320 if (document.readyState !== 'loading') {
321 resolve(true);
322 } else {
323 document.addEventListener('DOMContentLoaded', () => resolve(true));
324 }
325 })
326 "#
327 }
328 WaitUntil::NetworkIdle0 | WaitUntil::NetworkIdle2 => {
329 r#"
332 new Promise(resolve => {
333 if (document.readyState === 'complete') {
334 setTimeout(() => resolve(true), 500);
335 } else {
336 window.addEventListener('load', () => {
337 setTimeout(() => resolve(true), 500);
338 });
339 }
340 })
341 "#
342 }
343 };
344
345 let timeout = Duration::from_millis(opts.timeout_ms);
346 tokio::time::timeout(timeout, page.evaluate(script))
347 .await
348 .map_err(|_| NavigationError::Timeout(opts.timeout_ms))?
349 .map_err(|e| Error::cdp(e.to_string()))?;
350
351 Ok(())
352 }
353
354 async fn simulate_human_behavior(page: &chromiumoxide::Page) -> Result<()> {
356 let delay = rand::random::<u64>() % 500 + 200;
358 tokio::time::sleep(Duration::from_millis(delay)).await;
359
360 let scroll_script = r#"
362 window.scrollTo({
363 top: Math.random() * 100 + 50,
364 behavior: 'smooth'
365 });
366 "#;
367
368 let _ = page.evaluate(scroll_script).await;
369
370 tokio::time::sleep(Duration::from_millis(200)).await;
372
373 Ok(())
374 }
375
376 #[instrument(skip(page))]
378 pub async fn back(page: &PageHandle) -> Result<()> {
379 page.page
380 .evaluate("window.history.back()")
381 .await
382 .map_err(|e| Error::cdp(e.to_string()))?;
383
384 tokio::time::sleep(Duration::from_millis(500)).await;
385 Ok(())
386 }
387
388 #[instrument(skip(page))]
390 pub async fn forward(page: &PageHandle) -> Result<()> {
391 page.page
392 .evaluate("window.history.forward()")
393 .await
394 .map_err(|e| Error::cdp(e.to_string()))?;
395
396 tokio::time::sleep(Duration::from_millis(500)).await;
397 Ok(())
398 }
399
400 #[instrument(skip(page))]
402 pub async fn reload(page: &PageHandle) -> Result<()> {
403 page.page
404 .reload()
405 .await
406 .map_err(|e| Error::cdp(e.to_string()))?;
407
408 Ok(())
409 }
410
411 #[instrument(skip(page))]
413 pub async fn wait_for_selector(
414 page: &PageHandle,
415 selector: &str,
416 timeout_ms: u64,
417 ) -> Result<()> {
418 let script = format!(
419 r#"
420 new Promise((resolve, reject) => {{
421 const timeout = {};
422 const start = Date.now();
423
424 function check() {{
425 const el = document.querySelector('{}');
426 if (el) {{
427 resolve(true);
428 }} else if (Date.now() - start > timeout) {{
429 reject(new Error('Timeout waiting for selector'));
430 }} else {{
431 requestAnimationFrame(check);
432 }}
433 }}
434 check();
435 }})
436 "#,
437 timeout_ms,
438 selector.replace('\'', "\\'")
439 );
440
441 let timeout = Duration::from_millis(timeout_ms + 1000);
442 tokio::time::timeout(timeout, page.page.evaluate(script.as_str()))
443 .await
444 .map_err(|_| NavigationError::Timeout(timeout_ms))?
445 .map_err(|e| Error::cdp(e.to_string()))?;
446
447 Ok(())
448 }
449}
450
451#[cfg(test)]
452mod tests {
453 use super::*;
454
455 #[test]
460 fn test_navigation_options_default() {
461 let opts = NavigationOptions::default();
462 assert_eq!(opts.timeout_ms, 30000);
463 assert_eq!(opts.retries, 3);
464 assert!(opts.human_like);
465 assert_eq!(opts.retry_delay_ms, 1000);
466 }
467
468 #[test]
469 fn test_wait_until_variants() {
470 assert_ne!(WaitUntil::Load, WaitUntil::DomContentLoaded);
471 assert_eq!(WaitUntil::NetworkIdle0, WaitUntil::NetworkIdle0);
472 }
473
474 #[test]
479 fn test_url_validation_valid_http() {
480 assert!(UrlValidator::validate("http://example.com").is_ok());
481 }
482
483 #[test]
484 fn test_url_validation_valid_https() {
485 assert!(UrlValidator::validate("https://example.com").is_ok());
486 }
487
488 #[test]
489 fn test_url_validation_valid_file() {
490 assert!(UrlValidator::validate("file:///path/to/file.html").is_ok());
491 }
492
493 #[test]
494 fn test_url_validation_empty() {
495 let result = UrlValidator::validate("");
496 assert!(result.is_err());
497 assert!(result.unwrap_err().contains("empty"));
498 }
499
500 #[test]
501 fn test_url_validation_no_protocol() {
502 let result = UrlValidator::validate("example.com");
503 assert!(result.is_err());
504 assert!(result.unwrap_err().contains("must start with"));
505 }
506
507 #[test]
508 fn test_url_validation_invalid_protocol() {
509 let result = UrlValidator::validate("ftp://example.com");
510 assert!(result.is_err());
511 }
512
513 #[test]
514 fn test_url_validation_too_long() {
515 let long_url = format!("https://example.com/{}", "a".repeat(3000));
516 let result = UrlValidator::validate(&long_url);
517 assert!(result.is_err());
518 assert!(result.unwrap_err().contains("maximum length"));
519 }
520
521 #[test]
526 fn test_localhost_check_127001() {
527 assert!(UrlValidator::is_localhost("http://127.0.0.1:8080"));
528 assert!(UrlValidator::is_localhost("https://127.0.0.1/path"));
529 }
530
531 #[test]
532 fn test_localhost_check_localhost() {
533 assert!(UrlValidator::is_localhost("http://localhost:3000"));
534 assert!(UrlValidator::is_localhost("https://localhost/api"));
535 }
536
537 #[test]
538 fn test_localhost_check_ipv6_loopback() {
539 assert!(UrlValidator::is_localhost("http://[::1]:8080"));
540 }
541
542 #[test]
543 fn test_localhost_check_zero_addr() {
544 assert!(UrlValidator::is_localhost("http://0.0.0.0:8080"));
545 }
546
547 #[test]
548 fn test_localhost_check_external() {
549 assert!(!UrlValidator::is_localhost("https://example.com"));
550 assert!(!UrlValidator::is_localhost("https://google.com"));
551 assert!(!UrlValidator::is_localhost("http://192.168.1.1"));
552 }
553
554 #[test]
555 fn test_is_external() {
556 assert!(UrlValidator::is_external("https://example.com"));
557 assert!(!UrlValidator::is_external("http://localhost:8080"));
558 assert!(!UrlValidator::is_external("http://127.0.0.1"));
559 }
560
561 #[test]
566 fn test_extract_host_simple() {
567 assert_eq!(
568 UrlValidator::extract_host("https://example.com/path"),
569 Some("example.com".to_string())
570 );
571 }
572
573 #[test]
574 fn test_extract_host_with_port() {
575 assert_eq!(
576 UrlValidator::extract_host("http://localhost:8080/api"),
577 Some("localhost".to_string())
578 );
579 }
580
581 #[test]
582 fn test_extract_host_no_path() {
583 assert_eq!(
584 UrlValidator::extract_host("https://google.com"),
585 Some("google.com".to_string())
586 );
587 }
588
589 #[test]
590 fn test_extract_host_no_protocol() {
591 assert_eq!(UrlValidator::extract_host("example.com"), None);
592 }
593
594 #[test]
599 fn test_rate_limiter_allows_under_limit() {
600 let mut limiter = RateLimiter::new(5, 60);
601
602 assert!(limiter.check());
604 assert!(limiter.check());
605 assert!(limiter.check());
606 assert!(limiter.check());
607 assert!(limiter.check());
608 }
609
610 #[test]
611 fn test_rate_limiter_blocks_over_limit() {
612 let mut limiter = RateLimiter::new(3, 60);
613
614 assert!(limiter.check());
616 assert!(limiter.check());
617 assert!(limiter.check());
618
619 assert!(!limiter.check());
621 assert!(!limiter.check());
622 }
623
624 #[test]
625 fn test_rate_limiter_remaining() {
626 let mut limiter = RateLimiter::new(5, 60);
627
628 assert_eq!(limiter.remaining(), 5);
629 limiter.check();
630 assert_eq!(limiter.remaining(), 4);
631 limiter.check();
632 limiter.check();
633 assert_eq!(limiter.remaining(), 2);
634 }
635
636 #[test]
637 fn test_rate_limiter_reset() {
638 let mut limiter = RateLimiter::new(3, 60);
639
640 limiter.check();
641 limiter.check();
642 limiter.check();
643 assert_eq!(limiter.remaining(), 0);
644 assert!(!limiter.check());
645
646 limiter.reset();
647 assert_eq!(limiter.remaining(), 3);
648 assert!(limiter.check());
649 }
650
651 #[test]
652 fn test_rate_limiter_single_request() {
653 let mut limiter = RateLimiter::new(1, 60);
654 assert!(limiter.check());
655 assert!(!limiter.check());
656 }
657
658 #[test]
659 fn test_rate_limiter_zero_remaining_after_exhaustion() {
660 let mut limiter = RateLimiter::new(2, 60);
661 limiter.check();
662 limiter.check();
663 assert_eq!(limiter.remaining(), 0);
664 }
665
666 #[test]
671 fn test_navigation_result_structure() {
672 let result = NavigationResult {
673 final_url: "https://example.com".to_string(),
674 status: Some(200),
675 title: Some("Example".to_string()),
676 duration_ms: 150,
677 };
678
679 assert_eq!(result.final_url, "https://example.com");
680 assert_eq!(result.status, Some(200));
681 assert_eq!(result.title, Some("Example".to_string()));
682 assert_eq!(result.duration_ms, 150);
683 }
684
685 #[test]
686 fn test_navigation_result_without_status() {
687 let result = NavigationResult {
688 final_url: "https://example.com".to_string(),
689 status: None,
690 title: None,
691 duration_ms: 100,
692 };
693
694 assert!(result.status.is_none());
695 assert!(result.title.is_none());
696 }
697
698 #[test]
703 fn test_url_validation_with_query_params() {
704 assert!(UrlValidator::validate("https://example.com?foo=bar&baz=123").is_ok());
705 }
706
707 #[test]
708 fn test_url_validation_with_fragment() {
709 assert!(UrlValidator::validate("https://example.com#section").is_ok());
710 }
711
712 #[test]
713 fn test_url_validation_with_auth() {
714 assert!(UrlValidator::validate("https://user:pass@example.com").is_ok());
715 }
716
717 #[test]
718 fn test_localhost_case_insensitive() {
719 assert!(UrlValidator::is_localhost("http://LOCALHOST:8080"));
720 assert!(UrlValidator::is_localhost("http://LocalHost:8080"));
721 }
722
723 #[test]
724 fn test_localhost_in_path_not_matched() {
725 assert!(!UrlValidator::is_localhost(
727 "https://example.com/localhost/api"
728 ));
729 }
730}