web_capture/
stackoverflow.rs1use crate::{Result, WebCaptureError};
9use std::time::Duration;
10use tokio::time::sleep;
11use url::Url;
12
13const STACKPRINTER_RETRIES: usize = 3;
14const STACKPRINTER_RETRY_BASE_DELAY_MS: u64 = 1_000;
15
16#[must_use]
18pub fn is_stackoverflow_question_url(url: &str) -> bool {
19 stackoverflow_question_id(url).is_some()
20}
21
22#[must_use]
24pub fn stackprinter_url(url: &str) -> Option<String> {
25 let question_id = stackoverflow_question_id(url)?;
26 Some(format!(
27 "https://stackprinter.appspot.com/export?question={question_id}&service=stackoverflow&language=en&hideAnswers=false&showAll=true&width=640"
28 ))
29}
30
31pub async fn fetch_stackoverflow_html(url: &str) -> Result<String> {
38 let stackprinter =
39 stackprinter_url(url).ok_or_else(|| WebCaptureError::InvalidUrl(url.to_string()))?;
40 let mut last_error = None;
41
42 for attempt in 0..=STACKPRINTER_RETRIES {
43 match fetch_stackprinter_html_once(&stackprinter).await {
44 Ok(html) if !is_stackprinter_transient_error(&html) => return Ok(html),
45 Ok(_) => {
46 last_error = Some("StackPrinter returned a transient error page".to_string());
47 }
48 Err(error) => {
49 last_error = Some(error.to_string());
50 }
51 }
52
53 if attempt < STACKPRINTER_RETRIES {
54 let delay_factor = 2_u64.pow(u32::try_from(attempt).expect("retry attempt fits u32"));
55 sleep(Duration::from_millis(
56 STACKPRINTER_RETRY_BASE_DELAY_MS * delay_factor,
57 ))
58 .await;
59 }
60 }
61
62 Err(WebCaptureError::FetchError(last_error.unwrap_or_else(
63 || "StackPrinter failed without an error message".to_string(),
64 )))
65}
66
67async fn fetch_stackprinter_html_once(stackprinter: &str) -> Result<String> {
68 let response = reqwest::get(stackprinter)
69 .await
70 .and_then(reqwest::Response::error_for_status)
71 .map_err(|error| WebCaptureError::FetchError(error.to_string()))?;
72 response
73 .text()
74 .await
75 .map_err(|error| WebCaptureError::FetchError(error.to_string()))
76}
77
78#[must_use]
79fn is_stackprinter_transient_error(html: &str) -> bool {
80 html.contains("Ooooops") || html.contains("Please try again later")
81}
82
83fn stackoverflow_question_id(url: &str) -> Option<String> {
84 let parsed = Url::parse(url).ok()?;
85 let host = parsed.host_str()?.trim_start_matches("www.");
86 if host != "stackoverflow.com" {
87 return None;
88 }
89
90 let mut segments = parsed.path_segments()?;
91 if segments.next()? != "questions" {
92 return None;
93 }
94
95 let question_id = segments.next()?;
96 if question_id
97 .chars()
98 .all(|character| character.is_ascii_digit())
99 {
100 Some(question_id.to_string())
101 } else {
102 None
103 }
104}