use crate::client::FetchOptions;
use crate::error::FetchError;
use crate::fetchers::Fetcher;
use crate::types::{FetchRequest, FetchResponse};
use crate::DEFAULT_USER_AGENT;
use async_trait::async_trait;
use reqwest::header::{HeaderValue, USER_AGENT};
use serde::Deserialize;
use std::time::Duration;
use url::Url;
const API_TIMEOUT: Duration = Duration::from_secs(10);
const MAX_ANSWERS: usize = 10;
pub struct StackOverflowFetcher;
impl StackOverflowFetcher {
pub fn new() -> Self {
Self
}
fn parse_url(url: &Url) -> Option<(String, u64)> {
let host = url.host_str()?;
let site = match host {
"stackoverflow.com" | "www.stackoverflow.com" => "stackoverflow",
"serverfault.com" | "www.serverfault.com" => "serverfault",
"superuser.com" | "www.superuser.com" => "superuser",
"askubuntu.com" | "www.askubuntu.com" => "askubuntu",
"mathoverflow.net" | "www.mathoverflow.net" => "mathoverflow",
_ if host.ends_with(".stackexchange.com") => {
host.strip_suffix(".stackexchange.com")?
}
_ => return None,
};
let segments: Vec<&str> = url.path_segments().map(|s| s.collect()).unwrap_or_default();
if segments.len() < 2 || segments[0] != "questions" {
return None;
}
let id: u64 = segments[1].parse().ok()?;
Some((site.to_string(), id))
}
}
impl Default for StackOverflowFetcher {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Deserialize)]
struct ApiResponse<T> {
items: Vec<T>,
}
#[derive(Debug, Deserialize)]
struct Question {
title: String,
body_markdown: Option<String>,
score: i64,
view_count: Option<u64>,
answer_count: u64,
tags: Vec<String>,
owner: Option<Owner>,
link: String,
is_answered: bool,
}
#[derive(Debug, Deserialize)]
struct Answer {
body_markdown: Option<String>,
score: i64,
is_accepted: bool,
owner: Option<Owner>,
}
#[derive(Debug, Deserialize)]
struct Owner {
display_name: Option<String>,
reputation: Option<u64>,
}
#[async_trait]
impl Fetcher for StackOverflowFetcher {
fn name(&self) -> &'static str {
"stackoverflow"
}
fn matches(&self, url: &Url) -> bool {
Self::parse_url(url).is_some()
}
async fn fetch(
&self,
request: &FetchRequest,
options: &FetchOptions,
) -> Result<FetchResponse, FetchError> {
let url = Url::parse(&request.url).map_err(|_| FetchError::InvalidUrlScheme)?;
let (site, question_id) = Self::parse_url(&url).ok_or_else(|| {
FetchError::FetcherError("Not a valid Stack Exchange question URL".to_string())
})?;
let user_agent = options.user_agent.as_deref().unwrap_or(DEFAULT_USER_AGENT);
let mut client_builder = reqwest::Client::builder()
.connect_timeout(API_TIMEOUT)
.timeout(API_TIMEOUT)
.redirect(reqwest::redirect::Policy::limited(3));
if !options.respect_proxy_env {
client_builder = client_builder.no_proxy();
}
let client = client_builder
.build()
.map_err(FetchError::ClientBuildError)?;
let ua_header = HeaderValue::from_str(user_agent)
.unwrap_or_else(|_| HeaderValue::from_static(DEFAULT_USER_AGENT));
let question_url = format!(
"https://api.stackexchange.com/2.3/questions/{}?site={}&filter=withbody_markdown",
question_id, site
);
let q_response = client
.get(&question_url)
.header(USER_AGENT, ua_header.clone())
.send()
.await
.map_err(FetchError::from_reqwest)?;
let status_code = q_response.status().as_u16();
if !q_response.status().is_success() {
let error_msg = if status_code == 404 {
format!("Question {} not found on {}", question_id, site)
} else {
format!("Stack Exchange API error: HTTP {}", status_code)
};
return Ok(FetchResponse {
url: request.url.clone(),
status_code,
error: Some(error_msg),
..Default::default()
});
}
let q_data: ApiResponse<Question> = q_response.json().await.map_err(|e| {
FetchError::FetcherError(format!("Failed to parse question data: {}", e))
})?;
let question = q_data.items.into_iter().next().ok_or_else(|| {
FetchError::FetcherError(format!("Question {} not found", question_id))
})?;
let answers = if question.answer_count > 0 {
let answers_url = format!(
"https://api.stackexchange.com/2.3/questions/{}/answers?site={}&sort=votes&order=desc&pagesize={}&filter=withbody_markdown",
question_id, site, MAX_ANSWERS
);
match client
.get(&answers_url)
.header(USER_AGENT, ua_header)
.send()
.await
{
Ok(resp) if resp.status().is_success() => resp
.json::<ApiResponse<Answer>>()
.await
.ok()
.map(|r| r.items),
_ => None,
}
} else {
None
};
let content = format_qa_response(&question, answers.as_deref());
Ok(FetchResponse {
url: request.url.clone(),
status_code: 200,
content_type: Some("text/markdown".to_string()),
format: Some("stackoverflow_qa".to_string()),
content: Some(content),
..Default::default()
})
}
}
fn format_qa_response(question: &Question, answers: Option<&[Answer]>) -> String {
let mut out = String::new();
out.push_str(&format!("# {}\n\n", question.title));
out.push_str("## Question\n\n");
out.push_str(&format!("- **Score:** {}\n", question.score));
if let Some(views) = question.view_count {
out.push_str(&format!("- **Views:** {}\n", views));
}
out.push_str(&format!("- **Answers:** {}\n", question.answer_count));
out.push_str(&format!(
"- **Answered:** {}\n",
if question.is_answered { "yes" } else { "no" }
));
if !question.tags.is_empty() {
out.push_str(&format!("- **Tags:** {}\n", question.tags.join(", ")));
}
if let Some(owner) = &question.owner {
if let Some(name) = &owner.display_name {
let rep = owner
.reputation
.map(|r| format!(" ({})", r))
.unwrap_or_default();
out.push_str(&format!("- **Asked by:** {}{}\n", name, rep));
}
}
out.push_str(&format!("- **URL:** {}\n", question.link));
if let Some(body) = &question.body_markdown {
out.push_str(&format!("\n{}\n", body));
}
if let Some(answers) = answers {
if !answers.is_empty() {
out.push_str(&format!("\n---\n\n## Answers ({})\n", answers.len()));
for answer in answers {
let accepted = if answer.is_accepted {
" ✓ Accepted"
} else {
""
};
let author = answer
.owner
.as_ref()
.and_then(|o| o.display_name.as_deref())
.unwrap_or("anonymous");
out.push_str(&format!(
"\n### Score: {}{} — by {}\n\n",
answer.score, accepted, author
));
if let Some(body) = &answer.body_markdown {
out.push_str(body);
out.push('\n');
}
}
}
}
out
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_stackoverflow_url() {
let url = Url::parse("https://stackoverflow.com/questions/12345/how-to-do-x").unwrap();
assert_eq!(
StackOverflowFetcher::parse_url(&url),
Some(("stackoverflow".to_string(), 12345))
);
}
#[test]
fn test_parse_stackoverflow_url_no_slug() {
let url = Url::parse("https://stackoverflow.com/questions/12345").unwrap();
assert_eq!(
StackOverflowFetcher::parse_url(&url),
Some(("stackoverflow".to_string(), 12345))
);
}
#[test]
fn test_parse_stackexchange_url() {
let url =
Url::parse("https://unix.stackexchange.com/questions/999/shell-question").unwrap();
assert_eq!(
StackOverflowFetcher::parse_url(&url),
Some(("unix".to_string(), 999))
);
}
#[test]
fn test_parse_other_se_sites() {
let url = Url::parse("https://serverfault.com/questions/42/title").unwrap();
assert_eq!(
StackOverflowFetcher::parse_url(&url),
Some(("serverfault".to_string(), 42))
);
let url = Url::parse("https://askubuntu.com/questions/1/title").unwrap();
assert_eq!(
StackOverflowFetcher::parse_url(&url),
Some(("askubuntu".to_string(), 1))
);
}
#[test]
fn test_rejects_non_question_paths() {
let url = Url::parse("https://stackoverflow.com/users/12345").unwrap();
assert_eq!(StackOverflowFetcher::parse_url(&url), None);
let url = Url::parse("https://stackoverflow.com/tags").unwrap();
assert_eq!(StackOverflowFetcher::parse_url(&url), None);
}
#[test]
fn test_rejects_non_numeric_id() {
let url = Url::parse("https://stackoverflow.com/questions/abc/title").unwrap();
assert_eq!(StackOverflowFetcher::parse_url(&url), None);
}
#[test]
fn test_rejects_non_se_sites() {
let url = Url::parse("https://example.com/questions/123").unwrap();
assert_eq!(StackOverflowFetcher::parse_url(&url), None);
}
#[test]
fn test_fetcher_matches() {
let fetcher = StackOverflowFetcher::new();
let url = Url::parse("https://stackoverflow.com/questions/12345/title").unwrap();
assert!(fetcher.matches(&url));
let url = Url::parse("https://unix.stackexchange.com/questions/1/title").unwrap();
assert!(fetcher.matches(&url));
let url = Url::parse("https://example.com/questions/1").unwrap();
assert!(!fetcher.matches(&url));
}
#[test]
fn test_format_qa_response() {
let question = Question {
title: "How to parse JSON in Rust?".to_string(),
body_markdown: Some("I need to parse JSON.".to_string()),
score: 15,
view_count: Some(1000),
answer_count: 2,
tags: vec!["rust".to_string(), "json".to_string()],
owner: Some(Owner {
display_name: Some("alice".to_string()),
reputation: Some(5000),
}),
link: "https://stackoverflow.com/questions/42".to_string(),
is_answered: true,
};
let answers = vec![
Answer {
body_markdown: Some("Use serde_json crate.".to_string()),
score: 20,
is_accepted: true,
owner: Some(Owner {
display_name: Some("bob".to_string()),
reputation: Some(10000),
}),
},
Answer {
body_markdown: Some("Try simd-json for speed.".to_string()),
score: 5,
is_accepted: false,
owner: Some(Owner {
display_name: Some("charlie".to_string()),
reputation: None,
}),
},
];
let output = format_qa_response(&question, Some(&answers));
assert!(output.contains("# How to parse JSON in Rust?"));
assert!(output.contains("**Score:** 15"));
assert!(output.contains("**Views:** 1000"));
assert!(output.contains("**Tags:** rust, json"));
assert!(output.contains("**Asked by:** alice (5000)"));
assert!(output.contains("I need to parse JSON."));
assert!(output.contains("Accepted"));
assert!(output.contains("Use serde_json crate."));
assert!(output.contains("Try simd-json for speed."));
}
}