1use crate::client::FetchOptions;
7use crate::error::FetchError;
8use crate::fetchers::Fetcher;
9use crate::types::{FetchRequest, FetchResponse};
10use crate::DEFAULT_USER_AGENT;
11use async_trait::async_trait;
12use reqwest::header::{HeaderValue, USER_AGENT};
13use serde::Deserialize;
14use std::time::Duration;
15use url::Url;
16
17const API_TIMEOUT: Duration = Duration::from_secs(10);
18
19const MAX_COMMENTS: usize = 20;
21
22pub struct HackerNewsFetcher;
27
28impl HackerNewsFetcher {
29 pub fn new() -> Self {
30 Self
31 }
32
33 fn parse_url(url: &Url) -> Option<u64> {
34 let host = url.host_str()?;
35 if host != "news.ycombinator.com" {
36 return None;
37 }
38
39 let segments: Vec<&str> = url.path_segments().map(|s| s.collect()).unwrap_or_default();
40 if segments.first() != Some(&"item") {
41 return None;
42 }
43
44 url.query_pairs()
45 .find(|(k, _)| k == "id")
46 .and_then(|(_, v)| v.parse().ok())
47 }
48}
49
50impl Default for HackerNewsFetcher {
51 fn default() -> Self {
52 Self::new()
53 }
54}
55
56#[derive(Debug, Deserialize)]
57struct HNItem {
58 id: u64,
59 #[serde(rename = "type")]
60 item_type: Option<String>,
61 title: Option<String>,
62 text: Option<String>,
63 url: Option<String>,
64 by: Option<String>,
65 score: Option<i64>,
66 descendants: Option<u64>,
67 kids: Option<Vec<u64>>,
68}
69
70#[async_trait]
71impl Fetcher for HackerNewsFetcher {
72 fn name(&self) -> &'static str {
73 "hackernews"
74 }
75
76 fn matches(&self, url: &Url) -> bool {
77 Self::parse_url(url).is_some()
78 }
79
80 async fn fetch(
81 &self,
82 request: &FetchRequest,
83 options: &FetchOptions,
84 ) -> Result<FetchResponse, FetchError> {
85 let url = Url::parse(&request.url).map_err(|_| FetchError::InvalidUrlScheme)?;
86
87 let item_id = Self::parse_url(&url)
88 .ok_or_else(|| FetchError::FetcherError("Not a valid HN URL".to_string()))?;
89
90 let user_agent = options.user_agent.as_deref().unwrap_or(DEFAULT_USER_AGENT);
91 let mut client_builder = reqwest::Client::builder()
92 .connect_timeout(API_TIMEOUT)
93 .timeout(API_TIMEOUT)
94 .redirect(reqwest::redirect::Policy::limited(3));
95
96 if !options.respect_proxy_env {
97 client_builder = client_builder.no_proxy();
98 }
99
100 let client = client_builder
101 .build()
102 .map_err(FetchError::ClientBuildError)?;
103
104 let ua_header = HeaderValue::from_str(user_agent)
105 .unwrap_or_else(|_| HeaderValue::from_static(DEFAULT_USER_AGENT));
106
107 let item = fetch_item(&client, &ua_header, item_id).await?;
109
110 let comments = if let Some(kids) = &item.kids {
112 let mut comments = Vec::new();
113 for &kid_id in kids.iter().take(MAX_COMMENTS) {
114 if let Ok(comment) = fetch_item(&client, &ua_header, kid_id).await {
115 let replies = if let Some(reply_ids) = &comment.kids {
117 let mut replies = Vec::new();
118 for &reply_id in reply_ids.iter().take(5) {
119 if let Ok(reply) = fetch_item(&client, &ua_header, reply_id).await {
120 replies.push(reply);
121 }
122 }
123 replies
124 } else {
125 Vec::new()
126 };
127 comments.push((comment, replies));
128 }
129 }
130 comments
131 } else {
132 Vec::new()
133 };
134
135 let content = format_hn_response(&item, &comments);
136
137 Ok(FetchResponse {
138 url: request.url.clone(),
139 status_code: 200,
140 content_type: Some("text/markdown".to_string()),
141 format: Some("hackernews".to_string()),
142 content: Some(content),
143 ..Default::default()
144 })
145 }
146}
147
148async fn fetch_item(
149 client: &reqwest::Client,
150 ua: &HeaderValue,
151 id: u64,
152) -> Result<HNItem, FetchError> {
153 let url = format!("https://hacker-news.firebaseio.com/v0/item/{}.json", id);
154
155 let resp = client
156 .get(&url)
157 .header(USER_AGENT, ua.clone())
158 .send()
159 .await
160 .map_err(FetchError::from_reqwest)?;
161
162 if !resp.status().is_success() {
163 return Err(FetchError::FetcherError(format!(
164 "HN API error: HTTP {}",
165 resp.status()
166 )));
167 }
168
169 resp.json()
170 .await
171 .map_err(|e| FetchError::FetcherError(format!("Failed to parse HN item: {}", e)))
172}
173
174fn format_hn_response(item: &HNItem, comments: &[(HNItem, Vec<HNItem>)]) -> String {
175 let mut out = String::new();
176
177 let item_type = item.item_type.as_deref().unwrap_or("story");
178
179 let title = item.title.as_deref().unwrap_or("Hacker News Item");
181 out.push_str(&format!("# {}\n\n", title));
182
183 out.push_str("## Info\n\n");
185 out.push_str(&format!("- **Type:** {}\n", item_type));
186
187 if let Some(by) = &item.by {
188 out.push_str(&format!("- **By:** {}\n", by));
189 }
190 if let Some(score) = item.score {
191 out.push_str(&format!("- **Score:** {}\n", score));
192 }
193 if let Some(descendants) = item.descendants {
194 out.push_str(&format!("- **Comments:** {}\n", descendants));
195 }
196 if let Some(url) = &item.url {
197 out.push_str(&format!("- **Link:** {}\n", url));
198 }
199 out.push_str(&format!(
200 "- **HN URL:** https://news.ycombinator.com/item?id={}\n",
201 item.id
202 ));
203
204 if let Some(text) = &item.text {
206 let cleaned = strip_html_tags(text);
207 out.push_str(&format!("\n{}\n", cleaned));
208 }
209
210 if !comments.is_empty() {
212 let total = item.descendants.unwrap_or(0);
213 let shown: usize = comments.len() + comments.iter().map(|(_, r)| r.len()).sum::<usize>();
214 if shown < total as usize {
215 out.push_str(&format!("\n---\n\n## Comments ({} of {})\n", shown, total));
216 } else {
217 out.push_str(&format!("\n---\n\n## Comments ({})\n", shown));
218 }
219
220 for (comment, replies) in comments {
221 format_comment(&mut out, comment, 0);
222 for reply in replies {
223 format_comment(&mut out, reply, 1);
224 }
225 }
226 }
227
228 out
229}
230
231fn format_comment(out: &mut String, comment: &HNItem, depth: usize) {
232 let indent = "> ".repeat(depth);
233 let by = comment.by.as_deref().unwrap_or("anonymous");
234
235 out.push_str(&format!("\n{}**{}**\n\n", indent, by));
236
237 if let Some(text) = &comment.text {
238 let cleaned = strip_html_tags(text);
239 for line in cleaned.lines() {
240 out.push_str(&format!("{}{}\n", indent, line));
241 }
242 out.push('\n');
243 }
244}
245
246fn strip_html_tags(html: &str) -> String {
248 let mut result = String::with_capacity(html.len());
249 let mut in_tag = false;
250
251 for c in html.chars() {
252 match c {
253 '<' => {
254 in_tag = true;
255 let rest: String = html[html.len() - (html.len() - result.len())..]
257 .chars()
258 .take(3)
259 .collect();
260 if rest.starts_with("p>") || rest.starts_with("br") {
261 result.push('\n');
262 }
263 }
264 '>' => in_tag = false,
265 _ if !in_tag => result.push(c),
266 _ => {}
267 }
268 }
269
270 result
272 .replace("&", "&")
273 .replace("<", "<")
274 .replace(">", ">")
275 .replace(""", "\"")
276 .replace("'", "'")
277 .replace("'", "'")
278 .replace("/", "/")
279}
280
281#[cfg(test)]
282mod tests {
283 use super::*;
284
285 #[test]
286 fn test_parse_hn_url() {
287 let url = Url::parse("https://news.ycombinator.com/item?id=12345").unwrap();
288 assert_eq!(HackerNewsFetcher::parse_url(&url), Some(12345));
289 }
290
291 #[test]
292 fn test_rejects_non_hn() {
293 let url = Url::parse("https://example.com/item?id=123").unwrap();
294 assert_eq!(HackerNewsFetcher::parse_url(&url), None);
295 }
296
297 #[test]
298 fn test_rejects_non_item_path() {
299 let url = Url::parse("https://news.ycombinator.com/newest").unwrap();
300 assert_eq!(HackerNewsFetcher::parse_url(&url), None);
301 }
302
303 #[test]
304 fn test_rejects_no_id() {
305 let url = Url::parse("https://news.ycombinator.com/item").unwrap();
306 assert_eq!(HackerNewsFetcher::parse_url(&url), None);
307 }
308
309 #[test]
310 fn test_fetcher_matches() {
311 let fetcher = HackerNewsFetcher::new();
312
313 let url = Url::parse("https://news.ycombinator.com/item?id=123").unwrap();
314 assert!(fetcher.matches(&url));
315
316 let url = Url::parse("https://example.com/item?id=123").unwrap();
317 assert!(!fetcher.matches(&url));
318 }
319
320 #[test]
321 fn test_strip_html_tags() {
322 assert_eq!(strip_html_tags("Hello <b>world</b>"), "Hello world");
323 assert_eq!(strip_html_tags("a & b"), "a & b");
324 }
325
326 #[test]
327 fn test_format_hn_response() {
328 let item = HNItem {
329 id: 42,
330 item_type: Some("story".to_string()),
331 title: Some("Show HN: My Project".to_string()),
332 text: None,
333 url: Some("https://example.com".to_string()),
334 by: Some("pg".to_string()),
335 score: Some(100),
336 descendants: Some(5),
337 kids: None,
338 };
339
340 let output = format_hn_response(&item, &[]);
341
342 assert!(output.contains("# Show HN: My Project"));
343 assert!(output.contains("**By:** pg"));
344 assert!(output.contains("**Score:** 100"));
345 assert!(output.contains("https://example.com"));
346 }
347}