1use std::collections::HashMap;
2use std::sync::Arc;
3
4use tokio::sync::Mutex;
5
6const MAX_FEED_SIZE: u64 = 10 * 1024 * 1024;
8
9const MAX_ARTICLE_SIZE: u64 = 5 * 1024 * 1024;
11
12pub const DEFAULT_LIST_LIMIT: usize = 20;
14
15#[derive(Debug, Clone)]
17pub struct FeedEntry {
18 pub id: u32,
19 pub title: String,
20 pub url: String,
21 pub published: String,
22 pub summary: String,
23}
24
25#[derive(Debug, Clone, Default)]
28pub struct EntryStore {
29 inner: Arc<Mutex<StoreInner>>,
30}
31
32#[derive(Debug, Default)]
33struct StoreInner {
34 next_id: u32,
35 entries: HashMap<u32, StoredEntry>,
36}
37
38#[derive(Debug, Clone)]
39struct StoredEntry {
40 url: String,
41 title: String,
42}
43
44impl EntryStore {
45 pub fn new() -> Self {
46 Self::default()
47 }
48
49 pub async fn store_entries(&self, entries: Vec<ParsedEntry>) -> anyhow::Result<Vec<FeedEntry>> {
52 let mut inner = self.inner.lock().await;
53 inner.entries.clear();
54 inner.next_id = 1;
55
56 let mut result = Vec::with_capacity(entries.len());
57 for e in entries {
58 let id = inner.next_id;
59 inner.next_id = inner
60 .next_id
61 .checked_add(1)
62 .ok_or_else(|| anyhow::anyhow!("entry ID overflow"))?;
63 inner.entries.insert(
64 id,
65 StoredEntry {
66 url: e.url.clone(),
67 title: e.title.clone(),
68 },
69 );
70 result.push(FeedEntry {
71 id,
72 title: e.title,
73 url: e.url,
74 published: e.published,
75 summary: e.summary,
76 });
77 }
78 Ok(result)
79 }
80
81 pub async fn get_url(&self, id: u32) -> Option<(String, String)> {
83 let inner = self.inner.lock().await;
84 inner
85 .entries
86 .get(&id)
87 .map(|e| (e.url.clone(), e.title.clone()))
88 }
89}
90
91#[derive(Debug)]
93pub struct ParsedEntry {
94 pub title: String,
95 pub url: String,
96 pub published: String,
97 pub summary: String,
98}
99
100pub fn build_http_client() -> reqwest::Result<reqwest::Client> {
102 reqwest::Client::builder()
103 .timeout(std::time::Duration::from_secs(30))
104 .build()
105}
106
107fn validate_url(url: &str) -> anyhow::Result<reqwest::Url> {
109 let parsed = reqwest::Url::parse(url).map_err(|e| anyhow::anyhow!("invalid URL: {e}"))?;
110 match parsed.scheme() {
111 "http" | "https" => Ok(parsed),
112 scheme => anyhow::bail!("unsupported URL scheme: {scheme}"),
113 }
114}
115
116fn check_content_length(resp: &reqwest::Response, limit: u64) -> anyhow::Result<()> {
118 if let Some(len) = resp.content_length() {
119 if len > limit {
120 anyhow::bail!("response too large: {len} bytes (limit: {limit} bytes)");
121 }
122 }
123 Ok(())
124}
125
126pub async fn fetch_and_parse_feed(
129 client: &reqwest::Client,
130 url: &str,
131) -> anyhow::Result<(String, Vec<ParsedEntry>)> {
132 let validated = validate_url(url)?;
133
134 let resp = client.get(validated).send().await?;
135 check_content_length(&resp, MAX_FEED_SIZE)?;
136 let bytes = resp.bytes().await?;
137 let feed = feed_rs::parser::parse(&bytes[..])?;
138
139 let feed_title = feed
140 .title
141 .map(|t| t.content)
142 .unwrap_or_else(|| "(untitled feed)".to_string());
143
144 let entries = feed
145 .entries
146 .into_iter()
147 .map(|entry| {
148 let title = entry
149 .title
150 .map(|t| t.content)
151 .unwrap_or_else(|| "(no title)".to_string());
152
153 let url = entry
154 .links
155 .first()
156 .map(|l| l.href.clone())
157 .unwrap_or_default();
158
159 let published = entry
160 .published
161 .or(entry.updated)
162 .map(|d| d.format("%Y-%m-%d").to_string())
163 .unwrap_or_else(|| "-".to_string());
164
165 let summary = entry
166 .summary
167 .map(|s| truncate_text(&strip_html_simple(&s.content), 80))
168 .unwrap_or_default();
169
170 ParsedEntry {
171 title,
172 url,
173 published,
174 summary,
175 }
176 })
177 .collect();
178
179 Ok((feed_title, entries))
180}
181
182pub fn format_entries_as_markdown(feed_title: &str, entries: &[FeedEntry], total: usize) -> String {
187 let mut out = String::with_capacity(entries.len() * 100);
188 out.push_str(&format!("## {feed_title}\n\n"));
189 out.push_str("| # | Title | Date |\n");
190 out.push_str("|---|-------|------|\n");
191
192 for e in entries {
193 let title_display = truncate_text(&e.title, 60);
194 out.push_str(&format!(
195 "| {} | [{}]({}) | {} |\n",
196 e.id, title_display, e.url, e.published
197 ));
198 }
199
200 if total > entries.len() {
201 out.push_str(&format!(
202 "\n*Showing {} of {} articles. Use `limit` to see more. Use `get` with # to read.*",
203 entries.len(),
204 total
205 ));
206 } else {
207 out.push_str(&format!(
208 "\n*{} articles. Use `get` with # to read.*",
209 entries.len()
210 ));
211 }
212 out
213}
214
215pub async fn fetch_article_text(client: &reqwest::Client, url: &str) -> anyhow::Result<String> {
217 let validated = validate_url(url)?;
218
219 let resp = client
220 .get(validated)
221 .header("User-Agent", "rss-fetch-mcp/0.1")
222 .send()
223 .await?;
224 check_content_length(&resp, MAX_ARTICLE_SIZE)?;
225
226 let html = resp.text().await?;
227 let text = html2text::from_read(html.as_bytes(), 80)?;
228
229 Ok(text)
230}
231
232fn strip_html_simple(html: &str) -> String {
234 let mut result = String::with_capacity(html.len());
235 let mut in_tag = false;
236 let mut in_skip = false;
237 let mut tag_buf = String::new();
238
239 for ch in html.chars() {
240 if ch == '<' {
241 in_tag = true;
242 tag_buf.clear();
243 continue;
244 }
245 if ch == '>' {
246 in_tag = false;
247 let tag_name = tag_buf
248 .split_whitespace()
249 .next()
250 .unwrap_or("")
251 .to_lowercase();
252 if tag_name == "script" || tag_name == "style" {
253 in_skip = true;
254 } else if tag_name == "/script" || tag_name == "/style" {
255 in_skip = false;
256 }
257 continue;
258 }
259 if in_tag {
260 tag_buf.push(ch);
261 continue;
262 }
263 if !in_skip {
264 result.push(ch);
265 }
266 }
267 result
268}
269
270fn truncate_text(s: &str, max: usize) -> String {
272 let char_count = s.chars().count();
273 if char_count <= max {
274 s.to_string()
275 } else {
276 let truncated: String = s.chars().take(max.saturating_sub(3)).collect();
277 format!("{truncated}...")
278 }
279}
280
281#[cfg(test)]
282mod tests {
283 use super::*;
284
285 #[test]
286 fn strip_html_basic() {
287 assert_eq!(
288 strip_html_simple("<p>Hello <b>world</b></p>"),
289 "Hello world"
290 );
291 }
292
293 #[test]
294 fn strip_html_empty() {
295 assert_eq!(strip_html_simple(""), "");
296 }
297
298 #[test]
299 fn strip_html_skips_script() {
300 let html = "<p>before</p><script>alert('xss')</script><p>after</p>";
301 assert_eq!(strip_html_simple(html), "beforeafter");
302 }
303
304 #[test]
305 fn strip_html_skips_style() {
306 let html = "<p>text</p><style>body{color:red}</style><p>more</p>";
307 assert_eq!(strip_html_simple(html), "textmore");
308 }
309
310 #[test]
311 fn truncate_short_text() {
312 assert_eq!(truncate_text("hello", 10), "hello");
313 }
314
315 #[test]
316 fn truncate_long_text() {
317 let result = truncate_text("this is a long sentence", 10);
318 assert!(result.ends_with("..."));
319 assert!(result.chars().count() <= 10);
320 }
321
322 #[test]
323 fn truncate_multibyte() {
324 let result = truncate_text("あいうえおかきくけこ", 5);
325 assert!(result.ends_with("..."));
326 assert!(result.chars().count() <= 5);
327 }
328
329 #[test]
330 fn validate_url_accepts_https() {
331 assert!(validate_url("https://example.com/feed.xml").is_ok());
332 }
333
334 #[test]
335 fn validate_url_accepts_http() {
336 assert!(validate_url("http://example.com/feed.xml").is_ok());
337 }
338
339 #[test]
340 fn validate_url_rejects_file() {
341 let err = validate_url("file:///etc/passwd").unwrap_err();
342 assert!(err.to_string().contains("unsupported URL scheme"));
343 }
344
345 #[test]
346 fn validate_url_rejects_ftp() {
347 let err = validate_url("ftp://example.com/data").unwrap_err();
348 assert!(err.to_string().contains("unsupported URL scheme"));
349 }
350
351 #[test]
352 fn validate_url_rejects_invalid() {
353 assert!(validate_url("not a url").is_err());
354 }
355
356 #[test]
357 fn format_markdown_table() {
358 let entries = vec![FeedEntry {
359 id: 1,
360 title: "Test Article".to_string(),
361 url: "https://example.com/1".to_string(),
362 published: "2026-02-14".to_string(),
363 summary: "A test".to_string(),
364 }];
365 let md = format_entries_as_markdown("Test Feed", &entries, 1);
366 assert!(md.contains("| 1 |"));
367 assert!(md.contains("Test Article"));
368 assert!(md.contains("## Test Feed"));
369 assert!(md.contains("1 articles."));
370 }
371
372 #[test]
373 fn format_markdown_table_truncated() {
374 let entries = vec![FeedEntry {
375 id: 1,
376 title: "Article".to_string(),
377 url: "https://example.com/1".to_string(),
378 published: "2026-02-14".to_string(),
379 summary: String::new(),
380 }];
381 let md = format_entries_as_markdown("Feed", &entries, 50);
382 assert!(md.contains("Showing 1 of 50 articles"));
383 assert!(md.contains("Use `limit` to see more"));
384 }
385
386 #[tokio::test]
387 async fn entry_store_roundtrip() {
388 let store = EntryStore::new();
389 let parsed = vec![
390 ParsedEntry {
391 title: "Article A".to_string(),
392 url: "https://a.com".to_string(),
393 published: "2026-01-01".to_string(),
394 summary: "sumA".to_string(),
395 },
396 ParsedEntry {
397 title: "Article B".to_string(),
398 url: "https://b.com".to_string(),
399 published: "2026-01-02".to_string(),
400 summary: "sumB".to_string(),
401 },
402 ];
403
404 let entries = store.store_entries(parsed).await.unwrap();
405 assert_eq!(entries.len(), 2);
406 assert_eq!(entries[0].id, 1);
407 assert_eq!(entries[1].id, 2);
408
409 let (url, title) = store.get_url(1).await.unwrap();
410 assert_eq!(url, "https://a.com");
411 assert_eq!(title, "Article A");
412
413 assert!(store.get_url(99).await.is_none());
414 }
415
416 #[tokio::test]
417 async fn store_clears_on_new_list() {
418 let store = EntryStore::new();
419 let first = vec![ParsedEntry {
420 title: "Old".to_string(),
421 url: "https://old.com".to_string(),
422 published: "-".to_string(),
423 summary: String::new(),
424 }];
425 store.store_entries(first).await.unwrap();
426
427 let second = vec![ParsedEntry {
428 title: "New".to_string(),
429 url: "https://new.com".to_string(),
430 published: "-".to_string(),
431 summary: String::new(),
432 }];
433 store.store_entries(second).await.unwrap();
434
435 let (url, _) = store.get_url(1).await.unwrap();
436 assert_eq!(url, "https://new.com");
437 }
438}