kalosm_language/context/
rss.rs1use rss::Channel;
2use url::Url;
3
4use super::document::{Document, IntoDocuments};
5
6#[derive(Debug, thiserror::Error)]
8pub enum RssFeedError {
9 #[error("Failed to fetch RSS feed: {0}")]
11 FetchFeed(#[from] reqwest::Error),
12 #[error("Failed to parse RSS feed: {0}")]
14 ParseFeed(#[from] rss::Error),
15}
16
17#[derive(Debug, Clone, PartialEq)]
33pub struct RssFeed(Url);
34
35impl From<Url> for RssFeed {
36 fn from(url: Url) -> Self {
37 Self::new(url)
38 }
39}
40
41impl IntoDocuments for RssFeed {
42 type Error = RssFeedError;
43
44 async fn into_documents(self) -> Result<Vec<Document>, Self::Error> {
45 self.read_top_n(usize::MAX).await
46 }
47}
48
49impl RssFeed {
50 pub fn new(url: Url) -> Self {
52 Self(url)
53 }
54
55 pub fn url(&self) -> &Url {
57 &self.0
58 }
59
60 pub async fn read_top_n(&self, top_n: usize) -> Result<Vec<Document>, RssFeedError> {
62 let xml = reqwest::get(self.0.clone()).await?.text().await?;
63 let channel = Channel::read_from(xml.as_bytes())?;
64 let mut documents = Vec::new();
65 for item in channel.items().iter().take(top_n) {
66 let mut message = String::new();
67 if let Some(title) = item.title() {
68 message.push_str(&format!("### {}\n", title));
69 }
70 let (source_url, content) = if let Some(content) = item.content() {
71 (None, content.to_string())
72 } else if let Some(source_url) = item.link() {
73 (
74 Some(source_url),
75 reqwest::get(source_url).await?.text().await?,
76 )
77 } else {
78 (None, String::new())
79 };
80
81 let url = match source_url {
82 Some(url) => Url::parse(url).unwrap(),
83 None => self.0.clone(),
84 };
85
86 if let Ok(article) =
87 readability::extractor::extract(&mut std::io::Cursor::new(&content), &url)
88 {
89 documents.push(Document::from_parts(article.title, article.text));
90 }
91 }
92 Ok(documents)
93 }
94}