rssume 0.3.3

RSS middleware with AI-powered translation and summarization
use chrono::Utc;
use feed_rs::parser;

#[derive(Debug, Clone)]
pub struct RawArticle {
    pub title: String,
    pub link: String,
    pub content: String,
    pub published_at: String,
    pub author: Option<String>,
    pub categories: Vec<String>,
    pub guid: Option<String>,
    pub media_urls: Vec<MediaItem>,
}

#[derive(Debug, Clone)]
pub struct MediaItem {
    pub url: String,
    pub content_type: Option<String>,
    pub length: Option<u64>,
}

pub async fn fetch_feed(url: &str) -> Result<Vec<RawArticle>, crate::error::AppError> {
    let client = reqwest::Client::builder()
        .user_agent("rssume/0.1 (RSS middleware; +https://github.com/rssume/rssume)")
        .timeout(std::time::Duration::from_secs(30))
        .build()
        .map_err(|e| crate::error::AppError::Fetch(format!("client: {}", e)))?;

    let resp = client
        .get(url)
        .send()
        .await
        .map_err(|e| crate::error::AppError::Fetch(format!("fetch {}: {}", url, e)))?;
    if !resp.status().is_success() {
        return Err(crate::error::AppError::Fetch(format!(
            "HTTP {} for {}",
            resp.status(),
            url
        )));
    }

    let body = resp
        .bytes()
        .await
        .map_err(|e| crate::error::AppError::Fetch(format!("body: {}", e)))?;
    let feed = parser::parse(&body[..])
        .map_err(|e| crate::error::AppError::Parse(format!("parse: {}", e)))?;

    let articles: Vec<_> = feed
        .entries
        .into_iter()
        .map(|entry| {
            let content = entry
                .content
                .as_ref()
                .and_then(|c| c.body.as_ref())
                .cloned()
                .unwrap_or_default();
            let summary = entry
                .summary
                .as_ref()
                .map(|s| s.content.clone())
                .unwrap_or_default();
            let body = if !content.is_empty() {
                content
            } else {
                summary
            };
            let media_urls = entry
                .media
                .iter()
                .flat_map(|m| &m.content)
                .filter_map(|mc| {
                    mc.url.as_ref().map(|u| MediaItem {
                        url: u.to_string(),
                        content_type: mc.content_type.as_ref().map(|t| t.to_string()),
                        length: mc.size,
                    })
                })
                .collect();

            RawArticle {
                title: entry
                    .title
                    .as_ref()
                    .map(|t| t.content.clone())
                    .unwrap_or_default(),
                link: entry
                    .links
                    .first()
                    .map(|l| l.href.clone())
                    .unwrap_or_default(),
                content: body,
                published_at: entry
                    .published
                    .or(entry.updated)
                    .map(|d| d.to_rfc2822())
                    .unwrap_or_else(|| Utc::now().to_rfc2822()),
                author: entry.authors.first().map(|p| p.name.clone()),
                categories: entry.categories.into_iter().map(|c| c.term).collect(),
                guid: Some(entry.id),
                media_urls,
            }
        })
        .filter(|a| !a.link.is_empty() && !a.title.is_empty())
        .collect();

    Ok(articles)
}