use crate::client::AkShareClient;
use crate::error::Result;
use crate::types::NewsItem;
fn extract_rss_tag(xml: &str, tag: &str) -> Option<String> {
let start_tag = format!("<{tag}>");
let end_tag = format!("</{tag}>");
let start = xml.find(&start_tag)? + start_tag.len();
let end = xml.find(&end_tag)?;
let value = xml[start..end].trim();
let value = value
.strip_prefix("<![CDATA[")
.and_then(|s| s.strip_suffix("]]>"))
.unwrap_or(value);
let value = value.trim();
if value.is_empty() {
None
} else {
Some(value.to_string())
}
}
fn normalize_rss_date(raw: &str) -> String {
if let Ok(dt) = chrono::DateTime::parse_from_rfc2822(raw) {
return dt.format("%Y-%m-%d").to_string();
}
if raw.len() >= 10 && raw.as_bytes()[4] == b'-' && raw.as_bytes()[7] == b'-' {
return raw[..10].to_string();
}
String::new()
}
fn parse_rss_items(body: &str, source: &str, exclude_title_substrings: &[&str]) -> Vec<NewsItem> {
let mut items = Vec::new();
for item_xml in body.split("<item>").skip(1) {
let end = item_xml.find("</item>").unwrap_or(item_xml.len());
let xml = &item_xml[..end];
let title = extract_rss_tag(xml, "title")
.filter(|t| !exclude_title_substrings.iter().any(|sub| t.contains(sub)));
let link = extract_rss_tag(xml, "link");
let desc = extract_rss_tag(xml, "description");
let date = extract_rss_tag(xml, "pubDate")
.map(|d| normalize_rss_date(&d))
.unwrap_or_default();
if let (Some(title), Some(url)) = (title, link) {
let published_at = if date.is_empty() {
chrono::Utc::now().format("%Y-%m-%d").to_string()
} else {
date
};
items.push(NewsItem {
published_at,
title,
summary: desc.unwrap_or_default(),
source: source.to_string(),
url: Some(url),
});
}
}
items
}
impl AkShareClient {
pub async fn bing_news_rss(&self, query: &str, timeout_secs: u64) -> Result<Vec<NewsItem>> {
let rss_url = format!(
"https://cn.bing.com/search?q={}&format=rss",
query.replace(' ', "+")
);
let body = tokio::time::timeout(
std::time::Duration::from_secs(timeout_secs),
self.get(&rss_url).send(),
)
.await
.map_err(|_| {
crate::Error::upstream(format!("Bing RSS request timed out after {timeout_secs}s"))
})?
.map_err(crate::Error::from)?
.text()
.await
.map_err(crate::Error::from)?;
Ok(parse_rss_items(&body, "bing_rss", &["必应", "Bing"]))
}
pub async fn google_news_rss(&self, query: &str, timeout_secs: u64) -> Result<Vec<NewsItem>> {
let gnews_url = format!(
"https://news.google.com/rss/search?q={}&hl=en-US&gl=US&ceid=US:en",
query.replace(' ', "+")
);
let body = tokio::time::timeout(
std::time::Duration::from_secs(timeout_secs),
self.get(&gnews_url).send(),
)
.await
.map_err(|_| {
crate::Error::upstream(format!(
"Google News RSS request timed out after {timeout_secs}s"
))
})?
.map_err(crate::Error::from)?
.text()
.await
.map_err(crate::Error::from)?;
Ok(parse_rss_items(&body, "google_news_rss", &[]))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_extract_rss_tag() {
let xml = "<item><title>Hello World</title><link>https://example.com</link></item>";
assert_eq!(
extract_rss_tag(xml, "title"),
Some("Hello World".to_string())
);
assert_eq!(
extract_rss_tag(xml, "link"),
Some("https://example.com".to_string())
);
assert_eq!(extract_rss_tag(xml, "missing"), None);
}
#[test]
fn test_extract_rss_tag_cdata() {
let xml = "<title><![CDATA[CDATA Title]]></title>";
assert_eq!(
extract_rss_tag(xml, "title"),
Some("CDATA Title".to_string())
);
}
#[test]
fn test_normalize_rss_date_rfc2822() {
assert_eq!(
normalize_rss_date("Wed, 03 Jun 2026 00:36:00 GMT"),
"2026-06-03"
);
}
#[test]
fn test_normalize_rss_date_iso() {
assert_eq!(normalize_rss_date("2026-06-03"), "2026-06-03");
}
#[test]
fn test_normalize_rss_date_unknown() {
assert_eq!(normalize_rss_date("unknown format"), "");
}
#[test]
fn test_parse_rss_items_basic() {
let body = r#"
<channel>
<item><title>T1</title><link>http://a.com</link><description>D1</description><pubDate>Wed, 03 Jun 2026 00:00:00 GMT</pubDate></item>
<item><title>T2</title><link>http://b.com</link><description>D2</description></item>
</channel>"#;
let items = parse_rss_items(body, "test", &[]);
assert_eq!(items.len(), 2);
assert_eq!(items[0].title, "T1");
assert_eq!(items[1].title, "T2");
assert_eq!(items[0].source, "test");
}
#[test]
fn test_parse_rss_items_excludes_filtered_titles() {
let body = r#"
<channel>
<item><title>必应 News</title><link>http://a.com</link></item>
<item><title>Real News</title><link>http://b.com</link></item>
</channel>"#;
let items = parse_rss_items(body, "test", &["必应", "Bing"]);
assert_eq!(items.len(), 1);
assert_eq!(items[0].title, "Real News");
}
}