use microformats::types::{Class, KnownClass};
use url::Url;
fn find_first_hfeed(document: µformats::types::Document) -> Option<microformats::types::Item> {
document
.items
.iter()
.find(|item| {
item.r#type
.iter()
.any(|t| matches!(t, Class::Known(KnownClass::Feed)))
})
.cloned()
}
fn collect_feed_entries(feed: µformats::types::Item) -> Vec<microformats::types::Item> {
feed.children
.iter()
.filter(|child| {
child
.r#type
.iter()
.any(|t| matches!(t, Class::Known(KnownClass::Entry)))
})
.cloned()
.collect()
}
fn get_feed_title(feed: µformats::types::Item) -> Option<String> {
feed.properties
.get("name")
.and_then(|values| values.first())
.and_then(|value| match value {
microformats::types::PropertyValue::Plain(s) => Some(s.to_string()),
_ => None,
})
}
fn main() -> Result<(), Box<dyn std::error::Error>> {
let html = r#"
<!DOCTYPE html>
<html>
<body>
<main class="h-feed">
<h1 class="p-name">My Blog</h1>
<article class="h-entry">
<h2 class="p-name">First Post</h2>
<time class="dt-published">2024-01-15</time>
<div class="e-content">
<p>This is my first blog post.</p>
</div>
</article>
<article class="h-entry">
<h2 class="p-name">Second Post</h2>
<time class="dt-published">2024-01-20</time>
<div class="e-content">
<p>This is my second blog post.</p>
</div>
</article>
<article class="h-entry">
<h2 class="p-name">Third Post</h2>
<time class="dt-published">2024-01-25</time>
<div class="e-content">
<p>This is my third blog post.</p>
</div>
</article>
</main>
<aside class="h-card">
<span class="p-name">Sidebar Author</span>
</aside>
</body>
</html>
"#;
let page_url: Url = "https://example.com/blog/".parse()?;
let document = microformats::from_html(html, &page_url)?;
match find_first_hfeed(&document) {
Some(feed) => {
println!("Found h-feed!");
if let Some(title) = get_feed_title(&feed) {
println!(" Feed title: {}", title);
}
let entries = collect_feed_entries(&feed);
println!(" Found {} entries:", entries.len());
for (i, entry) in entries.iter().enumerate() {
println!("\n Entry #{}", i + 1);
if let Some(name) = entry.properties.get("name").and_then(|v| v.first()) {
println!(" Title: {:?}", name);
}
if let Some(published) = entry.properties.get("published").and_then(|v| v.first()) {
println!(" Published: {:?}", published);
}
if let Some(url) = entry.properties.get("url").and_then(|v| v.first()) {
println!(" URL: {:?}", url);
}
}
}
None => {
println!("No h-feed found on this page.");
let standalone_entries: Vec<_> = document
.items
.iter()
.filter(|item| {
item.r#type
.iter()
.any(|t| matches!(t, Class::Known(KnownClass::Entry)))
})
.collect();
if !standalone_entries.is_empty() {
println!(
"Found {} standalone h-entry items (not in an h-feed)",
standalone_entries.len()
);
}
}
}
Ok(())
}