microformats 0.18.2

A union library of the Microformats types and associated parser.
Documentation
//! Example: Collecting Items from the First h-feed on a Page
//!
//! This example demonstrates how to find and collect items from the first
//! h-feed on a page. h-feed is a microformat for representing feeds of
//! content items (like blog posts).
//!
//! # Usage
//! ```bash
//! cargo run --example hfeed_items
//! ```

use microformats::types::{Class, KnownClass};
use url::Url;

/// Find the first h-feed on a page
fn find_first_hfeed(document: &microformats::types::Document) -> Option<microformats::types::Item> {
    document
        .items
        .iter()
        .find(|item| {
            item.r#type
                .iter()
                .any(|t| matches!(t, Class::Known(KnownClass::Feed)))
        })
        .cloned()
}

/// Collect all h-entry children from an h-feed
fn collect_feed_entries(feed: &microformats::types::Item) -> Vec<microformats::types::Item> {
    feed.children
        .iter()
        .filter(|child| {
            child
                .r#type
                .iter()
                .any(|t| matches!(t, Class::Known(KnownClass::Entry)))
        })
        .cloned()
        .collect()
}

/// Get the title of a feed if present
fn get_feed_title(feed: &microformats::types::Item) -> Option<String> {
    feed.properties
        .get("name")
        .and_then(|values| values.first())
        .and_then(|value| match value {
            microformats::types::PropertyValue::Plain(s) => Some(s.to_string()),
            _ => None,
        })
}

fn main() -> Result<(), Box<dyn std::error::Error>> {
    let html = r#"
    <!DOCTYPE html>
    <html>
    <body>
        <main class="h-feed">
            <h1 class="p-name">My Blog</h1>

            <article class="h-entry">
                <h2 class="p-name">First Post</h2>
                <time class="dt-published">2024-01-15</time>
                <div class="e-content">
                    <p>This is my first blog post.</p>
                </div>
            </article>

            <article class="h-entry">
                <h2 class="p-name">Second Post</h2>
                <time class="dt-published">2024-01-20</time>
                <div class="e-content">
                    <p>This is my second blog post.</p>
                </div>
            </article>

            <article class="h-entry">
                <h2 class="p-name">Third Post</h2>
                <time class="dt-published">2024-01-25</time>
                <div class="e-content">
                    <p>This is my third blog post.</p>
                </div>
            </article>
        </main>

        <aside class="h-card">
            <span class="p-name">Sidebar Author</span>
        </aside>
    </body>
    </html>
    "#;

    let page_url: Url = "https://example.com/blog/".parse()?;
    let document = microformats::from_html(html, &page_url)?;

    match find_first_hfeed(&document) {
        Some(feed) => {
            println!("Found h-feed!");

            if let Some(title) = get_feed_title(&feed) {
                println!("  Feed title: {}", title);
            }

            let entries = collect_feed_entries(&feed);
            println!("  Found {} entries:", entries.len());

            for (i, entry) in entries.iter().enumerate() {
                println!("\n  Entry #{}", i + 1);

                if let Some(name) = entry.properties.get("name").and_then(|v| v.first()) {
                    println!("    Title: {:?}", name);
                }

                if let Some(published) = entry.properties.get("published").and_then(|v| v.first()) {
                    println!("    Published: {:?}", published);
                }

                if let Some(url) = entry.properties.get("url").and_then(|v| v.first()) {
                    println!("    URL: {:?}", url);
                }
            }
        }
        None => {
            println!("No h-feed found on this page.");

            // Check for any h-entries that might not be in a feed
            let standalone_entries: Vec<_> = document
                .items
                .iter()
                .filter(|item| {
                    item.r#type
                        .iter()
                        .any(|t| matches!(t, Class::Known(KnownClass::Entry)))
                })
                .collect();

            if !standalone_entries.is_empty() {
                println!(
                    "Found {} standalone h-entry items (not in an h-feed)",
                    standalone_entries.len()
                );
            }
        }
    }

    Ok(())
}