h2m 0.7.1

HTML to Markdown converter.
Documentation
#![cfg(test)]
//! HTML utility function tests (`extract_links`, `readable_content`, `strip_noise`).

use ego_tree as _;
use pretty_assertions::assert_eq;
#[cfg(feature = "scrape")]
use reqwest as _;
use scraper as _;
#[cfg(feature = "scrape")]
use serde as _;
use thiserror as _;
#[cfg(feature = "scrape")]
use tokio as _;
use url as _;

#[test]
fn extract_links_with_base_resolves_relative() {
    let links = h2m::html::extract_links_with_base(
        r#"<a href="/about">About</a><a href="https://other.com">Other</a>"#,
        "https://example.com/page",
    );
    assert_eq!(
        links,
        vec!["https://example.com/about", "https://other.com"]
    );
}

#[test]
fn extract_links_without_base_returns_raw() {
    let links = h2m::html::extract_links(r#"<a href="/about">About</a>"#);
    assert_eq!(links, vec!["/about"]);
}

#[test]
fn readable_content_phase1_semantic_selector() {
    let html = "<nav>menu</nav><article><p>Main content</p></article><footer>ft</footer>";
    let result = h2m::html::readable_content(html);
    assert_eq!(result, "<p>Main content</p>");
}

#[test]
fn readable_content_phase2_noise_stripping() {
    let html = "<nav>menu</nav><div><p>Hello world</p></div><footer>ft</footer>";
    let result = h2m::html::readable_content(html);
    assert!(!result.contains("menu"), "nav should be stripped");
    assert!(!result.contains("ft"), "footer should be stripped");
    assert!(
        result.contains("Hello world"),
        "content should be preserved"
    );
}

#[test]
fn strip_noise_removes_nav_footer_aside_header() {
    let html = "<header>hd</header><nav>nv</nav><p>content</p><aside>sd</aside><footer>ft</footer>";
    let result = h2m::html::strip_noise(html);
    assert!(!result.contains("hd"));
    assert!(!result.contains("nv"));
    assert!(!result.contains("sd"));
    assert!(!result.contains("ft"));
    assert!(result.contains("content"));
}

#[test]
fn strip_noise_removes_aria_roles() {
    let html =
        r#"<div role="navigation">nav</div><p>content</p><div role="contentinfo">info</div>"#;
    let result = h2m::html::strip_noise(html);
    assert!(!result.contains("nav</div>"));
    assert!(!result.contains("info"));
    assert!(result.contains("content"));
}

#[test]
fn strip_noise_no_noise_returns_original() {
    let html = "<div><p>plain content</p></div>";
    let result = h2m::html::strip_noise(html);
    assert!(result.contains("plain content"));
}

#[test]
fn readable_content_prefers_article_over_noise_strip() {
    let html = "<nav>menu</nav><article><p>article</p></article><div><p>other</p></div><footer>ft</footer>";
    let result = h2m::html::readable_content(html);
    assert_eq!(result, "<p>article</p>");
    assert!(!result.contains("other"));
}