use web_capture::html::{
convert_relative_urls, convert_to_utf8, decode_html_entities, has_javascript, is_html,
normalize_url, pretty_print_html,
};
#[test]
fn test_convert_relative_urls_href() {
let html = r#"<a href="/about">About</a>"#;
let result = convert_relative_urls(html, "https://example.com");
assert!(result.contains("https://example.com/about"));
}
#[test]
fn test_convert_relative_urls_src() {
let html = r#"<img src="/image.png">"#;
let result = convert_relative_urls(html, "https://example.com");
assert!(result.contains("https://example.com/image.png"));
}
#[test]
fn test_convert_relative_urls_absolute() {
let html = r#"<a href="https://other.com/page">Link</a>"#;
let result = convert_relative_urls(html, "https://example.com");
assert!(result.contains("https://other.com/page"));
}
#[test]
fn test_convert_relative_urls_data_url() {
let html = r#"<img src="data:image/png;base64,abc123">"#;
let result = convert_relative_urls(html, "https://example.com");
assert!(result.contains("data:image/png;base64,abc123"));
}
#[test]
fn test_convert_to_utf8_already_utf8() {
let html = r#"<html><head><meta charset="utf-8"></head></html>"#;
let result = convert_to_utf8(html);
assert!(result.contains("utf-8"));
}
#[test]
fn test_convert_to_utf8_other_charset() {
let html = r#"<html><head><meta charset="iso-8859-1"></head></html>"#;
let result = convert_to_utf8(html);
assert!(result.contains("utf-8"));
}
#[test]
fn test_has_javascript_with_script() {
let html = r"<html><script>console.log('test');</script></html>";
assert!(has_javascript(html));
}
#[test]
fn test_has_javascript_without_script() {
let html = r"<html><body>Hello</body></html>";
assert!(!has_javascript(html));
}
#[test]
fn test_is_html_valid() {
let html = r"<html><body>Hello</body></html>";
assert!(is_html(html));
}
#[test]
fn test_is_html_invalid() {
let html = "Just plain text";
assert!(!is_html(html));
}
#[test]
fn test_normalize_url_already_absolute() {
assert_eq!(
normalize_url("https://example.com").unwrap(),
"https://example.com"
);
}
#[test]
fn test_normalize_url_relative() {
assert_eq!(normalize_url("example.com").unwrap(), "https://example.com");
}
#[test]
fn test_normalize_url_empty() {
assert!(normalize_url("").is_err());
}
#[test]
fn test_decode_html_entities_basic() {
assert_eq!(decode_html_entities("&"), "&");
assert_eq!(decode_html_entities("<div>"), "<div>");
assert_eq!(decode_html_entities("'hello'"), "'hello'");
assert_eq!(decode_html_entities(""test""), "\"test\"");
}
#[test]
fn test_decode_html_entities_unicode() {
assert_eq!(decode_html_entities("—"), "\u{2014}"); assert_eq!(decode_html_entities("—"), "\u{2014}");
assert_eq!(decode_html_entities(" "), "\u{00A0}");
}
#[test]
fn test_decode_html_entities_no_entities() {
assert_eq!(decode_html_entities("plain text"), "plain text");
assert_eq!(decode_html_entities(""), "");
}
#[test]
fn test_pretty_print_html_basic() {
let html = "<html><head><title>Test</title></head><body><p>Hello</p></body></html>";
let result = pretty_print_html(html);
assert!(result.contains(" <head>"));
assert!(result.contains(" <title>"));
assert!(result.contains(" </head>"));
assert!(result.contains(" <body>"));
assert!(result.contains(" <p>"));
}
#[test]
fn test_pretty_print_html_void_elements() {
let html =
r#"<html><head><meta charset="utf-8"></head><body><img src="test.png"><br></body></html>"#;
let result = pretty_print_html(html);
assert!(result.contains("<img"));
assert!(result.contains("<br>"));
}
#[test]
fn test_pretty_print_html_empty() {
let result = pretty_print_html("");
assert!(result.is_empty());
}