#![cfg(all(test, feature = "metaformats"))]
use crate::parse::Parser;
use microformats_types::{Document, PropertyValue};
#[yare::parameterized(
// Open Graph tags
og_tags = {
r#"
<!DOCTYPE html>
<html>
<head>
<meta property="og:title" content="OG Title">
<meta property="og:description" content="OG Description">
<meta property="og:type" content="article">
<meta property="og:image" content="https://example.com/image.jpg">
</head>
<body>
<div class="h-entry" id="entry1">
<span class="p-name">Main Entry</span>
</div>
</body>
</html>
"#,
serde_json::from_value(serde_json::json!({
"items": [
{
"id": "entry1",
"type": ["h-entry"],
"properties": {
"name": ["Main Entry"]
}
}
],
"url": "http://example.com/page.html",
"rels": {},
"rel-urls": {},
"meta-item": {
"type": ["h-entry"],
"properties": {
"name": ["OG Title"],
"summary": ["OG Description"],
"photo": ["https://example.com/image.jpg"]
}
}
})).unwrap()
},
// Twitter Card tags
twitter_tags = {
r#"
<!DOCTYPE html>
<html>
<head>
<meta name="twitter:card" content="summary">
<meta name="twitter:title" content="Twitter Title">
<meta name="twitter:description" content="Twitter Description">
<meta name="twitter:image" content="https://example.com/twitter-image.jpg">
</head>
<body>
<div class="h-entry" id="entry1">
<span class="p-name">Main Entry</span>
</div>
</body>
</html>
"#,
serde_json::from_value(serde_json::json!({
"items": [
{
"id": "entry1",
"type": ["h-entry"],
"properties": {
"name": ["Main Entry"]
}
}
],
"url": "http://example.com/page.html",
"rels": {},
"rel-urls": {},
"meta-item": {
"type": ["h-entry"],
"properties": {
"name": ["Twitter Title"],
"summary": ["Twitter Description"],
"photo": ["https://example.com/twitter-image.jpg"]
}
}
})).unwrap()
},
// Article:* datetime properties
article_datetime = {
r#"
<!DOCTYPE html>
<html>
<head>
<meta property="og:title" content="Article Title">
<meta property="article:published_time" content="2024-01-15T10:30:00Z">
<meta property="article:modified_time" content="2024-01-16T15:45:00Z">
<meta property="article:author" content="Jane Doe">
<meta property="og:type" content="article">
</head>
<body>
<div class="h-entry" id="entry1">
<span class="p-name">Main Entry</span>
</div>
</body>
</html>
"#,
serde_json::from_value(serde_json::json!({
"items": [
{
"id": "entry1",
"type": ["h-entry"],
"properties": {
"name": ["Main Entry"]
}
}
],
"url": "http://example.com/page.html",
"rels": {},
"rel-urls": {},
"meta-item": {
"type": ["h-entry"],
"properties": {
"name": ["Article Title"],
"published": ["2024-01-15T10:30:00Z"],
"updated": ["2024-01-16T15:45:00Z"],
"author": ["Jane Doe"]
}
}
})).unwrap()
},
)]
fn parse_metaformats_scenarios(html: &str, expected: Document) -> Result<(), crate::Error> {
let mut parser = Parser::from_html(html.to_string())?;
let result = parser.into_document(Some("http://example.com/page.html".parse()?));
similar_asserts::assert_serde_eq!(
actual: result.map_err(|e| e.to_string()),
expected: Ok(expected),
"meta-item should be created from meta tags"
);
Ok(())
}
#[yare::parameterized(
// Home page with OG tags - should be h-card
home_page_og = {
r#"
<!DOCTYPE html>
<html>
<head>
<meta property="og:title" content="My Site">
<meta property="og:type" content="article">
</head>
<body>
<div class="h-entry" id="entry1">
<span class="p-name">Content</span>
</div>
</body>
</html>
"#,
"http://example.com/",
"h-card"
},
// Home page with OG tags (index.html variant) - should be h-card
home_page_og_index = {
r#"
<!DOCTYPE html>
<html>
<head>
<meta property="og:title" content="My Site">
<meta property="og:type" content="article">
</head>
<body>
<div class="h-entry" id="entry1">
<span class="p-name">Content</span>
</div>
</body>
</html>
"#,
"http://example.com/index.html",
"h-card"
},
// Regular page with OG tags - should be h-entry
regular_page_og = {
r#"
<!DOCTYPE html>
<html>
<head>
<meta property="og:title" content="Article">
<meta property="og:type" content="article">
</head>
<body>
<div class="h-entry" id="entry1">
<span class="p-name">Content</span>
</div>
</body>
</html>
"#,
"http://example.com/page.html",
"h-entry"
},
// Home page with Twitter card - should be h-card
home_page_twitter = {
r#"
<!DOCTYPE html>
<html>
<head>
<meta name="twitter:card" content="summary">
<meta name="twitter:title" content="My Site">
</head>
<body>
<div class="h-entry" id="entry1">
<span class="p-name">Content</span>
</div>
</body>
</html>
"#,
"http://example.com/",
"h-card"
},
// Regular page with Twitter card - should be h-entry
regular_page_twitter = {
r#"
<!DOCTYPE html>
<html>
<head>
<meta name="twitter:card" content="summary">
<meta name="twitter:title" content="Article">
</head>
<body>
<div class="h-entry" id="entry1">
<span class="p-name">Content</span>
</div>
</body>
</html>
"#,
"http://example.com/page.html",
"h-entry"
},
)]
fn parse_home_page_detection(
html: &str,
url: &str,
expected_item_type: &str,
) -> Result<(), crate::Error> {
let mut parser = Parser::from_html(html.to_string())?;
let result = parser.into_document(Some(url.parse()?));
assert!(result.is_ok(), "parsing should succeed");
let doc = result?;
assert!(doc.meta_item.is_some(), "meta-item should be created");
let meta_item = doc.meta_item.as_ref().unwrap();
assert_eq!(meta_item.r#type.len(), 1, "meta-item should have one type");
let actual_type = &meta_item.r#type[0];
assert_eq!(
actual_type.to_string(),
expected_item_type,
"meta-item type should be {} for {} URL",
expected_item_type,
url
);
Ok(())
}
#[yare::parameterized(
// Fallback to <title> element
title_fallback = {
r#"
<!DOCTYPE html>
<html>
<head>
<title>Page Title from Title Tag</title>
<meta name="twitter:card" content="summary">
</head>
<body>
<div class="h-entry" id="entry1">
<span class="p-name">Content</span>
</div>
</body>
</html>
"#,
"name",
"Page Title from Title Tag"
},
// Fallback to <meta name="description">
description_fallback = {
r#"
<!DOCTYPE html>
<html>
<head>
<meta name="description" content="Meta Description">
<meta name="twitter:card" content="summary">
</head>
<body>
<div class="h-entry" id="entry1">
<span class="p-name">Content</span>
</div>
</body>
</html>
"#,
"summary",
"Meta Description"
},
// Icon fallback with rel="icon"
icon_fallback = {
r#"
<!DOCTYPE html>
<html>
<head>
<meta name="twitter:card" content="summary">
<meta name="twitter:title" content="Test">
<link rel="icon" href="/favicon.ico">
</head>
<body>
<div class="h-entry" id="entry1">
<span class="p-name">Content</span>
</div>
</body>
</html>
"#,
"photo",
"http://example.com/favicon.ico"
},
// MS icon with rel="shortcut icon" (MS convention)
ms_shortcut_icon = {
r#"
<!DOCTYPE html>
<html>
<head>
<meta name="twitter:card" content="summary">
<meta name="twitter:title" content="Test">
<link rel="shortcut icon" href="/tile.png">
</head>
<body>
<div class="h-entry" id="entry1">
<span class="p-name">Content</span>
</div>
</body>
</html>
"#,
"photo",
"http://example.com/tile.png"
},
// apple-touch-icon fallback
apple_touch_icon = {
r#"
<!DOCTYPE html>
<html>
<head>
<meta name="twitter:card" content="summary">
<meta name="twitter:title" content="Test">
<link rel="apple-touch-icon" href="/apple-touch-icon.png">
</head>
<body>
<div class="h-entry" id="entry1">
<span class="p-name">Content</span>
</div>
</body>
</html>
"#,
"photo",
"http://example.com/apple-touch-icon.png"
},
// MS tile icon with type="image/png"
ms_tile_icon = {
r#"
<!DOCTYPE html>
<html>
<head>
<meta name="twitter:card" content="summary">
<meta name="twitter:title" content="Test">
<link rel="icon" type="image/png" href="/tile.png">
</head>
<body>
<div class="h-entry" id="entry1">
<span class="p-name">Content</span>
</div>
</body>
</html>
"#,
"photo",
"http://example.com/tile.png"
},
)]
fn parse_metaformats_fallbacks(
html: &str,
expected_property: &str,
expected_value: &str,
) -> Result<(), crate::Error> {
let mut parser = Parser::from_html(html.to_string())?;
let result = parser.into_document(Some("http://example.com".parse()?));
assert!(result.is_ok(), "parsing should succeed");
let doc = result?;
assert!(doc.meta_item.is_some(), "meta-item should be created");
let meta_item = doc.meta_item.as_ref().unwrap();
let values = meta_item
.properties
.get(expected_property)
.unwrap_or_else(|| panic!("{} property should be present", expected_property));
assert_eq!(
values.len(),
1,
"{} should have one value",
expected_property
);
let actual_value = match &values[0] {
PropertyValue::Plain(s) => s.to_string(),
PropertyValue::Url(u) => u.to_string(),
PropertyValue::Temporal(t) => t.to_string(),
_ => panic!("Unexpected property value type"),
};
assert_eq!(
actual_value, expected_value,
"{} should match expected value",
expected_property
);
Ok(())
}