use super::*;
use crate::parse::{
element::test::{from_html_str, grab_element_from_document},
ElementRef,
};
use microformats_types::{temporal, TextValue, UrlValue};
use tracing_test::traced_test;
use tracing_unwrap::OptionExt;
#[traced_test]
#[test]
fn linked() -> Result<(), crate::Error> {
let elem = grab_element_from_document(
&from_html_str(r#"<a href='/foo' class='u-url'>a place</a>"#),
"a",
)
.unwrap_or_log();
let parser = explicit::PropertyParser::new(
Arc::new(ElementRef {
index: 0,
node: Node { elem },
}),
DeclKind::Linked("url".to_string()),
"http://example.com".parse()?,
None,
);
assert_eq!(
parser.expand()?,
Some((
"url".to_string(),
PropertyValue::Url(UrlValue::new("http://example.com/foo".parse().unwrap()))
)),
"expanded a direct linked property"
);
Ok(())
}
#[yare::parameterized(
direct = {
r#"<time class="dt-timing" datetime="2014-01-01"></time>"#,
"time", temporal::Value::Timestamp(temporal::Stamp::parse("2014-01-01")?)
},
abbr_title = {
r#"<abbr class="dt-timing" title="2013-06-29">June 29</abbr>"#,
"abbr", temporal::Value::Timestamp(temporal::Stamp::parse("2013-06-29")?)
}
)]
fn temporal_value(html: &str, tag: &str, value: temporal::Value) -> Result<(), crate::Error> {
let elem = grab_element_from_document(&from_html_str(html), tag).unwrap_or_log();
let parser = explicit::PropertyParser::new(
Arc::new(ElementRef {
index: 0,
node: Node { elem },
}),
DeclKind::Temporal("timing".to_string()),
"http://example.com".parse()?,
None,
);
assert_eq!(
parser.expand()?,
Some(("timing".to_string(), PropertyValue::Temporal(value))),
"expanded a direct temporal property"
);
Ok(())
}
#[traced_test]
#[test]
fn html() -> Result<(), crate::Error> {
let elem = grab_element_from_document(
&from_html_str(r#"<span class="e-content">The name is in <strong>bold</strong>.</span>"#),
"span",
)
.unwrap_or_log();
let parser = explicit::PropertyParser::new(
Arc::new(ElementRef {
index: 0,
node: Node { elem },
}),
DeclKind::Hypertext("content".to_string()),
"http://example.com".parse()?,
None,
);
assert_eq!(
parser.expand()?,
Some((
"content".to_string(),
PropertyValue::Fragment(Fragment {
html: "The name is in <strong>bold</strong>.".to_string(),
value: "The name is in bold.".to_string(),
..Default::default()
})
)),
"expanded a direct hypertext property"
);
Ok(())
}
#[yare::parameterized(
bare = { "span", r#"<span class="p-name">The name.</span>"# },
nested = { "div", r#"<div class="p-name">
<span class="p-nickname">The</span> <span class="p-place">name.</span>
</div>"# },
tag_abbr = { "abbr", r#"<abbr class="p-name value" title="The name.">Wow.</abbr>"# },
tag_link = { "link", r#"<link class="p-name value" href='/place' title="The name." />"# },
)]
fn plain(tag_name: &str, html: &str) -> Result<(), crate::Error> {
let elem = grab_element_from_document(&from_html_str(html), tag_name).unwrap_or_log();
let parser = PropertyParser::new(
Arc::new(ElementRef {
index: 0,
node: Node { elem },
}),
DeclKind::Plain("name".to_string()),
"http://example.com".parse()?,
None,
);
assert_eq!(
parser.expand()?,
Some((
"name".to_string(),
PropertyValue::Plain(TextValue::new("The name.".to_string()))
))
);
Ok(())
}
#[test]
fn plain_allow_empty() -> Result<(), crate::Error> {
let elem = grab_element_from_document(
&from_html_str(r#"<br class="p-honorific-suffix" />BSc<br />"#),
"br",
)
.unwrap_or_log();
let parser = PropertyParser::new(
Arc::new(ElementRef {
index: 0,
node: Node { elem },
}),
DeclKind::Plain("honorific-suffix".to_string()),
"http://example.com".parse()?,
None,
);
assert_eq!(
parser.expand()?,
Some((
"honorific-suffix".to_string(),
PropertyValue::Plain(TextValue::new("".to_string()))
))
);
Ok(())
}
#[traced_test]
#[test]
fn h_review_description_to_content_mapping() -> Result<(), crate::Error> {
let html =
r#"<div class="h-review"><div class="e-description">Test description content</div></div>"#;
let root_url: url::Url = "http://example.com/".parse().unwrap();
let mut parser = crate::parse::Parser::from_html(html.to_string())?;
let document = parser.into_document(Some(root_url))?;
let items = &document.items;
assert_eq!(items.len(), 1, "Should parse exactly one h-review item");
let h_review = &items[0];
let properties = &h_review.properties;
let has_content = properties.contains_key("content");
let has_description = properties.contains_key("description");
assert!(
has_content,
"e-description in h-review should map to 'content' property"
);
assert!(
!has_description,
"Should not have 'description' property after normalization"
);
Ok(())
}
#[traced_test]
#[test]
fn h_review_e_content_mapping() -> Result<(), crate::Error> {
let html = r#"<div class="h-review"><div class="e-content">Test new style content</div></div>"#;
let root_url: url::Url = "http://example.com/".parse().unwrap();
let mut parser = crate::parse::Parser::from_html(html.to_string())?;
let document = parser.into_document(Some(root_url))?;
let items = &document.items;
assert_eq!(items.len(), 1, "Should parse exactly one h-review item");
let h_review = &items[0];
let properties = &h_review.properties;
let has_content = properties.contains_key("content");
assert!(
has_content,
"e-content in h-review should map to 'content' property"
);
Ok(())
}
#[traced_test]
#[test]
fn picture_element_detection() -> Result<(), crate::Error> {
let html = r#"<a href='/foo' class='u-url'><picture>
<source srcset='/images/hero-800w.jpg 800w, /images/hero-400w.jpg 400w' media='(min-width: 800px)' type='image/jpeg'>
<img src='/images/fallback.jpg' alt='Hero image'>
</picture></a>"#;
let elem = grab_element_from_document(&from_html_str(html), "a").unwrap_or_log();
let parser = explicit::PropertyParser::new(
Arc::new(ElementRef {
index: 0,
node: Node { elem },
}),
DeclKind::Linked("photo".to_string()),
"http://example.com".parse()?,
None,
);
let result = parser.expand()?;
assert!(
result.is_some(),
"Picture elements should be detected and processed by the main parser"
);
let (property_name, property_value) = result.unwrap();
assert_eq!(property_name, "photo");
println!("Picture property value: {:?}", property_value);
Ok(())
}
#[traced_test]
#[test]
fn picture_element_direct_detection() -> Result<(), crate::Error> {
let html = r#"<picture class="u-photo">
<source srcset='/images/hero-800w.jpg 800w, /images/hero-400w.jpg 400w' media='(min-width: 800px)' type='image/jpeg'>
<img src='/images/fallback.jpg' alt='Hero image'>
</picture>"#;
let elem = grab_element_from_document(&from_html_str(html), "picture").unwrap_or_log();
let parser = explicit::PropertyParser::new(
Arc::new(ElementRef {
index: 0,
node: Node { elem },
}),
DeclKind::Linked("photo".to_string()),
"http://example.com".parse()?,
None,
);
let result = parser.expand()?;
assert!(
result.is_some(),
"Picture elements should be directly processed by the property parser"
);
let (property_name, property_value) = result.unwrap();
assert_eq!(property_name, "photo");
println!("Direct picture property value: {:?}", property_value);
match &property_value {
PropertyValue::Image(image) => {
assert_eq!(
image.value.as_str(),
"http://example.com/images/fallback.jpg"
);
assert_eq!(image.alt, Some("Hero image".to_string()));
println!("✓ Picture element properly processed with fallback image");
}
PropertyValue::Url(_) => {
panic!(
"Picture elements should return Image data, not generic URLs. Got: {:?}",
property_value
);
}
_ => {
panic!(
"Unexpected property value type for picture element: {:?}",
property_value
);
}
}
Ok(())
}
#[traced_test]
#[test]
fn picture_element_plain_property() -> Result<(), crate::Error> {
let html = r#"<picture class="p-name">
<source srcset='/images/hero-800w.jpg 800w, /images/hero-400w.jpg 400w' media='(min-width: 800px)' type='image/jpeg'>
<img src='/images/fallback.jpg' alt='Hero image'>
</picture>"#;
let elem = grab_element_from_document(&from_html_str(html), "picture").unwrap_or_log();
let parser = explicit::PropertyParser::new(
Arc::new(ElementRef {
index: 0,
node: Node { elem },
}),
DeclKind::Plain("name".to_string()),
"http://example.com".parse()?,
None,
);
let result = parser.expand()?;
assert!(result.is_some());
let (property_name, property_value) = result.unwrap();
assert_eq!(property_name, "name");
match &property_value {
PropertyValue::Plain(plain_text_value) => {
assert_eq!(plain_text_value.to_string(), "Hero image");
println!("✓ Picture element plain property correctly extracted alt text");
}
_ => {
panic!(
"Expected plain text value for picture element in plain property context, got: {:?}",
property_value
);
}
}
Ok(())
}