use std::str::FromStr;
use crate::parse::{element::{Extraction, HtmlUrlExpander}, remove_surrounding_whitespace};
use super::*;
use html_escape::decode_html_entities;
pub(crate) struct PropertyParser {
pub(crate) elem: ElementPtr,
kind: DeclKind,
base_url: Url,
}
impl std::fmt::Debug for PropertyParser {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> swc_html_codegen::Result {
f.debug_struct("PropertyParser")
.field("forKind", &self.kind)
.finish()
}
}
impl PropertyParser {
#[tracing::instrument(level = "trace", ret)]
pub(crate) fn new(elem: ElementPtr, kind: DeclKind, base_url: Url) -> Self {
Self {
elem,
kind,
base_url,
}
}
#[tracing::instrument(level = "trace", skip(self), ret)]
fn expand_plain_property_value(&self) -> Result<Option<PropertyValue>, crate::parse::Error> {
let elem = &self.elem.node;
Ok(value_class::ValueClassPropertyExtractor {
element: Arc::clone(&self.elem),
hint: value_class::TypeHint::Plain,
}
.extract_value_class(&self.base_url)?
.filter(non_empty_property_value)
.or_else(|| {
elem.attr("title")
.filter(|_| ["abbr", "link"].contains(&elem.tag()))
.filter(non_empty_string)
.map(PropertyValue::Plain)
})
.or_else(|| {
elem.attr("value")
.filter(|_| ["data"].contains(&elem.tag()))
.filter(non_empty_string)
.map(PropertyValue::Plain)
})
.or_else(|| {
elem.attr("alt")
.filter(|_| ["img", "area"].contains(&elem.tag()))
.filter(non_empty_string)
.map(PropertyValue::Plain)
})
.or_else(|| {
elem.text_content_with_img_links(&self.base_url)
.ok()
.map(|Extraction { text, .. }| PropertyValue::Plain(text))
}))
.map(|value_opt| {
value_opt.map(|value| {
if let PropertyValue::Plain(plain_text_value) = value {
PropertyValue::Plain(remove_surrounding_whitespace(plain_text_value))
} else {
value
}
})
})
}
fn expand_linked_property_value(&self) -> Option<PropertyValue> {
let node = &self.elem.node;
let text_value = node
.attr("href")
.filter(|_| ["a", "area", "link"].contains(&node.tag()))
.or_else(|| {
node.attr("src")
.filter(|_| ["audio", "video", "source", "iframe"].contains(&node.tag()))
})
.or_else(|| {
node.attr("poster")
.filter(non_empty_string)
.filter(|_| ["video"].contains(&node.tag()))
})
.or_else(|| {
node.attr("data")
.filter(non_empty_string)
.filter(|_| ["object"].contains(&node.tag()))
})
.or_else(|| {
if let Some(PropertyValue::Plain(value)) =
(value_class::ValueClassPropertyExtractor {
element: Arc::clone(&self.elem),
hint: value_class::TypeHint::Plain,
}
.extract_value_class(&self.base_url)
.ok()
.flatten()
.filter(non_empty_property_value))
{
Some(value)
} else {
None
}
})
.or_else(|| {
node.attr("title")
.filter(non_empty_string)
.filter(|_| ["abbr"].contains(&node.tag()))
})
.or_else(|| {
node.attr("value")
.filter(non_empty_string)
.filter(|_| ["input", "data"].contains(&node.tag()))
})
.or_else(|| {
node.text_content(&self.base_url).ok().map(Into::into)
});
let linked_text = text_value.unwrap_or_default();
if node.tag() == "img" {
extract_img_element(node, &self.base_url)
} else if let Ok(u) = self.base_url.join(&linked_text) {
Some(PropertyValue::Url(u))
} else {
Some(PropertyValue::Plain(linked_text))
}
}
#[tracing::instrument(level = "trace", skip(self), ret)]
fn expand_temporal_property_value(&self) -> Result<Option<PropertyValue>, crate::parse::Error> {
let elem = &self.elem.node;
let vcp_value = value_class::ValueClassPropertyExtractor {
element: Arc::clone(&self.elem),
hint: value_class::TypeHint::Temporal,
}
.extract_value_class(&self.base_url)?;
if vcp_value.is_none() {
let text_value = elem
.attr("datetime")
.filter(|_| ["time", "ins", "del"].contains(&elem.tag()))
.filter(non_empty_string)
.or_else(|| {
elem.attr("title")
.filter(|_| ["abbr"].contains(&elem.tag()))
.filter(non_empty_string)
})
.or_else(|| {
elem.attr("value")
.filter(|_| ["data", "input"].contains(&elem.tag()))
.filter(non_empty_string)
});
let dt_str = if let Some(v) = text_value {
v
} else {
elem.text_content(&self.base_url).map(Into::into)?
};
Ok(
if let Ok(value) = microformats_types::temporal::Value::from_str(&dt_str) {
Some(PropertyValue::Temporal(value)).filter(non_empty_property_value)
} else {
Some(PropertyValue::Plain(dt_str)).filter(non_empty_property_value)
},
)
} else {
Ok(vcp_value)
}
}
#[tracing::instrument(level = "trace", skip(self), ret)]
fn get_embedded_html(&self) -> Result<PropertyValue, crate::parse::Error> {
let raw_html = self.elem.node.html_content()?;
let html = HtmlUrlExpander::expand_urls_in_html_string(&raw_html, &self.base_url)?;
let Extraction { text: raw_value, links } =
self.elem.node.text_content_with_img_links(&self.base_url)?;
Ok(PropertyValue::Fragment(Fragment {
html,
value: decode_html_entities(&remove_surrounding_whitespace(raw_value)).to_string(),
links,
lang: self.elem.node.attr("lang"),
}))
}
#[tracing::instrument(level = "trace", skip(self), ret)]
pub(crate) fn expand(&self) -> Result<Option<(String, PropertyValue)>, crate::parse::Error> {
Ok(match &self.kind {
DeclKind::Root(_) => unreachable!(),
DeclKind::Plain(prop_name) => self
.expand_plain_property_value()?
.map(|plain_text| (prop_name.to_owned(), plain_text)),
DeclKind::Linked(prop_name) => self
.expand_linked_property_value()
.map(|linkable_value| (prop_name.to_owned(), linkable_value)),
DeclKind::Temporal(prop_name) => self
.expand_temporal_property_value()?
.map(|temporal_value| (prop_name.to_owned(), temporal_value)),
DeclKind::Hypertext(prop_name) => {
Some((prop_name.to_owned(), self.get_embedded_html()?))
}
})
}
}
#[cfg(test)]
mod test;