use crate::parse::{
element::Node, non_empty_property_value, non_empty_string, value_class, ElementPtr, Error,
};
use microformats_types::{
temporal, Fragment, Image, Item, Properties, PropertyValue, TextValue, UrlValue, ValueKind,
};
use std::sync::Arc;
use swc_html_ast::Child;
use url::Url;
pub(crate) mod explicit;
pub(crate) mod implied;
pub(crate) mod item;
use super::RE_CLASS_NAME;
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)]
pub enum DeclKind {
Root(String),
Plain(String),
Linked(String),
Temporal(String),
Hypertext(String),
}
impl DeclKind {
pub(crate) fn is_root(&self) -> bool {
matches!(self, Self::Root(_))
}
pub(crate) fn extract_root_classes(properties: &[Self]) -> Vec<microformats_types::Class> {
properties
.iter()
.filter_map(|property| {
if let Self::Root(class) = property {
format!("h-{}", class).parse().ok()
} else {
None
}
})
.collect()
}
#[tracing::instrument(level = "trace")]
pub(crate) fn from_str(property_class_string: impl ToString + std::fmt::Debug) -> Vec<Self> {
let mut classes = property_class_string
.to_string()
.split_ascii_whitespace()
.filter_map(|class_name| {
RE_CLASS_NAME.captures(class_name).and_then(|cp| {
let prefix = cp
.name("prefix")
.map(|s| s.as_str().to_string())
.filter(non_empty_string);
let name = cp
.name("name")
.map(|s| s.as_str().to_string())
.filter(non_empty_string);
prefix.zip(name)
})
})
.map(|(prefix, name)| Self::from_prefix_and_name(&prefix, &name))
.collect::<Vec<Self>>();
classes.sort();
classes.dedup();
classes
}
pub(crate) fn from_prefix_and_name(prefix: &str, name: &str) -> Self {
let name = name.to_string();
match prefix {
"u" => Self::Linked(name),
"dt" => Self::Temporal(name),
"e" => Self::Hypertext(name),
"h" => Self::Root(name),
_ => Self::Plain(name),
}
}
}
#[tracing::instrument(level = "trace", skip(base_url), ret)]
pub(crate) fn extract_img_element(node: &Node, base_url: &Url) -> Option<PropertyValue> {
if node.tag() == "img" {
let src_attr_value = node.attr("src").filter(non_empty_string);
let alt_attr_value = node.attr("alt");
let src_url = src_attr_value
.as_ref()
.and_then(|attr_value| base_url.join(attr_value).ok())?;
if alt_attr_value.is_some() {
Some(PropertyValue::Image(Image {
value: src_url,
alt: alt_attr_value,
}))
} else {
Some(PropertyValue::Url(UrlValue::new(src_url)))
}
} else {
node.attr("src")
.map(|s| PropertyValue::Plain(TextValue::new(s)))
.filter(non_empty_property_value)
}
}
#[tracing::instrument(level = "trace", skip(properties))]
pub(crate) fn adjust_timestamps(properties: &mut Properties) {
let mut last_seen_date: Option<temporal::Date> = None;
properties
.values_mut()
.flat_map(|values| values.iter_mut())
.rev()
.for_each(|property_value| {
if let PropertyValue::Temporal(temporal::Value::Timestamp(stamp)) = property_value {
if stamp.date.is_some() {
last_seen_date = stamp.date.clone();
} else if stamp.date.is_none() {
tracing::trace!(
new_date = format!("{last_seen_date:?}"),
stamp = format!("{stamp:?}")
);
stamp.date = last_seen_date.clone();
last_seen_date = None;
}
}
})
}
mod test;