use super::*;
use crate::parse::{ElementPtr, element::Node};
use element::Extraction;
use microformats_types::{TextValue, temporal};
use std::str::FromStr;
use swc_html_ast::Child;
use url::Url;
struct ValueClassWalker {
values: Vec<String>,
base_url: Url,
}
impl ValueClassWalker {
#[tracing::instrument(level = "trace", skip(self), ret)]
fn extract_value(&mut self, node: &Node) -> Option<String> {
let class_str = node.attr("class").unwrap_or_default();
let classes = class_str.split(' ').collect::<Vec<_>>();
tracing::trace!(class_str, tag = node.tag());
if classes.contains(&"value") {
node.attr("alt")
.filter(|_| ["img", "area"].contains(&node.tag()))
.filter(non_empty_string)
.or_else(|| {
node.attr("value")
.filter(|_| ["data"].contains(&node.tag()))
.filter(non_empty_string)
})
.or_else(|| {
node.attr("title")
.filter(|_| ["abbr"].contains(&node.tag()))
.filter(non_empty_string)
})
.or_else(|| {
let Extraction { text, .. } =
node.text_content(&self.base_url).unwrap_or_default();
Some(text).filter(non_empty_string)
})
} else if classes.contains(&"value-title") {
node.attr("title")
} else {
None
}
}
}
impl swc_html_codegen::Emit<swc_html_ast::Element> for ValueClassWalker {
fn emit(&mut self, elem: &swc_html_ast::Element) -> swc_html_codegen::Result {
let node = Node {
elem: elem.to_owned(),
};
if let Some(text_value) = self.extract_value(&node) {
self.values.push(text_value);
} else {
for child in &elem.children {
self.emit(&child)?;
}
}
Ok(())
}
}
impl swc_html_codegen::Emit<swc_html_ast::Child> for ValueClassWalker {
fn emit(&mut self, node: &swc_html_ast::Child) -> swc_html_codegen::Result {
match node {
Child::Element(element) => self.emit(&element),
_ => Ok(()),
}
}
}
#[derive(Default)]
struct TemporalValueClassWalker {
values: Vec<String>,
}
impl TemporalValueClassWalker {
fn expand_value(&mut self, node: &Node) -> bool {
let value = node
.attr("datetime")
.filter(non_empty_string)
.filter(|_| ["del", "ins", "time"].contains(&node.tag()));
let expanded = value.is_some();
if let Some(v) = value {
self.values.push(v)
};
expanded
}
}
impl swc_html_codegen::Emit<swc_html_ast::Element> for TemporalValueClassWalker {
fn emit(&mut self, elem: &swc_html_ast::Element) -> swc_html_codegen::Result {
let node = Node {
elem: elem.to_owned(),
};
if !self.expand_value(&node) {
for child in &elem.children {
self.emit(&child)?;
}
}
Ok(())
}
}
impl swc_html_codegen::Emit<swc_html_ast::Child> for TemporalValueClassWalker {
fn emit(&mut self, node: &swc_html_ast::Child) -> swc_html_codegen::Result {
match node {
Child::Element(element) => self.emit(&element),
_ => Ok(()),
}
}
}
impl ValueClassPropertyExtractor {
#[tracing::instrument(level = "trace", ret)]
fn compose_temporal_value(&self, values: &[String]) -> Option<temporal::Value> {
if let Some(iso8601_value) = values
.iter()
.find_map(|s| temporal::Stamp::from_iso8601(s).ok())
{
Some(temporal::Value::Timestamp(iso8601_value))
} else if let Ok(value) = temporal::Duration::from_str(&values.join("")) {
Some(temporal::Value::Duration(value))
} else if !values.is_empty() {
let mut date = None;
let mut time = None;
let mut offset = None;
for value in values.iter().cloned().flat_map(temporal::Stamp::parse) {
if value.is_date() && date.is_none() {
date = value.as_date()
} else if value.is_time() && time.is_none() {
time = value.as_time();
} else if value.is_offset() && offset.is_none() {
offset = value.as_offset();
}
}
time = if let Some(mut t) = time {
if t.prefix.is_none() {
t.prefix = Some(' ');
}
Some(t)
} else {
time
};
Some(temporal::Stamp::compose(date, time, offset))
.filter(|ts| !ts.is_empty())
.map(temporal::Value::Timestamp)
} else {
None
}
}
#[tracing::instrument(level = "trace", ret)]
pub fn extract_value_class(
self,
base_url: &Url,
) -> Result<Option<PropertyValue>, crate::parse::Error> {
let mut walker = ValueClassWalker {
base_url: base_url.clone(),
values: Default::default(),
};
walker.emit(&self.element.node.elem)?;
let walker_values = walker
.values
.into_iter()
.filter(non_empty_string)
.collect::<Vec<_>>();
match self.hint {
TypeHint::Plain => Ok(Some(PropertyValue::Plain(TextValue::new(
remove_surrounding_whitespace(walker_values.join("")),
)))),
TypeHint::Temporal => {
let mut temporal_walker = TemporalValueClassWalker::default();
temporal_walker.emit(&self.element.node.elem)?;
let temporal_values: Vec<_> = temporal_walker
.values
.iter()
.map(remove_surrounding_whitespace)
.filter(non_empty_string)
.collect();
let extended_values = [temporal_values, walker_values]
.concat()
.into_iter()
.collect::<Vec<_>>();
Ok(self
.compose_temporal_value(&extended_values)
.map(PropertyValue::Temporal)
.or_else(|| {
Some(PropertyValue::Plain(TextValue::new(
extended_values.join(""),
)))
})
.filter(non_empty_property_value))
}
}
}
}
#[derive(Eq, Debug, PartialEq)]
pub enum TypeHint {
Plain,
Temporal,
}
#[derive(Debug)]
pub(crate) struct ValueClassPropertyExtractor {
pub element: ElementPtr,
pub hint: TypeHint,
}
#[cfg(test)]
mod test;