microformats 0.18.2

A union library of the Microformats types and associated parser.
Documentation
use super::*;
use crate::parse::{ElementPtr, element::Node};
use element::Extraction;
use microformats_types::{TextValue, temporal};
use std::str::FromStr;
use swc_html_ast::Child;
use url::Url;

struct ValueClassWalker {
    values: Vec<String>,
    base_url: Url,
}

impl ValueClassWalker {
    #[tracing::instrument(level = "trace", skip(self), ret)]
    fn extract_value(&mut self, node: &Node) -> Option<String> {
        let class_str = node.attr("class").unwrap_or_default();
        let classes = class_str.split(' ').collect::<Vec<_>>();

        tracing::trace!(class_str, tag = node.tag());

        if classes.contains(&"value") {
            node.attr("alt")
                .filter(|_| ["img", "area"].contains(&node.tag()))
                .filter(non_empty_string)
                .or_else(|| {
                    node.attr("value")
                        .filter(|_| ["data"].contains(&node.tag()))
                        .filter(non_empty_string)
                })
                .or_else(|| {
                    node.attr("title")
                        .filter(|_| ["abbr"].contains(&node.tag()))
                        .filter(non_empty_string)
                })
                .or_else(|| {
                    let Extraction { text, .. } =
                        node.text_content(&self.base_url).unwrap_or_default();
                    Some(text).filter(non_empty_string)
                })
        } else if classes.contains(&"value-title") {
            node.attr("title")
        } else {
            None
        }
    }
}

impl swc_html_codegen::Emit<swc_html_ast::Element> for ValueClassWalker {
    fn emit(&mut self, elem: &swc_html_ast::Element) -> swc_html_codegen::Result {
        let node = Node {
            elem: elem.to_owned(),
        };

        if let Some(text_value) = self.extract_value(&node) {
            self.values.push(text_value);
        } else {
            for child in &elem.children {
                self.emit(&child)?;
            }
        }

        Ok(())
    }
}

impl swc_html_codegen::Emit<swc_html_ast::Child> for ValueClassWalker {
    fn emit(&mut self, node: &swc_html_ast::Child) -> swc_html_codegen::Result {
        match node {
            Child::Element(element) => self.emit(&element),
            _ => Ok(()),
        }
    }
}

#[derive(Default)]
struct TemporalValueClassWalker {
    values: Vec<String>,
}

impl TemporalValueClassWalker {
    fn expand_value(&mut self, node: &Node) -> bool {
        let value = node
            .attr("datetime")
            .filter(non_empty_string)
            .filter(|_| ["del", "ins", "time"].contains(&node.tag()));
        let expanded = value.is_some();

        if let Some(v) = value {
            self.values.push(v)
        };

        expanded
    }
}

impl swc_html_codegen::Emit<swc_html_ast::Element> for TemporalValueClassWalker {
    fn emit(&mut self, elem: &swc_html_ast::Element) -> swc_html_codegen::Result {
        let node = Node {
            elem: elem.to_owned(),
        };

        if !self.expand_value(&node) {
            for child in &elem.children {
                self.emit(&child)?;
            }
        }

        Ok(())
    }
}

impl swc_html_codegen::Emit<swc_html_ast::Child> for TemporalValueClassWalker {
    fn emit(&mut self, node: &swc_html_ast::Child) -> swc_html_codegen::Result {
        match node {
            Child::Element(element) => self.emit(&element),
            _ => Ok(()),
        }
    }
}

impl ValueClassPropertyExtractor {
    #[tracing::instrument(level = "trace", ret)]
    fn compose_temporal_value(&self, values: &[String]) -> Option<temporal::Value> {
        if let Some(iso8601_value) = values
            .iter()
            .find_map(|s| temporal::Stamp::from_iso8601(s).ok())
        {
            Some(temporal::Value::Timestamp(iso8601_value))
        } else if let Ok(value) = temporal::Duration::from_str(&values.join("")) {
            Some(temporal::Value::Duration(value))
        } else if !values.is_empty() {
            let mut date = None;
            let mut time = None;
            let mut offset = None;

            for value in values.iter().cloned().flat_map(temporal::Stamp::parse) {
                if value.is_date() && date.is_none() {
                    date = value.as_date()
                } else if value.is_time() && time.is_none() {
                    time = value.as_time();
                } else if value.is_offset() && offset.is_none() {
                    offset = value.as_offset();
                }
            }

            time = if let Some(mut t) = time {
                if t.prefix.is_none() {
                    t.prefix = Some(' ');
                }
                Some(t)
            } else {
                time
            };

            Some(temporal::Stamp::compose(date, time, offset))
                .filter(|ts| !ts.is_empty())
                .map(temporal::Value::Timestamp)
        } else {
            None
        }
    }

    #[tracing::instrument(level = "trace", ret)]
    pub fn extract_value_class(
        self,
        base_url: &Url,
    ) -> Result<Option<PropertyValue>, crate::parse::Error> {
        let mut walker = ValueClassWalker {
            base_url: base_url.clone(),
            values: Default::default(),
        };
        walker.emit(&self.element.node.elem)?;

        let walker_values = walker
            .values
            .into_iter()
            .filter(non_empty_string)
            .collect::<Vec<_>>();

        match self.hint {
            TypeHint::Plain => Ok(Some(PropertyValue::Plain(TextValue::new(
                remove_surrounding_whitespace(walker_values.join("")),
            )))),
            TypeHint::Temporal => {
                let mut temporal_walker = TemporalValueClassWalker::default();
                temporal_walker.emit(&self.element.node.elem)?;

                let temporal_values: Vec<_> = temporal_walker
                    .values
                    .iter()
                    .map(remove_surrounding_whitespace)
                    .filter(non_empty_string)
                    .collect();

                let extended_values = [temporal_values, walker_values]
                    .concat()
                    .into_iter()
                    .collect::<Vec<_>>();

                Ok(self
                    .compose_temporal_value(&extended_values)
                    .map(PropertyValue::Temporal)
                    .or_else(|| {
                        Some(PropertyValue::Plain(TextValue::new(
                            extended_values.join(""),
                        )))
                    })
                    .filter(non_empty_property_value))
            }
        }
    }
}

#[derive(Eq, Debug, PartialEq)]
pub enum TypeHint {
    Plain,
    Temporal,
}

#[derive(Debug)]
pub(crate) struct ValueClassPropertyExtractor {
    pub element: ElementPtr,
    pub hint: TypeHint,
}

#[cfg(test)]
mod test;