graph-rdfa-processor 0.3.12

Graph RDFa processor
Documentation
use std::{
    borrow::Cow,
    collections::{HashMap, HashSet},
    fmt::{Display, Formatter},
    sync::Arc,
};

use regex::Regex;

use crate::constants::{DATETIME_TYPES, NODE_RDF_XSD_STRING};
#[macro_export]
macro_rules! iri {
    ($name:literal) => {
        Node::Iri(Cow::Borrowed($name))
    };
}

#[derive(Debug)]
pub struct RdfaGraph<'a> {
    pub well_known_prefix: Option<&'a str>,
    pub statements: HashSet<Statement<'a>>,
}

#[derive(Debug, Default, Clone)]
pub struct Context<'a> {
    pub base: &'a str,
    pub well_known_prefix: Option<&'a str>,
    pub empty_ref_node_substitute: &'a str,
    pub vocab: Option<&'a str>,
    pub lang: Option<&'a str>,
    pub in_rel: Option<Vec<Node<'a>>>,
    pub in_rev: Option<Vec<Node<'a>>>,
    pub in_list: Option<Vec<Node<'a>>>,
    pub current_node: Option<Node<'a>>,
    pub prefixes: HashMap<&'a str, &'a str>,
}

#[derive(Debug)]
pub struct DataTypeFromPattern<'a> {
    pub pattern: &'a str,
    pub datatype: Node<'a>,
}

#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Literal<'a> {
    pub datatype: Option<Box<Node<'a>>>,
    pub value: Cow<'a, str>,
    pub lang: Option<&'a str>,
}

#[derive(Debug, Clone, Eq, PartialOrd, Ord, Hash)]
pub enum Node<'a> {
    Iri(Cow<'a, str>),
    TermIri(Cow<'a, str>),
    Literal(Literal<'a>),
    Ref(Arc<Node<'a>>),
    Blank(String),
    RefBlank(&'a str),
}

#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Statement<'a> {
    pub subject: Node<'a>,
    pub predicate: Node<'a>,
    pub object: Node<'a>,
}

impl Statement<'_> {
    fn as_ntriple_string(&self, well_known_prefix: Option<&str>) -> String {
        let Statement {
            subject,
            predicate,
            object,
        } = self;
        format!(
            r#"{} {} {}."#,
            subject.as_ntriple_string(well_known_prefix),
            predicate.as_ntriple_string(well_known_prefix),
            object.as_ntriple_string(well_known_prefix)
        )
    }
}

impl Node<'_> {
    pub fn is_empty(&self) -> bool {
        match self {
            Node::Iri(iri) => iri.is_empty(),
            Node::TermIri(iri) => iri.is_empty(),
            Node::Literal(l) => {
                l.value.is_empty()
                    && l.datatype.as_ref().filter(|li| !li.is_empty()).is_none()
                    && l.lang.filter(|lan| lan.is_empty()).is_none()
            }
            Node::Ref(r) => r.is_empty(),
            Node::Blank(_) => false,
            Node::RefBlank(s) => s.is_empty(),
        }
    }
    fn as_ntriple_string(&self, well_known_prefix: Option<&str>) -> String {
        match self {
            Node::Iri(iri) | Node::TermIri(iri) => format!("<{}>", iri),
            Node::Ref(iri) => iri.as_ntriple_string(well_known_prefix),
            Node::Literal(Literal {
                datatype,
                lang,
                value,
            }) => {
                const DEFAULT_SEPARATOR: &str = r#"""""#;
                const FALLBACK_SEPARATOR: &str = "'''";
                let value = value.replace(FALLBACK_SEPARATOR, "\'\'\'");
                let separator = if value.ends_with("\"") || value.contains(DEFAULT_SEPARATOR) {
                    FALLBACK_SEPARATOR
                } else {
                    DEFAULT_SEPARATOR
                };
                let mut s = format!(r#"{separator}{value}{separator}"#);

                if let Some(datatype) = datatype
                    .as_ref()
                    .filter(|dt| dt.as_ref() != &*NODE_RDF_XSD_STRING)
                {
                    s.push_str(&format!(
                        r#"^^{}"#,
                        datatype.as_ntriple_string(well_known_prefix)
                    ));
                } else if let Some(lang) = lang {
                    s.push_str(&format!(r#"@{lang}"#));
                }
                s
            }
            Node::Blank(id) => {
                if let Some(well_known_prefix) = well_known_prefix {
                    format!("<{well_known_prefix}{id}>",)
                } else {
                    format!("_:{id}")
                }
            }
            Node::RefBlank(id) => {
                if let Some(well_known_prefix) = well_known_prefix {
                    format!("<{well_known_prefix}{id}>",)
                } else {
                    format!("_:{id}")
                }
            }
        }
    }

    pub fn is_blank(&self) -> bool {
        match self {
            Node::Iri(_) | Node::TermIri(_) | Node::Literal(_) => false,
            Node::Ref(r) => r.is_blank(),
            Node::RefBlank(_) | Node::Blank(_) => true,
        }
    }
}

impl PartialEq for Node<'_> {
    fn eq(&self, other: &Self) -> bool {
        match (self, other) {
            (Self::Iri(l0), Self::Iri(r0)) => l0 == r0,
            (Self::Iri(l0), Self::TermIri(r0)) => l0 == r0,
            (Self::TermIri(l0), Self::TermIri(r0)) => l0 == r0,
            (Self::TermIri(l0), Self::Iri(r0)) => l0 == r0,
            (Self::Literal(l0), Self::Literal(r0)) => l0 == r0,
            (Self::Ref(l0), Self::Ref(r0)) => l0 == r0,
            (Self::Ref(l0), rhs) => l0.as_ref() == rhs,
            (lhs, Self::Ref(r0)) => lhs == r0.as_ref(),
            (Self::Blank(l0), Self::Blank(r0)) => l0 == r0,
            (Self::RefBlank(l0), Self::RefBlank(r0)) => l0 == r0,
            _ => false,
        }
    }
}

impl Display for RdfaGraph<'_> {
    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
        f.write_str(
            &self
                .statements
                .iter()
                .map(|s| s.as_ntriple_string(self.well_known_prefix))
                .collect::<Vec<_>>()
                .join("\n"),
        )
    }
}
#[allow(unused)]
impl<'a> DataTypeFromPattern<'a> {
    pub fn test(&self, value: &'a str) -> Option<Node<'a>> {
        let re = Regex::new(self.pattern).ok()?;
        if re.find(value).filter(|r| r.len() == value.len()).is_some() {
            Some(self.datatype.clone())
        } else {
            None
        }
    }
    pub fn date_time_from_pattern(value: &'a str) -> Option<Node<'a>> {
        for dtp in DATETIME_TYPES {
            if let v @ Some(_) = dtp.test(value) {
                return v;
            }
        }
        None
    }
}

#[cfg(test)]
mod test {
    use super::DataTypeFromPattern;

    use crate::Cow;
    use crate::Node;

    #[test]
    fn test_date() {
        let res = DataTypeFromPattern::date_time_from_pattern("2022-09-10");
        assert_eq!(Some(iri!("http://www.w3.org/2001/XMLSchema#date")), res);

        let res = DataTypeFromPattern::date_time_from_pattern("00:00:00");
        assert_eq!(Some(iri!("http://www.w3.org/2001/XMLSchema#time")), res);
        let res = DataTypeFromPattern::date_time_from_pattern("2012-03-18T00:00:00Z");
        assert_eq!(Some(iri!("http://www.w3.org/2001/XMLSchema#dateTime")), res);

        let res = DataTypeFromPattern::date_time_from_pattern("2022");
        assert_eq!(Some(iri!("http://www.w3.org/2001/XMLSchema#gYear")), res);

        let res = DataTypeFromPattern::date_time_from_pattern("2022-09");
        assert_eq!(
            Some(iri!("http://www.w3.org/2001/XMLSchema#gYearMonth")),
            res
        );

        let res = DataTypeFromPattern::date_time_from_pattern("PT2H30M45.5S");
        assert_eq!(Some(iri!("http://www.w3.org/2001/XMLSchema#duration")), res);
    }
}