graph_rdfa_processor/
structs.rs

1use std::{
2    borrow::Cow,
3    collections::{HashMap, HashSet},
4    fmt::{Display, Formatter},
5    sync::Arc,
6};
7
8use regex::Regex;
9
10use crate::constants::{DATETIME_TYPES, NODE_RDF_XSD_STRING};
11#[macro_export]
12macro_rules! iri {
13    ($name:literal) => {
14        Node::Iri(Cow::Borrowed($name))
15    };
16}
17
18#[derive(Debug)]
19pub struct RdfaGraph<'a> {
20    pub well_known_prefix: Option<&'a str>,
21    pub statements: HashSet<Statement<'a>>,
22}
23
24#[derive(Debug, Default, Clone)]
25pub struct Context<'a> {
26    pub base: &'a str,
27    pub well_known_prefix: Option<&'a str>,
28    pub empty_ref_node_substitute: &'a str,
29    pub vocab: Option<&'a str>,
30    pub lang: Option<&'a str>,
31    pub in_rel: Option<Vec<Node<'a>>>,
32    pub in_rev: Option<Vec<Node<'a>>>,
33    pub in_list: Option<Vec<Node<'a>>>,
34    pub current_node: Option<Node<'a>>,
35    pub prefixes: HashMap<&'a str, &'a str>,
36}
37
38#[derive(Debug)]
39pub struct DataTypeFromPattern<'a> {
40    pub pattern: &'a str,
41    pub datatype: Node<'a>,
42}
43
44#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
45pub struct Literal<'a> {
46    pub datatype: Option<Box<Node<'a>>>,
47    pub value: Cow<'a, str>,
48    pub lang: Option<&'a str>,
49}
50
51#[derive(Debug, Clone, Eq, PartialOrd, Ord, Hash)]
52pub enum Node<'a> {
53    Iri(Cow<'a, str>),
54    TermIri(Cow<'a, str>),
55    Literal(Literal<'a>),
56    Ref(Arc<Node<'a>>),
57    Blank(String),
58    RefBlank(&'a str),
59}
60
61#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
62pub struct Statement<'a> {
63    pub subject: Node<'a>,
64    pub predicate: Node<'a>,
65    pub object: Node<'a>,
66}
67
68impl Statement<'_> {
69    fn as_ntriple_string(&self, well_known_prefix: Option<&str>) -> String {
70        let Statement {
71            subject,
72            predicate,
73            object,
74        } = self;
75        format!(
76            r#"{} {} {}."#,
77            subject.as_ntriple_string(well_known_prefix),
78            predicate.as_ntriple_string(well_known_prefix),
79            object.as_ntriple_string(well_known_prefix)
80        )
81    }
82}
83
84impl Node<'_> {
85    pub fn is_empty(&self) -> bool {
86        match self {
87            Node::Iri(iri) => iri.is_empty(),
88            Node::TermIri(iri) => iri.is_empty(),
89            Node::Literal(l) => {
90                l.value.is_empty()
91                    && l.datatype.as_ref().filter(|li| !li.is_empty()).is_none()
92                    && l.lang.filter(|lan| lan.is_empty()).is_none()
93            }
94            Node::Ref(r) => r.is_empty(),
95            Node::Blank(_) => false,
96            Node::RefBlank(s) => s.is_empty(),
97        }
98    }
99    fn as_ntriple_string(&self, well_known_prefix: Option<&str>) -> String {
100        match self {
101            Node::Iri(iri) | Node::TermIri(iri) => format!("<{}>", iri),
102            Node::Ref(iri) => iri.as_ntriple_string(well_known_prefix),
103            Node::Literal(Literal {
104                datatype,
105                lang,
106                value,
107            }) => {
108                const DEFAULT_SEPARATOR: &str = r#"""""#;
109                const FALLBACK_SEPARATOR: &str = "'''";
110                let value = value.replace(FALLBACK_SEPARATOR, "\'\'\'");
111                let separator = if value.ends_with("\"") || value.contains(DEFAULT_SEPARATOR) {
112                    FALLBACK_SEPARATOR
113                } else {
114                    DEFAULT_SEPARATOR
115                };
116                let mut s = format!(r#"{separator}{value}{separator}"#);
117
118                if let Some(datatype) = datatype
119                    .as_ref()
120                    .filter(|dt| dt.as_ref() != &*NODE_RDF_XSD_STRING)
121                {
122                    s.push_str(&format!(
123                        r#"^^{}"#,
124                        datatype.as_ntriple_string(well_known_prefix)
125                    ));
126                } else if let Some(lang) = lang {
127                    s.push_str(&format!(r#"@{lang}"#));
128                }
129                s
130            }
131            Node::Blank(id) => {
132                if let Some(well_known_prefix) = well_known_prefix {
133                    format!("<{well_known_prefix}{id}>",)
134                } else {
135                    format!("_:{id}")
136                }
137            }
138            Node::RefBlank(id) => {
139                if let Some(well_known_prefix) = well_known_prefix {
140                    format!("<{well_known_prefix}{id}>",)
141                } else {
142                    format!("_:{id}")
143                }
144            }
145        }
146    }
147
148    pub fn is_blank(&self) -> bool {
149        match self {
150            Node::Iri(_) | Node::TermIri(_) | Node::Literal(_) => false,
151            Node::Ref(r) => r.is_blank(),
152            Node::RefBlank(_) | Node::Blank(_) => true,
153        }
154    }
155}
156
157impl PartialEq for Node<'_> {
158    fn eq(&self, other: &Self) -> bool {
159        match (self, other) {
160            (Self::Iri(l0), Self::Iri(r0)) => l0 == r0,
161            (Self::Iri(l0), Self::TermIri(r0)) => l0 == r0,
162            (Self::TermIri(l0), Self::TermIri(r0)) => l0 == r0,
163            (Self::TermIri(l0), Self::Iri(r0)) => l0 == r0,
164            (Self::Literal(l0), Self::Literal(r0)) => l0 == r0,
165            (Self::Ref(l0), Self::Ref(r0)) => l0 == r0,
166            (Self::Ref(l0), rhs) => l0.as_ref() == rhs,
167            (lhs, Self::Ref(r0)) => lhs == r0.as_ref(),
168            (Self::Blank(l0), Self::Blank(r0)) => l0 == r0,
169            (Self::RefBlank(l0), Self::RefBlank(r0)) => l0 == r0,
170            _ => false,
171        }
172    }
173}
174
175impl Display for RdfaGraph<'_> {
176    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
177        f.write_str(
178            &self
179                .statements
180                .iter()
181                .map(|s| s.as_ntriple_string(self.well_known_prefix))
182                .collect::<Vec<_>>()
183                .join("\n"),
184        )
185    }
186}
187#[allow(unused)]
188impl<'a> DataTypeFromPattern<'a> {
189    pub fn test(&self, value: &'a str) -> Option<Node<'a>> {
190        let re = Regex::new(self.pattern).ok()?;
191        if re.find(value).filter(|r| r.len() == value.len()).is_some() {
192            Some(self.datatype.clone())
193        } else {
194            None
195        }
196    }
197    pub fn date_time_from_pattern(value: &'a str) -> Option<Node<'a>> {
198        for dtp in DATETIME_TYPES {
199            if let v @ Some(_) = dtp.test(value) {
200                return v;
201            }
202        }
203        None
204    }
205}
206
207#[cfg(test)]
208mod test {
209    use super::DataTypeFromPattern;
210
211    use crate::Cow;
212    use crate::Node;
213
214    #[test]
215    fn test_date() {
216        let res = DataTypeFromPattern::date_time_from_pattern("2022-09-10");
217        assert_eq!(Some(iri!("http://www.w3.org/2001/XMLSchema#date")), res);
218
219        let res = DataTypeFromPattern::date_time_from_pattern("00:00:00");
220        assert_eq!(Some(iri!("http://www.w3.org/2001/XMLSchema#time")), res);
221        let res = DataTypeFromPattern::date_time_from_pattern("2012-03-18T00:00:00Z");
222        assert_eq!(Some(iri!("http://www.w3.org/2001/XMLSchema#dateTime")), res);
223
224        let res = DataTypeFromPattern::date_time_from_pattern("2022");
225        assert_eq!(Some(iri!("http://www.w3.org/2001/XMLSchema#gYear")), res);
226
227        let res = DataTypeFromPattern::date_time_from_pattern("2022-09");
228        assert_eq!(
229            Some(iri!("http://www.w3.org/2001/XMLSchema#gYearMonth")),
230            res
231        );
232
233        let res = DataTypeFromPattern::date_time_from_pattern("PT2H30M45.5S");
234        assert_eq!(Some(iri!("http://www.w3.org/2001/XMLSchema#duration")), res);
235    }
236}