1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
//! This module defines the API for [RDF] terms.
//!
//! Terms are the building blocks of an [RDF] graph.
//! There are four types of terms: IRIs, blank nodes (BNode for short),
//! literals and variables.
//!
//! NB: variable only exist in [generalized RDF].
//!
//! [Sophia]: https://docs.rs/sophia/latest/sophia/
//! [RDF]: https://www.w3.org/TR/rdf-primer/
//! [Linked Data]: http://linkeddata.org/
//! [generalized RDF]: crate#generalized-vs-strict-rdf-model

use mownstr::MownStr;
use std::cmp::Ordering;
use std::error::Error;
use std::hash::{Hash, Hasher};

mod _dyn_term;
mod _graph_name_matcher; // is 'pub use'd by module 'matcher'
mod _iri_wrapper;
mod _raw_value;
pub use self::_raw_value::*;
pub mod matcher;
pub mod simple_iri;
pub use simple_iri::SimpleIri;

/// Trait for all RDF terms.
///
/// Sophia supports 4 kinds of terms: IRI references (absolute or relative),
/// literals, blank nodes and variables.
/// Note that strict RDF does not support relative IRI references nor variables.
///
/// Types representing terms, of one or more of the kinds above,
/// can implement this trait and be used with the rest of the Sophia API.
///
/// # Design considerations
///
/// The design of this trait is not as "pure" as it could have been:
///
/// * it merges into a single trait four "kinds"
///   which could arguably be considered as four different abstract types;
///
/// * it is rather opinionated on how implementation should store their data internally,
///   and has a very constrained contract (see below);
///
/// * it relies on the "semi-abstract" methods `value_raw`,
///   which mostly makes sense for the default implementation of other methods.
///
/// These choices were made to allow for efficient implementations of the overall API.
///
/// # Contract
///
/// In addition to the specific contract of each method,
/// any type implementing this trait must uphold the following guarantees:
///
/// * if it implements [`Hash`],
///   it must be consistent with (or, even better, based on)
///   [`term_hash()`];
///
/// * if it implements [`PartialEq`],
///   it must be consistent with (or, even better, based on)
///   [`term_eq()`];
///
/// * if it implements [`PartialCmp`](std::cmp),
///   it must be consistent with (or, even better, based on)
///   [`term_cmp`];
///
/// * if it implements [`Borrow`](std::borrow::Borrow)`<dyn `[`TTerm`]` + 'a>`
///   it must have equivalent implementations to [`term_hash()`] and [`term_eq()`]
///   so that the contract of [`Borrow`](std::borrow::Borrow)
///   upholds with the implementations on `dyn `[`TTerm`].
///
/// # `[Borrow]<dyn TTerm + 'a>`
///
/// Implementing [`Borrow`](std::borrow::Borrow)`<dyn `[`TTerm`]` + 'a>`
/// for a term increases the ergonomics
/// when the type is used as key in a [`HashSet`](std::collections::HashSet) or similar.
/// It allows to search for keys with any implementation of the `TTerm` trait, e.g.:
///
/// ```
/// # use std::collections::HashSet;
/// use sophia_term::Term;
/// use sophia_api::term::{SimpleIri, TTerm as _};
///
/// let t: Term<String> = Term::new_iri("http://example.com/test")?;
/// let mut map = HashSet::new();
/// map.insert(t);
///
/// let iri = SimpleIri::new("http://example.com/", Some("test"))?;
/// assert!(map.get(iri.as_dyn()).is_some());
/// # Ok::<(), Box<dyn std::error::Error>>(())
/// ```
///
/// ## Implementation
///
/// Besides the contract [mentioned above](#Contract), it should be noted that
/// someone wants to implement `Borrow<dyn TTerm + 'a>` instead of
/// `Borrow<dyn TTerm>` for a custom term. The reason is that Rust assumes that
///  every `dyn Trait` object has a `'static` lifetime which means that
/// borrowing `dyn TTerm` requires borrowing of the value for the `'static`
/// lifetime. To remove this assumption implement `Borrow<dyn TTerm + 'a>`
/// instead, e.g.:
///
/// ```
/// # use sophia_api::term::{TTerm, TermKind, RawValue};
/// # struct MyTerm;
/// # impl TTerm for MyTerm {
/// #     fn kind(&self) -> TermKind {TermKind::BlankNode}
/// #     fn value_raw(&self) -> RawValue { "".into() }
/// #     fn as_dyn(&self) -> &dyn TTerm { self }
/// # }
/// use std::borrow::Borrow;
///
/// impl<'a> Borrow<dyn TTerm + 'a> for MyTerm {
///     fn borrow(&self) -> &(dyn TTerm + 'a) {
///         self
///     }
/// }
/// ```
pub trait TTerm {
    /// Returns the kind of this term (IRI, literal, blank node, variable).
    fn kind(&self) -> TermKind;

    /// Return the "value" of this term, which depends on its kind:
    /// * for an IRI reference, its value;
    /// * for a literal, its lexical value;
    /// * for a blank node, its local identifier;
    /// * for a variable, its name.
    ///
    /// # Performance
    /// The returned `MownStr` is always borrowed (equivalent to a `&str`),
    /// **except** for IRI references where this method *may* allocate a new string
    /// (depending on implementations).
    /// If this allocation is undesirable, use [`value_raw`] instead.
    ///
    /// # Note to implementors
    /// Should not be overridden; must be consistent with [`value_raw`].
    ///
    /// [`value_raw`]: #tymethod.value_raw
    fn value(&self) -> MownStr {
        self.value_raw().into()
    }

    /// Return the datatype IRI of this term if it is a literal.
    ///
    /// NB: *all* literals have a datatype,
    /// even simple literals (whose implicit type is `xsd:string`)
    /// and language tagged strings (whose implicit type is `rdf:langString`).
    fn datatype(&self) -> Option<SimpleIri> {
        None
    }

    /// Return the language tag of this term if it is a language-tagged literal.
    ///
    /// # Note to implementors
    /// The default implementation always return `None`,
    /// so unless your type may represent a language-tagged literal,
    /// you do not need to override it.
    fn language(&self) -> Option<&str> {
        None
    }

    /// Return the "value" of this term, possibly split in two substrings.
    /// The second part might only be non-empty if this term is an IRI reference.
    ///
    /// See also [`value`](#method.value).
    ///
    /// # Note to implementors
    /// The second part of the raw value is intended for some implementations
    /// of IRIs, storing both a "namespace" and a "suffix".
    /// For other kinds of term, the second part must always be None.
    fn value_raw(&self) -> RawValue;

    /// All terms are absolute, except for:
    /// * relative IRI references,
    /// * literals whose datatype is a relative IRI reference.
    fn is_absolute(&self) -> bool {
        match self.kind() {
            Iri => self.value_raw().is_absolute(),
            Literal => match self.language() {
                None => self.datatype().unwrap().value_raw().is_absolute(),
                Some(_) => true,
            },
            _ => true,
        }
    }

    /// This method ensures that all implementations of `TTerm`
    /// can be turned into a trait object.
    ///
    /// # Why is this required?
    /// After all, in most cases, passing `&t` instead of `t.as_dyn()`
    /// will work just as well.
    ///
    /// The reason is that most methods of the API will accept *references*
    /// to terms, as `&T` where `T: TTerm + ?Sized`,
    /// and such references can *not* be cast to `dyn TTerm`
    /// (see <https://stackoverflow.com/a/57432042/1235487> for more details).
    fn as_dyn(&self) -> &dyn TTerm;
}

/// Any [`TTerm`] belongs to one of those kinds.
#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq, PartialOrd, Ord)]
pub enum TermKind {
    /// RDF [IRI](https://www.w3.org/TR/rdf11-concepts/#section-IRIs),
    /// although in Sophia they can also be
    /// [relative IRI references](https://www.ietf.org/rfc/rfc3987.html#section-6.5)
    Iri,
    /// RDF [literal](https://www.w3.org/TR/rdf11-concepts/#section-Graph-Literal)
    Literal,
    /// RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#section-blank-nodes)
    BlankNode,
    /// [variable](https://www.w3.org/TR/sparql11-query/#QSynVariables)
    Variable,
}
use TermKind::*;

/// A type that can copy any term.
pub trait CopyTerm: Sized {
    /// Copy `term` into an instance of this type.
    fn copy<T>(term: &T) -> Self
    where
        T: TTerm + ?Sized;
}

/// A type that can copy some terms.
pub trait TryCopyTerm: Sized {
    /// The error type produced when failing to copy a given term
    type Error: 'static + Error;
    /// Try to copy `term` into an instance of this type.
    fn try_copy<T>(term: &T) -> Result<Self, Self::Error>
    where
        T: TTerm + ?Sized;
}

/// This trait is to [`CopyTerm`] and [`TryCopyTerm`]
/// what `Into` is to `From`.
/// It is automatically implemented by any implementation of [`TTerm`].
pub trait CopiableTerm {
    /// Copy this IRI into another type.
    fn copied<T: CopyTerm>(&self) -> T;
    /// Try to copy this IRI into another type.
    fn try_copied<T: TryCopyTerm>(&self) -> Result<T, T::Error>;
}

impl<T> CopiableTerm for T
where
    T: TTerm + ?Sized,
{
    fn copied<U: CopyTerm>(&self) -> U {
        U::copy(self)
    }
    fn try_copied<U: TryCopyTerm>(&self) -> Result<U, U::Error> {
        U::try_copy(self)
    }
}

/// Hash a term
pub fn term_hash<T, H>(term: &T, state: &mut H)
where
    H: Hasher,
    T: TTerm + ?Sized,
{
    let k = term.kind();
    let v = term.value_raw();
    k.hash(state);
    match k {
        Iri => v.hash(state),
        Literal => {
            match term.language() {
                None => term.datatype().unwrap().value_raw().hash(state),
                Some(tag) => {
                    for b in tag.bytes() {
                        state.write_u8(b.to_ascii_uppercase());
                    }
                }
            };
            v.hash(state);
        }
        _ => v.hash(state),
    }
}

/// Compare two terms for syntactic equality.
///
/// NB: this does not take into account semantics, not even for literals.
/// For example, `"42"^^xsd:integer`, `"042"^^xsd:integer` and `"42.0"^^xsd::decimal`
/// are considered all different from each other.
pub fn term_eq<T1, T2>(t1: &T1, t2: &T2) -> bool
where
    T1: TTerm + ?Sized,
    T2: TTerm + ?Sized,
{
    let k1 = t1.kind();
    let k2 = t2.kind();
    k1 == k2 && {
        let v1 = t1.value_raw();
        let v2 = t2.value_raw();
        if matches!(k1, Iri) {
            v1 == v2
        } else {
            v1.0 == v2.0 && {
                if matches!(k1, Literal) {
                    match (t1.language(), t2.language()) {
                        (Some(tag1), Some(tag2)) => tag1.eq_ignore_ascii_case(tag2),
                        (None, None) => {
                            let dt1 = t1.datatype().unwrap();
                            let dt2 = t2.datatype().unwrap();
                            dt1.value_raw() == dt2.value_raw()
                        }
                        _ => false,
                    }
                } else {
                    true
                }
            }
        }
    }
}

/// Compare two terms:
/// * IRIs < literals < blank nodes < variables
/// * IRIs, blank nodes and variables are ordered by their value
/// * Literals are ordered by their datatype, then their language (if any),
///   then their lexical value
///
/// NB: literals are ordered by their *lexical* value,
/// so for example, `"10"^^xsd:integer` come `*before* "2"^^xsd:integer`.
pub fn term_cmp<T1, T2>(t1: &T1, t2: &T2) -> Ordering
where
    T1: TTerm + ?Sized,
    T2: TTerm + ?Sized,
{
    let k1 = t1.kind();
    let k2 = t2.kind();
    k1.cmp(&k2).then_with(|| {
        let v1 = t1.value_raw();
        let v2 = t2.value_raw();
        match k1 {
            Iri => v1.bytes().cmp(v2.bytes()),
            Literal => {
                let tag1 = t1.language();
                let tag2 = t2.language();
                //if tag1.is_some() && tag2.is_some() {
                if let (Some(tag1), Some(tag2)) = (tag1, tag2) {
                    tag1.to_uppercase()
                        .cmp(&tag2.to_uppercase())
                        .then_with(|| v1.0.cmp(v2.0))
                } else {
                    let dt1 = t1.datatype().unwrap();
                    let dt2 = t2.datatype().unwrap();
                    dt1.value_raw()
                        .bytes()
                        .cmp(dt2.value_raw().bytes())
                        .then_with(|| v1.0.cmp(v2.0))
                }
            }
            _ => v1.0.cmp(v2.0),
        }
    })
}

/// Format the given term in a Turtle-like format.
pub fn term_format<T, W>(term: &T, w: &mut W) -> std::fmt::Result
where
    T: TTerm + ?Sized,
    W: std::fmt::Write,
{
    let v = term.value_raw();
    match term.kind() {
        Iri => {
            w.write_char('<')?;
            w.write_str(v.0)?;
            if let Some(suffix) = v.1 {
                w.write_str(suffix)?;
            }
            w.write_char('>')
        }
        Literal => {
            write!(w, "{:?}", v.0)?;
            if let Some(tag) = term.language() {
                write!(w, "@{}", tag)
            } else {
                let dt = term.datatype().unwrap();
                if !term_eq(&dt, &crate::ns::xsd::string) {
                    w.write_str("^^")?;
                    term_format(&term.datatype().unwrap(), w)?;
                }
                Ok(())
            }
        }
        BlankNode => write!(w, "_:{}", v.0),
        Variable => write!(w, "_?{}", v.0),
    }
}

/// Formats the given term in to a string.
pub fn term_to_string<T>(term: &T) -> String
where
    T: TTerm + ?Sized,
{
    format!("{}", TermFormater(term))
}

/// Check the equality of two graph names (`Option<&Term>`)
/// possibly of different types.
pub fn same_graph_name<T, U>(g1: Option<&T>, g2: Option<&U>) -> bool
where
    T: TTerm + ?Sized,
    U: TTerm + ?Sized,
{
    match (g1, g2) {
        (Some(n1), Some(n2)) => term_eq(n1, n2),
        (None, None) => true,
        _ => false,
    }
}

struct TermFormater<'a, T: ?Sized>(&'a T);

impl<'a, T> std::fmt::Display for TermFormater<'a, T>
where
    T: TTerm + ?Sized,
{
    fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
        term_format(self.0, fmt)
    }
}

#[cfg(any(test, feature = "test_macro"))]
pub mod test;