1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432
//! This module defines the API for [RDF] terms.
//!
//! Terms are the building blocks of an [RDF] graph.
//! There are four types of terms: IRIs, blank nodes (BNode for short),
//! literals and variables.
//!
//! NB: variable only exist in [generalized RDF].
//!
//! [Sophia]: https://docs.rs/sophia/latest/sophia/
//! [RDF]: https://www.w3.org/TR/rdf-primer/
//! [Linked Data]: http://linkeddata.org/
//! [generalized RDF]: crate#generalized-vs-strict-rdf-model
use mownstr::MownStr;
use std::cmp::Ordering;
use std::error::Error;
use std::hash::{Hash, Hasher};
mod _dyn_term;
mod _graph_name_matcher; // is 'pub use'd by module 'matcher'
mod _iri_wrapper;
mod _raw_value;
pub use self::_raw_value::*;
pub mod matcher;
pub mod simple_iri;
pub use simple_iri::SimpleIri;
/// Trait for all RDF terms.
///
/// Sophia supports 4 kinds of terms: IRI references (absolute or relative),
/// literals, blank nodes and variables.
/// Note that strict RDF does not support relative IRI references nor variables.
///
/// Types representing terms, of one or more of the kinds above,
/// can implement this trait and be used with the rest of the Sophia API.
///
/// # Design considerations
///
/// The design of this trait is not as "pure" as it could have been:
///
/// * it merges into a single trait four "kinds"
/// which could arguably be considered as four different abstract types;
///
/// * it is rather opinionated on how implementation should store their data internally,
/// and has a very constrained contract (see below);
///
/// * it relies on the "semi-abstract" methods `value_raw`,
/// which mostly makes sense for the default implementation of other methods.
///
/// These choices were made to allow for efficient implementations of the overall API.
///
/// # Contract
///
/// In addition to the specific contract of each method,
/// any type implementing this trait must uphold the following guarantees:
///
/// * if it implements [`Hash`],
/// it must be consistent with (or, even better, based on)
/// [`term_hash()`];
///
/// * if it implements [`PartialEq`],
/// it must be consistent with (or, even better, based on)
/// [`term_eq()`];
///
/// * if it implements [`PartialCmp`](std::cmp),
/// it must be consistent with (or, even better, based on)
/// [`term_cmp`];
///
/// * if it implements [`Borrow`](std::borrow::Borrow)`<dyn `[`TTerm`]` + 'a>`
/// it must have equivalent implementations to [`term_hash()`] and [`term_eq()`]
/// so that the contract of [`Borrow`](std::borrow::Borrow)
/// upholds with the implementations on `dyn `[`TTerm`].
///
/// # `[Borrow]<dyn TTerm + 'a>`
///
/// Implementing [`Borrow`](std::borrow::Borrow)`<dyn `[`TTerm`]` + 'a>`
/// for a term increases the ergonomics
/// when the type is used as key in a [`HashSet`](std::collections::HashSet) or similar.
/// It allows to search for keys with any implementation of the `TTerm` trait, e.g.:
///
/// ```
/// # use std::collections::HashSet;
/// use sophia_term::Term;
/// use sophia_api::term::{SimpleIri, TTerm as _};
///
/// let t: Term<String> = Term::new_iri("http://example.com/test")?;
/// let mut map = HashSet::new();
/// map.insert(t);
///
/// let iri = SimpleIri::new("http://example.com/", Some("test"))?;
/// assert!(map.get(iri.as_dyn()).is_some());
/// # Ok::<(), Box<dyn std::error::Error>>(())
/// ```
///
/// ## Implementation
///
/// Besides the contract [mentioned above](#Contract), it should be noted that
/// someone wants to implement `Borrow<dyn TTerm + 'a>` instead of
/// `Borrow<dyn TTerm>` for a custom term. The reason is that Rust assumes that
/// every `dyn Trait` object has a `'static` lifetime which means that
/// borrowing `dyn TTerm` requires borrowing of the value for the `'static`
/// lifetime. To remove this assumption implement `Borrow<dyn TTerm + 'a>`
/// instead, e.g.:
///
/// ```
/// # use sophia_api::term::{TTerm, TermKind, RawValue};
/// # struct MyTerm;
/// # impl TTerm for MyTerm {
/// # fn kind(&self) -> TermKind {TermKind::BlankNode}
/// # fn value_raw(&self) -> RawValue { "".into() }
/// # fn as_dyn(&self) -> &dyn TTerm { self }
/// # }
/// use std::borrow::Borrow;
///
/// impl<'a> Borrow<dyn TTerm + 'a> for MyTerm {
/// fn borrow(&self) -> &(dyn TTerm + 'a) {
/// self
/// }
/// }
/// ```
pub trait TTerm {
/// Returns the kind of this term (IRI, literal, blank node, variable).
fn kind(&self) -> TermKind;
/// Return the "value" of this term, which depends on its kind:
/// * for an IRI reference, its value;
/// * for a literal, its lexical value;
/// * for a blank node, its local identifier;
/// * for a variable, its name.
///
/// # Performance
/// The returned `MownStr` is always borrowed (equivalent to a `&str`),
/// **except** for IRI references where this method *may* allocate a new string
/// (depending on implementations).
/// If this allocation is undesirable, use [`value_raw`] instead.
///
/// # Note to implementors
/// Should not be overridden; must be consistent with [`value_raw`].
///
/// [`value_raw`]: #tymethod.value_raw
fn value(&self) -> MownStr {
self.value_raw().into()
}
/// Return the datatype IRI of this term if it is a literal.
///
/// NB: *all* literals have a datatype,
/// even simple literals (whose implicit type is `xsd:string`)
/// and language tagged strings (whose implicit type is `rdf:langString`).
fn datatype(&self) -> Option<SimpleIri> {
None
}
/// Return the language tag of this term if it is a language-tagged literal.
///
/// # Note to implementors
/// The default implementation always return `None`,
/// so unless your type may represent a language-tagged literal,
/// you do not need to override it.
fn language(&self) -> Option<&str> {
None
}
/// Return the "value" of this term, possibly split in two substrings.
/// The second part might only be non-empty if this term is an IRI reference.
///
/// See also [`value`](#method.value).
///
/// # Note to implementors
/// The second part of the raw value is intended for some implementations
/// of IRIs, storing both a "namespace" and a "suffix".
/// For other kinds of term, the second part must always be None.
fn value_raw(&self) -> RawValue;
/// All terms are absolute, except for:
/// * relative IRI references,
/// * literals whose datatype is a relative IRI reference.
fn is_absolute(&self) -> bool {
match self.kind() {
Iri => self.value_raw().is_absolute(),
Literal => match self.language() {
None => self.datatype().unwrap().value_raw().is_absolute(),
Some(_) => true,
},
_ => true,
}
}
/// This method ensures that all implementations of `TTerm`
/// can be turned into a trait object.
///
/// # Why is this required?
/// After all, in most cases, passing `&t` instead of `t.as_dyn()`
/// will work just as well.
///
/// The reason is that most methods of the API will accept *references*
/// to terms, as `&T` where `T: TTerm + ?Sized`,
/// and such references can *not* be cast to `dyn TTerm`
/// (see <https://stackoverflow.com/a/57432042/1235487> for more details).
fn as_dyn(&self) -> &dyn TTerm;
}
/// Any [`TTerm`] belongs to one of those kinds.
#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq, PartialOrd, Ord)]
pub enum TermKind {
/// RDF [IRI](https://www.w3.org/TR/rdf11-concepts/#section-IRIs),
/// although in Sophia they can also be
/// [relative IRI references](https://www.ietf.org/rfc/rfc3987.html#section-6.5)
Iri,
/// RDF [literal](https://www.w3.org/TR/rdf11-concepts/#section-Graph-Literal)
Literal,
/// RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#section-blank-nodes)
BlankNode,
/// [variable](https://www.w3.org/TR/sparql11-query/#QSynVariables)
Variable,
}
use TermKind::*;
/// A type that can copy any term.
pub trait CopyTerm: Sized {
/// Copy `term` into an instance of this type.
fn copy<T>(term: &T) -> Self
where
T: TTerm + ?Sized;
}
/// A type that can copy some terms.
pub trait TryCopyTerm: Sized {
/// The error type produced when failing to copy a given term
type Error: 'static + Error;
/// Try to copy `term` into an instance of this type.
fn try_copy<T>(term: &T) -> Result<Self, Self::Error>
where
T: TTerm + ?Sized;
}
/// This trait is to [`CopyTerm`] and [`TryCopyTerm`]
/// what `Into` is to `From`.
/// It is automatically implemented by any implementation of [`TTerm`].
pub trait CopiableTerm {
/// Copy this IRI into another type.
fn copied<T: CopyTerm>(&self) -> T;
/// Try to copy this IRI into another type.
fn try_copied<T: TryCopyTerm>(&self) -> Result<T, T::Error>;
}
impl<T> CopiableTerm for T
where
T: TTerm + ?Sized,
{
fn copied<U: CopyTerm>(&self) -> U {
U::copy(self)
}
fn try_copied<U: TryCopyTerm>(&self) -> Result<U, U::Error> {
U::try_copy(self)
}
}
/// Hash a term
pub fn term_hash<T, H>(term: &T, state: &mut H)
where
H: Hasher,
T: TTerm + ?Sized,
{
let k = term.kind();
let v = term.value_raw();
k.hash(state);
match k {
Iri => v.hash(state),
Literal => {
match term.language() {
None => term.datatype().unwrap().value_raw().hash(state),
Some(tag) => {
for b in tag.bytes() {
state.write_u8(b.to_ascii_uppercase());
}
}
};
v.hash(state);
}
_ => v.hash(state),
}
}
/// Compare two terms for syntactic equality.
///
/// NB: this does not take into account semantics, not even for literals.
/// For example, `"42"^^xsd:integer`, `"042"^^xsd:integer` and `"42.0"^^xsd::decimal`
/// are considered all different from each other.
pub fn term_eq<T1, T2>(t1: &T1, t2: &T2) -> bool
where
T1: TTerm + ?Sized,
T2: TTerm + ?Sized,
{
let k1 = t1.kind();
let k2 = t2.kind();
k1 == k2 && {
let v1 = t1.value_raw();
let v2 = t2.value_raw();
if matches!(k1, Iri) {
v1 == v2
} else {
v1.0 == v2.0 && {
if matches!(k1, Literal) {
match (t1.language(), t2.language()) {
(Some(tag1), Some(tag2)) => tag1.eq_ignore_ascii_case(tag2),
(None, None) => {
let dt1 = t1.datatype().unwrap();
let dt2 = t2.datatype().unwrap();
dt1.value_raw() == dt2.value_raw()
}
_ => false,
}
} else {
true
}
}
}
}
}
/// Compare two terms:
/// * IRIs < literals < blank nodes < variables
/// * IRIs, blank nodes and variables are ordered by their value
/// * Literals are ordered by their datatype, then their language (if any),
/// then their lexical value
///
/// NB: literals are ordered by their *lexical* value,
/// so for example, `"10"^^xsd:integer` come `*before* "2"^^xsd:integer`.
pub fn term_cmp<T1, T2>(t1: &T1, t2: &T2) -> Ordering
where
T1: TTerm + ?Sized,
T2: TTerm + ?Sized,
{
let k1 = t1.kind();
let k2 = t2.kind();
k1.cmp(&k2).then_with(|| {
let v1 = t1.value_raw();
let v2 = t2.value_raw();
match k1 {
Iri => v1.bytes().cmp(v2.bytes()),
Literal => {
let tag1 = t1.language();
let tag2 = t2.language();
//if tag1.is_some() && tag2.is_some() {
if let (Some(tag1), Some(tag2)) = (tag1, tag2) {
tag1.to_uppercase()
.cmp(&tag2.to_uppercase())
.then_with(|| v1.0.cmp(v2.0))
} else {
let dt1 = t1.datatype().unwrap();
let dt2 = t2.datatype().unwrap();
dt1.value_raw()
.bytes()
.cmp(dt2.value_raw().bytes())
.then_with(|| v1.0.cmp(v2.0))
}
}
_ => v1.0.cmp(v2.0),
}
})
}
/// Format the given term in a Turtle-like format.
pub fn term_format<T, W>(term: &T, w: &mut W) -> std::fmt::Result
where
T: TTerm + ?Sized,
W: std::fmt::Write,
{
let v = term.value_raw();
match term.kind() {
Iri => {
w.write_char('<')?;
w.write_str(v.0)?;
if let Some(suffix) = v.1 {
w.write_str(suffix)?;
}
w.write_char('>')
}
Literal => {
write!(w, "{:?}", v.0)?;
if let Some(tag) = term.language() {
write!(w, "@{}", tag)
} else {
let dt = term.datatype().unwrap();
if !term_eq(&dt, &crate::ns::xsd::string) {
w.write_str("^^")?;
term_format(&term.datatype().unwrap(), w)?;
}
Ok(())
}
}
BlankNode => write!(w, "_:{}", v.0),
Variable => write!(w, "_?{}", v.0),
}
}
/// Formats the given term in to a string.
pub fn term_to_string<T>(term: &T) -> String
where
T: TTerm + ?Sized,
{
format!("{}", TermFormater(term))
}
/// Check the equality of two graph names (`Option<&Term>`)
/// possibly of different types.
pub fn same_graph_name<T, U>(g1: Option<&T>, g2: Option<&U>) -> bool
where
T: TTerm + ?Sized,
U: TTerm + ?Sized,
{
match (g1, g2) {
(Some(n1), Some(n2)) => term_eq(n1, n2),
(None, None) => true,
_ => false,
}
}
struct TermFormater<'a, T: ?Sized>(&'a T);
impl<'a, T> std::fmt::Display for TermFormater<'a, T>
where
T: TTerm + ?Sized,
{
fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
term_format(self.0, fmt)
}
}
#[cfg(any(test, feature = "test_macro"))]
pub mod test;