#![allow(rustdoc::bare_urls)]
use crate::{config::config, resource::Resource};
#[cfg(feature = "hdt")]
use hdt::HdtGraph;
use log::*;
use multimap::MultiMap;
#[cfg(feature = "rdfxml")]
use sophia::serializer::xml::RdfXmlSerializer;
use sophia::{
graph::{inmem::sync::FastGraph, *},
iri::{error::InvalidIri, AsIri, Iri, IriBox},
ns::Namespace,
parser::{nt, turtle},
prefix::{PrefixBox, PrefixMap},
serializer::{
nt::NtSerializer,
turtle::{TurtleConfig, TurtleSerializer},
Stringifier, TripleSerializer,
},
term,
term::{RefTerm, TTerm, Term, Term::*},
triple::{stream::TripleSource, Triple},
};
use std::{
collections::BTreeMap, collections::BTreeSet, collections::HashMap, fmt, fs::File, io::BufReader, path::Path, sync::Arc, sync::OnceLock, time::Instant,
};
#[cfg(feature = "hdt")]
use zstd::stream::read::Decoder;
static EXAMPLE_KB: &str = std::include_str!("../data/example.ttl");
static CAP: usize = 100;
fn get_prefixed_pair(iri: &Iri<'_>) -> Option<(String, String)> {
let (p, s) = prefixes().get_prefixed_pair(iri)?;
Some((p.to_string(), s.to_string()))
}
struct Piri {
iri: IriBox,
prefixed: Option<(String, String)>,
}
impl<TD: term::TermData> From<&term::iri::Iri<TD>> for Piri {
fn from(tiri: &term::iri::Iri<TD>) -> Self { Piri::new(IriBox::new_unchecked(tiri.value().to_string().into_boxed_str())) }
}
impl fmt::Display for Piri {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{}", self.iri.value()) }
}
impl Piri {
fn from_suffix(suffix: &str) -> Self { Piri::new(IriBox::new_unchecked((config().namespace.clone() + suffix).into_boxed_str())) }
fn new(iri: IriBox) -> Self { Self { prefixed: get_prefixed_pair(&iri.as_iri()), iri } }
fn embrace(&self) -> String { format!("<{self}>") }
fn prefixed_string(&self, bold: bool, embrace: bool) -> String {
if let Some((p, s)) = &self.prefixed {
if bold {
format!("{p}:<b>{s}</b>")
} else {
format!("{p}:{s}")
}
} else if embrace {
self.embrace()
} else {
self.to_string()
}
}
fn short(&self) -> String { self.prefixed_string(false, false) }
fn root_relative(&self) -> String { self.iri.value().replace(&config().namespace, &(config().base.clone() + "/")) }
fn property_anchor(&self) -> String { format!("<a href='{}'>{}</a>", self.root_relative(), self.prefixed_string(true, false)) }
}
#[allow(clippy::large_enum_variant)]
pub enum GraphEnum {
FastGraph(FastGraph),
#[cfg(feature = "hdt")]
HdtGraph(HdtGraph<Arc<str>>),
}
pub fn graph() -> &'static GraphEnum {
GRAPH.get_or_init(|| {
let t = Instant::now();
let triples = match &config().kb_file {
None => {
warn!("No knowledge base configured. Loading example knowledge base. Set kb_file in data/config.toml or env var RICKVIEW_KB_FILE.");
turtle::parse_str(EXAMPLE_KB).collect_triples()
}
Some(filename) => match File::open(filename) {
Err(e) => {
error!("Cannot open knowledge base '{}': {}. Check kb_file in data/config.toml or env var RICKVIEW_KB_FILE.", filename, e);
std::process::exit(1);
}
Ok(file) => {
let reader = BufReader::new(&file);
let triples = match Path::new(&filename).extension().and_then(std::ffi::OsStr::to_str) {
Some("ttl") => turtle::parse_bufread(reader).collect_triples(),
Some("nt") => nt::parse_bufread(reader).collect_triples(),
#[cfg(feature = "hdt")]
Some("zst") if filename.ends_with("hdt.zst") => {
let decoder = Decoder::with_buffer(BufReader::new(file)).expect("Error creating zstd decoder.");
let hdt = hdt::Hdt::new(BufReader::new(decoder)).expect("Error loading HDT.");
info!("Decompressed and loaded HDT from {filename} in {:?}", t.elapsed());
return GraphEnum::HdtGraph(hdt::HdtGraph::new(hdt));
}
#[cfg(feature = "hdt")]
Some("hdt") => {
let hdt_graph = hdt::HdtGraph::new(hdt::Hdt::new(BufReader::new(file)).unwrap());
info!("Loaded HDT from {filename} in {:?}", t.elapsed());
return GraphEnum::HdtGraph(hdt_graph);
}
x => {
error!("Unknown extension: \"{:?}\": cannot parse knowledge base. Aborting.", x);
std::process::exit(1);
}
};
triples
}
},
};
let g: FastGraph = triples.unwrap_or_else(|x| {
error!("Unable to parse knowledge base {}: {}", &config().kb_file.as_deref().unwrap_or("example"), x);
std::process::exit(1);
});
if log_enabled!(Level::Debug) {
info!(
"Loaded ~{} FastGraph triples from {} in {:?}",
g.triples().size_hint().0,
&config().kb_file.as_deref().unwrap_or("example kb"),
t.elapsed()
);
}
GraphEnum::FastGraph(g)
})
}
fn prefixes() -> &'static Vec<(PrefixBox, IriBox)> {
PREFIXES.get_or_init(|| {
let mut p: Vec<(PrefixBox, IriBox)> = Vec::new();
for (prefix, iri) in &config().namespaces {
p.push((PrefixBox::new_unchecked(prefix.clone().into_boxed_str()), IriBox::new_unchecked(iri.clone().into_boxed_str())));
}
p.push((PrefixBox::new_unchecked(config().prefix.clone().into_boxed_str()), IriBox::new_unchecked(config().namespace.clone().into_boxed_str())));
p
})
}
pub fn titles() -> &'static HashMap<String, String> {
match graph() {
GraphEnum::FastGraph(g) => titles_generic(g),
#[cfg(feature = "hdt")]
GraphEnum::HdtGraph(g) => titles_generic(g),
}
}
fn titles_generic<G: Graph>(g: &G) -> &'static HashMap<String, String> {
TITLES.get_or_init(|| {
let mut tagged = MultiMap::<String, (String, String)>::new();
let mut titles = HashMap::<String, String>::new();
if !config().large {
for prop in config().title_properties.iter().rev() {
let term = RefTerm::new_iri(prop.as_ref()).unwrap();
for tt in g.triples_with_p(&term) {
let t = tt.unwrap();
if let Literal(lit) = Term::<&str>::from(t.o()) {
let lang = if let Some(lang) = lit.lang() { (*lang).to_string() } else { String::new() };
tagged.insert(lang, (t.s().value().to_string(), (*lit.txt()).to_string()));
}
}
}
let mut tags: Vec<&String> = tagged.keys().collect();
tags.sort_by_cached_key(|tag| config().langs.iter().position(|x| &x == tag).unwrap_or(1000));
tags.reverse();
for tag in tags {
if let Some(v) = tagged.get_vec(tag) {
for (uri, title) in v {
titles.insert(uri.clone(), title.clone());
}
}
}
}
titles
})
}
pub fn types() -> &'static HashMap<String, String> {
match graph() {
GraphEnum::FastGraph(g) => types_generic(g),
#[cfg(feature = "hdt")]
GraphEnum::HdtGraph(g) => types_generic(g),
}
}
fn types_generic<G: Graph>(g: &G) -> &'static HashMap<String, String> {
TYPES.get_or_init(|| {
let mut types = HashMap::<String, String>::new();
if !config().large {
for prop in config().type_properties.iter().rev() {
let term = RefTerm::new_iri(prop.as_ref()).unwrap();
for tt in g.triples_with_p(&term) {
let t = tt.unwrap();
let suffix = t.s().value().replace(&config().namespace, "");
types.insert(suffix, t.o().value().to_string());
}
}
}
types
})
}
static GRAPH: OnceLock<GraphEnum> = OnceLock::new();
static PREFIXES: OnceLock<Vec<(PrefixBox, IriBox)>> = OnceLock::new();
static TITLES: OnceLock<HashMap<String, String>> = OnceLock::new();
static TYPES: OnceLock<HashMap<String, String>> = OnceLock::new();
static NAMESPACE: OnceLock<Namespace<&'static str>> = OnceLock::new();
fn namespace() -> &'static Namespace<&'static str> { NAMESPACE.get_or_init(|| Namespace::new(config().namespace.as_ref()).unwrap()) }
enum ConnectionType {
Direct,
Inverse,
}
#[derive(Debug)]
struct Connection {
prop: IriBox,
prop_html: String,
target_htmls: Vec<String>,
}
fn connections(conn_type: &ConnectionType, suffix: &str) -> Result<Vec<Connection>, InvalidIri> {
match graph() {
GraphEnum::FastGraph(g) => connections_generic(g, conn_type, suffix),
#[cfg(feature = "hdt")]
GraphEnum::HdtGraph(g) => connections_generic(g, conn_type, suffix),
}
}
fn connections_generic<G: Graph>(g: &G, conn_type: &ConnectionType, suffix: &str) -> Result<Vec<Connection>, InvalidIri> {
let source = Piri::from_suffix(suffix);
let triples = match conn_type {
ConnectionType::Direct => g.triples_with_s(&source.iri),
ConnectionType::Inverse => g.triples_with_o(&source.iri),
};
let mut map: BTreeMap<IriBox, BTreeSet<String>> = BTreeMap::new();
let mut connections: Vec<Connection> = Vec::new();
for res in triples {
let triple = res.unwrap();
let target_term = match conn_type {
ConnectionType::Direct => triple.o(),
ConnectionType::Inverse => triple.s(),
};
let target_html = match Term::<_>::from(target_term) {
Literal(lit) => match lit.lang() {
Some(lang) => {
format!("{} @{lang}", lit.txt())
}
None => {
format!(r#"{}<div class="datatype">{}</div>"#, lit.txt(), Piri::from(&lit.dt()).short())
}
},
Iri(tiri) => {
let piri = Piri::from(&tiri);
let title = if let Some(title) = titles().get(&piri.to_string()) { format!("<br><span>↪ {title}</span>") } else { String::new() };
let target = if piri.to_string().starts_with(&config().namespace) { "" } else { " target='_blank' " };
format!("<a href='{}'{target}>{}{title}</a>", piri.root_relative(), piri.prefixed_string(false, true))
}
_ => target_term.value().to_string(), };
if let Iri(iri) = Term::<_>::from(triple.p()) {
let key = IriBox::new(iri.value().into())?;
if let Some(values) = map.get_mut(&key) {
values.insert(target_html);
} else {
let mut values = BTreeSet::new();
values.insert(target_html);
map.insert(key, values);
}
}
}
for (prop, values) in map {
let len = values.len();
let mut target_htmls: Vec<String> = values.into_iter().take(CAP).collect();
if len > CAP {
target_htmls.push("...".to_string());
}
connections.push(Connection { prop: prop.clone(), prop_html: Piri::new(prop).property_anchor(), target_htmls });
}
Ok(connections)
}
#[cfg(feature = "rdfxml")]
pub fn serialize_rdfxml(suffix: &str) -> String {
match graph() {
GraphEnum::FastGraph(g) => serialize_rdfxml_generic(g, suffix),
#[cfg(feature = "hdt")]
GraphEnum::HdtGraph(g) => serialize_rdfxml_generic(g, suffix),
}
}
#[cfg(feature = "rdfxml")]
pub fn serialize_rdfxml_generic<G: Graph>(g: &G, suffix: &str) -> String {
let iri = namespace().get(suffix).unwrap();
RdfXmlSerializer::new_stringifier().serialize_triples(g.triples_with_s(&iri)).unwrap().to_string()
}
pub fn serialize_turtle(suffix: &str) -> String {
match graph() {
GraphEnum::FastGraph(g) => serialize_turtle_generic(g, suffix),
#[cfg(feature = "hdt")]
GraphEnum::HdtGraph(g) => serialize_turtle_generic(g, suffix),
}
}
fn serialize_turtle_generic<G: Graph>(g: &G, suffix: &str) -> String {
let iri = namespace().get(suffix).unwrap();
let config = TurtleConfig::new().with_pretty(true).with_own_prefix_map(prefixes().clone());
TurtleSerializer::new_stringifier_with_config(config).serialize_triples(g.triples_with_s(&iri)).unwrap().to_string()
}
pub fn serialize_nt(suffix: &str) -> String {
match graph() {
GraphEnum::FastGraph(g) => serialize_nt_generic(g, suffix),
#[cfg(feature = "hdt")]
GraphEnum::HdtGraph(g) => serialize_nt_generic(g, suffix),
}
}
fn serialize_nt_generic<G: Graph>(g: &G, suffix: &str) -> String {
let iri = namespace().get(suffix).unwrap();
NtSerializer::new_stringifier().serialize_triples(g.triples_with_s(&iri)).unwrap().to_string()
}
pub fn resource(suffix: &str) -> Result<Resource, InvalidIri> {
fn filter(cons: &[Connection], key_predicate: fn(&str) -> bool) -> Vec<(String, Vec<String>)> {
cons.iter().filter(|c| key_predicate(&c.prop.value())).map(|c| (c.prop_html.clone(), c.target_htmls.clone())).collect()
}
let start = Instant::now();
let subject = namespace().get(suffix).unwrap();
let uri = subject.clone().value().to_string();
let all_directs = connections(&ConnectionType::Direct, suffix)?;
let mut descriptions = filter(&all_directs, |key| config().description_properties.contains(key));
let notdescriptions = filter(&all_directs, |key| !config().description_properties.contains(key));
let title = titles().get(&uri).unwrap_or(&suffix.to_owned()).to_string();
let main_type = types().get(suffix).map(std::clone::Clone::clone);
let inverses = if config().show_inverse { filter(&connections(&ConnectionType::Inverse, suffix)?, |_| true) } else { Vec::new() };
if all_directs.is_empty() && inverses.is_empty() {
let warning = format!("No triples found for {uri}. Did you configure the namespace correctly?");
warn!("{warning}");
descriptions.push(("Warning".to_owned(), vec![warning]));
}
Ok(Resource {
suffix: suffix.to_owned(),
uri,
duration: format!("{:?}", start.elapsed()),
title,
github_issue_url: config().github.as_ref().map(|g| format!("{g}/issues/new?title={suffix}")),
main_type,
descriptions,
directs: notdescriptions,
inverses,
})
}