use crate::error::NpError;
use crate::utils::{graph_iri_to_string, subject_iri_to_string};
use base64::{alphabet, engine, Engine as _};
use oxrdf::{
Dataset, GraphNameRef, NamedNode, NamedNodeRef, NamedOrBlankNode, NamedOrBlankNodeRef, QuadRef,
TermRef,
};
use regex::Regex;
use rsa::{sha2::Digest, sha2::Sha256};
use std::cmp::Ordering;
use std::collections::HashMap;
pub fn make_trusty(
dataset: &Dataset,
base_ns: &str,
norm_ns: &str,
separator: &str,
) -> Result<String, NpError> {
let norm_quads = normalize_dataset(dataset, base_ns, norm_ns, separator)?;
let base64_engine = engine::GeneralPurpose::new(
&alphabet::Alphabet::new(
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_",
)?,
engine::GeneralPurposeConfig::new().with_encode_padding(false),
);
Ok(format!(
"RA{}",
base64_engine.encode(Sha256::digest(norm_quads.as_bytes()))
))
}
pub fn replace_bnodes(
dataset: &Dataset,
base_ns: &str,
base_uri: &str,
) -> Result<Dataset, NpError> {
let mut new_dataset = Dataset::new();
let mut bnode_map: HashMap<String, usize> = HashMap::new();
let mut bnode_count = 1;
let re_underscore_uri = Regex::new(&format!(r"{base_uri}.?(_+[a-zA-Z0-9^_]+)$"))?;
for quad in dataset.iter() {
let subject_string = match quad.subject {
NamedOrBlankNodeRef::BlankNode(bnode) => {
let bnode_id = bnode.as_str();
bnode_map.entry(bnode_id.to_string()).or_insert_with(|| {
let counter = bnode_count;
bnode_count += 1;
counter
});
format!("{}_{}", base_ns, bnode_map[bnode_id])
}
NamedOrBlankNodeRef::NamedNode(named) => {
let mut subject_iri_str = named.as_str().to_owned();
if let Some(caps) = re_underscore_uri.captures(&subject_iri_str) {
let matching = caps
.get(1)
.ok_or(NpError("Error with regex".to_string()))?
.as_str();
let new_ending = matching.replacen('_', "__", 1);
subject_iri_str.truncate(subject_iri_str.len() - matching.len()); subject_iri_str.push_str(&new_ending);
}
subject_iri_str
}
};
let GraphNameRef::NamedNode(graph_iri) = quad.graph_name else {
return Err(NpError("Failed to extract graph name IRI.".to_string()));
};
let graph_node = if let Some(caps) = re_underscore_uri.captures(graph_iri.as_str()) {
let mut graph_string = graph_iri.into_owned().into_string();
let matching = caps
.get(1)
.ok_or(NpError("Error with regex".to_string()))?
.as_str();
let new_ending = matching.replacen('_', "__", 1);
graph_string.truncate(graph_string.len() - matching.len()); graph_string.push_str(&new_ending);
&NamedNode::new_unchecked(graph_string)
} else {
&graph_iri.into_owned()
};
let subject_node = NamedNodeRef::new_unchecked(subject_string.as_str());
match quad.object {
TermRef::BlankNode(bnode) => {
let bnode_id = bnode.as_str();
bnode_map.entry(bnode_id.to_string()).or_insert_with(|| {
let counter = bnode_count;
bnode_count += 1;
counter
});
let object_string = format!("{}_{}", base_ns, bnode_map[bnode_id]);
let object_node = NamedNodeRef::new_unchecked(object_string.as_str());
new_dataset.insert(QuadRef::new(
subject_node,
quad.predicate,
object_node,
graph_node,
));
}
TermRef::NamedNode(named) => {
let object_iri = named.as_str();
if let Some(caps) = re_underscore_uri.captures(object_iri) {
let mut object_string = object_iri.to_string();
let matching = caps
.get(1)
.ok_or(NpError("Error with regex".to_string()))?
.as_str();
let new_ending = matching.replacen('_', "__", 1);
object_string.truncate(object_string.len() - matching.len()); object_string.push_str(&new_ending);
let object_node = NamedNodeRef::new_unchecked(object_string.as_str());
new_dataset.insert(QuadRef::new(
subject_node,
quad.predicate,
object_node,
graph_node,
));
} else {
new_dataset.insert(QuadRef::new(
subject_node,
quad.predicate,
named,
graph_node,
));
}
}
_ => {
new_dataset.insert(QuadRef::new(
subject_node,
quad.predicate,
quad.object,
graph_node,
));
}
};
}
Ok(new_dataset)
}
pub fn replace_ns_in_quads(
dataset: &Dataset,
old_ns: &str,
old_uri: &str,
new_ns: &str,
new_uri: &str,
) -> Result<Dataset, NpError> {
let mut new = Dataset::new();
for quad in dataset.iter() {
let s = subject_iri_to_string(quad.subject)?;
let subject_node = if s == old_ns || s == old_uri {
NamedOrBlankNode::from(NamedNodeRef::new_unchecked(new_uri))
} else {
NamedOrBlankNode::from(NamedNodeRef::new_unchecked(
s.replace(old_ns, new_ns).as_str(),
))
};
let graph_name = graph_iri_to_string(quad.graph_name)?.replace(old_ns, new_ns);
let graph = NamedNodeRef::new_unchecked(graph_name.as_str());
match quad.object {
TermRef::NamedNode(iri) => {
let o = iri.as_str();
let object_node = if o == old_ns || o == old_uri {
NamedNode::new_unchecked(new_uri)
} else {
let new_uri_string = o.replace(old_ns, new_ns);
NamedNode::new_unchecked(new_uri_string.to_string())
};
new.insert(QuadRef::new(
&subject_node,
quad.predicate,
&object_node,
graph,
));
}
_ => {
new.insert(QuadRef::new(
&subject_node,
quad.predicate,
quad.object,
graph,
));
}
};
}
Ok(new)
}
#[derive(Debug, Copy, Clone)]
enum Field {
Graph,
Subject,
Predicate,
Object,
Datatype,
Lang,
}
struct NormQuad {
graph: String,
subject: String,
predicate: String,
object: String,
datatype: String,
lang: String,
}
pub fn fix_normed_uri(uri: &str, separator: &str) -> String {
if let Some(space_index) = uri.rfind(' ') {
let last_frag = &uri[space_index + 1..];
if uri.ends_with(&format!(" {separator}")) || last_frag.is_empty() {
uri.strip_suffix(separator).unwrap_or(uri).to_string()
} else if last_frag.starts_with(separator) {
uri.to_string()
} else {
format!("{} {separator}{}", &uri[..space_index], last_frag)
}
} else {
uri.to_string()
}
}
pub fn normalize_dataset(
dataset: &Dataset,
base_ns: &str,
norm_ns: &str,
separator: &str,
) -> Result<String, NpError> {
let mut quads_vec: Vec<NormQuad> = vec![];
let norm_uri = format!("{norm_ns} ");
for quad in dataset.iter() {
let graph = fix_normed_uri(
&graph_iri_to_string(quad.graph_name)?.replace(base_ns, &norm_uri),
separator,
);
let mut datatype = "".to_string();
let mut lang = "".to_string();
let subject = if subject_iri_to_string(quad.subject)? == base_ns {
norm_uri.clone()
} else {
fix_normed_uri(
&subject_iri_to_string(quad.subject)?.replace(base_ns, &norm_uri),
separator,
)
};
let predicate = quad
.predicate
.into_owned()
.into_string()
.replace(base_ns, &norm_uri);
let object = match quad.object {
TermRef::NamedNode(iri) => {
if iri.as_str() == base_ns {
norm_uri.to_string()
} else {
fix_normed_uri(&iri.as_str().replace(base_ns, &norm_uri), separator)
}
}
TermRef::Literal(literal) => {
datatype = literal.datatype().into_owned().into_string();
lang = literal.language().unwrap_or_default().to_owned();
literal.value().replace('\\', "\\\\").replace('\n', "\\n")
}
other => {
return Err(NpError(format!(
"Failed to extract literal from object: Got {other:?}"
)));
}
};
quads_vec.push(NormQuad {
graph,
subject,
predicate,
object,
datatype,
lang,
});
}
use Field::*;
let orders = [Graph, Subject, Predicate, Lang, Datatype, Object];
quads_vec.sort_by(|a, b| {
orders.iter().fold(Ordering::Equal, |acc, &field| {
acc.then_with(|| match field {
Graph => a.graph.cmp(&b.graph),
Subject => a.subject.cmp(&b.subject),
Predicate => a.predicate.cmp(&b.predicate),
Lang => a.lang.cmp(&b.lang),
Datatype => a.datatype.cmp(&b.datatype),
Object => a.object.cmp(&b.object),
})
})
});
let mut normed_quads = String::new();
for quad in quads_vec {
normed_quads.push_str(&format!("{}\n", quad.graph));
normed_quads.push_str(&format!("{}\n", quad.subject));
normed_quads.push_str(&format!("{}\n", quad.predicate));
let formatted_object = if !quad.lang.is_empty() {
format!("@{} {}", quad.lang, quad.object)
} else if !quad.datatype.is_empty() {
format!("^{} {}", quad.datatype, quad.object)
} else {
quad.object
};
normed_quads.push_str(&formatted_object);
normed_quads.push('\n');
}
Ok(normed_quads)
}