use crate::error::CuriesError;
use crate::fetch::{ExtendedPrefixMapSource, PrefixMapSource, ShaclSource};
use ptrie::Trie;
use regex::Regex;
use serde::{Deserialize, Serialize, Serializer};
use serde_json::{json, Value};
use sophia::api::dataset::Dataset as _;
use sophia::api::graph::MutableGraph as _;
use sophia::api::ns::{xsd, Namespace};
use sophia::api::prefix::Prefix;
use sophia::api::quad::Quad as _;
use sophia::api::serializer::{Stringifier as _, TripleSerializer as _};
use sophia::api::source::QuadSource as _;
use sophia::api::term::matcher::Any;
use sophia::api::term::BnodeId;
use sophia::api::term::Term;
use sophia::inmem::dataset::LightDataset;
use sophia::inmem::graph::LightGraph;
use sophia::iri::Iri;
use sophia::turtle::parser::trig;
use sophia::turtle::serializer::turtle::{TurtleConfig, TurtleSerializer};
use std::collections::{HashMap, HashSet};
use std::fmt;
use std::sync::Arc;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Record {
pub prefix: String,
pub uri_prefix: String,
#[serde(default)]
pub prefix_synonyms: HashSet<String>,
#[serde(default)]
pub uri_prefix_synonyms: HashSet<String>,
pub pattern: Option<String>,
}
impl Record {
pub fn new(prefix: &str, uri_prefix: &str) -> Self {
Record {
prefix: prefix.to_string(),
uri_prefix: uri_prefix.to_string(),
prefix_synonyms: HashSet::from([]),
uri_prefix_synonyms: HashSet::from([]),
pattern: None,
}
}
}
impl fmt::Display for Record {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
writeln!(f, "Prefix: {}", self.prefix)?;
writeln!(f, "URI prefix: {}", self.uri_prefix)?;
writeln!(f, "Prefix synonyms: {:?}", self.prefix_synonyms)?;
writeln!(f, "URI prefix synonyms: {:?}", self.uri_prefix_synonyms)?;
Ok(())
}
}
#[derive(Debug, Clone)]
pub struct Converter {
records: Vec<Arc<Record>>,
prefix_map: HashMap<String, Arc<Record>>,
trie: Trie<u8, Arc<Record>>,
delimiter: String,
}
impl Serialize for Converter {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
let records: Vec<&Record> = self.records.iter().map(|r| &**r).collect();
records.serialize(serializer)
}
}
impl Converter {
pub fn new(delimiter: &str) -> Self {
Converter {
records: Vec::new(),
prefix_map: HashMap::new(),
trie: Trie::<u8, Arc<Record>>::new(),
delimiter: delimiter.to_string(),
}
}
pub async fn from_prefix_map<T: PrefixMapSource>(prefix_map: T) -> Result<Self, CuriesError> {
let prefix_map: HashMap<String, Value> = prefix_map.fetch().await?;
let mut converter = Converter::default();
for (prefix, uri_prefix) in prefix_map {
if let Value::String(uri_prefix_str) = uri_prefix {
converter.add_record(Record::new(&prefix, &uri_prefix_str))?;
}
}
Ok(converter)
}
pub async fn from_jsonld<T: PrefixMapSource>(jsonld: T) -> Result<Self, CuriesError> {
let prefix_map = jsonld.fetch().await?;
let mut converter = Converter::default();
let context = match prefix_map.get("@context") {
Some(Value::Object(map)) => map,
_ => return Err(CuriesError::InvalidFormat("JSON-LD".to_string())),
};
for (key, value) in context {
match value {
Value::String(uri) => {
converter.add_record(Record::new(key, uri))?;
}
Value::Object(map) if map.get("@prefix") == Some(&Value::Bool(true)) => {
if let Some(Value::String(uri)) = map.get("@id") {
converter.add_record(Record::new(key, uri))?;
}
}
_ => continue,
}
}
Ok(converter)
}
pub async fn from_extended_prefix_map<T: ExtendedPrefixMapSource>(
prefix_map: T,
) -> Result<Self, CuriesError> {
let records = prefix_map.fetch().await?;
let mut converter = Converter::default();
for record in records {
converter.add_record(record)?;
}
Ok(converter)
}
pub async fn from_shacl<T: ShaclSource>(shacl: T) -> Result<Self, CuriesError> {
let rdf_str = shacl.fetch().await?;
let mut converter = Converter::default();
let graph: LightDataset = trig::parse_str(&rdf_str)
.collect_quads()
.map_err(|e| CuriesError::InvalidFormat(format!("Error parsing TriG: {e}")))?;
let shacl_ns = Namespace::new("http://www.w3.org/ns/shacl#")?;
for q_prefix in graph.quads_matching(Any, [shacl_ns.get("prefix")?], Any, Any) {
for q_ns in
graph.quads_matching([q_prefix?.s()], [shacl_ns.get("namespace")?], Any, Any)
{
converter.add_prefix(
q_prefix?
.o()
.lexical_form()
.ok_or(CuriesError::InvalidFormat(format!(
"Prefix term in SHACL graph {:?}",
q_prefix?.o()
)))?
.as_ref(),
q_ns?
.o()
.lexical_form()
.ok_or(CuriesError::InvalidFormat(format!(
"Namespace term in SHACL graph {:?}",
q_ns?.o()
)))?
.as_ref(),
)?;
}
}
Ok(converter)
}
pub fn add_record(&mut self, record: Record) -> Result<(), CuriesError> {
let rec = Arc::new(record);
if self.prefix_map.contains_key(&rec.prefix) {
return Err(CuriesError::DuplicateRecord(rec.prefix.clone()));
}
if self.trie.contains_key(rec.uri_prefix.bytes()) {
return Err(CuriesError::DuplicateRecord(rec.uri_prefix.clone()));
}
for prefix in &rec.prefix_synonyms {
if self.prefix_map.contains_key(prefix) {
return Err(CuriesError::DuplicateRecord(prefix.clone()));
}
}
for uri_prefix in &rec.uri_prefix_synonyms {
if self.trie.contains_key(uri_prefix.bytes()) {
return Err(CuriesError::DuplicateRecord(uri_prefix.clone()));
}
}
self.records.push(rec.clone());
self.prefix_map.insert(rec.prefix.clone(), rec.clone());
for prefix in &rec.prefix_synonyms {
self.prefix_map.insert(prefix.clone(), rec.clone());
}
self.trie
.insert(rec.uri_prefix.clone().bytes(), rec.clone());
for uri_prefix in &rec.uri_prefix_synonyms {
self.trie.insert(uri_prefix.bytes(), rec.clone());
}
Ok(())
}
pub fn add_prefix(&mut self, prefix: &str, uri_prefix: &str) -> Result<(), CuriesError> {
self.add_record(Record::new(prefix, uri_prefix))
}
pub fn get_prefixes(&self, include_synonyms: bool) -> Vec<String> {
if include_synonyms {
self.prefix_map.keys().cloned().collect()
} else {
self.records.iter().map(|r| r.prefix.clone()).collect()
}
}
pub fn get_uri_prefixes(&self, include_synonyms: bool) -> Vec<String> {
if include_synonyms {
let mut prefixes: Vec<String> = Vec::new();
for record in &self.records {
prefixes.push(record.uri_prefix.clone());
for synonym in &record.uri_prefix_synonyms {
prefixes.push(synonym.clone());
}
}
prefixes
} else {
self.records.iter().map(|r| r.uri_prefix.clone()).collect()
}
}
pub fn write_extended_prefix_map(&self) -> Result<String, CuriesError> {
Ok(serde_json::to_string(&self)?)
}
pub fn write_prefix_map(&self) -> HashMap<String, String> {
self.records
.iter()
.map(|record| (record.prefix.clone(), record.uri_prefix.clone()))
.collect()
}
pub fn write_shacl(&self) -> Result<String, CuriesError> {
let mut graph = LightGraph::new();
let shacl_ns = Namespace::new("http://www.w3.org/ns/shacl#")?;
let declare_subject = BnodeId::new_unchecked("declareNode".to_string());
for (i, arc_record) in self.records.iter().enumerate() {
let record = Arc::clone(arc_record);
let subject = BnodeId::new_unchecked(format!("{}", i));
graph.insert(&declare_subject, shacl_ns.get("declare")?, &subject)?;
graph.insert(&subject, shacl_ns.get("prefix")?, record.prefix.as_str())?;
graph.insert(
&subject,
shacl_ns.get("namespace")?,
record.uri_prefix.as_str() * xsd::anyURI,
)?;
}
let ttl_prefixes = [
(
Prefix::new_unchecked("xsd".to_string()),
Iri::new_unchecked("http://www.w3.org/2001/XMLSchema#".to_string()),
),
(
Prefix::new_unchecked("sh".to_string()),
Iri::new_unchecked("http://www.w3.org/ns/shacl#".to_string()),
),
];
let ttl_config = TurtleConfig::new()
.with_pretty(true)
.with_prefix_map(&ttl_prefixes[..]);
let mut ttl_stringifier = TurtleSerializer::new_stringifier_with_config(ttl_config);
Ok(ttl_stringifier.serialize_graph(&graph)?.to_string())
}
pub fn write_jsonld(&self) -> serde_json::Value {
let mut context = json!({});
for record in &self.records {
context[record.prefix.clone()] = record.uri_prefix.clone().into();
for synonym in &record.prefix_synonyms {
context[synonym.clone()] = record.uri_prefix.clone().into();
}
}
json!({"@context": context})
}
pub fn chain(mut converters: Vec<Converter>) -> Result<Converter, CuriesError> {
if converters.is_empty() {
return Err(CuriesError::InvalidFormat(
"The list of converters is empty".to_string(),
));
}
let mut base_converter = converters.remove(0);
for converter in converters {
for arc_record in converter.records {
let record = Arc::try_unwrap(arc_record).unwrap_or_else(|arc| (*arc).clone());
let find_record = |r: &Record| -> Option<Arc<Record>> {
base_converter
.prefix_map
.get(&r.prefix)
.cloned()
.or_else(|| {
r.prefix_synonyms
.iter()
.find_map(|synonym| base_converter.prefix_map.get(synonym).cloned())
})
};
if let Some(existing_arc) = find_record(&record) {
if existing_arc.uri_prefix != record.uri_prefix {
let mut updated_record = Arc::try_unwrap(existing_arc.clone())
.unwrap_or_else(|arc| (*arc).clone());
updated_record
.uri_prefix_synonyms
.insert(record.uri_prefix.clone());
updated_record
.uri_prefix_synonyms
.extend(record.uri_prefix_synonyms.clone());
updated_record
.prefix_synonyms
.extend(record.prefix_synonyms.clone());
base_converter.update_record(updated_record)?;
}
} else {
base_converter.add_record(record)?;
}
}
}
Ok(base_converter)
}
pub fn update_record(&mut self, record: Record) -> Result<(), CuriesError> {
let rec = Arc::new(record);
if let Some(pos) = self.records.iter().position(|r| r.prefix == rec.prefix) {
self.records[pos] = rec.clone();
} else {
return Err(CuriesError::NotFound(rec.prefix.clone()));
}
self.prefix_map.insert(rec.prefix.clone(), rec.clone());
for prefix in &rec.prefix_synonyms {
self.prefix_map.insert(prefix.clone(), rec.clone());
}
if self
.trie
.set_value(rec.uri_prefix.bytes(), rec.clone())
.is_err()
{
self.trie.insert(rec.uri_prefix.bytes(), rec.clone());
}
for uri_prefix in &rec.uri_prefix_synonyms {
if self
.trie
.set_value(uri_prefix.bytes(), rec.clone())
.is_err()
{
self.trie.insert(uri_prefix.bytes(), rec.clone());
}
}
Ok(())
}
pub fn find_by_prefix(&self, prefix: &str) -> Result<&Arc<Record>, CuriesError> {
match self.prefix_map.get(prefix) {
Some(record) => Ok(record),
None => Err(CuriesError::NotFound(prefix.to_string())),
}
}
pub fn find_by_uri_prefix(&self, uri_prefix: &str) -> Result<&Arc<Record>, CuriesError> {
match self.trie.get(uri_prefix.bytes()) {
Some(record) => Ok(record),
None => Err(CuriesError::NotFound(uri_prefix.to_string())),
}
}
pub fn find_by_uri(&self, uri: &str) -> Result<&Arc<Record>, CuriesError> {
match self.trie.find_longest_prefix(uri.bytes()) {
Some(rec) => Ok(rec),
None => Err(CuriesError::NotFound(uri.to_string())),
}
}
fn validate_id(&self, id: &str, record: &Arc<Record>) -> Result<(), CuriesError> {
if let Some(pattern) = &record.pattern {
let regex = Regex::new(pattern).map_err(|_| {
CuriesError::InvalidFormat(format!("Invalid regex pattern {pattern}"))
})?;
if !regex.is_match(id) {
return Err(CuriesError::InvalidFormat(format!(
"ID {id} does not match the pattern {pattern}"
)));
}
}
Ok(())
}
pub fn compress(&self, uri: &str) -> Result<String, CuriesError> {
let record = self.find_by_uri(uri)?;
let id = uri
.strip_prefix(&record.uri_prefix)
.or_else(|| {
record
.uri_prefix_synonyms
.iter()
.filter(|synonym| uri.starts_with(&**synonym))
.max_by_key(|synonym| synonym.len()) .and_then(|synonym| uri.strip_prefix(synonym))
})
.ok_or_else(|| CuriesError::NotFound(uri.to_string()))?;
self.validate_id(id, record)?;
Ok(format!("{}{}{}", &record.prefix, self.delimiter, id))
}
pub fn expand(&self, curie: &str) -> Result<String, CuriesError> {
let parts: Vec<&str> = curie.split(&self.delimiter).collect();
if parts.len() != 2 {
return Err(CuriesError::InvalidCurie(curie.to_string()));
}
let (prefix, id) = (parts[0], parts[1]);
let record = self.find_by_prefix(prefix)?;
self.validate_id(id, record)?;
Ok(format!("{}{}", record.uri_prefix, id))
}
pub fn compress_list(&self, uris: Vec<&str>, passthrough: bool) -> Vec<Option<String>> {
uris.into_iter()
.map(|uri| match self.compress(uri) {
Ok(curie) => Some(curie),
Err(_) => {
if passthrough {
Some(uri.to_string()) } else {
None
}
}
})
.collect()
}
pub fn expand_list(&self, curies: Vec<&str>, passthrough: bool) -> Vec<Option<String>> {
curies
.into_iter()
.map(|curie| match self.expand(curie) {
Ok(uri) => Some(uri),
Err(_) => {
if passthrough {
Some(curie.to_string()) } else {
None
}
}
})
.collect()
}
pub fn is_curie(&self, curie: &str) -> bool {
self.expand(curie).is_ok()
}
pub fn is_uri(&self, uri: &str) -> bool {
self.compress(uri).is_ok()
}
pub fn compress_or_standardize(&self, input: &str) -> Result<String, CuriesError> {
if self.is_curie(input) {
self.standardize_curie(input)
} else {
self.compress(input)
}
}
pub fn expand_or_standardize(&self, input: &str) -> Result<String, CuriesError> {
if self.is_curie(input) {
Ok(self.expand(input)?)
} else {
Ok(self.standardize_uri(input)?)
}
}
pub fn standardize_prefix(&self, prefix: &str) -> Result<String, CuriesError> {
Ok(self.find_by_prefix(prefix)?.prefix.to_string())
}
pub fn standardize_curie(&self, curie: &str) -> Result<String, CuriesError> {
let parts: Vec<&str> = curie.split(':').collect();
if parts.len() == 2 {
Ok(format!(
"{}:{}",
self.standardize_prefix(parts[0])?,
parts[1]
))
} else {
Ok(curie.to_string())
}
}
pub fn standardize_uri(&self, uri: &str) -> Result<String, CuriesError> {
let rec = self.find_by_uri(uri)?;
if uri.starts_with(&rec.uri_prefix) {
Ok(uri.to_string())
} else {
let (_new_prefix, id) = rec
.uri_prefix_synonyms
.iter()
.filter(|synonym| uri.starts_with(&**synonym))
.max_by_key(|synonym| synonym.len()) .and_then(|synonym| uri.strip_prefix(synonym).map(|id| (synonym, id)))
.ok_or_else(|| CuriesError::NotFound(uri.to_string()))?;
Ok(format!("{}{}", rec.uri_prefix, id))
}
}
pub fn len(&self) -> usize {
self.records.len()
}
pub fn is_empty(&self) -> bool {
self.records.is_empty()
}
}
impl Default for Converter {
fn default() -> Self {
Self::new(":")
}
}
impl fmt::Display for Converter {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
writeln!(f, "Converter contains {} records", self.records.len())?;
Ok(())
}
}