use std::collections::{HashMap, HashSet};
use crate::sbol2_vocab as v2;
use crate::vocab as v3;
use crate::{Iri, Literal, Resource, Term, Triple};
use sbol_rdf::Graph;
pub(super) struct IdentityMap {
rewrites: HashMap<String, String>,
namespaces: HashMap<String, String>,
}
impl IdentityMap {
pub(super) fn build(graph: &Graph) -> Self {
let mut persistent_identity: HashMap<String, String> = HashMap::new();
let mut display_id: HashMap<String, String> = HashMap::new();
let mut rewrite_candidates: HashSet<String> = HashSet::new();
for triple in graph.triples() {
let subject = match triple.subject.as_iri() {
Some(iri) => iri.as_str().to_owned(),
None => continue,
};
match triple.predicate.as_str() {
v3::RDF_TYPE => {
if triple
.object
.as_iri()
.is_some_and(|iri| iri.as_str().starts_with(v2::SBOL2_NS))
{
rewrite_candidates.insert(subject);
}
}
v2::SBOL2_PERSISTENT_IDENTITY => {
rewrite_candidates.insert(subject.clone());
if let Some(value) = literal_value(&triple.object) {
persistent_identity.insert(subject, value.to_owned());
} else if let Some(iri) = triple.object.as_iri() {
persistent_identity.insert(subject, iri.as_str().to_owned());
}
}
v2::SBOL2_DISPLAY_ID => {
if let Some(value) = literal_value(&triple.object) {
display_id.insert(subject, value.to_owned());
}
}
_ => {}
}
if is_sbol2_identity_reference_predicate(triple.predicate.as_str())
&& let Some(iri) = triple.object.as_iri()
{
rewrite_candidates.insert(iri.as_str().to_owned());
}
}
let mut rewrites: HashMap<String, String> = HashMap::new();
let mut namespaces: HashMap<String, String> = HashMap::new();
for iri in rewrite_candidates {
let canonical = canonical_identity(&iri, &persistent_identity);
if canonical != iri {
rewrites.insert(iri, canonical);
}
}
for (original, did) in display_id.iter() {
let canonical = rewrites
.get(original)
.cloned()
.unwrap_or_else(|| original.clone());
let namespace = persistent_identity
.get(original)
.and_then(|pid| strip_suffix_segment(pid, did))
.or_else(|| strip_suffix_segment(&canonical, did))
.map(str::to_owned);
if let Some(namespace) = namespace {
namespaces.insert(canonical, namespace);
}
}
Self {
rewrites,
namespaces,
}
}
pub(super) fn rewrite_iri<'a>(&'a self, iri: &'a str) -> &'a str {
match self.rewrites.get(iri) {
Some(new) => new.as_str(),
None => iri,
}
}
pub(super) fn add_rewrite(&mut self, from: String, to: String) {
self.rewrites.insert(from, to);
}
pub(super) fn namespace_for(&self, iri: &str) -> Option<&str> {
self.namespaces.get(iri).map(String::as_str)
}
pub(super) fn rewrite_resource(&self, resource: &Resource) -> Resource {
match resource {
Resource::Iri(iri) => {
let new = self.rewrite_iri(iri.as_str());
if new == iri.as_str() {
resource.clone()
} else {
Resource::Iri(Iri::new_unchecked(new))
}
}
_ => resource.clone(),
}
}
pub(super) fn rewrite_term(&self, term: &Term) -> Term {
match term {
Term::Resource(resource) => Term::Resource(self.rewrite_resource(resource)),
_ => term.clone(),
}
}
pub(super) fn rewrite_triple(&self, triple: &Triple) -> Triple {
Triple {
subject: self.rewrite_resource(&triple.subject),
predicate: triple.predicate.clone(),
object: self.rewrite_term(&triple.object),
}
}
}
fn canonical_identity(iri: &str, persistent_identity: &HashMap<String, String>) -> String {
if let Some(pid) = persistent_identity.get(iri) {
return pid.clone();
}
strip_trailing_version(iri).unwrap_or_else(|| iri.to_owned())
}
pub(super) fn strip_trailing_version(iri: &str) -> Option<String> {
let is_urn = iri.starts_with("urn:");
let last_slash = iri.rfind('/');
let sep_pos = if is_urn {
let last_colon = iri.rfind(':');
match (last_slash, last_colon) {
(Some(s), Some(c)) if s > c => Some(s),
(Some(s), None) => Some(s),
(_, Some(c)) => Some(c),
(None, None) => None,
}
} else {
last_slash
}?;
let tail = &iri[sep_pos + 1..];
if tail.is_empty() {
return None;
}
if !tail
.chars()
.next()
.map(|c| c.is_ascii_digit())
.unwrap_or(false)
{
return None;
}
if !tail.chars().all(|c| c.is_ascii_digit() || c == '.') {
return None;
}
Some(iri[..sep_pos].to_owned())
}
fn strip_suffix_segment<'a>(value: &'a str, segment: &str) -> Option<&'a str> {
for separator in ['/', ':'] {
let suffix = format!("{separator}{segment}");
if let Some(stripped) = value.strip_suffix(&suffix) {
return Some(stripped);
}
}
None
}
fn is_sbol2_identity_reference_predicate(predicate: &str) -> bool {
matches!(
predicate,
v2::SBOL2_BUILT
| v2::SBOL2_SEQUENCE_PROP
| v2::SBOL2_SEQUENCE_ANNOTATION_PROP
| v2::SBOL2_SEQUENCE_CONSTRAINT_PROP
| v2::SBOL2_COMPONENT_PROP
| v2::SBOL2_FUNCTIONAL_COMPONENT_PROP
| v2::SBOL2_MODULE_PROP
| v2::SBOL2_INTERACTION_PROP
| v2::SBOL2_PARTICIPATION_PROP
| v2::SBOL2_LOCATION_PROP
| v2::SBOL2_DEFINITION
| v2::SBOL2_VARIABLE_COMPONENT_PROP
| v2::SBOL2_VARIABLE
| v2::SBOL2_VARIANT
| v2::SBOL2_VARIANT_COLLECTION
| v2::SBOL2_VARIANT_DERIVATION
| v2::SBOL2_MODEL_PROP
| v2::SBOL2_ATTACHMENT_PROP
| v2::SBOL2_SUBJECT
| v2::SBOL2_OBJECT
| v2::SBOL2_PARTICIPANT
| v2::SBOL2_LOCAL
| v2::SBOL2_REMOTE
| v2::SBOL2_MAPS_TO_PROP
| v2::SBOL2_TEMPLATE
| v2::SBOL2_MEMBER
| v2::SBOL2_EXPERIMENTAL_DATA_PROP
)
}
fn literal_value(term: &Term) -> Option<&str> {
term.as_literal().map(Literal::value)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn strips_single_digit_version() {
assert_eq!(
strip_trailing_version("https://synbiohub.org/public/igem/BBa_E0040/1").as_deref(),
Some("https://synbiohub.org/public/igem/BBa_E0040"),
);
}
#[test]
fn strips_semantic_version() {
assert_eq!(
strip_trailing_version("https://example.org/lab/design/1.2.3").as_deref(),
Some("https://example.org/lab/design"),
);
}
#[test]
fn preserves_iri_without_version_suffix() {
assert_eq!(
strip_trailing_version("https://example.org/lab/design").as_deref(),
None,
);
}
#[test]
fn preserves_non_numeric_suffix() {
assert_eq!(
strip_trailing_version("https://example.org/lab/promoter_v1").as_deref(),
None,
);
}
}