use rio_api::model::{Literal, Subject, Term, Triple};
use rio_api::parser::TriplesParser;
use rio_xml::RdfXmlParser;
use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet};
const SKOS_CONCEPT: &str = "http://www.w3.org/2004/02/skos/core#Concept";
const SKOS_CONCEPT_SCHEME: &str = "http://www.w3.org/2004/02/skos/core#ConceptScheme";
const SKOS_COLLECTION: &str = "http://www.w3.org/2004/02/skos/core#Collection";
const SKOS_PREF_LABEL: &str = "http://www.w3.org/2004/02/skos/core#prefLabel";
const SKOS_ALT_LABEL: &str = "http://www.w3.org/2004/02/skos/core#altLabel";
const SKOS_SCOPE_NOTE: &str = "http://www.w3.org/2004/02/skos/core#scopeNote";
const SKOS_NARROWER: &str = "http://www.w3.org/2004/02/skos/core#narrower";
const SKOS_BROADER: &str = "http://www.w3.org/2004/02/skos/core#broader";
const SKOS_MEMBER: &str = "http://www.w3.org/2004/02/skos/core#member";
const SKOS_IN_SCHEME: &str = "http://www.w3.org/2004/02/skos/core#inScheme";
const RDF_TYPE: &str = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type";
const DCTERMS_TITLE: &str = "http://purl.org/dc/terms/title";
const DCTERMS_IDENTIFIER: &str = "http://purl.org/dc/terms/identifier";
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SkosLabel {
pub id: String,
pub value: String,
pub language_id: String,
pub valuetype_id: String,
pub list_item_id: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SkosConcept {
pub id: String,
#[serde(default)]
pub uri: Option<String>,
#[serde(rename = "prefLabels")]
pub pref_labels: HashMap<String, SkosValue>,
pub source: Option<String>,
#[serde(rename = "sortOrder")]
pub sort_order: Option<i32>,
pub children: Option<Vec<SkosConcept>>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SkosValue {
#[serde(default)]
pub id: String,
pub value: String,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
pub enum SkosNodeType {
#[default]
ConceptScheme,
Collection,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SkosCollection {
pub id: String,
#[serde(default)]
pub uri: Option<String>,
#[serde(rename = "prefLabels")]
pub pref_labels: HashMap<String, SkosValue>,
#[serde(rename = "altLabels", default)]
pub alt_labels: HashMap<String, Vec<SkosValue>>,
#[serde(rename = "scopeNotes", default)]
pub scope_notes: HashMap<String, SkosValue>,
#[serde(rename = "nodeType", default)]
pub node_type: SkosNodeType,
#[serde(default)]
pub concepts: HashMap<String, SkosConcept>,
#[serde(rename = "__allConcepts", default)]
pub all_concepts: HashMap<String, SkosConcept>,
#[serde(rename = "__values", default)]
pub values: HashMap<String, SkosValue>,
}
#[derive(Debug, Default)]
struct ParsedData {
types: HashMap<String, String>,
labels: HashMap<String, Vec<(String, String, String)>>,
scope_notes: HashMap<String, Vec<(String, String)>>,
narrower: HashMap<String, Vec<String>>,
broader: HashMap<String, Vec<String>>,
members: HashMap<String, Vec<String>>,
in_scheme: HashMap<String, Vec<String>>,
scheme_titles: HashMap<String, String>,
identifiers: HashMap<String, String>,
sort_orders: HashMap<String, i32>,
}
fn extract_or_generate_id(uri: &str) -> String {
use std::sync::OnceLock;
static UUID_RE: OnceLock<regex_lite::Regex> = OnceLock::new();
let uuid_regex = UUID_RE.get_or_init(|| {
regex_lite::Regex::new(r"([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})")
.unwrap()
});
if let Some(caps) = uuid_regex.captures(uri) {
return caps.get(1).unwrap().as_str().to_string();
}
uuid::Uuid::new_v5(&uuid::Uuid::NAMESPACE_URL, uri.as_bytes()).to_string()
}
fn generate_value_id(concept_id: &str, lang: &str, value: &str) -> String {
crate::rdm_namespace::generate_value_uuid(concept_id, value, lang).to_string()
}
fn parse_arches_label(raw_value: &str, fallback_id: &str, lang: &str) -> (String, String) {
if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(raw_value) {
if let Some(obj) = parsed.as_object() {
let id = obj
.get("id")
.and_then(|v| v.as_str())
.map(|s| s.to_string())
.unwrap_or_else(|| generate_value_id(fallback_id, lang, raw_value));
let value = obj
.get("value")
.and_then(|v| v.as_str())
.map(|s| s.to_string())
.unwrap_or_else(|| raw_value.to_string());
return (id, value);
}
}
(
generate_value_id(fallback_id, lang, raw_value),
raw_value.to_string(),
)
}
impl ParsedData {
fn process_triple(&mut self, triple: Triple) {
let subject_uri = match triple.subject {
Subject::NamedNode(n) => n.iri.to_string(),
_ => return,
};
let predicate = triple.predicate.iri;
match predicate {
RDF_TYPE => {
if let Term::NamedNode(obj) = triple.object {
self.types.insert(subject_uri, obj.iri.to_string());
}
}
SKOS_PREF_LABEL | SKOS_ALT_LABEL => {
if let Term::Literal(lit) = triple.object {
let (value, lang) = match lit {
Literal::Simple { value } => (value.to_string(), "en".to_string()),
Literal::LanguageTaggedString { value, language } => {
(value.to_string(), language.to_string())
}
Literal::Typed { value, .. } => (value.to_string(), "en".to_string()),
};
self.labels.entry(subject_uri).or_default().push((
predicate.to_string(),
value,
lang,
));
}
}
SKOS_NARROWER => {
if let Term::NamedNode(obj) = triple.object {
self.narrower
.entry(subject_uri)
.or_default()
.push(obj.iri.to_string());
}
}
SKOS_BROADER => {
if let Term::NamedNode(obj) = triple.object {
self.broader
.entry(subject_uri)
.or_default()
.push(obj.iri.to_string());
}
}
SKOS_MEMBER => {
if let Term::NamedNode(obj) = triple.object {
self.members
.entry(subject_uri)
.or_default()
.push(obj.iri.to_string());
}
}
SKOS_SCOPE_NOTE => {
if let Term::Literal(lit) = triple.object {
let (value, lang) = match lit {
Literal::Simple { value } => (value.to_string(), "en".to_string()),
Literal::LanguageTaggedString { value, language } => {
(value.to_string(), language.to_string())
}
Literal::Typed { value, .. } => (value.to_string(), "en".to_string()),
};
self.scope_notes
.entry(subject_uri)
.or_default()
.push((lang, value));
}
}
SKOS_IN_SCHEME => {
if let Term::NamedNode(obj) = triple.object {
self.in_scheme
.entry(subject_uri)
.or_default()
.push(obj.iri.to_string());
}
}
DCTERMS_TITLE => {
if let Term::Literal(lit) = triple.object {
let value = match lit {
Literal::Simple { value } => value.to_string(),
Literal::LanguageTaggedString { value, .. } => value.to_string(),
Literal::Typed { value, .. } => value.to_string(),
};
self.scheme_titles.insert(subject_uri, value);
}
}
DCTERMS_IDENTIFIER => {
if let Term::Literal(lit) = triple.object {
let value = match lit {
Literal::Simple { value } => value.to_string(),
Literal::LanguageTaggedString { value, .. } => value.to_string(),
Literal::Typed { value, .. } => value.to_string(),
};
self.identifiers.insert(subject_uri, value);
}
}
_ => {
if predicate.contains("sortorder") || predicate.contains("sortOrder") {
if let Term::Literal(lit) = triple.object {
let value = match lit {
Literal::Simple { value } => value,
Literal::LanguageTaggedString { value, .. } => value,
Literal::Typed { value, .. } => value,
};
if let Ok(order) = value.parse::<i32>() {
self.sort_orders.insert(subject_uri, order);
}
}
}
}
}
}
}
pub fn parse_skos_to_collections(
xml_content: &str,
base_uri: &str,
) -> Result<Vec<SkosCollection>, String> {
let mut data = ParsedData::default();
let base_iri =
oxiri::Iri::parse(base_uri.to_string()).map_err(|e| format!("Invalid base URI: {}", e))?;
let mut parser = RdfXmlParser::new(xml_content.as_bytes(), Some(base_iri));
parser
.parse_all(&mut |triple| {
data.process_triple(triple);
Ok(()) as Result<(), std::io::Error>
})
.map_err(|e| format!("RDF/XML parse error: {}", e))?;
let scheme_uris: Vec<String> = data
.types
.iter()
.filter(|(_, t)| *t == SKOS_CONCEPT_SCHEME)
.map(|(uri, _)| uri.clone())
.collect();
let concept_uris: Vec<String> = data
.types
.iter()
.filter(|(_, t)| *t == SKOS_CONCEPT)
.map(|(uri, _)| uri.clone())
.collect();
let mut all_concepts: HashMap<String, SkosConcept> = HashMap::new();
for uri in &concept_uris {
let id = extract_or_generate_id(uri);
let mut pref_labels: HashMap<String, SkosValue> = HashMap::new();
if let Some(labels) = data.labels.get(uri) {
for (pred, value, lang) in labels {
if pred == SKOS_PREF_LABEL {
let (label_id, label_value) = parse_arches_label(value, &id, lang);
pref_labels.insert(
lang.clone(),
SkosValue {
id: label_id,
value: label_value,
},
);
}
}
}
if pref_labels.is_empty() {
if let Some(labels) = data.labels.get(uri) {
if let Some((_, value, lang)) = labels.first() {
let (label_id, label_value) = parse_arches_label(value, &id, lang);
pref_labels.insert(
lang.clone(),
SkosValue {
id: label_id,
value: label_value,
},
);
}
}
}
let concept = SkosConcept {
id: id.clone(),
uri: Some(uri.clone()),
pref_labels,
source: data.identifiers.get(uri).cloned().or(Some(uri.clone())),
sort_order: data.sort_orders.get(uri).copied(),
children: None,
};
all_concepts.insert(uri.clone(), concept);
}
let mut children_map: HashMap<String, HashSet<String>> = HashMap::new();
for (parent_uri, narrower_uris) in &data.narrower {
children_map
.entry(parent_uri.clone())
.or_default()
.extend(narrower_uris.iter().cloned());
}
for (child_uri, broader_uris) in &data.broader {
for parent_uri in broader_uris {
children_map
.entry(parent_uri.clone())
.or_default()
.insert(child_uri.clone());
}
}
let children_map: HashMap<String, Vec<String>> = children_map
.into_iter()
.map(|(k, v)| (k, v.into_iter().collect()))
.collect();
let all_narrower: HashSet<String> = children_map.values().flatten().cloned().collect();
fn build_concept_tree(
uri: &str,
all_concepts: &HashMap<String, SkosConcept>,
children_map: &HashMap<String, Vec<String>>,
_sort_orders: &HashMap<String, i32>,
) -> Option<SkosConcept> {
let concept = all_concepts.get(uri)?;
let mut result = concept.clone();
if let Some(child_uris) = children_map.get(uri) {
let mut children: Vec<SkosConcept> = child_uris
.iter()
.filter_map(|child_uri| {
build_concept_tree(child_uri, all_concepts, children_map, _sort_orders)
})
.collect();
children.sort_by(|a, b| {
a.sort_order
.unwrap_or(999)
.cmp(&b.sort_order.unwrap_or(999))
});
if !children.is_empty() {
result.children = Some(children);
}
}
Some(result)
}
fn filter_concept_tree_to_members(
concept: &SkosConcept,
member_uris: &std::collections::HashSet<&String>,
) -> SkosConcept {
let mut result = concept.clone();
if let Some(ref children) = concept.children {
let filtered_children: Vec<SkosConcept> = children
.iter()
.filter(|child| {
let child_uri = child.uri.as_ref().unwrap_or(&child.id);
member_uris.contains(child_uri)
|| member_uris.iter().any(|m| m.as_str() == child.id.as_str())
})
.map(|child| filter_concept_tree_to_members(child, member_uris))
.collect();
result.children = if filtered_children.is_empty() {
None
} else {
Some(filtered_children)
};
}
result
}
let mut collections: Vec<SkosCollection> = Vec::new();
for scheme_uri in &scheme_uris {
let scheme_id = extract_or_generate_id(scheme_uri);
let title = data
.scheme_titles
.get(scheme_uri)
.cloned()
.unwrap_or_else(|| scheme_id.clone());
let scheme_concept_uris: Vec<&String> = data
.in_scheme
.iter()
.filter(|(_, schemes)| schemes.contains(scheme_uri))
.map(|(concept_uri, _)| concept_uri)
.collect();
let top_level: Vec<String> = scheme_concept_uris
.iter()
.filter(|uri| !all_narrower.contains(**uri))
.map(|uri| (*uri).clone())
.collect();
let mut concepts: HashMap<String, SkosConcept> = HashMap::new();
for uri in &top_level {
if let Some(concept) =
build_concept_tree(uri, &all_concepts, &children_map, &data.sort_orders)
{
concepts.insert(concept.id.clone(), concept);
}
}
let mut pref_labels = HashMap::new();
pref_labels.insert(
"en".to_string(),
SkosValue {
id: generate_value_id(&scheme_id, "en", &title),
value: title,
},
);
let mut alt_labels: HashMap<String, Vec<SkosValue>> = HashMap::new();
if let Some(labels) = data.labels.get(scheme_uri) {
for (pred, value, lang) in labels {
if pred == SKOS_ALT_LABEL {
alt_labels.entry(lang.clone()).or_default().push(SkosValue {
id: generate_value_id(&scheme_id, lang, value),
value: value.clone(),
});
}
}
}
let mut scope_notes: HashMap<String, SkosValue> = HashMap::new();
if let Some(notes) = data.scope_notes.get(scheme_uri) {
for (lang, value) in notes {
scope_notes.insert(
lang.clone(),
SkosValue {
id: generate_value_id(&scheme_id, lang, value),
value: value.clone(),
},
);
}
}
collections.push(SkosCollection {
id: scheme_id,
uri: Some(scheme_uri.clone()),
pref_labels,
alt_labels,
scope_notes,
node_type: SkosNodeType::ConceptScheme,
concepts,
all_concepts: HashMap::new(),
values: HashMap::new(),
});
}
let collection_uris: Vec<String> = data
.types
.iter()
.filter(|(_, t)| *t == SKOS_COLLECTION)
.map(|(uri, _)| uri.clone())
.collect();
for collection_uri in &collection_uris {
let collection_id = extract_or_generate_id(collection_uri);
let mut pref_labels: HashMap<String, SkosValue> = HashMap::new();
if let Some(labels) = data.labels.get(collection_uri) {
for (pred, value, lang) in labels {
if pred == SKOS_PREF_LABEL {
let (label_id, label_value) = parse_arches_label(value, &collection_id, lang);
pref_labels.insert(
lang.clone(),
SkosValue {
id: label_id,
value: label_value,
},
);
}
}
}
let mut alt_labels: HashMap<String, Vec<SkosValue>> = HashMap::new();
if let Some(labels) = data.labels.get(collection_uri) {
for (pred, value, lang) in labels {
if pred == SKOS_ALT_LABEL {
let (label_id, label_value) = parse_arches_label(value, &collection_id, lang);
alt_labels.entry(lang.clone()).or_default().push(SkosValue {
id: label_id,
value: label_value,
});
}
}
}
let mut scope_notes: HashMap<String, SkosValue> = HashMap::new();
if let Some(notes) = data.scope_notes.get(collection_uri) {
for (lang, value) in notes {
let (note_id, note_value) = parse_arches_label(value, &collection_id, lang);
scope_notes.insert(
lang.clone(),
SkosValue {
id: note_id,
value: note_value,
},
);
}
}
let mut concepts: HashMap<String, SkosConcept> = HashMap::new();
if let Some(member_uris) = data.members.get(collection_uri) {
let mut all_member_uris: Vec<String> = member_uris.clone();
let mut seen: std::collections::HashSet<String> = member_uris.iter().cloned().collect();
let mut i = 0;
while i < all_member_uris.len() {
if let Some(nested) = data.members.get(&all_member_uris[i]) {
for child in nested {
if seen.insert(child.clone()) {
all_member_uris.push(child.clone());
}
}
}
i += 1;
}
let mut collection_children_map = children_map.clone();
for uri in &all_member_uris {
if let Some(nested) = data.members.get(uri) {
let entry = collection_children_map.entry(uri.clone()).or_default();
for child in nested {
if !entry.contains(child) {
entry.push(child.clone());
}
}
}
}
let member_set: std::collections::HashSet<&String> = all_member_uris.iter().collect();
let top_level_members: Vec<&String> = all_member_uris
.iter()
.filter(|uri| {
!all_member_uris.iter().any(|other| {
if let Some(children) = collection_children_map.get(other.as_str()) {
children.contains(&(**uri).to_string())
} else {
false
}
})
})
.collect();
for member_uri in top_level_members {
if let Some(concept) = build_concept_tree(
member_uri,
&all_concepts,
&collection_children_map,
&data.sort_orders,
) {
let filtered_concept = filter_concept_tree_to_members(&concept, &member_set);
concepts.insert(filtered_concept.id.clone(), filtered_concept);
}
}
}
collections.push(SkosCollection {
id: collection_id,
uri: Some(collection_uri.clone()),
pref_labels,
alt_labels,
scope_notes,
node_type: SkosNodeType::Collection,
concepts,
all_concepts: HashMap::new(),
values: HashMap::new(),
});
}
if collections.is_empty() && !all_concepts.is_empty() {
let top_level: Vec<String> = concept_uris
.iter()
.filter(|uri| !all_narrower.contains(*uri))
.cloned()
.collect();
let mut concepts: HashMap<String, SkosConcept> = HashMap::new();
for uri in &top_level {
if let Some(concept) =
build_concept_tree(uri, &all_concepts, &children_map, &data.sort_orders)
{
concepts.insert(concept.id.clone(), concept);
}
}
let default_id = extract_or_generate_id(base_uri);
let mut pref_labels = HashMap::new();
pref_labels.insert(
"en".to_string(),
SkosValue {
id: generate_value_id(&default_id, "en", "Imported Concepts"),
value: "Imported Concepts".to_string(),
},
);
collections.push(SkosCollection {
id: default_id,
uri: Some(base_uri.to_string()),
pref_labels,
alt_labels: HashMap::new(),
scope_notes: HashMap::new(),
node_type: SkosNodeType::ConceptScheme,
concepts,
all_concepts: HashMap::new(),
values: HashMap::new(),
});
}
Ok(collections)
}
fn xml_escape(s: &str) -> String {
s.replace('&', "&")
.replace('<', "<")
.replace('>', ">")
.replace('"', """)
.replace('\'', "'")
}
fn xml_escape_content(s: &str) -> String {
s.replace('&', "&")
.replace('<', "<")
.replace('>', ">")
}
fn sorted_by_key<K: Ord, V>(map: &HashMap<K, V>) -> Vec<(&K, &V)> {
let mut entries: Vec<_> = map.iter().collect();
entries.sort_by_key(|(k, _)| *k);
entries
}
fn sorted_children(children: &Option<Vec<SkosConcept>>) -> Vec<&SkosConcept> {
match children {
Some(kids) => {
let mut sorted: Vec<&SkosConcept> = kids.iter().collect();
sorted.sort_by_key(|c| &c.id);
sorted
}
None => vec![],
}
}
fn sorted_concepts(concepts: &HashMap<String, SkosConcept>) -> Vec<&SkosConcept> {
let mut sorted: Vec<_> = concepts.values().collect();
sorted.sort_by_key(|c| &c.id);
sorted
}
fn write_concept_xml(
concept: &SkosConcept,
scheme_uri: &str,
base_uri: &str,
parent_uri: Option<&str>,
output: &mut String,
) {
let concept_uri = match &concept.uri {
Some(uri) if !uri.is_empty() => uri.clone(),
_ => format!("{}{}", base_uri, concept.id),
};
output.push_str(&format!(
" <skos:Concept rdf:about=\"{}\">\n",
xml_escape(&concept_uri)
));
output.push_str(&format!(
" <skos:inScheme rdf:resource=\"{}\"/>\n",
xml_escape(scheme_uri)
));
if let Some(parent) = parent_uri {
output.push_str(&format!(
" <skos:broader rdf:resource=\"{}\"/>\n",
xml_escape(parent)
));
}
for (lang, value) in sorted_by_key(&concept.pref_labels) {
output.push_str(&format!(
" <skos:prefLabel xml:lang=\"{}\">{}</skos:prefLabel>\n",
xml_escape(lang),
xml_escape_content(&value.value)
));
}
if let Some(order) = concept.sort_order {
output.push_str(&format!(
" <arches:sortorder rdf:datatype=\"http://www.w3.org/2001/XMLSchema#integer\">{}</arches:sortorder>\n",
order
));
}
if let Some(ref source) = concept.source {
output.push_str(&format!(
" <dcterms:identifier>{}</dcterms:identifier>\n",
xml_escape_content(source)
));
}
for child in sorted_children(&concept.children) {
let child_uri = match &child.uri {
Some(uri) if !uri.is_empty() => uri.clone(),
_ => format!("{}{}", base_uri, child.id),
};
output.push_str(&format!(
" <skos:narrower rdf:resource=\"{}\"/>\n",
xml_escape(&child_uri)
));
}
output.push_str(" </skos:Concept>\n");
for child in sorted_children(&concept.children) {
write_concept_xml(child, scheme_uri, base_uri, Some(&concept_uri), output);
}
}
fn write_collection_concept_xml(
concept: &SkosConcept,
base_uri: &str,
parent_uri: Option<&str>,
output: &mut String,
) {
let concept_uri = match &concept.uri {
Some(uri) if !uri.is_empty() => uri.clone(),
_ => format!("{}{}", base_uri, concept.id),
};
output.push_str(&format!(
" <skos:Concept rdf:about=\"{}\">\n",
xml_escape(&concept_uri)
));
if let Some(parent) = parent_uri {
output.push_str(&format!(
" <skos:broader rdf:resource=\"{}\"/>\n",
xml_escape(parent)
));
}
for (lang, value) in sorted_by_key(&concept.pref_labels) {
output.push_str(&format!(
" <skos:prefLabel xml:lang=\"{}\">{}</skos:prefLabel>\n",
xml_escape(lang),
xml_escape_content(&value.value)
));
}
if let Some(order) = concept.sort_order {
output.push_str(&format!(
" <arches:sortorder rdf:datatype=\"http://www.w3.org/2001/XMLSchema#integer\">{}</arches:sortorder>\n",
order
));
}
if let Some(ref source) = concept.source {
output.push_str(&format!(
" <dcterms:identifier>{}</dcterms:identifier>\n",
xml_escape_content(source)
));
}
for child in sorted_children(&concept.children) {
let child_uri = match &child.uri {
Some(uri) if !uri.is_empty() => uri.clone(),
_ => format!("{}{}", base_uri, child.id),
};
output.push_str(&format!(
" <skos:narrower rdf:resource=\"{}\"/>\n",
xml_escape(&child_uri)
));
}
output.push_str(" </skos:Concept>\n");
for child in sorted_children(&concept.children) {
write_collection_concept_xml(child, base_uri, Some(&concept_uri), output);
}
}
fn collect_all_concept_uris(concept: &SkosConcept, base_uri: &str, uris: &mut Vec<String>) {
let concept_uri = match &concept.uri {
Some(uri) if !uri.is_empty() => uri.clone(),
_ => format!("{}{}", base_uri, concept.id),
};
uris.push(concept_uri);
if let Some(ref children) = concept.children {
for child in children {
collect_all_concept_uris(child, base_uri, uris);
}
}
}
pub fn collection_to_skos_xml(collection: &SkosCollection, base_uri: &str) -> String {
let mut output = String::new();
output.push_str(
r#"<?xml version="1.0" encoding="utf-8"?>
<rdf:RDF
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:skos="http://www.w3.org/2004/02/skos/core#"
xmlns:dcterms="http://purl.org/dc/terms/"
xmlns:arches="http://localhost:8000/"
>
"#,
);
let entity_uri = collection
.uri
.clone()
.unwrap_or_else(|| format!("{}{}", base_uri, collection.id));
match collection.node_type {
SkosNodeType::Collection => {
output.push_str(&format!(
" <skos:Collection rdf:about=\"{}\">\n",
xml_escape(&entity_uri)
));
for (lang, value) in sorted_by_key(&collection.pref_labels) {
let json_value = serde_json::json!({
"id": value.id,
"value": value.value
});
output.push_str(&format!(
" <skos:prefLabel xml:lang=\"{}\">{}</skos:prefLabel>\n",
xml_escape(lang),
xml_escape_content(&json_value.to_string())
));
}
for (lang, values) in sorted_by_key(&collection.alt_labels) {
for value in values {
let json_value = serde_json::json!({
"id": value.id,
"value": value.value
});
output.push_str(&format!(
" <skos:altLabel xml:lang=\"{}\">{}</skos:altLabel>\n",
xml_escape(lang),
xml_escape_content(&json_value.to_string())
));
}
}
for (lang, value) in sorted_by_key(&collection.scope_notes) {
let json_value = serde_json::json!({
"id": value.id,
"value": value.value
});
output.push_str(&format!(
" <skos:scopeNote xml:lang=\"{}\">{}</skos:scopeNote>\n",
xml_escape(lang),
xml_escape_content(&json_value.to_string())
));
}
let mut all_concept_uris: Vec<String> = Vec::new();
for concept in sorted_concepts(&collection.concepts) {
collect_all_concept_uris(concept, base_uri, &mut all_concept_uris);
}
all_concept_uris.sort();
for concept_uri in &all_concept_uris {
output.push_str(&format!(
" <skos:member>\n <skos:Concept rdf:about=\"{}\"/>\n </skos:member>\n",
xml_escape(concept_uri)
));
}
output.push_str(" </skos:Collection>\n");
for concept in sorted_concepts(&collection.concepts) {
write_collection_concept_xml(concept, base_uri, None, &mut output);
}
}
SkosNodeType::ConceptScheme => {
output.push_str(&format!(
" <skos:ConceptScheme rdf:about=\"{}\">\n",
xml_escape(&entity_uri)
));
for (lang, value) in sorted_by_key(&collection.pref_labels) {
output.push_str(&format!(
" <dcterms:title xml:lang=\"{}\">{}</dcterms:title>\n",
xml_escape(lang),
xml_escape_content(&value.value)
));
}
for (lang, values) in sorted_by_key(&collection.alt_labels) {
for value in values {
output.push_str(&format!(
" <skos:altLabel xml:lang=\"{}\">{}</skos:altLabel>\n",
xml_escape(lang),
xml_escape_content(&value.value)
));
}
}
for (lang, value) in sorted_by_key(&collection.scope_notes) {
output.push_str(&format!(
" <skos:scopeNote xml:lang=\"{}\">{}</skos:scopeNote>\n",
xml_escape(lang),
xml_escape_content(&value.value)
));
}
for concept in sorted_concepts(&collection.concepts) {
let concept_uri = match &concept.uri {
Some(uri) if !uri.is_empty() => uri.clone(),
_ => format!("{}{}", base_uri, concept.id),
};
output.push_str(&format!(
" <skos:hasTopConcept rdf:resource=\"{}\"/>\n",
xml_escape(&concept_uri)
));
}
output.push_str(" </skos:ConceptScheme>\n");
for concept in sorted_concepts(&collection.concepts) {
write_concept_xml(concept, &entity_uri, base_uri, None, &mut output);
}
}
}
output.push_str("</rdf:RDF>\n");
output
}
pub fn collections_to_skos_xml(collections: &[SkosCollection], base_uri: &str) -> String {
let mut output = String::new();
output.push_str(
r#"<?xml version="1.0" encoding="utf-8"?>
<rdf:RDF
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:skos="http://www.w3.org/2004/02/skos/core#"
xmlns:dcterms="http://purl.org/dc/terms/"
xmlns:arches="http://localhost:8000/"
>
"#,
);
for collection in collections {
let entity_uri = collection
.uri
.clone()
.unwrap_or_else(|| format!("{}{}", base_uri, collection.id));
match collection.node_type {
SkosNodeType::Collection => {
output.push_str(&format!(
" <skos:Collection rdf:about=\"{}\">\n",
xml_escape(&entity_uri)
));
for (lang, value) in sorted_by_key(&collection.pref_labels) {
let json_value = serde_json::json!({
"id": value.id,
"value": value.value
});
output.push_str(&format!(
" <skos:prefLabel xml:lang=\"{}\">{}</skos:prefLabel>\n",
xml_escape(lang),
xml_escape_content(&json_value.to_string())
));
}
for (lang, values) in sorted_by_key(&collection.alt_labels) {
for value in values {
let json_value = serde_json::json!({
"id": value.id,
"value": value.value
});
output.push_str(&format!(
" <skos:altLabel xml:lang=\"{}\">{}</skos:altLabel>\n",
xml_escape(lang),
xml_escape_content(&json_value.to_string())
));
}
}
for (lang, value) in sorted_by_key(&collection.scope_notes) {
let json_value = serde_json::json!({
"id": value.id,
"value": value.value
});
output.push_str(&format!(
" <skos:scopeNote xml:lang=\"{}\">{}</skos:scopeNote>\n",
xml_escape(lang),
xml_escape_content(&json_value.to_string())
));
}
let mut all_concept_uris: Vec<String> = Vec::new();
for concept in sorted_concepts(&collection.concepts) {
collect_all_concept_uris(concept, base_uri, &mut all_concept_uris);
}
all_concept_uris.sort();
for concept_uri in &all_concept_uris {
output.push_str(&format!(
" <skos:member>\n <skos:Concept rdf:about=\"{}\"/>\n </skos:member>\n",
xml_escape(concept_uri)
));
}
output.push_str(" </skos:Collection>\n");
for concept in sorted_concepts(&collection.concepts) {
write_collection_concept_xml(concept, base_uri, None, &mut output);
}
}
SkosNodeType::ConceptScheme => {
output.push_str(&format!(
" <skos:ConceptScheme rdf:about=\"{}\">\n",
xml_escape(&entity_uri)
));
for (lang, value) in sorted_by_key(&collection.pref_labels) {
output.push_str(&format!(
" <dcterms:title xml:lang=\"{}\">{}</dcterms:title>\n",
xml_escape(lang),
xml_escape_content(&value.value)
));
}
for (lang, values) in sorted_by_key(&collection.alt_labels) {
for value in values {
output.push_str(&format!(
" <skos:altLabel xml:lang=\"{}\">{}</skos:altLabel>\n",
xml_escape(lang),
xml_escape_content(&value.value)
));
}
}
for (lang, value) in sorted_by_key(&collection.scope_notes) {
output.push_str(&format!(
" <skos:scopeNote xml:lang=\"{}\">{}</skos:scopeNote>\n",
xml_escape(lang),
xml_escape_content(&value.value)
));
}
for concept in sorted_concepts(&collection.concepts) {
let concept_uri = match &concept.uri {
Some(uri) if !uri.is_empty() => uri.clone(),
_ => format!("{}{}", base_uri, concept.id),
};
output.push_str(&format!(
" <skos:hasTopConcept rdf:resource=\"{}\"/>\n",
xml_escape(&concept_uri)
));
}
output.push_str(" </skos:ConceptScheme>\n");
for concept in sorted_concepts(&collection.concepts) {
write_concept_xml(concept, &entity_uri, base_uri, None, &mut output);
}
}
}
}
output.push_str("</rdf:RDF>\n");
output
}
#[cfg(test)]
mod tests {
use super::*;
const TEST_SKOS: &str = r#"<?xml version="1.0" encoding="utf-8"?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:skos="http://www.w3.org/2004/02/skos/core#"
xmlns:dcterms="http://purl.org/dc/terms/">
<skos:ConceptScheme rdf:about="http://example.org/scheme/1">
<dcterms:title>Test Scheme</dcterms:title>
</skos:ConceptScheme>
<skos:Concept rdf:about="http://example.org/concept/1">
<skos:inScheme rdf:resource="http://example.org/scheme/1"/>
<skos:prefLabel xml:lang="en">Concept One</skos:prefLabel>
</skos:Concept>
</rdf:RDF>"#;
const TEST_SKOS_WITH_HIERARCHY: &str = r#"<?xml version="1.0" encoding="utf-8"?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:skos="http://www.w3.org/2004/02/skos/core#"
xmlns:dcterms="http://purl.org/dc/terms/"
xmlns:arches="http://localhost:8000/">
<skos:ConceptScheme rdf:about="http://example.org/scheme/test-scheme">
<dcterms:title>Hierarchical Test</dcterms:title>
</skos:ConceptScheme>
<skos:Concept rdf:about="http://example.org/concept/parent">
<skos:inScheme rdf:resource="http://example.org/scheme/test-scheme"/>
<skos:prefLabel xml:lang="en">Parent Concept</skos:prefLabel>
<skos:prefLabel xml:lang="de">Elternkonzept</skos:prefLabel>
<arches:sortorder rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">0</arches:sortorder>
<skos:narrower rdf:resource="http://example.org/concept/child1"/>
<skos:narrower rdf:resource="http://example.org/concept/child2"/>
</skos:Concept>
<skos:Concept rdf:about="http://example.org/concept/child1">
<skos:inScheme rdf:resource="http://example.org/scheme/test-scheme"/>
<skos:broader rdf:resource="http://example.org/concept/parent"/>
<skos:prefLabel xml:lang="en">Child One</skos:prefLabel>
<arches:sortorder rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1</arches:sortorder>
</skos:Concept>
<skos:Concept rdf:about="http://example.org/concept/child2">
<skos:inScheme rdf:resource="http://example.org/scheme/test-scheme"/>
<skos:broader rdf:resource="http://example.org/concept/parent"/>
<skos:prefLabel xml:lang="en">Child Two</skos:prefLabel>
<arches:sortorder rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">2</arches:sortorder>
</skos:Concept>
</rdf:RDF>"#;
#[test]
fn test_parse_skos() {
let result = parse_skos_to_collections(TEST_SKOS, "http://example.org/");
assert!(result.is_ok());
let collections = result.unwrap();
assert_eq!(collections.len(), 1);
assert!(!collections[0].concepts.is_empty());
}
#[test]
fn test_parse_hierarchical_skos() {
let result = parse_skos_to_collections(TEST_SKOS_WITH_HIERARCHY, "http://example.org/");
assert!(result.is_ok());
let collections = result.unwrap();
assert_eq!(collections.len(), 1);
let collection = &collections[0];
assert_eq!(collection.concepts.len(), 1);
let parent = collection.concepts.values().next().unwrap();
assert!(parent.pref_labels.contains_key("en"));
assert_eq!(parent.pref_labels["en"].value, "Parent Concept");
assert!(parent.children.is_some());
let children = parent.children.as_ref().unwrap();
assert_eq!(children.len(), 2);
assert_eq!(children[0].pref_labels["en"].value, "Child One");
assert_eq!(children[1].pref_labels["en"].value, "Child Two");
}
#[test]
fn test_serialize_collection_to_xml() {
let mut pref_labels = HashMap::new();
pref_labels.insert(
"en".to_string(),
SkosValue {
id: "label-1".to_string(),
value: "Test Collection".to_string(),
},
);
let mut concept_labels = HashMap::new();
concept_labels.insert(
"en".to_string(),
SkosValue {
id: "concept-label-1".to_string(),
value: "Test Concept".to_string(),
},
);
let concept = SkosConcept {
id: "concept-1".to_string(),
uri: Some("http://example.org/concept/1".to_string()),
pref_labels: concept_labels,
source: Some("http://example.org/source/1".to_string()),
sort_order: Some(0),
children: None,
};
let mut concepts = HashMap::new();
concepts.insert("concept-1".to_string(), concept);
let collection = SkosCollection {
id: "collection-1".to_string(),
uri: None,
pref_labels,
alt_labels: HashMap::new(),
scope_notes: HashMap::new(),
node_type: SkosNodeType::ConceptScheme,
concepts,
all_concepts: HashMap::new(),
values: HashMap::new(),
};
let xml = collection_to_skos_xml(&collection, "http://example.org/");
assert!(xml.contains("<?xml version=\"1.0\""));
assert!(xml.contains("xmlns:skos=\"http://www.w3.org/2004/02/skos/core#\""));
assert!(xml.contains("skos:ConceptScheme"));
assert!(xml.contains("skos:Concept"));
assert!(xml.contains("Test Collection"));
assert!(xml.contains("Test Concept"));
assert!(xml.contains("skos:prefLabel"));
assert!(xml.contains("xml:lang=\"en\""));
}
#[test]
fn test_serialize_hierarchical_collection() {
let mut pref_labels = HashMap::new();
pref_labels.insert(
"en".to_string(),
SkosValue {
id: "label-1".to_string(),
value: "Hierarchical Collection".to_string(),
},
);
let mut child_labels = HashMap::new();
child_labels.insert(
"en".to_string(),
SkosValue {
id: "child-label-1".to_string(),
value: "Child Concept".to_string(),
},
);
let child = SkosConcept {
id: "child-1".to_string(),
uri: Some("http://example.org/concept/child".to_string()),
pref_labels: child_labels,
source: None,
sort_order: Some(1),
children: None,
};
let mut parent_labels = HashMap::new();
parent_labels.insert(
"en".to_string(),
SkosValue {
id: "parent-label-1".to_string(),
value: "Parent Concept".to_string(),
},
);
let parent = SkosConcept {
id: "parent-1".to_string(),
uri: Some("http://example.org/concept/parent".to_string()),
pref_labels: parent_labels,
source: None,
sort_order: Some(0),
children: Some(vec![child]),
};
let mut concepts = HashMap::new();
concepts.insert("parent-1".to_string(), parent);
let collection = SkosCollection {
id: "hier-collection".to_string(),
uri: None,
pref_labels,
alt_labels: HashMap::new(),
scope_notes: HashMap::new(),
node_type: SkosNodeType::ConceptScheme,
concepts,
all_concepts: HashMap::new(),
values: HashMap::new(),
};
let xml = collection_to_skos_xml(&collection, "http://example.org/");
assert!(xml.contains("skos:narrower"));
assert!(xml.contains("skos:broader"));
assert!(xml.contains("Parent Concept"));
assert!(xml.contains("Child Concept"));
}
#[test]
fn test_xml_escape() {
assert_eq!(xml_escape("test"), "test");
assert_eq!(xml_escape("a < b"), "a < b");
assert_eq!(xml_escape("a > b"), "a > b");
assert_eq!(xml_escape("a & b"), "a & b");
assert_eq!(xml_escape("\"quoted\""), ""quoted"");
assert_eq!(xml_escape("it's"), "it's");
assert_eq!(
xml_escape("<script>alert('xss')</script>"),
"<script>alert('xss')</script>"
);
}
#[test]
fn test_round_trip_simple() {
let result = parse_skos_to_collections(TEST_SKOS, "http://example.org/");
assert!(result.is_ok());
let collections = result.unwrap();
assert_eq!(collections.len(), 1);
let xml = collection_to_skos_xml(&collections[0], "http://example.org/");
let result2 = parse_skos_to_collections(&xml, "http://example.org/");
assert!(result2.is_ok());
let collections2 = result2.unwrap();
assert_eq!(collections2.len(), 1);
assert_eq!(
collections[0].concepts.len(),
collections2[0].concepts.len()
);
for (id, concept) in &collections[0].concepts {
let found = collections2[0].concepts.values().find(|c| {
c.pref_labels
.values()
.any(|v| concept.pref_labels.values().any(|v2| v.value == v2.value))
});
assert!(
found.is_some(),
"Concept with id {} not found after round-trip",
id
);
}
}
#[test]
fn test_round_trip_hierarchical() {
let result = parse_skos_to_collections(TEST_SKOS_WITH_HIERARCHY, "http://example.org/");
assert!(result.is_ok());
let collections = result.unwrap();
let xml = collection_to_skos_xml(&collections[0], "http://example.org/");
let result2 = parse_skos_to_collections(&xml, "http://example.org/");
assert!(result2.is_ok());
let collections2 = result2.unwrap();
assert_eq!(collections2[0].concepts.len(), 1);
let parent = collections2[0].concepts.values().next().unwrap();
assert!(parent.children.is_some());
let children = parent.children.as_ref().unwrap();
assert_eq!(children.len(), 2);
let child_labels: Vec<&str> = children
.iter()
.filter_map(|c| c.pref_labels.get("en").map(|v| v.value.as_str()))
.collect();
assert!(child_labels.contains(&"Child One"));
assert!(child_labels.contains(&"Child Two"));
}
#[test]
fn test_round_trip_multilingual() {
let result = parse_skos_to_collections(TEST_SKOS_WITH_HIERARCHY, "http://example.org/");
assert!(result.is_ok());
let collections = result.unwrap();
let parent = collections[0].concepts.values().next().unwrap();
assert!(parent.pref_labels.contains_key("en"));
assert!(parent.pref_labels.contains_key("de"));
assert_eq!(parent.pref_labels["de"].value, "Elternkonzept");
let xml = collection_to_skos_xml(&collections[0], "http://example.org/");
let result2 = parse_skos_to_collections(&xml, "http://example.org/");
assert!(result2.is_ok());
let collections2 = result2.unwrap();
let parent2 = collections2[0].concepts.values().next().unwrap();
assert!(parent2.pref_labels.contains_key("en"));
assert!(parent2.pref_labels.contains_key("de"));
assert_eq!(parent2.pref_labels["de"].value, "Elternkonzept");
}
const TEST_ARCHES_COLLECTION: &str = r#"<?xml version="1.0" encoding="utf-8"?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:skos="http://www.w3.org/2004/02/skos/core#"
xmlns:dcterms="http://purl.org/dc/terms/">
<skos:Collection rdf:about="http://localhost:8000/7dde2f92-9f8a-44cf-817f-ec8c5c736f69">
<skos:prefLabel xml:lang="en-us">{"id": "956f8913-f728-4f82-b3ae-3aaf4ce7891a", "value": "Test Collection"}</skos:prefLabel>
<skos:altLabel xml:lang="en-us">{"id": "5e328859-7a75-494f-948d-730169def957", "value": "Test Alt"}</skos:altLabel>
<skos:scopeNote xml:lang="en-us">{"id": "d91df30b-3c8b-4455-93de-77ff1096cb9d", "value": "Testing collection"}</skos:scopeNote>
<skos:member>
<skos:Concept rdf:about="http://localhost:8000/86be632e-0dad-4d88-b5da-3d65875d6239"/>
</skos:member>
<skos:member>
<skos:Concept rdf:about="http://localhost:8000/54c5c8ac-890d-4f8e-b19a-dfa2401eaea3"/>
</skos:member>
</skos:Collection>
<skos:Concept rdf:about="http://localhost:8000/86be632e-0dad-4d88-b5da-3d65875d6239">
<skos:prefLabel xml:lang="en">Concept One</skos:prefLabel>
</skos:Concept>
<skos:Concept rdf:about="http://localhost:8000/54c5c8ac-890d-4f8e-b19a-dfa2401eaea3">
<skos:prefLabel xml:lang="en">Concept Two</skos:prefLabel>
</skos:Concept>
</rdf:RDF>"#;
#[test]
fn test_parse_arches_collection() {
let result = parse_skos_to_collections(TEST_ARCHES_COLLECTION, "http://localhost:8000/");
assert!(result.is_ok());
let collections = result.unwrap();
assert_eq!(collections.len(), 1);
let collection = &collections[0];
assert_eq!(collection.node_type, SkosNodeType::Collection);
assert!(collection.pref_labels.contains_key("en-us"));
assert_eq!(collection.pref_labels["en-us"].value, "Test Collection");
assert_eq!(
collection.pref_labels["en-us"].id,
"956f8913-f728-4f82-b3ae-3aaf4ce7891a"
);
assert!(collection.alt_labels.contains_key("en-us"));
assert_eq!(collection.alt_labels["en-us"][0].value, "Test Alt");
assert!(collection.scope_notes.contains_key("en-us"));
assert_eq!(collection.scope_notes["en-us"].value, "Testing collection");
assert_eq!(collection.concepts.len(), 2);
for concept in collection.concepts.values() {
assert!(concept.children.is_none());
}
}
#[test]
fn test_serialize_arches_collection() {
let mut pref_labels = HashMap::new();
pref_labels.insert(
"en-us".to_string(),
SkosValue {
id: "label-uuid-1".to_string(),
value: "Test Collection".to_string(),
},
);
let mut alt_labels = HashMap::new();
alt_labels.insert(
"en-us".to_string(),
vec![SkosValue {
id: "alt-uuid-1".to_string(),
value: "Alt Label".to_string(),
}],
);
let mut scope_notes = HashMap::new();
scope_notes.insert(
"en-us".to_string(),
SkosValue {
id: "note-uuid-1".to_string(),
value: "A scope note".to_string(),
},
);
let mut concept_labels = HashMap::new();
concept_labels.insert(
"en".to_string(),
SkosValue {
id: "concept-label-1".to_string(),
value: "Member Concept".to_string(),
},
);
let concept = SkosConcept {
id: "concept-1".to_string(),
uri: Some("http://localhost:8000/concept-1".to_string()),
pref_labels: concept_labels,
source: None,
sort_order: None,
children: None,
};
let mut concepts = HashMap::new();
concepts.insert("concept-1".to_string(), concept);
let collection = SkosCollection {
id: "collection-1".to_string(),
uri: Some("http://localhost:8000/collection-1".to_string()),
pref_labels,
alt_labels,
scope_notes,
node_type: SkosNodeType::Collection,
concepts,
all_concepts: HashMap::new(),
values: HashMap::new(),
};
let xml = collection_to_skos_xml(&collection, "http://localhost:8000/");
assert!(xml.contains("skos:Collection"));
assert!(!xml.contains("skos:ConceptScheme"));
assert!(xml.contains("skos:member"));
assert!(!xml.contains("skos:narrower"));
assert!(!xml.contains("skos:broader"));
assert!(xml.contains("\"id\":"));
assert!(xml.contains("\"value\":"));
assert!(xml.contains("Test Collection"));
assert!(xml.contains("Alt Label"));
assert!(xml.contains("A scope note"));
}
#[test]
fn test_round_trip_arches_collection() {
let result = parse_skos_to_collections(TEST_ARCHES_COLLECTION, "http://localhost:8000/");
assert!(result.is_ok());
let collections = result.unwrap();
assert_eq!(collections.len(), 1);
let original = &collections[0];
assert_eq!(original.node_type, SkosNodeType::Collection);
let xml = collection_to_skos_xml(original, "http://localhost:8000/");
let result2 = parse_skos_to_collections(&xml, "http://localhost:8000/");
assert!(result2.is_ok());
let collections2 = result2.unwrap();
assert_eq!(collections2.len(), 1);
let round_tripped = &collections2[0];
assert_eq!(round_tripped.node_type, SkosNodeType::Collection);
assert!(round_tripped.pref_labels.contains_key("en-us"));
assert_eq!(round_tripped.pref_labels["en-us"].value, "Test Collection");
assert_eq!(
round_tripped.pref_labels["en-us"].id,
"956f8913-f728-4f82-b3ae-3aaf4ce7891a"
);
assert_eq!(round_tripped.concepts.len(), original.concepts.len());
for concept in round_tripped.concepts.values() {
assert!(concept.children.is_none());
}
}
#[test]
fn test_parse_arches_label_json() {
let (id, value) = parse_arches_label(
r#"{"id": "uuid-123", "value": "Label Text"}"#,
"fallback",
"en",
);
assert_eq!(id, "uuid-123");
assert_eq!(value, "Label Text");
}
#[test]
fn test_parse_arches_label_plain() {
let (id, value) = parse_arches_label("Plain Label", "fallback", "en");
assert_ne!(id, "fallback"); assert_eq!(value, "Plain Label");
}
const TEST_HIERARCHICAL_COLLECTION: &str = r#"<?xml version="1.0" encoding="utf-8"?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:skos="http://www.w3.org/2004/02/skos/core#"
xmlns:dcterms="http://purl.org/dc/terms/">
<skos:Collection rdf:about="http://localhost:8000/hierarchical-collection">
<skos:prefLabel xml:lang="en">{"id": "coll-label-1", "value": "Hierarchical Collection"}</skos:prefLabel>
<skos:member>
<skos:Concept rdf:about="http://localhost:8000/parent-concept"/>
</skos:member>
<skos:member>
<skos:Concept rdf:about="http://localhost:8000/child-concept-1"/>
</skos:member>
<skos:member>
<skos:Concept rdf:about="http://localhost:8000/child-concept-2"/>
</skos:member>
</skos:Collection>
<skos:Concept rdf:about="http://localhost:8000/parent-concept">
<skos:prefLabel xml:lang="en">Parent</skos:prefLabel>
<skos:narrower rdf:resource="http://localhost:8000/child-concept-1"/>
<skos:narrower rdf:resource="http://localhost:8000/child-concept-2"/>
</skos:Concept>
<skos:Concept rdf:about="http://localhost:8000/child-concept-1">
<skos:prefLabel xml:lang="en">Child One</skos:prefLabel>
<skos:broader rdf:resource="http://localhost:8000/parent-concept"/>
</skos:Concept>
<skos:Concept rdf:about="http://localhost:8000/child-concept-2">
<skos:prefLabel xml:lang="en">Child Two</skos:prefLabel>
<skos:broader rdf:resource="http://localhost:8000/parent-concept"/>
</skos:Concept>
</rdf:RDF>"#;
#[test]
fn test_parse_hierarchical_collection() {
let result =
parse_skos_to_collections(TEST_HIERARCHICAL_COLLECTION, "http://localhost:8000/");
assert!(result.is_ok());
let collections = result.unwrap();
assert_eq!(collections.len(), 1);
let collection = &collections[0];
assert_eq!(collection.node_type, SkosNodeType::Collection);
assert_eq!(collection.concepts.len(), 1);
let parent = collection.concepts.values().next().unwrap();
assert_eq!(parent.pref_labels["en"].value, "Parent");
assert!(parent.children.is_some());
let children = parent.children.as_ref().unwrap();
assert_eq!(children.len(), 2);
let child_labels: Vec<&str> = children
.iter()
.map(|c| c.pref_labels["en"].value.as_str())
.collect();
assert!(child_labels.contains(&"Child One"));
assert!(child_labels.contains(&"Child Two"));
}
#[test]
fn test_serialize_hierarchical_collection_with_children() {
let mut pref_labels = HashMap::new();
pref_labels.insert(
"en".to_string(),
SkosValue {
id: "label-1".to_string(),
value: "Hierarchical Collection".to_string(),
},
);
let mut child1_labels = HashMap::new();
child1_labels.insert(
"en".to_string(),
SkosValue {
id: "child1-label".to_string(),
value: "Child One".to_string(),
},
);
let child1 = SkosConcept {
id: "child-1".to_string(),
uri: None,
pref_labels: child1_labels,
source: None,
sort_order: None,
children: None,
};
let mut child2_labels = HashMap::new();
child2_labels.insert(
"en".to_string(),
SkosValue {
id: "child2-label".to_string(),
value: "Child Two".to_string(),
},
);
let child2 = SkosConcept {
id: "child-2".to_string(),
uri: None,
pref_labels: child2_labels,
source: None,
sort_order: None,
children: None,
};
let mut parent_labels = HashMap::new();
parent_labels.insert(
"en".to_string(),
SkosValue {
id: "parent-label".to_string(),
value: "Parent".to_string(),
},
);
let parent = SkosConcept {
id: "parent".to_string(),
uri: None,
pref_labels: parent_labels,
source: None,
sort_order: None,
children: Some(vec![child1, child2]),
};
let mut concepts = HashMap::new();
concepts.insert("parent".to_string(), parent);
let collection = SkosCollection {
id: "coll-1".to_string(),
uri: None,
pref_labels,
alt_labels: HashMap::new(),
scope_notes: HashMap::new(),
node_type: SkosNodeType::Collection,
concepts,
all_concepts: HashMap::new(),
values: HashMap::new(),
};
let xml = collection_to_skos_xml(&collection, "http://localhost:8000/");
assert!(xml.contains("skos:Collection"));
let member_count = xml.matches("<skos:member>").count();
assert_eq!(
member_count, 3,
"Should list all concepts including children as members"
);
assert!(
xml.contains("skos:narrower"),
"Parent should have narrower relationships"
);
assert!(
xml.contains("skos:broader"),
"Children should have broader relationships"
);
assert!(xml.contains("Parent"));
assert!(xml.contains("Child One"));
assert!(xml.contains("Child Two"));
}
#[test]
fn test_round_trip_hierarchical_collection() {
let result =
parse_skos_to_collections(TEST_HIERARCHICAL_COLLECTION, "http://localhost:8000/");
assert!(result.is_ok());
let collections = result.unwrap();
assert_eq!(collections.len(), 1);
let original = &collections[0];
assert_eq!(original.node_type, SkosNodeType::Collection);
assert_eq!(original.concepts.len(), 1);
let original_parent = original.concepts.values().next().unwrap();
assert!(original_parent.children.is_some());
assert_eq!(original_parent.children.as_ref().unwrap().len(), 2);
let xml = collection_to_skos_xml(original, "http://localhost:8000/");
let result2 = parse_skos_to_collections(&xml, "http://localhost:8000/");
assert!(result2.is_ok());
let collections2 = result2.unwrap();
assert_eq!(collections2.len(), 1);
let round_tripped = &collections2[0];
assert_eq!(round_tripped.node_type, SkosNodeType::Collection);
assert_eq!(round_tripped.concepts.len(), 1);
let rt_parent = round_tripped.concepts.values().next().unwrap();
assert!(
rt_parent.children.is_some(),
"Hierarchy should be preserved after round-trip"
);
assert_eq!(rt_parent.children.as_ref().unwrap().len(), 2);
let child_labels: Vec<&str> = rt_parent
.children
.as_ref()
.unwrap()
.iter()
.map(|c| c.pref_labels["en"].value.as_str())
.collect();
assert!(child_labels.contains(&"Child One"));
assert!(child_labels.contains(&"Child Two"));
}
#[test]
fn test_cross_file_collection_concept_resolution() {
const COLLECTIONS_XML: &str = r#"<?xml version="1.0" encoding="utf-8"?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:skos="http://www.w3.org/2004/02/skos/core#">
<skos:Collection rdf:about="http://localhost:8000/aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee">
<skos:prefLabel xml:lang="en">My Test Collection</skos:prefLabel>
<skos:member>
<skos:Concept rdf:about="http://localhost:8000/11111111-1111-1111-1111-111111111111"/>
</skos:member>
<skos:member>
<skos:Concept rdf:about="http://localhost:8000/22222222-2222-2222-2222-222222222222"/>
</skos:member>
</skos:Collection>
</rdf:RDF>"#;
const CONCEPTS_XML: &str = r#"<?xml version="1.0" encoding="utf-8"?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:skos="http://www.w3.org/2004/02/skos/core#">
<skos:Concept rdf:about="http://localhost:8000/11111111-1111-1111-1111-111111111111">
<skos:prefLabel xml:lang="en">Alpha Concept</skos:prefLabel>
</skos:Concept>
<skos:Concept rdf:about="http://localhost:8000/22222222-2222-2222-2222-222222222222">
<skos:prefLabel xml:lang="en">Beta Concept</skos:prefLabel>
</skos:Concept>
</rdf:RDF>"#;
let base_uri = "http://localhost:8000/";
let collections = parse_skos_to_collections(COLLECTIONS_XML, base_uri).unwrap();
assert_eq!(collections.len(), 1, "Should find one collection");
let collection = &collections[0];
assert_eq!(collection.node_type, SkosNodeType::Collection);
assert_eq!(
collection.concepts.len(),
2,
"Collection should have 2 member concepts (bare stubs)"
);
let labels: Vec<&str> = collection
.concepts
.values()
.filter_map(|c| c.pref_labels.get("en").map(|l| l.value.as_str()))
.collect();
assert!(
labels.is_empty(),
"Parser should produce bare concepts when definitions are in a separate file"
);
use crate::rdm_cache::RdmCache;
let mut cache = RdmCache::default();
let coll_parsed = parse_skos_to_collections(COLLECTIONS_XML, base_uri).unwrap();
cache.add_from_skos_collections(&coll_parsed);
let concepts_parsed = parse_skos_to_collections(CONCEPTS_XML, base_uri).unwrap();
cache.add_from_skos_collections(&concepts_parsed);
let collection_id = "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee";
let alpha_id = "11111111-1111-1111-1111-111111111111";
let beta_id = "22222222-2222-2222-2222-222222222222";
let alpha_label = cache.lookup_label(collection_id, alpha_id, "en");
let beta_label = cache.lookup_label(collection_id, beta_id, "en");
assert_eq!(
alpha_label,
Some("Alpha Concept".to_string()),
"Cache should resolve Alpha Concept label after loading concepts.xml"
);
assert_eq!(
beta_label,
Some("Beta Concept".to_string()),
"Cache should resolve Beta Concept label after loading concepts.xml"
);
}
#[test]
fn test_cross_file_collection_concept_resolution_reverse_order() {
const COLLECTIONS_XML: &str = r#"<?xml version="1.0" encoding="utf-8"?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:skos="http://www.w3.org/2004/02/skos/core#">
<skos:Collection rdf:about="http://localhost:8000/aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee">
<skos:prefLabel xml:lang="en">My Test Collection</skos:prefLabel>
<skos:member>
<skos:Concept rdf:about="http://localhost:8000/11111111-1111-1111-1111-111111111111"/>
</skos:member>
<skos:member>
<skos:Concept rdf:about="http://localhost:8000/22222222-2222-2222-2222-222222222222"/>
</skos:member>
</skos:Collection>
</rdf:RDF>"#;
const CONCEPTS_XML: &str = r#"<?xml version="1.0" encoding="utf-8"?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:skos="http://www.w3.org/2004/02/skos/core#">
<skos:Concept rdf:about="http://localhost:8000/11111111-1111-1111-1111-111111111111">
<skos:prefLabel xml:lang="en">Alpha Concept</skos:prefLabel>
</skos:Concept>
<skos:Concept rdf:about="http://localhost:8000/22222222-2222-2222-2222-222222222222">
<skos:prefLabel xml:lang="en">Beta Concept</skos:prefLabel>
</skos:Concept>
</rdf:RDF>"#;
let base_uri = "http://localhost:8000/";
use crate::rdm_cache::RdmCache;
let mut cache = RdmCache::default();
let concepts_parsed = parse_skos_to_collections(CONCEPTS_XML, base_uri).unwrap();
cache.add_from_skos_collections(&concepts_parsed);
let coll_parsed = parse_skos_to_collections(COLLECTIONS_XML, base_uri).unwrap();
cache.add_from_skos_collections(&coll_parsed);
let collection_id = "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee";
let alpha_id = "11111111-1111-1111-1111-111111111111";
let beta_id = "22222222-2222-2222-2222-222222222222";
let alpha_label = cache.lookup_label(collection_id, alpha_id, "en");
let beta_label = cache.lookup_label(collection_id, beta_id, "en");
assert_eq!(
alpha_label,
Some("Alpha Concept".to_string()),
"Cache should resolve Alpha Concept label when concepts loaded first"
);
assert_eq!(
beta_label,
Some("Beta Concept".to_string()),
"Cache should resolve Beta Concept label when concepts loaded first"
);
}
#[test]
fn test_collection_with_nested_member_hierarchy() {
const NESTED_MEMBER_XML: &str = r#"<?xml version="1.0" encoding="utf-8"?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:skos="http://www.w3.org/2004/02/skos/core#">
<skos:Collection rdf:about="http://localhost:8000/11111111-aaaa-bbbb-cccc-000000000000">
<skos:prefLabel xml:lang="en">Nested Member Collection</skos:prefLabel>
<skos:member>
<skos:Concept rdf:about="http://localhost:8000/22222222-aaaa-bbbb-cccc-000000000001">
<skos:member rdf:resource="http://localhost:8000/33333333-aaaa-bbbb-cccc-000000000001"/>
<skos:member rdf:resource="http://localhost:8000/33333333-aaaa-bbbb-cccc-000000000002"/>
</skos:Concept>
</skos:member>
<skos:member>
<skos:Concept rdf:about="http://localhost:8000/22222222-aaaa-bbbb-cccc-000000000002">
<skos:member rdf:resource="http://localhost:8000/33333333-aaaa-bbbb-cccc-000000000003"/>
</skos:Concept>
</skos:member>
</skos:Collection>
<skos:Concept rdf:about="http://localhost:8000/22222222-aaaa-bbbb-cccc-000000000001">
<skos:prefLabel xml:lang="en">Parent A</skos:prefLabel>
</skos:Concept>
<skos:Concept rdf:about="http://localhost:8000/22222222-aaaa-bbbb-cccc-000000000002">
<skos:prefLabel xml:lang="en">Parent B</skos:prefLabel>
</skos:Concept>
<skos:Concept rdf:about="http://localhost:8000/33333333-aaaa-bbbb-cccc-000000000001">
<skos:prefLabel xml:lang="en">Child A1</skos:prefLabel>
</skos:Concept>
<skos:Concept rdf:about="http://localhost:8000/33333333-aaaa-bbbb-cccc-000000000002">
<skos:prefLabel xml:lang="en">Child A2</skos:prefLabel>
</skos:Concept>
<skos:Concept rdf:about="http://localhost:8000/33333333-aaaa-bbbb-cccc-000000000003">
<skos:prefLabel xml:lang="en">Child B1</skos:prefLabel>
</skos:Concept>
</rdf:RDF>"#;
let result = parse_skos_to_collections(NESTED_MEMBER_XML, "http://localhost:8000/");
assert!(result.is_ok());
let collections = result.unwrap();
assert_eq!(collections.len(), 1);
let collection = &collections[0];
assert_eq!(
collection.concepts.len(),
2,
"Collection should have 2 top-level concepts"
);
let parent_a = collection
.concepts
.values()
.find(|c| c.pref_labels.get("en").map(|l| l.value.as_str()) == Some("Parent A"))
.expect("Parent A should be in the collection");
assert!(parent_a.children.is_some(), "Parent A should have children");
let children_a = parent_a.children.as_ref().unwrap();
assert_eq!(children_a.len(), 2, "Parent A should have 2 children");
let child_labels_a: Vec<&str> = children_a
.iter()
.map(|c| c.pref_labels["en"].value.as_str())
.collect();
assert!(child_labels_a.contains(&"Child A1"));
assert!(child_labels_a.contains(&"Child A2"));
let parent_b = collection
.concepts
.values()
.find(|c| c.pref_labels.get("en").map(|l| l.value.as_str()) == Some("Parent B"))
.expect("Parent B should be in the collection");
assert!(parent_b.children.is_some(), "Parent B should have children");
let children_b = parent_b.children.as_ref().unwrap();
assert_eq!(children_b.len(), 1);
assert_eq!(children_b[0].pref_labels["en"].value, "Child B1");
}
#[test]
fn test_nested_member_concepts_findable_by_label() {
use crate::rdm_cache::RdmCache;
const CONCEPT_XML: &str = r#"<?xml version="1.0" encoding="utf-8"?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:skos="http://www.w3.org/2004/02/skos/core#"
xmlns:dcterms="http://purl.org/dc/terms/">
<skos:ConceptScheme rdf:about="http://localhost:8000/99999999-0000-0000-0000-000000000001">
<skos:hasTopConcept>
<skos:Concept rdf:about="http://localhost:8000/aaaaaaaa-0000-0000-0000-000000000001">
<skos:prefLabel xml:lang="en">Category A</skos:prefLabel>
<skos:narrower rdf:resource="http://localhost:8000/bbbbbbbb-0000-0000-0000-000000000001"/>
<skos:narrower rdf:resource="http://localhost:8000/bbbbbbbb-0000-0000-0000-000000000002"/>
<skos:inScheme rdf:resource="http://localhost:8000/99999999-0000-0000-0000-000000000001"/>
</skos:Concept>
</skos:hasTopConcept>
</skos:ConceptScheme>
<skos:Concept rdf:about="http://localhost:8000/bbbbbbbb-0000-0000-0000-000000000001">
<skos:prefLabel xml:lang="en">Item One</skos:prefLabel>
<skos:inScheme rdf:resource="http://localhost:8000/99999999-0000-0000-0000-000000000001"/>
</skos:Concept>
<skos:Concept rdf:about="http://localhost:8000/bbbbbbbb-0000-0000-0000-000000000002">
<skos:prefLabel xml:lang="en">Item Two</skos:prefLabel>
<skos:inScheme rdf:resource="http://localhost:8000/99999999-0000-0000-0000-000000000001"/>
</skos:Concept>
</rdf:RDF>"#;
const COLLECTION_XML: &str = r#"<?xml version="1.0" encoding="utf-8"?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:skos="http://www.w3.org/2004/02/skos/core#">
<skos:Collection rdf:about="http://localhost:8000/cccccccc-0000-0000-0000-000000000001">
<skos:prefLabel xml:lang="en">My Collection</skos:prefLabel>
<skos:member>
<skos:Concept rdf:about="http://localhost:8000/aaaaaaaa-0000-0000-0000-000000000001">
<skos:member rdf:resource="http://localhost:8000/bbbbbbbb-0000-0000-0000-000000000001"/>
<skos:member rdf:resource="http://localhost:8000/bbbbbbbb-0000-0000-0000-000000000002"/>
</skos:Concept>
</skos:member>
</skos:Collection>
<skos:Concept rdf:about="http://localhost:8000/bbbbbbbb-0000-0000-0000-000000000001"/>
<skos:Concept rdf:about="http://localhost:8000/bbbbbbbb-0000-0000-0000-000000000002"/>
</rdf:RDF>"#;
let base_uri = "http://localhost:8000/";
let mut cache = RdmCache::default();
let concepts = parse_skos_to_collections(CONCEPT_XML, base_uri).unwrap();
cache.add_from_skos_collections(&concepts);
let collections = parse_skos_to_collections(COLLECTION_XML, base_uri).unwrap();
cache.add_from_skos_collections(&collections);
let collection_id = "cccccccc-0000-0000-0000-000000000001";
let cat_a = cache.lookup_by_label(collection_id, "Category A");
assert!(
cat_a.is_some(),
"Category A should be findable by label in the collection"
);
let item_1 = cache.lookup_by_label(collection_id, "Item One");
assert!(
item_1.is_some(),
"Item One (nested member) should be findable by label in the collection"
);
let item_2 = cache.lookup_by_label(collection_id, "Item Two");
assert!(
item_2.is_some(),
"Item Two (nested member) should be findable by label in the collection"
);
}
}