1use crate::graph::Graph;
2use crate::parser::ast::TargetFormat;
3use crate::projection::{find_projection_override, ProjectionRegistry};
4use percent_encoding::{utf8_percent_encode, AsciiSet, CONTROLS};
5use serde::{Deserialize, Serialize};
6use std::str::FromStr;
7
8#[derive(Debug, Clone)]
9pub enum KgError {
10 SerializationError(String),
11 UnsupportedFormat(String),
12}
13
14impl std::fmt::Display for KgError {
15 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
16 match self {
17 KgError::SerializationError(msg) => {
18 write!(f, "Knowledge graph serialization error: {}", msg)
19 }
20 KgError::UnsupportedFormat(fmt) => write!(f, "Unsupported format: {}", fmt),
21 }
22 }
23}
24
25impl std::error::Error for KgError {}
26
27#[derive(Debug, Clone, Serialize, Deserialize)]
28pub struct Triple {
29 pub subject: String,
30 pub predicate: String,
31 pub object: String,
32}
33
34#[derive(Debug, Clone, Serialize, Deserialize)]
35pub struct ShaclShape {
36 pub target_class: String,
37 pub properties: Vec<ShaclProperty>,
38}
39
40#[derive(Debug, Clone, Serialize, Deserialize)]
41pub struct ShaclProperty {
42 pub path: String,
43 pub datatype: Option<String>,
44 pub min_count: Option<u32>,
45 pub max_count: Option<u32>,
46 pub min_exclusive: Option<String>,
47}
48
49#[derive(Debug, Clone)]
50pub struct KnowledgeGraph {
51 pub triples: Vec<Triple>,
52 pub shapes: Vec<ShaclShape>,
53}
54
55const URI_ENCODE_SET: &AsciiSet = &CONTROLS
56 .add(b' ')
57 .add(b':')
58 .add(b'/')
59 .add(b'#')
60 .add(b'?')
61 .add(b'&')
62 .add(b'=')
63 .add(b'+')
64 .add(b'$')
65 .add(b',')
66 .add(b'@')
67 .add(b';');
68
69fn tokenize_triple_line(line: &str) -> Vec<String> {
70 let mut tokens = Vec::new();
71 let mut buffer = String::new();
72 let mut in_literal = false;
73 let mut escape = false;
74
75 for c in line.chars() {
76 if in_literal {
77 buffer.push(c);
78 if escape {
79 escape = false;
80 } else if c == '\\' {
81 escape = true;
82 } else if c == '"' {
83 in_literal = false;
84 }
85 continue;
86 }
87
88 match c {
89 '"' => {
90 in_literal = true;
91 buffer.push(c);
92 }
93 c if c.is_whitespace() => {
94 if !buffer.is_empty() {
95 tokens.push(buffer.clone());
96 buffer.clear();
97 }
98 }
99 _ => {
100 buffer.push(c);
101 }
102 }
103 }
104
105 if !buffer.is_empty() {
106 tokens.push(buffer);
107 }
108
109 tokens
110}
111
112fn extract_local_name(token: &str) -> String {
113 let trimmed = token.trim();
114 let stripped = trimmed.trim_matches(|c| c == '<' || c == '>');
115 stripped
116 .rsplit(|c| ['#', ':'].contains(&c))
117 .next()
118 .unwrap_or(stripped)
119 .to_string()
120}
121
122fn extract_literal_value(token: &str) -> String {
123 let trimmed = token.trim();
124 if let Some(stripped) = trimmed.strip_prefix('"') {
125 if let Some(end_quote) = stripped.find('"') {
126 return stripped[..end_quote].to_string();
127 }
128 return stripped.trim_end_matches('"').to_string();
129 }
130
131 if let Some(idx) = trimmed.find("^^") {
132 return trimmed[..idx].trim().to_string();
133 }
134 trimmed.to_string()
135}
136
137impl KnowledgeGraph {
138 pub fn new() -> Self {
139 Self {
140 triples: Vec::new(),
141 shapes: Vec::new(),
142 }
143 }
144
145 pub fn from_graph(graph: &Graph) -> Result<Self, KgError> {
146 let mut kg = Self::new();
147
148 let registry = ProjectionRegistry::new(graph);
149 let projections = registry.find_projections_for_target(&TargetFormat::Kg);
150 let projection = projections.first().copied();
151
152 for entity in graph.all_entities() {
153 let mut rdf_class = "sea:Entity".to_string();
154 let mut prop_map = std::collections::HashMap::new();
155
156 if let Some(proj) = projection {
157 if let Some(rule) = find_projection_override(proj, "Entity", entity.name()) {
158 if let Some(cls) = rule.fields.get("rdf_class").and_then(|v| v.as_str()) {
159 if Self::is_valid_rdf_term(cls) {
160 rdf_class = cls.to_string();
161 } else {
162 eprintln!("Warning: Invalid RDF term for rdf_class, skipping: {}", cls);
163 }
164 }
165 if let Some(props) = rule.fields.get("properties").and_then(|v| v.as_object()) {
166 for (k, v) in props {
167 if let Some(v_str) = v.as_str() {
168 if Self::is_valid_rdf_term(v_str) {
169 prop_map.insert(k.clone(), v_str.to_string());
170 } else {
171 eprintln!(
172 "Warning: Invalid RDF term for property '{}', skipping: {}",
173 k, v_str
174 );
175 }
176 }
177 }
178 }
179 }
180 }
181
182 kg.triples.push(Triple {
183 subject: format!("sea:{}", Self::uri_encode(entity.name())),
184 predicate: "rdf:type".to_string(),
185 object: rdf_class,
186 });
187
188 let label_pred = prop_map
189 .get("name")
190 .cloned()
191 .unwrap_or_else(|| "rdfs:label".to_string());
192 kg.triples.push(Triple {
193 subject: format!("sea:{}", Self::uri_encode(entity.name())),
194 predicate: label_pred,
195 object: format!("\"{}\"", Self::escape_turtle_literal(entity.name())),
196 });
197
198 let ns_pred = prop_map
199 .get("namespace")
200 .cloned()
201 .unwrap_or_else(|| "sea:namespace".to_string());
202 kg.triples.push(Triple {
203 subject: format!("sea:{}", Self::uri_encode(entity.name())),
204 predicate: ns_pred,
205 object: format!("\"{}\"", Self::escape_turtle_literal(entity.namespace())),
206 });
207 }
208
209 for role in graph.all_roles() {
210 kg.triples.push(Triple {
211 subject: format!("sea:{}", Self::uri_encode(role.name())),
212 predicate: "rdf:type".to_string(),
213 object: "sea:Role".to_string(),
214 });
215
216 kg.triples.push(Triple {
217 subject: format!("sea:{}", Self::uri_encode(role.name())),
218 predicate: "rdfs:label".to_string(),
219 object: format!("\"{}\"", Self::escape_turtle_literal(role.name())),
220 });
221
222 kg.triples.push(Triple {
223 subject: format!("sea:{}", Self::uri_encode(role.name())),
224 predicate: "sea:namespace".to_string(),
225 object: format!("\"{}\"", Self::escape_turtle_literal(role.namespace())),
226 });
227 }
228
229 for resource in graph.all_resources() {
230 kg.triples.push(Triple {
231 subject: format!("sea:{}", Self::uri_encode(resource.name())),
232 predicate: "rdf:type".to_string(),
233 object: "sea:Resource".to_string(),
234 });
235
236 kg.triples.push(Triple {
237 subject: format!("sea:{}", Self::uri_encode(resource.name())),
238 predicate: "rdfs:label".to_string(),
239 object: format!("\"{}\"", Self::escape_turtle_literal(resource.name())),
240 });
241
242 kg.triples.push(Triple {
243 subject: format!("sea:{}", Self::uri_encode(resource.name())),
244 predicate: "sea:unit".to_string(),
245 object: format!(
246 "\"{}\"",
247 Self::escape_turtle_literal(&resource.unit().to_string())
248 ),
249 });
250 }
251
252 for pattern in graph.all_patterns() {
253 let subject = format!("sea:pattern_{}", Self::uri_encode(pattern.name()));
254
255 kg.triples.push(Triple {
256 subject: subject.clone(),
257 predicate: "rdf:type".to_string(),
258 object: "sea:Pattern".to_string(),
259 });
260
261 kg.triples.push(Triple {
262 subject: subject.clone(),
263 predicate: "rdfs:label".to_string(),
264 object: format!("\"{}\"", Self::escape_turtle_literal(pattern.name())),
265 });
266
267 kg.triples.push(Triple {
268 subject: subject.clone(),
269 predicate: "sea:namespace".to_string(),
270 object: format!("\"{}\"", Self::escape_turtle_literal(pattern.namespace())),
271 });
272
273 kg.triples.push(Triple {
274 subject,
275 predicate: "sea:regex".to_string(),
276 object: format!("\"{}\"", Self::escape_turtle_literal(pattern.regex())),
277 });
278 }
279
280 for relation in graph.all_relations() {
281 let relation_subject = format!("sea:{}", Self::uri_encode(relation.name()));
282
283 kg.triples.push(Triple {
284 subject: relation_subject.clone(),
285 predicate: "rdf:type".to_string(),
286 object: "sea:Relation".to_string(),
287 });
288
289 kg.triples.push(Triple {
290 subject: relation_subject.clone(),
291 predicate: "rdfs:label".to_string(),
292 object: format!("\"{}\"", Self::escape_turtle_literal(relation.name())),
293 });
294
295 if let Some(subject_role) = graph.get_role(relation.subject_role()) {
296 kg.triples.push(Triple {
297 subject: relation_subject.clone(),
298 predicate: "sea:subjectRole".to_string(),
299 object: format!("sea:{}", Self::uri_encode(subject_role.name())),
300 });
301 }
302
303 if let Some(object_role) = graph.get_role(relation.object_role()) {
304 kg.triples.push(Triple {
305 subject: relation_subject.clone(),
306 predicate: "sea:objectRole".to_string(),
307 object: format!("sea:{}", Self::uri_encode(object_role.name())),
308 });
309 }
310
311 kg.triples.push(Triple {
312 subject: relation_subject.clone(),
313 predicate: "sea:predicate".to_string(),
314 object: format!("\"{}\"", Self::escape_turtle_literal(relation.predicate())),
315 });
316
317 if let Some(flow_id) = relation.via_flow() {
318 if let Some(resource) = graph.get_resource(flow_id) {
319 kg.triples.push(Triple {
320 subject: relation_subject.clone(),
321 predicate: "sea:via".to_string(),
322 object: format!("sea:{}", Self::uri_encode(resource.name())),
323 });
324 } else {
325 kg.triples.push(Triple {
326 subject: relation_subject.clone(),
327 predicate: "sea:via".to_string(),
328 object: format!("\"{}\"", flow_id),
329 });
330 }
331 }
332 }
333
334 for flow in graph.all_flows() {
335 let flow_id = format!("sea:flow_{}", Self::uri_encode(&flow.id().to_string()));
336
337 kg.triples.push(Triple {
338 subject: flow_id.clone(),
339 predicate: "rdf:type".to_string(),
340 object: "sea:Flow".to_string(),
341 });
342
343 if let Some(from_entity) = graph.get_entity(flow.from_id()) {
344 kg.triples.push(Triple {
345 subject: flow_id.clone(),
346 predicate: "sea:from".to_string(),
347 object: format!("sea:{}", Self::uri_encode(from_entity.name())),
348 });
349 }
350
351 if let Some(to_entity) = graph.get_entity(flow.to_id()) {
352 kg.triples.push(Triple {
353 subject: flow_id.clone(),
354 predicate: "sea:to".to_string(),
355 object: format!("sea:{}", Self::uri_encode(to_entity.name())),
356 });
357 }
358
359 if let Some(resource) = graph.get_resource(flow.resource_id()) {
360 kg.triples.push(Triple {
361 subject: flow_id.clone(),
362 predicate: "sea:hasResource".to_string(),
363 object: format!("sea:{}", Self::uri_encode(resource.name())),
364 });
365 }
366
367 let quantity_str = flow.quantity().to_string();
369 Self::validate_turtle_decimal(&quantity_str).map_err(|e| {
370 KgError::SerializationError(format!("Invalid quantity format: {}", e))
371 })?;
372
373 kg.triples.push(Triple {
374 subject: flow_id.clone(),
375 predicate: "sea:quantity".to_string(),
376 object: format!("\"{}\"^^xsd:decimal", quantity_str),
377 });
378 }
379
380 kg.shapes.push(ShaclShape {
381 target_class: "sea:Flow".to_string(),
382 properties: vec![
383 ShaclProperty {
384 path: "sea:quantity".to_string(),
385 datatype: Some("xsd:decimal".to_string()),
386 min_count: None,
387 max_count: None,
388 min_exclusive: Some("0".to_string()),
389 },
390 ShaclProperty {
391 path: "sea:hasResource".to_string(),
392 datatype: None,
393 min_count: Some(1),
394 max_count: Some(1),
395 min_exclusive: None,
396 },
397 ShaclProperty {
398 path: "sea:from".to_string(),
399 datatype: None,
400 min_count: Some(1),
401 max_count: Some(1),
402 min_exclusive: None,
403 },
404 ShaclProperty {
405 path: "sea:to".to_string(),
406 datatype: None,
407 min_count: Some(1),
408 max_count: Some(1),
409 min_exclusive: None,
410 },
411 ],
412 });
413
414 kg.shapes.push(ShaclShape {
415 target_class: "sea:Entity".to_string(),
416 properties: vec![ShaclProperty {
417 path: "rdfs:label".to_string(),
418 datatype: Some("xsd:string".to_string()),
419 min_count: Some(1),
420 max_count: Some(1),
421 min_exclusive: None,
422 }],
423 });
424
425 Ok(kg)
426 }
427
428 pub fn to_turtle(&self) -> String {
429 let mut turtle = String::new();
430
431 turtle.push_str("@prefix sea: <http://domainforge.ai/sea#> .\n");
432 turtle.push_str("@prefix owl: <http://www.w3.org/2002/07/owl#> .\n");
433 turtle.push_str("@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .\n");
434 turtle.push_str("@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .\n");
435 turtle.push_str("@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .\n");
436 turtle.push_str("@prefix sh: <http://www.w3.org/ns/shacl#> .\n");
437 turtle.push('\n');
438
439 turtle.push_str("# Ontology\n");
440 turtle.push_str("sea:Entity a owl:Class ;\n");
441 turtle.push_str(" rdfs:label \"Entity\" ;\n");
442 turtle.push_str(
443 " rdfs:comment \"Business actor, location, or organizational unit\" .\n\n",
444 );
445
446 turtle.push_str("sea:Resource a owl:Class ;\n");
447 turtle.push_str(" rdfs:label \"Resource\" ;\n");
448 turtle.push_str(" rdfs:comment \"Quantifiable subject of value\" .\n\n");
449
450 turtle.push_str("sea:Flow a owl:Class ;\n");
451 turtle.push_str(" rdfs:label \"Flow\" ;\n");
452 turtle.push_str(" rdfs:comment \"Transfer of resource between entities\" .\n\n");
453
454 turtle.push_str("sea:hasResource a owl:ObjectProperty ;\n");
455 turtle.push_str(" rdfs:domain sea:Flow ;\n");
456 turtle.push_str(" rdfs:range sea:Resource .\n\n");
457
458 turtle.push_str("sea:from a owl:ObjectProperty ;\n");
459 turtle.push_str(" rdfs:domain sea:Flow ;\n");
460 turtle.push_str(" rdfs:range sea:Entity .\n\n");
461
462 turtle.push_str("sea:to a owl:ObjectProperty ;\n");
463 turtle.push_str(" rdfs:domain sea:Flow ;\n");
464 turtle.push_str(" rdfs:range sea:Entity .\n\n");
465
466 turtle.push_str("# Instances\n");
467 for triple in &self.triples {
468 turtle.push_str(&format!(
469 "{} {} {} .\n",
470 triple.subject, triple.predicate, triple.object
471 ));
472 }
473
474 turtle.push_str("\n# SHACL Shapes\n");
475 for shape in &self.shapes {
476 turtle.push_str(&format!(
477 "sea:{}Shape a sh:NodeShape ;\n",
478 shape.target_class.replace("sea:", "")
479 ));
480 turtle.push_str(&format!(" sh:targetClass {} ;\n", shape.target_class));
481
482 for (i, prop) in shape.properties.iter().enumerate() {
483 turtle.push_str(" sh:property [\n");
484 turtle.push_str(&format!(" sh:path {} ;\n", prop.path));
485
486 if let Some(dt) = &prop.datatype {
487 turtle.push_str(&format!(" sh:datatype {} ;\n", dt));
488 }
489 if let Some(min) = prop.min_count {
490 turtle.push_str(&format!(" sh:minCount {} ;\n", min));
491 }
492 if let Some(max) = prop.max_count {
493 turtle.push_str(&format!(" sh:maxCount {} ;\n", max));
494 }
495 if let Some(min_ex) = &prop.min_exclusive {
496 turtle.push_str(&format!(" sh:minExclusive {} ;\n", min_ex));
497 }
498
499 if i < shape.properties.len() - 1 {
500 turtle.push_str(" ] ;\n");
501 } else {
502 turtle.push_str(" ] .\n");
503 }
504 }
505 turtle.push('\n');
506 }
507
508 turtle
509 }
510
511 #[allow(clippy::while_let_on_iterator)]
514 pub fn from_turtle(turtle: &str) -> Result<Self, KgError> {
515 let mut kg = Self::new();
516 for line in turtle.lines() {
517 let trimmed = line.trim();
518 if trimmed.is_empty() || trimmed.starts_with('@') || trimmed.starts_with('#') {
519 continue;
520 }
521 let triple_line = if let Some(stripped) = trimmed.strip_suffix('.') {
522 stripped.trim_end()
523 } else {
524 trimmed
525 };
526 let tokens = tokenize_triple_line(triple_line);
527 if tokens.len() != 3 {
528 continue;
529 }
530 let subject = &tokens[0];
531 let predicate = &tokens[1];
532 let object = &tokens[2];
533 let norm_s = Self::shorten_token(subject);
534 let norm_p = Self::shorten_token(predicate);
535 let norm_o = Self::shorten_token(object);
536 kg.triples.push(Triple {
537 subject: norm_s,
538 predicate: norm_p,
539 object: norm_o,
540 });
541 }
542 let mut lines_iter = turtle.lines();
545 while let Some(line) = lines_iter.next() {
546 let l = line.trim();
547 if l.contains("a sh:NodeShape") {
548 let mut block = l.to_string();
550 if !l.ends_with('.') {
551 while let Some(next_line) = lines_iter.next() {
552 block.push(' ');
553 block.push_str(next_line.trim());
554 if next_line.trim().ends_with('.') {
555 break;
556 }
557 }
558 }
559
560 let normalized_block = block
562 .replace("http://www.w3.org/ns/shacl#", "sh:")
563 .replace("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "rdf:")
564 .replace("http://www.w3.org/2000/01/rdf-schema#", "rdfs:")
565 .replace("http://www.w3.org/2001/XMLSchema#", "xsd:")
566 .replace("http://domainforge.ai/sea#", "sea:");
567
568 let target_class = if let Some(pos) = normalized_block.find("sh:targetClass") {
570 let rest = &normalized_block[pos + "sh:targetClass".len()..];
571 let tok = rest
572 .split_whitespace()
573 .next()
574 .unwrap_or("")
575 .trim()
576 .trim_end_matches(';')
577 .to_string();
578 tok
579 } else {
580 continue; };
582
583 let mut shape = ShaclShape {
584 target_class,
585 properties: Vec::new(),
586 };
587
588 let mut start_idx = 0;
590 while let Some(idx) = normalized_block[start_idx..].find("sh:property") {
591 let local_idx = start_idx + idx;
592 if let Some(open_br) = normalized_block[local_idx..].find('[') {
594 let open_idx = local_idx + open_br + 1;
595 if let Some(close_br) = normalized_block[open_idx..].find(']') {
596 let close_idx = open_idx + close_br;
597 let prop_block = &normalized_block[open_idx..close_idx];
598 let mut path = String::new();
600 let mut datatype: Option<String> = None;
601 let mut min_count: Option<u32> = None;
602 let mut max_count: Option<u32> = None;
603 let mut min_exclusive: Option<String> = None;
604
605 for tok in prop_block.split(';') {
606 let tok = tok.trim();
607 if tok.is_empty() {
608 continue;
609 }
610 if let Some(s) = tok.strip_prefix("sh:path") {
611 path = s.trim().to_string();
612 } else if let Some(s) = tok.strip_prefix("sh:datatype") {
613 datatype = Some(s.trim().to_string());
614 } else if let Some(s) = tok.strip_prefix("sh:minCount") {
615 let val = s.trim();
616 if let Ok(n) = val.parse::<u32>() {
617 min_count = Some(n);
618 }
619 } else if let Some(s) = tok.strip_prefix("sh:maxCount") {
620 let val = s.trim();
621 if let Ok(n) = val.parse::<u32>() {
622 max_count = Some(n);
623 }
624 } else if let Some(s) = tok.strip_prefix("sh:minExclusive") {
625 let val = s.trim();
626 min_exclusive = Some(val.to_string());
627 }
628 }
629
630 if !path.is_empty() {
631 shape.properties.push(ShaclProperty {
632 path,
633 datatype,
634 min_count,
635 max_count,
636 min_exclusive,
637 });
638 }
639 start_idx = close_idx + 1;
640 continue;
641 }
642 }
643 start_idx = local_idx + 1;
644 }
645
646 if !shape.properties.is_empty() {
647 kg.shapes.push(shape);
648 }
649 }
650 }
651 Ok(kg)
652 }
653
654 pub fn to_graph(&self) -> Result<crate::graph::Graph, KgError> {
656 use crate::graph::Graph;
657 use crate::primitives::{Entity, Flow, Resource};
658 use crate::units::unit_from_string;
659 use rust_decimal::Decimal;
660
661 let mut graph = Graph::new();
662
663 for t in &self.triples {
665 if t.predicate == "rdf:type" && t.object == "sea:Entity" {
666 let name = t
668 .subject
669 .split(':')
670 .nth(1)
671 .unwrap_or(&t.subject)
672 .to_string();
673 let entity = Entity::new_with_namespace(name.clone(), "default".to_string());
674 graph
675 .add_entity(entity)
676 .map_err(|e| KgError::SerializationError(e.to_string()))?;
677 }
678 if t.predicate == "rdf:type" && t.object == "sea:Resource" {
679 let name = t
680 .subject
681 .split(':')
682 .nth(1)
683 .unwrap_or(&t.subject)
684 .to_string();
685 let resource = Resource::new_with_namespace(
686 name.clone(),
687 unit_from_string("units"),
688 "default".to_string(),
689 );
690 graph
691 .add_resource(resource)
692 .map_err(|e| KgError::SerializationError(e.to_string()))?;
693 }
694 }
695
696 for t in &self.triples {
698 if t.predicate == "rdf:type" && t.object == "sea:Flow" {
699 let flow_subject = t.subject.clone();
700 let mut from: Option<String> = None;
702 let mut to: Option<String> = None;
703 let mut resource_name: Option<String> = None;
704 let mut quantity: Option<Decimal> = None;
705
706 for p in &self.triples {
707 if p.subject != flow_subject {
708 continue;
709 }
710 match p.predicate.as_str() {
711 "sea:from" => {
712 from = Some(extract_local_name(&p.object));
713 }
714 "sea:to" => {
715 to = Some(extract_local_name(&p.object));
716 }
717 "sea:hasResource" => {
718 resource_name = Some(extract_local_name(&p.object));
719 }
720 "sea:quantity" => {
721 let lexical = extract_literal_value(&p.object);
722 let parsed = Decimal::from_str(&lexical).map_err(|e| {
723 KgError::SerializationError(format!(
724 "Invalid quantity literal '{}': {}",
725 p.object, e
726 ))
727 })?;
728 quantity = Some(parsed);
729 }
730 _ => {}
731 }
732 }
733
734 if let (Some(from_name), Some(to_name), Some(resource_name), Some(quantity_val)) =
735 (from, to, resource_name, quantity)
736 {
737 let from_id = graph.find_entity_by_name(&from_name).ok_or_else(|| {
738 KgError::SerializationError(format!("Unknown entity: {}", from_name))
739 })?;
740 let to_id = graph.find_entity_by_name(&to_name).ok_or_else(|| {
741 KgError::SerializationError(format!("Unknown entity: {}", to_name))
742 })?;
743 let res_id = graph.find_resource_by_name(&resource_name).ok_or_else(|| {
744 KgError::SerializationError(format!("Unknown resource: {}", resource_name))
745 })?;
746
747 let flow = Flow::new(res_id, from_id, to_id, quantity_val);
748 graph
749 .add_flow(flow)
750 .map_err(|e| KgError::SerializationError(e.to_string()))?;
751 }
752 }
753 }
754
755 Ok(graph)
756 }
757
758 pub fn validate_shacl(&self) -> Result<Vec<crate::policy::Violation>, KgError> {
759 use crate::policy::{Severity, Violation};
760
761 if self.shapes.is_empty() {
763 return Ok(Vec::new());
764 }
765
766 let mut violations: Vec<Violation> = Vec::new();
767
768 fn parse_literal_and_datatype(obj: &str) -> (String, Option<String>) {
771 let s = obj.trim();
772 if !s.starts_with('"') {
773 return (s.to_string(), None);
774 }
775
776 let bytes = s.as_bytes();
778 let mut end_quote = None;
779 let mut i = 1;
780 while i < bytes.len() {
781 if bytes[i] == b'\\' {
782 i += 2;
784 continue;
785 }
786 if bytes[i] == b'"' {
787 end_quote = Some(i);
788 break;
789 }
790 i += 1;
791 }
792
793 if let Some(end) = end_quote {
794 let lex = &s[1..end];
795 let rest = s[end + 1..].trim();
796 let dtype = rest.strip_prefix("^^").map(|s| s.trim().to_string());
797 (lex.to_string(), dtype)
798 } else {
799 (s.trim_matches('"').to_string(), None)
801 }
802 }
803
804 for shape in &self.shapes {
806 match shape.target_class.as_str() {
807 "sea:Flow" | "sea:Entity" | "sea:Resource" => {}
808 other => {
809 return Err(KgError::SerializationError(format!(
810 "Unsupported SHACL target class: {}",
811 other
812 )))
813 }
814 }
815
816 let mut subjects: Vec<String> = Vec::new();
818 for t in &self.triples {
819 if t.predicate == "rdf:type" && t.object == shape.target_class {
820 subjects.push(t.subject.clone());
821 }
822 }
823
824 for subject in subjects {
825 for prop in &shape.properties {
826 let count = self
828 .triples
829 .iter()
830 .filter(|tr| tr.subject == subject && tr.predicate == prop.path)
831 .count() as u32;
832
833 if let Some(min) = prop.min_count {
834 if count < min {
835 let msg = format!(
836 "SHACL violation: subject {} missing required property {} (min_count={} found={})",
837 subject, prop.path, min, count
838 );
839 violations.push(Violation::new(format!("SHACL:{}", shape.target_class), msg, Severity::Error).with_context(serde_json::json!({"subject": subject, "predicate": prop.path, "expected_min": min, "found": count})));
840 }
841 }
842
843 if let Some(max) = prop.max_count {
844 if count > max {
845 let msg = format!(
846 "SHACL violation: subject {} has {} occurrences of {} (max_count={} found={})",
847 subject, count, prop.path, max, count
848 );
849 violations.push(Violation::new(format!("SHACL:{}", shape.target_class), msg, Severity::Error).with_context(serde_json::json!({"subject": subject, "predicate": prop.path, "expected_max": max, "found": count})));
850 }
851 }
852
853 if let Some(dt) = &prop.datatype {
855 for tr in self
856 .triples
857 .iter()
858 .filter(|tr| tr.subject == subject && tr.predicate == prop.path)
859 {
860 let obj = tr.object.trim();
861 let (_lex, dtype_opt) = parse_literal_and_datatype(obj);
862 if let Some(dtype) = dtype_opt {
864 if &dtype != dt {
865 let msg = format!(
866 "SHACL violation: subject {} property {} expected datatype {} but found {}",
867 subject, prop.path, dt, dtype
868 );
869 violations.push(Violation::new(format!("SHACL:{}", shape.target_class), msg, Severity::Error).with_context(serde_json::json!({"subject": subject, "predicate": prop.path, "expected_type": dt, "found_type": dtype})));
870 }
871 } else if dt != "xsd:string" {
872 let msg = format!(
874 "SHACL violation: subject {} property {} expected datatype {} but found untyped literal {}",
875 subject, prop.path, dt, obj
876 );
877 violations.push(Violation::new(format!("SHACL:{}", shape.target_class), msg, Severity::Error).with_context(serde_json::json!({"subject": subject, "predicate": prop.path, "expected_type": dt, "found": obj})));
878 }
879 }
880 }
881
882 if let Some(min_ex) = &prop.min_exclusive {
884 if prop.datatype.as_deref() == Some("xsd:decimal") {
885 let threshold =
886 rust_decimal::Decimal::from_str(min_ex).map_err(|e| {
887 KgError::SerializationError(format!(
888 "Invalid minExclusive threshold '{}': {}",
889 min_ex, e
890 ))
891 })?;
892 for tr in self
893 .triples
894 .iter()
895 .filter(|tr| tr.subject == subject && tr.predicate == prop.path)
896 {
897 let obj = tr.object.trim();
898 let lex = extract_literal_value(obj);
899 if let Ok(val) = rust_decimal::Decimal::from_str(&lex) {
900 if val <= threshold {
901 let msg = format!(
902 "SHACL violation: subject {} property {} must be > {} but found {}",
903 subject, prop.path, threshold, val
904 );
905 violations.push(Violation::new(format!("SHACL:{}", shape.target_class), msg, Severity::Error).with_context(serde_json::json!({"subject": subject, "predicate": prop.path, "threshold": threshold.to_string(), "found": val.to_string()})));
906 }
907 }
908 }
909 }
910 }
911 }
912 }
913 }
914
915 Ok(violations)
916 }
917
918 pub fn to_rdf_xml(&self) -> String {
919 let mut xml = String::new();
920
921 xml.push_str("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
922 xml.push_str("<rdf:RDF\n");
923 xml.push_str(" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\"\n");
924 xml.push_str(" xmlns:rdfs=\"http://www.w3.org/2000/01/rdf-schema#\"\n");
927 xml.push_str(" xmlns:owl=\"http://www.w3.org/2002/07/owl#\"\n");
928 xml.push_str(" xmlns:xsd=\"http://www.w3.org/2001/XMLSchema#\"\n");
929 xml.push_str(" xmlns:xml=\"http://www.w3.org/XML/1998/namespace\"\n");
932 xml.push_str(" xmlns:sea=\"http://domainforge.ai/sea#\"\n");
933 xml.push_str(" xmlns:sh=\"http://www.w3.org/ns/shacl#\">\n\n");
934
935 for triple in &self.triples {
936 let subject = Self::clean_uri(&triple.subject);
937 let predicate_name = triple.predicate.clone();
941 let object = &triple.object;
942
943 if object.starts_with('"') {
944 let (literal_value, suffix) = Self::parse_typed_literal(object);
945 let escaped_value = Self::escape_xml(&literal_value);
946
947 xml.push_str(&format!(" <rdf:Description rdf:about=\"{}\">\n", subject));
948
949 match suffix {
950 Some(TypedLiteralSuffix::Datatype(datatype)) => {
951 let datatype_uri = Self::clean_uri(&datatype);
952 xml.push_str(&format!(
953 " <{} rdf:datatype=\"{}\">{}</{}>\n",
954 predicate_name, datatype_uri, escaped_value, predicate_name
955 ));
956 }
957 Some(TypedLiteralSuffix::Language(lang)) => {
958 xml.push_str(&format!(
959 " <{} xml:lang=\"{}\">{}</{}>\n",
960 predicate_name, lang, escaped_value, predicate_name
961 ));
962 }
963 None => {
964 xml.push_str(&format!(
965 " <{}>{}</{}>\n",
966 predicate_name, escaped_value, predicate_name
967 ));
968 }
969 }
970
971 xml.push_str(" </rdf:Description>\n\n");
972 } else {
973 let cleaned_object = Self::clean_uri(object);
974 xml.push_str(&format!(" <rdf:Description rdf:about=\"{}\">\n", subject));
975 xml.push_str(&format!(
976 " <{} rdf:resource=\"{}\"/>\n",
977 predicate_name, cleaned_object
978 ));
979 xml.push_str(" </rdf:Description>\n\n");
980 }
981 }
982 xml.push('\n');
983 for shape in &self.shapes {
985 xml.push_str(&Self::write_shacl_shapes_xml(shape));
986 }
987
988 xml.push_str("</rdf:RDF>\n");
989 xml
990 }
991
992 fn write_shacl_shapes_xml(shape: &ShaclShape) -> String {
993 let mut xml = String::new();
994 let shape_name = shape.target_class.replace("sea:", "") + "Shape";
995 xml.push_str(&format!(
996 " <sh:NodeShape rdf:about=\"http://domainforge.ai/sea#{}\">\n",
997 shape_name
998 ));
999 xml.push_str(&format!(
1000 " <sh:targetClass rdf:resource=\"http://domainforge.ai/sea#{}\"/>\n",
1001 shape.target_class.replace("sea:", "")
1002 ));
1003 for prop in &shape.properties {
1004 xml.push_str(" <sh:property>\n");
1005 xml.push_str(" <rdf:Description>\n");
1006 let (ns, local) = if let Some(rest) = prop.path.strip_prefix("sea:") {
1008 ("http://domainforge.ai/sea#", rest)
1009 } else if let Some(rest) = prop.path.strip_prefix("rdfs:") {
1010 ("http://www.w3.org/2000/01/rdf-schema#", rest)
1011 } else {
1012 ("http://domainforge.ai/sea#", prop.path.as_str())
1013 };
1014 xml.push_str(&format!(
1015 " <sh:path rdf:resource=\"{}{}\"/>\n",
1016 ns, local
1017 ));
1018 if let Some(dt) = &prop.datatype {
1019 let dt_uri = if dt.starts_with("xsd:") {
1020 dt.replace("xsd:", "http://www.w3.org/2001/XMLSchema#")
1021 } else {
1022 dt.clone()
1023 };
1024 xml.push_str(&format!(
1025 " <sh:datatype rdf:resource=\"{}\"/>\n",
1026 dt_uri
1027 ));
1028 }
1029 if let Some(min) = prop.min_count {
1030 xml.push_str(&format!(" <sh:minCount>{}</sh:minCount>\n", min));
1031 }
1032 if let Some(max) = prop.max_count {
1033 xml.push_str(&format!(" <sh:maxCount>{}</sh:maxCount>\n", max));
1034 }
1035 if let Some(min_ex) = &prop.min_exclusive {
1036 xml.push_str(&format!(" <sh:minExclusive rdf:datatype=\"http://www.w3.org/2001/XMLSchema#decimal\">{}</sh:minExclusive>\n", min_ex));
1037 }
1038 xml.push_str(" </rdf:Description>\n");
1039 xml.push_str(" </sh:property>\n");
1040 }
1041 xml.push_str(" </sh:NodeShape>\n\n");
1042 xml
1043 }
1044
1045 pub fn escape_turtle_literal(input: &str) -> String {
1046 let mut escaped = String::with_capacity(input.len());
1047 for ch in input.chars() {
1048 match ch {
1049 '\\' => escaped.push_str("\\\\"),
1050 '"' => escaped.push_str("\\\""),
1051 '\n' => escaped.push_str("\\n"),
1052 '\r' => escaped.push_str("\\r"),
1053 '\t' => escaped.push_str("\\t"),
1054 '\x08' => escaped.push_str("\\b"), '\x0C' => escaped.push_str("\\f"), other => escaped.push(other),
1057 }
1058 }
1059 escaped
1060 }
1061
1062 fn uri_encode(s: &str) -> String {
1063 utf8_percent_encode(s, URI_ENCODE_SET).to_string()
1064 }
1065
1066 fn validate_turtle_decimal(decimal_str: &str) -> Result<(), String> {
1067 let trimmed = decimal_str.trim();
1069
1070 if trimmed
1072 .chars()
1073 .any(|ch| matches!(ch, '"' | '\'' | '\\' | '\n' | '\r' | '\t'))
1074 {
1075 return Err("Decimal contains invalid characters".to_string());
1076 }
1077
1078 if trimmed.is_empty() {
1080 return Err("Decimal is empty".to_string());
1081 }
1082
1083 let mut has_digit = false;
1085 let mut chars = trimmed.chars().peekable();
1086
1087 if matches!(chars.peek(), Some('+') | Some('-')) {
1089 chars.next();
1090 }
1091
1092 while let Some(ch) = chars.next() {
1094 if ch.is_ascii_digit() {
1095 has_digit = true;
1096 } else if ch == '.' {
1097 if !chars.next().is_some_and(|c| c.is_ascii_digit()) {
1099 return Err("Invalid decimal format".to_string());
1100 }
1101 for c in chars.by_ref() {
1102 if !c.is_ascii_digit() {
1103 return Err("Invalid decimal format".to_string());
1104 }
1105 }
1106 break;
1107 } else {
1108 return Err("Invalid decimal format".to_string());
1109 }
1110 }
1111
1112 if !has_digit {
1113 return Err("Invalid decimal format".to_string());
1114 }
1115
1116 Ok(())
1117 }
1118
1119 fn clean_uri(uri: &str) -> String {
1120 if uri.contains(':') {
1121 let parts: Vec<&str> = uri.splitn(2, ':').collect();
1122 if parts.len() == 2 {
1123 let (prefix, name) = (parts[0], parts[1]);
1124
1125 let standard_prefixes = [
1127 ("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#"),
1128 ("rdfs", "http://www.w3.org/2000/01/rdf-schema#"),
1129 ("xsd", "http://www.w3.org/2001/XMLSchema#"),
1130 ("owl", "http://www.w3.org/2002/07/owl#"),
1131 ("sh", "http://www.w3.org/ns/shacl#"),
1132 ("sea", "http://domainforge.ai/sea#"),
1133 ];
1134
1135 for (std_prefix, namespace) in &standard_prefixes {
1136 if prefix == *std_prefix {
1137 return format!("{}{}", namespace, name);
1138 }
1139 }
1140
1141 return format!("http://domainforge.ai/{}#{}", prefix, name);
1143 }
1144 }
1145 uri.to_string()
1146 }
1147
1148 fn shorten_token(token: &str) -> String {
1149 let t = token.trim();
1150 let value = if t.starts_with('<') && t.ends_with('>') {
1152 &t[1..t.len() - 1]
1153 } else {
1154 t
1155 };
1156
1157 if value.contains("^^<http://www.w3.org/2001/XMLSchema#") {
1159 if let Some(pos) = value.find("^^<http://www.w3.org/2001/XMLSchema#") {
1161 let (lit, rest) = value.split_at(pos);
1162 if rest.contains("decimal") {
1163 return format!("{}^^xsd:decimal", lit.trim());
1164 } else if rest.contains("string") {
1165 return format!("{}^^xsd:string", lit.trim());
1166 }
1167 }
1168 }
1169
1170 let mappings = [
1172 ("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "rdf:"),
1173 ("http://www.w3.org/2000/01/rdf-schema#", "rdfs:"),
1174 ("http://www.w3.org/2001/XMLSchema#", "xsd:"),
1175 ("http://www.w3.org/2002/07/owl#", "owl:"),
1176 ("http://www.w3.org/ns/shacl#", "sh:"),
1177 ("http://domainforge.ai/sea#", "sea:"),
1178 ("http://domainforge.ai/rdfs#", "rdfs:"),
1179 ];
1180
1181 for (ns, prefix) in &mappings {
1182 if let Some(stripped) = value.strip_prefix(ns) {
1183 return format!("{}{}", prefix, stripped);
1184 }
1185 }
1186
1187 t.to_string()
1189 }
1190
1191 pub fn escape_xml(input: &str) -> String {
1192 let mut escaped = String::with_capacity(input.len());
1193 for ch in input.chars() {
1194 match ch {
1195 '&' => escaped.push_str("&"),
1196 '<' => escaped.push_str("<"),
1197 '>' => escaped.push_str(">"),
1198 '"' => escaped.push_str("""),
1199 '\'' => escaped.push_str("'"),
1200 other => escaped.push(other),
1201 }
1202 }
1203 escaped
1204 }
1205
1206 fn parse_escaped_value<I>(chars: &mut I) -> String
1207 where
1208 I: Iterator<Item = char>,
1209 {
1210 let mut value = String::new();
1211 let mut escaped = false;
1212
1213 for ch in chars.by_ref() {
1214 if escaped {
1215 let resolved = match ch {
1216 'n' => '\n',
1217 't' => '\t',
1218 'r' => '\r',
1219 '"' => '"',
1220 '\\' => '\\',
1221 other => {
1222 value.push('\\');
1223 other
1224 }
1225 };
1226 value.push(resolved);
1227 escaped = false;
1228 continue;
1229 }
1230
1231 match ch {
1232 '\\' => escaped = true,
1233 '"' => break,
1234 other => value.push(other),
1235 }
1236 }
1237
1238 value
1239 }
1240
1241 fn parse_typed_literal(literal: &str) -> (String, Option<TypedLiteralSuffix>) {
1242 if !literal.starts_with('"') {
1243 return (literal.to_string(), None);
1244 }
1245
1246 let mut chars = literal.chars();
1247 chars.next();
1248 let value = Self::parse_escaped_value(&mut chars);
1249
1250 let remainder: String = chars.collect();
1251 let trimmed = remainder.trim();
1252
1253 let suffix = if let Some(rest) = trimmed.strip_prefix("^^") {
1254 let datatype = rest.trim();
1255 if datatype.is_empty() {
1256 None
1257 } else {
1258 Some(TypedLiteralSuffix::Datatype(datatype.to_string()))
1259 }
1260 } else if let Some(rest) = trimmed.strip_prefix('@') {
1261 let language = rest.trim();
1262 if language.is_empty() {
1263 None
1264 } else {
1265 Some(TypedLiteralSuffix::Language(language.to_string()))
1266 }
1267 } else {
1268 None
1269 };
1270
1271 (value, suffix)
1272 }
1273
1274 fn is_valid_rdf_term(term: &str) -> bool {
1277 if term.contains('"') || term.contains('<') || term.contains('>') || term.contains('\\') {
1279 return false;
1280 }
1281
1282 if term.chars().any(|c| c.is_control()) {
1284 return false;
1285 }
1286
1287 let colon_count = term.matches(':').count();
1290 if colon_count > 1 {
1291 return false;
1292 }
1293 if colon_count == 1 && (term.starts_with(':') || term.ends_with(':')) {
1294 return false;
1295 }
1296
1297 true
1298 }
1299}
1300
1301enum TypedLiteralSuffix {
1302 Datatype(String),
1303 Language(String),
1304}
1305
1306impl Default for KnowledgeGraph {
1307 fn default() -> Self {
1308 Self::new()
1309 }
1310}
1311
1312impl Graph {
1313 pub fn export_rdf(&self, format: &str) -> Result<String, KgError> {
1314 let kg = KnowledgeGraph::from_graph(self)?;
1315 match format {
1316 "turtle" => Ok(kg.to_turtle()),
1317 "rdf-xml" => Ok(kg.to_rdf_xml()),
1318 _ => Err(KgError::UnsupportedFormat(format.to_string())),
1319 }
1320 }
1321}
1322
1323#[cfg(test)]
1324mod tests {
1325 use super::*;
1326 use crate::primitives::{Entity, Flow, Resource};
1327 use rust_decimal::Decimal;
1328
1329 #[test]
1330 fn test_export_to_rdf_turtle() {
1331 let mut graph = Graph::new();
1332
1333 let entity1 = Entity::new_with_namespace("Supplier", "supply_chain");
1334 let entity2 = Entity::new_with_namespace("Manufacturer", "supply_chain");
1335 let resource = Resource::new_with_namespace(
1336 "Parts",
1337 crate::units::unit_from_string("kg"),
1338 "supply_chain",
1339 );
1340
1341 let entity1_id = entity1.id().clone();
1342 let entity2_id = entity2.id().clone();
1343 let resource_id = resource.id().clone();
1344
1345 graph.add_entity(entity1).unwrap();
1346 graph.add_entity(entity2).unwrap();
1347 graph.add_resource(resource).unwrap();
1348
1349 #[allow(deprecated)]
1350 let flow = Flow::new(resource_id, entity1_id, entity2_id, Decimal::new(100, 0));
1351 graph.add_flow(flow).unwrap();
1352
1353 let rdf_turtle = graph.export_rdf("turtle").unwrap();
1354
1355 assert!(rdf_turtle.contains("sea:Entity"));
1356 assert!(rdf_turtle.contains("sea:hasResource"));
1357 assert!(rdf_turtle.contains("@prefix"));
1358 }
1359
1360 #[test]
1361 fn test_export_to_rdf_xml() {
1362 let mut graph = Graph::new();
1363
1364 let entity = Entity::new_with_namespace("TestEntity", "default".to_string());
1365 graph.add_entity(entity).unwrap();
1366
1367 let rdf_xml = graph.export_rdf("rdf-xml").unwrap();
1368
1369 assert!(rdf_xml.contains("<?xml"));
1370 assert!(rdf_xml.contains("rdf:RDF"));
1371 }
1372
1373 #[test]
1374 fn test_unsupported_format() {
1375 let graph = Graph::new();
1376 let result = graph.export_rdf("json-ld");
1377
1378 assert!(result.is_err());
1379 assert!(matches!(result.unwrap_err(), KgError::UnsupportedFormat(_)));
1380 }
1381
1382 #[test]
1383 fn test_export_rdf_turtle_encodes_special_characters_and_literals() {
1384 let mut graph = Graph::new();
1385
1386 let entity_space = Entity::new_with_namespace("Entity With Space", "default".to_string());
1387 let entity_colon = Entity::new_with_namespace("Entity:Colon", "default".to_string());
1388 let entity_slash = Entity::new_with_namespace("Entity/Slash", "default".to_string());
1389 let entity_hash = Entity::new_with_namespace("Entity#Hash", "default".to_string());
1390
1391 graph.add_entity(entity_space.clone()).unwrap();
1392 graph.add_entity(entity_colon.clone()).unwrap();
1393 graph.add_entity(entity_slash.clone()).unwrap();
1394 graph.add_entity(entity_hash.clone()).unwrap();
1395
1396 let resource = Resource::new_with_namespace(
1397 "Resource:Name/Hash",
1398 crate::units::unit_from_string("units"),
1399 "default".to_string(),
1400 );
1401 let resource_id = resource.id().clone();
1402 graph.add_resource(resource).unwrap();
1403
1404 let flow = Flow::new(
1405 resource_id,
1406 entity_space.id().clone(),
1407 entity_colon.id().clone(),
1408 Decimal::new(42, 0),
1409 );
1410 graph.add_flow(flow).unwrap();
1411
1412 let turtle = graph.export_rdf("turtle").unwrap();
1413 assert!(turtle.contains("sea:Entity%20With%20Space"));
1414 assert!(turtle.contains("sea:Entity%3AColon"));
1415 assert!(turtle.contains("sea:Entity%2FSlash"));
1416 assert!(turtle.contains("sea:Entity%23Hash"));
1417 assert!(turtle.contains("sea:Resource%3AName%2FHash"));
1418 assert!(turtle.contains("\"42\"^^xsd:decimal"));
1419 }
1420
1421 #[test]
1422 fn test_rdf_xml_escapes_special_literals_and_language_tags() {
1423 let mut kg = KnowledgeGraph::new();
1424
1425 kg.triples.push(Triple {
1426 subject: "sea:testEntity".to_string(),
1427 predicate: "sea:hasNumericValue".to_string(),
1428 object: "\"100\"^^xsd:decimal".to_string(),
1429 });
1430 kg.triples.push(Triple {
1431 subject: "sea:testEntity".to_string(),
1432 predicate: "sea:description".to_string(),
1433 object: "\"Hello & <World>\"@en".to_string(),
1434 });
1435
1436 let xml = kg.to_rdf_xml();
1437 assert!(xml.contains("rdf:datatype=\"http://www.w3.org/2001/XMLSchema#decimal\""));
1438 assert!(xml.contains(">100<"));
1439 assert!(xml.contains("xml:lang=\"en\""));
1440 assert!(xml.contains("&"));
1441 assert!(xml.contains("<"));
1442 assert!(xml.contains(">"));
1443 }
1444}