use anyhow::Result;
use oxrdf::{Literal, NamedNode, NamedOrBlankNode, Term, Triple};
use std::collections::HashMap;
#[derive(Debug, Clone, PartialEq)]
pub struct Quad {
pub subject: String,
pub predicate: String,
pub object: String,
pub graph: Option<String>,
}
impl Quad {
pub fn new(subject: String, predicate: String, object: String, graph: Option<String>) -> Self {
Quad {
subject,
predicate,
object,
graph,
}
}
pub fn default_graph(subject: String, predicate: String, object: String) -> Self {
Quad {
subject,
predicate,
object,
graph: None,
}
}
pub fn to_nquads(&self) -> String {
let graph_part = if let Some(ref g) = self.graph {
format!(" <{}>", g)
} else {
String::new()
};
format!(
"<{}> <{}> {}{} .\n",
self.subject,
self.predicate,
format_object(&self.object),
graph_part
)
}
}
pub struct NQuadsProcessor {
graphs: HashMap<Option<String>, Vec<Quad>>,
}
impl NQuadsProcessor {
pub fn new() -> Self {
NQuadsProcessor {
graphs: HashMap::new(),
}
}
pub fn load_nquads(&mut self, data: &str) -> Result<()> {
for line in data.lines() {
let line = line.trim();
if line.is_empty() || line.starts_with('#') {
continue;
}
let quad = parse_nquad_line(line)?;
self.graphs
.entry(quad.graph.clone())
.or_default()
.push(quad);
}
Ok(())
}
pub fn total_quads(&self) -> usize {
self.graphs.values().map(|v| v.len()).sum()
}
pub fn graph_count(&self) -> usize {
self.graphs.len()
}
pub fn get_graph(&self, graph_iri: Option<&str>) -> Option<&Vec<Quad>> {
let key = graph_iri.map(|s| s.to_string());
self.graphs.get(&key)
}
pub fn graph_iris(&self) -> Vec<&str> {
self.graphs.keys().filter_map(|k| k.as_deref()).collect()
}
pub fn to_nquads(&self) -> String {
let mut output = String::new();
if let Some(quads) = self.graphs.get(&None) {
for quad in quads {
output.push_str(&quad.to_nquads());
}
}
for (graph, quads) in &self.graphs {
if graph.is_some() {
for quad in quads {
output.push_str(&quad.to_nquads());
}
}
}
output
}
pub fn all_quads(&self) -> impl Iterator<Item = &Quad> {
self.graphs.values().flat_map(|v| v.iter())
}
pub fn to_triples(&self) -> Result<Vec<Triple>> {
let mut triples = Vec::new();
for quad in self.all_quads() {
let triple = quad_to_triple(quad)?;
triples.push(triple);
}
Ok(triples)
}
pub fn clear(&mut self) {
self.graphs.clear();
}
pub fn add_quad(&mut self, quad: Quad) {
self.graphs
.entry(quad.graph.clone())
.or_default()
.push(quad);
}
pub fn merge(&mut self, other: NQuadsProcessor) {
for (graph, quads) in other.graphs {
self.graphs.entry(graph).or_default().extend(quads);
}
}
pub fn into_quad_store(self) -> crate::quad_store::QuadStore {
let mut qs = crate::quad_store::QuadStore::new();
for (_graph, quads) in self.graphs {
for quad in quads {
qs.insert_quad(quad);
}
}
qs
}
}
impl Default for NQuadsProcessor {
fn default() -> Self {
Self::new()
}
}
fn parse_nquad_line(line: &str) -> Result<Quad> {
let line = line.trim_end_matches('.').trim();
let parts = split_nquad_components(line);
if parts.len() < 3 {
anyhow::bail!("Invalid N-Quad line (need at least 3 components): {}", line);
}
let subject = parse_iri(&parts[0])?;
let predicate = parse_iri(&parts[1])?;
let object = parts[2].to_string();
let graph = if parts.len() > 3 {
Some(parse_iri(&parts[3])?)
} else {
None
};
Ok(Quad {
subject,
predicate,
object,
graph,
})
}
fn split_nquad_components(line: &str) -> Vec<String> {
let mut parts = Vec::new();
let mut current = String::new();
let mut in_quotes = false;
let mut escape_next = false;
for c in line.chars() {
if escape_next {
current.push(c);
escape_next = false;
continue;
}
match c {
'\\' if in_quotes => {
current.push(c);
escape_next = true;
}
'"' => {
current.push(c);
in_quotes = !in_quotes;
}
' ' | '\t' if !in_quotes => {
if !current.is_empty() {
parts.push(current);
current = String::new();
}
}
_ => {
current.push(c);
}
}
}
if !current.is_empty() {
parts.push(current);
}
parts
}
fn parse_iri(s: &str) -> Result<String> {
if let Some(iri) = s.strip_prefix('<').and_then(|s| s.strip_suffix('>')) {
Ok(iri.to_string())
} else if s.starts_with("_:") {
Ok(s.to_string())
} else {
anyhow::bail!("Invalid IRI: {}", s);
}
}
fn format_object(object: &str) -> String {
if object.starts_with('<') || object.starts_with("_:") || object.starts_with('"') {
object.to_string()
} else {
format!("\"{}\"", object.replace('\\', "\\\\").replace('"', "\\\""))
}
}
fn quad_to_triple(quad: &Quad) -> Result<Triple> {
let subject = if quad.subject.starts_with("_:") {
NamedOrBlankNode::BlankNode(oxrdf::BlankNode::new(&quad.subject[2..])?)
} else {
NamedOrBlankNode::NamedNode(NamedNode::new(&quad.subject)?)
};
let predicate = NamedNode::new(&quad.predicate)?;
let object = if quad.object.starts_with('<') && quad.object.ends_with('>') {
Term::NamedNode(NamedNode::new(&quad.object[1..quad.object.len() - 1])?)
} else if quad.object.starts_with("_:") {
Term::BlankNode(oxrdf::BlankNode::new(&quad.object[2..])?)
} else if quad.object.starts_with('"') {
let value = extract_literal_value(&quad.object);
Term::Literal(Literal::new_simple_literal(&value))
} else {
Term::Literal(Literal::new_simple_literal(&quad.object))
};
Ok(Triple::new(subject, predicate, object))
}
fn extract_literal_value(s: &str) -> String {
if let Some(rest) = s.strip_prefix('"') {
let mut value = String::new();
let mut escape_next = false;
for c in rest.chars() {
if escape_next {
match c {
'n' => value.push('\n'),
'r' => value.push('\r'),
't' => value.push('\t'),
'"' => value.push('"'),
'\\' => value.push('\\'),
_ => value.push(c),
}
escape_next = false;
continue;
}
if c == '\\' {
escape_next = true;
continue;
}
if c == '"' {
break;
}
value.push(c);
}
value
} else {
s.to_string()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_nquad_default_graph() {
let line =
r#"<http://example.org/Alice> <http://example.org/knows> <http://example.org/Bob> ."#;
let quad = parse_nquad_line(line).expect("unwrap");
assert_eq!(quad.subject, "http://example.org/Alice");
assert_eq!(quad.predicate, "http://example.org/knows");
assert_eq!(quad.object, "<http://example.org/Bob>");
assert_eq!(quad.graph, None);
}
#[test]
fn test_parse_nquad_named_graph() {
let line = r#"<http://example.org/Alice> <http://example.org/knows> <http://example.org/Bob> <http://example.org/graph1> ."#;
let quad = parse_nquad_line(line).expect("unwrap");
assert_eq!(quad.subject, "http://example.org/Alice");
assert_eq!(quad.predicate, "http://example.org/knows");
assert_eq!(quad.object, "<http://example.org/Bob>");
assert_eq!(quad.graph, Some("http://example.org/graph1".to_string()));
}
#[test]
fn test_parse_nquad_with_literal() {
let line =
r#"<http://example.org/Alice> <http://www.w3.org/2000/01/rdf-schema#label> "Alice" ."#;
let quad = parse_nquad_line(line).expect("unwrap");
assert_eq!(quad.subject, "http://example.org/Alice");
assert_eq!(quad.object, r#""Alice""#);
}
#[test]
fn test_processor_load() {
let nquads = r#"
<http://example.org/Alice> <http://example.org/knows> <http://example.org/Bob> <http://example.org/graph1> .
<http://example.org/Bob> <http://example.org/knows> <http://example.org/Charlie> .
<http://example.org/Charlie> <http://example.org/knows> <http://example.org/Alice> <http://example.org/graph2> .
"#;
let mut processor = NQuadsProcessor::new();
processor.load_nquads(nquads).expect("unwrap");
assert_eq!(processor.total_quads(), 3);
assert_eq!(processor.graph_count(), 3); }
#[test]
fn test_processor_get_graph() {
let nquads = r#"
<http://example.org/Alice> <http://example.org/knows> <http://example.org/Bob> <http://example.org/graph1> .
<http://example.org/Bob> <http://example.org/knows> <http://example.org/Charlie> .
"#;
let mut processor = NQuadsProcessor::new();
processor.load_nquads(nquads).expect("unwrap");
let default = processor.get_graph(None).expect("unwrap");
assert_eq!(default.len(), 1);
let graph1 = processor
.get_graph(Some("http://example.org/graph1"))
.expect("unwrap");
assert_eq!(graph1.len(), 1);
}
#[test]
fn test_processor_to_nquads() {
let mut processor = NQuadsProcessor::new();
processor.add_quad(Quad::new(
"http://example.org/Alice".to_string(),
"http://example.org/knows".to_string(),
"<http://example.org/Bob>".to_string(),
Some("http://example.org/graph1".to_string()),
));
let output = processor.to_nquads();
assert!(output.contains("http://example.org/Alice"));
assert!(output.contains("http://example.org/graph1"));
}
#[test]
fn test_quad_to_triple() {
let quad = Quad::new(
"http://example.org/Alice".to_string(),
"http://example.org/knows".to_string(),
"<http://example.org/Bob>".to_string(),
None,
);
let triple = quad_to_triple(&quad).expect("unwrap");
assert_eq!(triple.predicate.as_str(), "http://example.org/knows");
}
#[test]
fn test_graph_iris() {
let nquads = r#"
<http://example.org/a> <http://example.org/p> <http://example.org/b> <http://example.org/g1> .
<http://example.org/a> <http://example.org/p> <http://example.org/c> <http://example.org/g2> .
<http://example.org/a> <http://example.org/p> <http://example.org/d> .
"#;
let mut processor = NQuadsProcessor::new();
processor.load_nquads(nquads).expect("unwrap");
let iris = processor.graph_iris();
assert!(iris.contains(&"http://example.org/g1"));
assert!(iris.contains(&"http://example.org/g2"));
}
#[test]
fn test_roundtrip() {
let original = r#"<http://example.org/Alice> <http://example.org/knows> <http://example.org/Bob> <http://example.org/graph1> .
<http://example.org/Bob> <http://example.org/knows> <http://example.org/Charlie> .
"#;
let mut processor = NQuadsProcessor::new();
processor.load_nquads(original).expect("unwrap");
let output = processor.to_nquads();
let mut processor2 = NQuadsProcessor::new();
processor2.load_nquads(&output).expect("unwrap");
assert_eq!(processor.total_quads(), processor2.total_quads());
}
#[test]
fn test_literal_with_escape() {
let line = r#"<http://example.org/s> <http://example.org/p> "line1\nline2" ."#;
let quad = parse_nquad_line(line).expect("unwrap");
let triple = quad_to_triple(&quad).expect("unwrap");
if let Term::Literal(lit) = triple.object {
assert!(lit.value().contains('\n'));
} else {
panic!("Expected literal");
}
}
}