use anyhow::Result;
use std::io::{Read, Seek};
use std::path::Path;
use oxigraph::io::{RdfFormat, RdfParser, RdfSerializer};
use oxigraph::model::graph::Graph as OxigraphGraph;
use oxigraph::model::Dataset;
use oxigraph::model::{GraphNameRef, Quad, Triple, TripleRef};
use std::io::BufReader;
use log::{debug, info};
fn ext_to_rdf_format(ext: &str) -> Option<RdfFormat> {
use oxigraph::io::JsonLdProfileSet;
match ext {
"ttl" | "n3" => Some(RdfFormat::Turtle),
"xml" | "rdf" | "owl" => Some(RdfFormat::RdfXml),
"nt" => Some(RdfFormat::NTriples),
"nq" => Some(RdfFormat::NQuads),
"jsonld" | "json" => Some(RdfFormat::JsonLd {
profile: JsonLdProfileSet::default(),
}),
_ => None,
}
}
pub fn get_file_contents(path: &Path) -> Result<(Vec<u8>, Option<RdfFormat>)> {
let b = std::fs::read(path)?;
let format = path
.extension()
.and_then(|ext| ext.to_str())
.and_then(ext_to_rdf_format);
Ok((b, format))
}
pub fn get_url_contents(url: &str) -> Result<(Vec<u8>, Option<RdfFormat>)> {
let opts = crate::fetch::FetchOptions::default();
let res = crate::fetch::fetch_rdf(url, &opts)?;
Ok((res.bytes, res.format))
}
pub fn write_dataset_to_file(dataset: &Dataset, file: &str) -> Result<()> {
info!(
"Writing dataset to file: {} with length {}",
file,
dataset.len()
);
let mut file = std::fs::File::create(file)?;
let mut serializer = RdfSerializer::from_format(RdfFormat::Turtle).for_writer(&mut file);
for quad in dataset.iter() {
serializer.serialize_triple(TripleRef {
subject: quad.subject,
predicate: quad.predicate,
object: quad.object,
})?;
}
serializer.finish()?;
Ok(())
}
pub fn read_file(file: &Path) -> Result<OxigraphGraph> {
debug!("Reading file: {}", file.to_str().unwrap());
let filename = file;
let file = std::fs::File::open(file)?;
let content: BufReader<_> = BufReader::new(file);
let content_type = filename
.extension()
.and_then(|ext| ext.to_str())
.and_then(ext_to_rdf_format);
let parser = RdfParser::from_format(content_type.unwrap_or(RdfFormat::Turtle));
let mut graph = OxigraphGraph::new();
let parser = parser.for_reader(content);
for quad in parser {
let quad = quad?;
let triple = Triple::new(quad.subject, quad.predicate, quad.object);
graph.insert(&triple);
}
Ok(graph)
}
pub fn read_format<T: Read + Seek>(
mut original_content: BufReader<T>,
format: Option<RdfFormat>,
) -> Result<OxigraphGraph> {
let format = format.unwrap_or(RdfFormat::Turtle);
for format in [
format,
RdfFormat::Turtle,
RdfFormat::RdfXml,
RdfFormat::NTriples,
] {
let content = original_content.get_mut();
content.rewind()?;
let parser = RdfParser::from_format(format);
let mut graph = OxigraphGraph::new();
let parser = parser.for_reader(content);
for quad in parser {
match quad {
Ok(q) => {
let triple = Triple::new(q.subject, q.predicate, q.object);
graph.insert(&triple);
}
Err(_) => {
break;
}
}
}
if !graph.is_empty() {
return Ok(graph);
}
}
Err(anyhow::anyhow!("Failed to parse graph"))
}
pub fn read_url(file: &str) -> Result<OxigraphGraph> {
debug!("Reading url: {file}");
let opts = crate::fetch::FetchOptions::default();
let res = crate::fetch::fetch_rdf(file, &opts)?;
let content: BufReader<_> = BufReader::new(std::io::Cursor::new(res.bytes));
read_format(content, res.format)
}
pub fn read_prefixes_from_location(
location: &crate::ontology::OntologyLocation,
) -> Result<std::collections::HashMap<String, String>> {
let (bytes, format) = match location {
crate::ontology::OntologyLocation::File(p) => get_file_contents(p)?,
crate::ontology::OntologyLocation::Url(u) => get_url_contents(u)?,
crate::ontology::OntologyLocation::InMemory { .. } => {
return Ok(std::collections::HashMap::new())
}
};
let format = format.unwrap_or(RdfFormat::Turtle);
let parser = RdfParser::from_format(format);
let mut reader = parser.for_reader(std::io::Cursor::new(bytes));
for quad in &mut reader {
let _ = quad;
}
let prefixes: std::collections::HashMap<String, String> = reader
.prefixes()
.map(|(k, v)| (k.to_string(), v.to_string()))
.collect();
Ok(prefixes)
}
pub fn graph_to_quads<'a>(
graph: &'a OxigraphGraph,
graph_name: GraphNameRef<'a>,
) -> impl IntoIterator<Item = impl Into<Quad> + use<'a>> {
graph
.into_iter()
.map(move |triple| triple.in_graph(graph_name))
}