use crate::error::TurtleParseError;
use crate::formats::nquads::{NQuadsParser, NQuadsSerializer};
use crate::formats::ntriples::{NTriplesParser, NTriplesSerializer};
use crate::formats::trig::TriGParser;
use crate::formats::turtle::{TurtleParser, TurtleSerializer};
use crate::toolkit::{Parser, RdfFormat, SerializationConfig, Serializer};
use oxirs_core::model::{Quad, Triple};
use std::io::{BufRead, BufReader, Write};
pub type ConversionResult<T> = Result<T, ConversionError>;
#[derive(Debug, thiserror::Error)]
pub enum ConversionError {
#[error("Parse error: {0}")]
ParseError(#[from] TurtleParseError),
#[error("I/O error: {0}")]
IoError(#[from] std::io::Error),
#[error("Unsupported conversion from {0:?} to {1:?}")]
UnsupportedConversion(RdfFormat, RdfFormat),
#[error("Serialization error: {0}")]
SerializationError(String),
#[error("Invalid input: {0}")]
InvalidInput(String),
}
#[derive(Debug, Clone)]
pub struct ConversionConfig {
pub serialization: SerializationConfig,
pub preserve_prefixes: bool,
pub lenient_parsing: bool,
pub batch_size: usize,
}
impl Default for ConversionConfig {
fn default() -> Self {
Self {
serialization: SerializationConfig::default(),
preserve_prefixes: true,
lenient_parsing: false,
batch_size: 10_000,
}
}
}
impl ConversionConfig {
pub fn new() -> Self {
Self::default()
}
pub fn with_lenient(mut self, lenient: bool) -> Self {
self.lenient_parsing = lenient;
self
}
pub fn with_preserve_prefixes(mut self, preserve: bool) -> Self {
self.preserve_prefixes = preserve;
self
}
pub fn with_batch_size(mut self, size: usize) -> Self {
self.batch_size = size;
self
}
pub fn with_serialization(mut self, config: SerializationConfig) -> Self {
self.serialization = config;
self
}
}
#[derive(Debug)]
pub struct FormatConverter {
config: ConversionConfig,
}
impl FormatConverter {
pub fn new() -> Self {
Self {
config: ConversionConfig::default(),
}
}
pub fn with_config(config: ConversionConfig) -> Self {
Self { config }
}
pub fn convert_string(
&self,
input: &str,
from: RdfFormat,
to: RdfFormat,
) -> ConversionResult<String> {
let mut output = Vec::new();
let input_bytes = input.as_bytes().to_vec();
let cursor = std::io::Cursor::new(input_bytes);
self.convert_stream(cursor, &mut output, from, to)?;
String::from_utf8(output).map_err(|e| {
ConversionError::SerializationError(format!("Invalid UTF-8 output: {}", e))
})
}
pub fn convert_stream<R: BufRead + 'static, W: Write>(
&self,
input: R,
output: &mut W,
from: RdfFormat,
to: RdfFormat,
) -> ConversionResult<()> {
let from_has_quads = matches!(from, RdfFormat::NQuads | RdfFormat::TriG);
let to_has_quads = matches!(to, RdfFormat::NQuads | RdfFormat::TriG);
if from_has_quads && !to_has_quads {
let quads = self.parse_quads(input, from)?;
let triples: Vec<Triple> = quads
.into_iter()
.filter_map(|q| match q.graph_name() {
oxirs_core::model::GraphName::DefaultGraph => Some(Triple::new(
q.subject().clone(),
q.predicate().clone(),
q.object().clone(),
)),
_ => None,
})
.collect();
self.serialize_triples(&triples, output, to)?;
} else if !from_has_quads && to_has_quads {
let triples = self.parse_triples(input, from)?;
let quads: Vec<Quad> = triples
.into_iter()
.map(|t| {
Quad::new(
t.subject().clone(),
t.predicate().clone(),
t.object().clone(),
oxirs_core::model::GraphName::DefaultGraph,
)
})
.collect();
self.serialize_quads(&quads, output, to)?;
} else if from_has_quads && to_has_quads {
let quads = self.parse_quads(input, from)?;
self.serialize_quads(&quads, output, to)?;
} else {
let triples = self.parse_triples(input, from)?;
self.serialize_triples(&triples, output, to)?;
}
Ok(())
}
fn parse_triples<R: BufRead + 'static>(
&self,
input: R,
format: RdfFormat,
) -> ConversionResult<Vec<Triple>> {
match format {
RdfFormat::Turtle => {
let parser = if self.config.lenient_parsing {
TurtleParser::new_lenient()
} else {
TurtleParser::new()
};
parser
.for_reader(input)
.collect::<Result<Vec<_>, _>>()
.map_err(ConversionError::from)
}
RdfFormat::NTriples => {
let parser = NTriplesParser::new();
parser
.for_reader(input)
.collect::<Result<Vec<_>, _>>()
.map_err(ConversionError::from)
}
_ => Err(ConversionError::UnsupportedConversion(
format,
RdfFormat::Turtle,
)),
}
}
fn parse_quads<R: BufRead + 'static>(
&self,
input: R,
format: RdfFormat,
) -> ConversionResult<Vec<Quad>> {
match format {
RdfFormat::NQuads => {
let parser = NQuadsParser::new();
parser
.for_reader(input)
.collect::<Result<Vec<_>, _>>()
.map_err(ConversionError::from)
}
RdfFormat::TriG => {
let parser = TriGParser::new();
parser
.for_reader(input)
.collect::<Result<Vec<_>, _>>()
.map_err(ConversionError::from)
}
_ => Err(ConversionError::UnsupportedConversion(
format,
RdfFormat::TriG,
)),
}
}
fn serialize_triples<W: Write>(
&self,
triples: &[Triple],
output: &mut W,
format: RdfFormat,
) -> ConversionResult<()> {
match format {
RdfFormat::Turtle => {
let serializer = TurtleSerializer::with_config(self.config.serialization.clone());
serializer
.serialize(triples, output)
.map_err(|e| ConversionError::SerializationError(e.to_string()))?;
}
RdfFormat::NTriples => {
let serializer = NTriplesSerializer::new();
serializer
.serialize(triples, output)
.map_err(|e| ConversionError::SerializationError(e.to_string()))?;
}
_ => {
return Err(ConversionError::UnsupportedConversion(
RdfFormat::Turtle,
format,
))
}
}
Ok(())
}
fn serialize_quads<W: Write>(
&self,
quads: &[Quad],
output: &mut W,
format: RdfFormat,
) -> ConversionResult<()> {
match format {
RdfFormat::NQuads => {
let serializer = NQuadsSerializer::new();
serializer
.serialize(quads, output)
.map_err(|e| ConversionError::SerializationError(e.to_string()))?;
}
RdfFormat::TriG => {
let serializer = NQuadsSerializer::new();
serializer
.serialize(quads, output)
.map_err(|e| ConversionError::SerializationError(e.to_string()))?;
}
_ => {
return Err(ConversionError::UnsupportedConversion(
RdfFormat::TriG,
format,
))
}
}
Ok(())
}
pub fn convert_file(
&self,
input_path: &str,
output_path: &str,
from: RdfFormat,
to: RdfFormat,
) -> ConversionResult<ConversionStats> {
let input = std::fs::File::open(input_path)?;
let reader = BufReader::new(input);
let mut output = std::fs::File::create(output_path)?;
let start = std::time::Instant::now();
self.convert_stream(reader, &mut output, from, to)?;
let duration = start.elapsed();
Ok(ConversionStats {
duration,
items_processed: 0, })
}
}
impl Default for FormatConverter {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone)]
pub struct ConversionStats {
pub duration: std::time::Duration,
pub items_processed: usize,
}
impl ConversionStats {
pub fn throughput(&self) -> f64 {
if self.duration.as_secs_f64() > 0.0 {
self.items_processed as f64 / self.duration.as_secs_f64()
} else {
0.0
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_turtle_to_ntriples() {
let converter = FormatConverter::new();
let turtle = r#"
@prefix ex: <http://example.org/> .
ex:subject ex:predicate "object" .
"#;
let result = converter
.convert_string(turtle, RdfFormat::Turtle, RdfFormat::NTriples)
.expect("conversion should succeed");
assert!(result.contains("<http://example.org/subject>"));
assert!(result.contains("<http://example.org/predicate>"));
assert!(result.contains("\"object\""));
}
#[test]
fn test_ntriples_to_turtle() {
let converter = FormatConverter::new();
let ntriples = "<http://example.org/s> <http://example.org/p> \"o\" .";
let result = converter
.convert_string(ntriples, RdfFormat::NTriples, RdfFormat::Turtle)
.expect("conversion should succeed");
assert!(result.contains("<http://example.org/s>"));
}
#[test]
fn test_streaming_conversion() {
let converter = FormatConverter::new();
let turtle = b"<http://s> <http://p> <http://o> ." as &[u8];
let mut output = Vec::new();
converter
.convert_stream(turtle, &mut output, RdfFormat::Turtle, RdfFormat::NTriples)
.expect("conversion should succeed");
let result = String::from_utf8(output).expect("valid UTF-8");
assert!(result.contains("<http://s>"));
}
#[test]
fn test_config_builder() {
let config = ConversionConfig::new()
.with_lenient(true)
.with_preserve_prefixes(false)
.with_batch_size(5000);
assert!(config.lenient_parsing);
assert!(!config.preserve_prefixes);
assert_eq!(config.batch_size, 5000);
}
}