1use crate::error::TurtleParseError;
55use crate::formats::nquads::{NQuadsParser, NQuadsSerializer};
56use crate::formats::ntriples::{NTriplesParser, NTriplesSerializer};
57use crate::formats::trig::TriGParser;
58use crate::formats::turtle::{TurtleParser, TurtleSerializer};
59use crate::toolkit::{Parser, RdfFormat, SerializationConfig, Serializer};
60use oxirs_core::model::{Quad, Triple};
61use std::io::{BufRead, BufReader, Write};
62
63pub type ConversionResult<T> = Result<T, ConversionError>;
65
66#[derive(Debug, thiserror::Error)]
68pub enum ConversionError {
69 #[error("Parse error: {0}")]
71 ParseError(#[from] TurtleParseError),
72
73 #[error("I/O error: {0}")]
75 IoError(#[from] std::io::Error),
76
77 #[error("Unsupported conversion from {0:?} to {1:?}")]
79 UnsupportedConversion(RdfFormat, RdfFormat),
80
81 #[error("Serialization error: {0}")]
83 SerializationError(String),
84
85 #[error("Invalid input: {0}")]
87 InvalidInput(String),
88}
89
90#[derive(Debug, Clone)]
92pub struct ConversionConfig {
93 pub serialization: SerializationConfig,
95 pub preserve_prefixes: bool,
97 pub lenient_parsing: bool,
99 pub batch_size: usize,
101}
102
103impl Default for ConversionConfig {
104 fn default() -> Self {
105 Self {
106 serialization: SerializationConfig::default(),
107 preserve_prefixes: true,
108 lenient_parsing: false,
109 batch_size: 10_000,
110 }
111 }
112}
113
114impl ConversionConfig {
115 pub fn new() -> Self {
117 Self::default()
118 }
119
120 pub fn with_lenient(mut self, lenient: bool) -> Self {
122 self.lenient_parsing = lenient;
123 self
124 }
125
126 pub fn with_preserve_prefixes(mut self, preserve: bool) -> Self {
128 self.preserve_prefixes = preserve;
129 self
130 }
131
132 pub fn with_batch_size(mut self, size: usize) -> Self {
134 self.batch_size = size;
135 self
136 }
137
138 pub fn with_serialization(mut self, config: SerializationConfig) -> Self {
140 self.serialization = config;
141 self
142 }
143}
144
145#[derive(Debug)]
149pub struct FormatConverter {
150 config: ConversionConfig,
151}
152
153impl FormatConverter {
154 pub fn new() -> Self {
156 Self {
157 config: ConversionConfig::default(),
158 }
159 }
160
161 pub fn with_config(config: ConversionConfig) -> Self {
163 Self { config }
164 }
165
166 pub fn convert_string(
184 &self,
185 input: &str,
186 from: RdfFormat,
187 to: RdfFormat,
188 ) -> ConversionResult<String> {
189 let mut output = Vec::new();
190 let input_bytes = input.as_bytes().to_vec();
191 let cursor = std::io::Cursor::new(input_bytes);
192 self.convert_stream(cursor, &mut output, from, to)?;
193 String::from_utf8(output).map_err(|e| {
194 ConversionError::SerializationError(format!("Invalid UTF-8 output: {}", e))
195 })
196 }
197
198 pub fn convert_stream<R: BufRead + 'static, W: Write>(
202 &self,
203 input: R,
204 output: &mut W,
205 from: RdfFormat,
206 to: RdfFormat,
207 ) -> ConversionResult<()> {
208 let from_has_quads = matches!(from, RdfFormat::NQuads | RdfFormat::TriG);
210 let to_has_quads = matches!(to, RdfFormat::NQuads | RdfFormat::TriG);
211
212 if from_has_quads && !to_has_quads {
213 let quads = self.parse_quads(input, from)?;
215 let triples: Vec<Triple> = quads
216 .into_iter()
217 .filter_map(|q| match q.graph_name() {
218 oxirs_core::model::GraphName::DefaultGraph => Some(Triple::new(
219 q.subject().clone(),
220 q.predicate().clone(),
221 q.object().clone(),
222 )),
223 _ => None,
224 })
225 .collect();
226 self.serialize_triples(&triples, output, to)?;
227 } else if !from_has_quads && to_has_quads {
228 let triples = self.parse_triples(input, from)?;
230 let quads: Vec<Quad> = triples
231 .into_iter()
232 .map(|t| {
233 Quad::new(
234 t.subject().clone(),
235 t.predicate().clone(),
236 t.object().clone(),
237 oxirs_core::model::GraphName::DefaultGraph,
238 )
239 })
240 .collect();
241 self.serialize_quads(&quads, output, to)?;
242 } else if from_has_quads && to_has_quads {
243 let quads = self.parse_quads(input, from)?;
245 self.serialize_quads(&quads, output, to)?;
246 } else {
247 let triples = self.parse_triples(input, from)?;
249 self.serialize_triples(&triples, output, to)?;
250 }
251
252 Ok(())
253 }
254
255 fn parse_triples<R: BufRead + 'static>(
257 &self,
258 input: R,
259 format: RdfFormat,
260 ) -> ConversionResult<Vec<Triple>> {
261 match format {
262 RdfFormat::Turtle => {
263 let parser = if self.config.lenient_parsing {
264 TurtleParser::new_lenient()
265 } else {
266 TurtleParser::new()
267 };
268 parser
269 .for_reader(input)
270 .collect::<Result<Vec<_>, _>>()
271 .map_err(ConversionError::from)
272 }
273 RdfFormat::NTriples => {
274 let parser = NTriplesParser::new();
275 parser
276 .for_reader(input)
277 .collect::<Result<Vec<_>, _>>()
278 .map_err(ConversionError::from)
279 }
280 _ => Err(ConversionError::UnsupportedConversion(
281 format,
282 RdfFormat::Turtle,
283 )),
284 }
285 }
286
287 fn parse_quads<R: BufRead + 'static>(
289 &self,
290 input: R,
291 format: RdfFormat,
292 ) -> ConversionResult<Vec<Quad>> {
293 match format {
294 RdfFormat::NQuads => {
295 let parser = NQuadsParser::new();
296 parser
297 .for_reader(input)
298 .collect::<Result<Vec<_>, _>>()
299 .map_err(ConversionError::from)
300 }
301 RdfFormat::TriG => {
302 let parser = TriGParser::new();
303 parser
304 .for_reader(input)
305 .collect::<Result<Vec<_>, _>>()
306 .map_err(ConversionError::from)
307 }
308 _ => Err(ConversionError::UnsupportedConversion(
309 format,
310 RdfFormat::TriG,
311 )),
312 }
313 }
314
315 fn serialize_triples<W: Write>(
317 &self,
318 triples: &[Triple],
319 output: &mut W,
320 format: RdfFormat,
321 ) -> ConversionResult<()> {
322 match format {
323 RdfFormat::Turtle => {
324 let serializer = TurtleSerializer::with_config(self.config.serialization.clone());
325 serializer
326 .serialize(triples, output)
327 .map_err(|e| ConversionError::SerializationError(e.to_string()))?;
328 }
329 RdfFormat::NTriples => {
330 let serializer = NTriplesSerializer::new();
331 serializer
332 .serialize(triples, output)
333 .map_err(|e| ConversionError::SerializationError(e.to_string()))?;
334 }
335 _ => {
336 return Err(ConversionError::UnsupportedConversion(
337 RdfFormat::Turtle,
338 format,
339 ))
340 }
341 }
342 Ok(())
343 }
344
345 fn serialize_quads<W: Write>(
347 &self,
348 quads: &[Quad],
349 output: &mut W,
350 format: RdfFormat,
351 ) -> ConversionResult<()> {
352 match format {
353 RdfFormat::NQuads => {
354 let serializer = NQuadsSerializer::new();
355 serializer
356 .serialize(quads, output)
357 .map_err(|e| ConversionError::SerializationError(e.to_string()))?;
358 }
359 RdfFormat::TriG => {
360 let serializer = NQuadsSerializer::new();
363 serializer
364 .serialize(quads, output)
365 .map_err(|e| ConversionError::SerializationError(e.to_string()))?;
366 }
367 _ => {
368 return Err(ConversionError::UnsupportedConversion(
369 RdfFormat::TriG,
370 format,
371 ))
372 }
373 }
374 Ok(())
375 }
376
377 pub fn convert_file(
379 &self,
380 input_path: &str,
381 output_path: &str,
382 from: RdfFormat,
383 to: RdfFormat,
384 ) -> ConversionResult<ConversionStats> {
385 let input = std::fs::File::open(input_path)?;
386 let reader = BufReader::new(input);
387
388 let mut output = std::fs::File::create(output_path)?;
389
390 let start = std::time::Instant::now();
391 self.convert_stream(reader, &mut output, from, to)?;
392 let duration = start.elapsed();
393
394 Ok(ConversionStats {
395 duration,
396 items_processed: 0, })
398 }
399}
400
401impl Default for FormatConverter {
402 fn default() -> Self {
403 Self::new()
404 }
405}
406
407#[derive(Debug, Clone)]
409pub struct ConversionStats {
410 pub duration: std::time::Duration,
412 pub items_processed: usize,
414}
415
416impl ConversionStats {
417 pub fn throughput(&self) -> f64 {
419 if self.duration.as_secs_f64() > 0.0 {
420 self.items_processed as f64 / self.duration.as_secs_f64()
421 } else {
422 0.0
423 }
424 }
425}
426
427#[cfg(test)]
428mod tests {
429 use super::*;
430
431 #[test]
432 fn test_turtle_to_ntriples() {
433 let converter = FormatConverter::new();
434 let turtle = r#"
435@prefix ex: <http://example.org/> .
436ex:subject ex:predicate "object" .
437 "#;
438
439 let result = converter
440 .convert_string(turtle, RdfFormat::Turtle, RdfFormat::NTriples)
441 .unwrap();
442
443 assert!(result.contains("<http://example.org/subject>"));
444 assert!(result.contains("<http://example.org/predicate>"));
445 assert!(result.contains("\"object\""));
446 }
447
448 #[test]
449 fn test_ntriples_to_turtle() {
450 let converter = FormatConverter::new();
451 let ntriples = "<http://example.org/s> <http://example.org/p> \"o\" .";
452
453 let result = converter
454 .convert_string(ntriples, RdfFormat::NTriples, RdfFormat::Turtle)
455 .unwrap();
456
457 assert!(result.contains("<http://example.org/s>"));
458 }
459
460 #[test]
461 fn test_streaming_conversion() {
462 let converter = FormatConverter::new();
463 let turtle = b"<http://s> <http://p> <http://o> ." as &[u8];
464 let mut output = Vec::new();
465
466 converter
467 .convert_stream(turtle, &mut output, RdfFormat::Turtle, RdfFormat::NTriples)
468 .unwrap();
469
470 let result = String::from_utf8(output).unwrap();
471 assert!(result.contains("<http://s>"));
472 }
473
474 #[test]
475 fn test_config_builder() {
476 let config = ConversionConfig::new()
477 .with_lenient(true)
478 .with_preserve_prefixes(false)
479 .with_batch_size(5000);
480
481 assert!(config.lenient_parsing);
482 assert!(!config.preserve_prefixes);
483 assert_eq!(config.batch_size, 5000);
484 }
485}