use std::path::Path;
use std::time::Instant;
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum RdfFormat {
NTriples,
NQuads,
Turtle,
TriG,
JsonLd,
RdfXml,
CsvTriples,
}
#[derive(Debug, Clone)]
pub struct ConvertArgs {
pub input_file: String,
pub output_file: String,
pub from_format: Option<RdfFormat>,
pub to_format: RdfFormat,
pub pretty: bool,
pub base_iri: Option<String>,
pub prefix_file: Option<String>,
}
#[derive(Debug, Clone)]
pub struct ConversionStats {
pub triples_read: usize,
pub triples_written: usize,
pub elapsed_ms: u64,
pub warnings: Vec<String>,
}
#[derive(Debug, Clone)]
pub enum ConvertError {
FileNotFound(String),
UnsupportedFormat(String),
IoError(String),
FormatMismatch { from: String, to: String },
}
impl std::fmt::Display for ConvertError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
ConvertError::FileNotFound(p) => write!(f, "file not found: {}", p),
ConvertError::UnsupportedFormat(s) => write!(f, "unsupported format: {}", s),
ConvertError::IoError(e) => write!(f, "I/O error: {}", e),
ConvertError::FormatMismatch { from, to } => {
write!(f, "format mismatch: cannot convert from {} to {}", from, to)
}
}
}
}
pub struct ConvertCommand;
impl ConvertCommand {
pub fn new() -> Self {
ConvertCommand
}
pub fn execute(&self, args: &ConvertArgs) -> Result<ConversionStats, ConvertError> {
let start = Instant::now();
if !Path::new(&args.input_file).exists() {
return Err(ConvertError::FileNotFound(args.input_file.clone()));
}
let from_format = match &args.from_format {
Some(fmt) => fmt.clone(),
None => Self::detect_format(&args.input_file)
.ok_or_else(|| ConvertError::UnsupportedFormat(args.input_file.clone()))?,
};
let _output_fmt_hint = Self::detect_format(&args.output_file);
if Self::is_quad_format(&from_format) && !Self::is_quad_format(&args.to_format) {
let mut warnings = Vec::new();
warnings.push(format!(
"Source format '{}' contains named graphs which will be discarded when converting to '{}'.",
Self::format_name(&from_format),
Self::format_name(&args.to_format)
));
let stats = Self::simulate_conversion(&from_format, &args.to_format, 100);
return Ok(ConversionStats {
warnings,
elapsed_ms: start.elapsed().as_millis() as u64,
..stats
});
}
let mut stats = Self::simulate_conversion(&from_format, &args.to_format, 100);
stats.elapsed_ms = start.elapsed().as_millis() as u64;
if args.pretty {
match &args.to_format {
RdfFormat::NTriples | RdfFormat::NQuads | RdfFormat::CsvTriples => {
stats.warnings.push(format!(
"Pretty-printing is not supported for '{}'; output will be compact.",
Self::format_name(&args.to_format)
));
}
_ => {}
}
}
Ok(stats)
}
pub fn detect_format(filename: &str) -> Option<RdfFormat> {
let path = Path::new(filename);
let ext = path.extension()?.to_str()?.to_lowercase();
match ext.as_str() {
"nt" => Some(RdfFormat::NTriples),
"nq" => Some(RdfFormat::NQuads),
"ttl" => Some(RdfFormat::Turtle),
"trig" => Some(RdfFormat::TriG),
"jsonld" | "json-ld" => Some(RdfFormat::JsonLd),
"rdf" | "owl" | "xml" => Some(RdfFormat::RdfXml),
"csv" => Some(RdfFormat::CsvTriples),
_ => None,
}
}
pub fn format_name(fmt: &RdfFormat) -> &'static str {
match fmt {
RdfFormat::NTriples => "N-Triples",
RdfFormat::NQuads => "N-Quads",
RdfFormat::Turtle => "Turtle",
RdfFormat::TriG => "TriG",
RdfFormat::JsonLd => "JSON-LD",
RdfFormat::RdfXml => "RDF/XML",
RdfFormat::CsvTriples => "CSV-Triples",
}
}
pub fn format_extension(fmt: &RdfFormat) -> &'static str {
match fmt {
RdfFormat::NTriples => "nt",
RdfFormat::NQuads => "nq",
RdfFormat::Turtle => "ttl",
RdfFormat::TriG => "trig",
RdfFormat::JsonLd => "jsonld",
RdfFormat::RdfXml => "rdf",
RdfFormat::CsvTriples => "csv",
}
}
pub fn is_quad_format(fmt: &RdfFormat) -> bool {
matches!(fmt, RdfFormat::NQuads | RdfFormat::TriG | RdfFormat::JsonLd)
}
pub fn simulate_conversion(
from: &RdfFormat,
to: &RdfFormat,
triple_count: usize,
) -> ConversionStats {
let read_overhead: f64 = match from {
RdfFormat::JsonLd | RdfFormat::RdfXml => 1.1,
_ => 1.0,
};
let write_overhead: f64 = match to {
RdfFormat::JsonLd => 1.2,
RdfFormat::RdfXml => 1.1,
_ => 1.0,
};
let triples_read = (triple_count as f64 * read_overhead) as usize;
let triples_written = (triple_count as f64 * write_overhead) as usize;
let elapsed_ms = match (from, to) {
(RdfFormat::JsonLd, _) | (_, RdfFormat::JsonLd) => 5,
(RdfFormat::RdfXml, _) | (_, RdfFormat::RdfXml) => 4,
_ => 2,
};
ConversionStats {
triples_read,
triples_written,
elapsed_ms,
warnings: Vec::new(),
}
}
}
impl Default for ConvertCommand {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Write;
fn temp_file_with_content(ext: &str, content: &str) -> std::path::PathBuf {
let mut dir = std::env::temp_dir();
dir.push(format!("oxirs_convert_test_{}.{}", uuid_like(), ext));
let mut f = std::fs::File::create(&dir).expect("create temp file");
f.write_all(content.as_bytes()).expect("write temp file");
dir
}
fn uuid_like() -> String {
use std::time::{SystemTime, UNIX_EPOCH};
let t = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
.subsec_nanos();
format!("{:x}", t)
}
#[test]
fn test_detect_nt() {
assert_eq!(
ConvertCommand::detect_format("data.nt"),
Some(RdfFormat::NTriples)
);
}
#[test]
fn test_detect_nq() {
assert_eq!(
ConvertCommand::detect_format("data.nq"),
Some(RdfFormat::NQuads)
);
}
#[test]
fn test_detect_ttl() {
assert_eq!(
ConvertCommand::detect_format("data.ttl"),
Some(RdfFormat::Turtle)
);
}
#[test]
fn test_detect_trig() {
assert_eq!(
ConvertCommand::detect_format("data.trig"),
Some(RdfFormat::TriG)
);
}
#[test]
fn test_detect_jsonld() {
assert_eq!(
ConvertCommand::detect_format("data.jsonld"),
Some(RdfFormat::JsonLd)
);
}
#[test]
fn test_detect_rdf() {
assert_eq!(
ConvertCommand::detect_format("data.rdf"),
Some(RdfFormat::RdfXml)
);
}
#[test]
fn test_detect_owl() {
assert_eq!(
ConvertCommand::detect_format("data.owl"),
Some(RdfFormat::RdfXml)
);
}
#[test]
fn test_detect_csv() {
assert_eq!(
ConvertCommand::detect_format("data.csv"),
Some(RdfFormat::CsvTriples)
);
}
#[test]
fn test_detect_unknown_extension_returns_none() {
assert_eq!(ConvertCommand::detect_format("data.xyz"), None);
}
#[test]
fn test_detect_no_extension_returns_none() {
assert_eq!(ConvertCommand::detect_format("datafile"), None);
}
#[test]
fn test_detect_case_insensitive_extension() {
assert_eq!(
ConvertCommand::detect_format("DATA.TTL"),
Some(RdfFormat::Turtle)
);
}
#[test]
fn test_detect_path_with_dir() {
assert_eq!(
ConvertCommand::detect_format("/tmp/some/path/data.ttl"),
Some(RdfFormat::Turtle)
);
}
#[test]
fn test_format_name_ntriples() {
assert_eq!(
ConvertCommand::format_name(&RdfFormat::NTriples),
"N-Triples"
);
}
#[test]
fn test_format_name_nquads() {
assert_eq!(ConvertCommand::format_name(&RdfFormat::NQuads), "N-Quads");
}
#[test]
fn test_format_name_turtle() {
assert_eq!(ConvertCommand::format_name(&RdfFormat::Turtle), "Turtle");
}
#[test]
fn test_format_name_trig() {
assert_eq!(ConvertCommand::format_name(&RdfFormat::TriG), "TriG");
}
#[test]
fn test_format_name_jsonld() {
assert_eq!(ConvertCommand::format_name(&RdfFormat::JsonLd), "JSON-LD");
}
#[test]
fn test_format_name_rdfxml() {
assert_eq!(ConvertCommand::format_name(&RdfFormat::RdfXml), "RDF/XML");
}
#[test]
fn test_format_name_csv() {
assert_eq!(
ConvertCommand::format_name(&RdfFormat::CsvTriples),
"CSV-Triples"
);
}
#[test]
fn test_extension_ntriples() {
assert_eq!(ConvertCommand::format_extension(&RdfFormat::NTriples), "nt");
}
#[test]
fn test_extension_nquads() {
assert_eq!(ConvertCommand::format_extension(&RdfFormat::NQuads), "nq");
}
#[test]
fn test_extension_turtle() {
assert_eq!(ConvertCommand::format_extension(&RdfFormat::Turtle), "ttl");
}
#[test]
fn test_extension_trig() {
assert_eq!(ConvertCommand::format_extension(&RdfFormat::TriG), "trig");
}
#[test]
fn test_extension_jsonld() {
assert_eq!(
ConvertCommand::format_extension(&RdfFormat::JsonLd),
"jsonld"
);
}
#[test]
fn test_extension_rdfxml() {
assert_eq!(ConvertCommand::format_extension(&RdfFormat::RdfXml), "rdf");
}
#[test]
fn test_extension_csv() {
assert_eq!(
ConvertCommand::format_extension(&RdfFormat::CsvTriples),
"csv"
);
}
#[test]
fn test_is_quad_nquads() {
assert!(ConvertCommand::is_quad_format(&RdfFormat::NQuads));
}
#[test]
fn test_is_quad_trig() {
assert!(ConvertCommand::is_quad_format(&RdfFormat::TriG));
}
#[test]
fn test_is_quad_jsonld() {
assert!(ConvertCommand::is_quad_format(&RdfFormat::JsonLd));
}
#[test]
fn test_not_quad_ntriples() {
assert!(!ConvertCommand::is_quad_format(&RdfFormat::NTriples));
}
#[test]
fn test_not_quad_turtle() {
assert!(!ConvertCommand::is_quad_format(&RdfFormat::Turtle));
}
#[test]
fn test_not_quad_rdfxml() {
assert!(!ConvertCommand::is_quad_format(&RdfFormat::RdfXml));
}
#[test]
fn test_not_quad_csv() {
assert!(!ConvertCommand::is_quad_format(&RdfFormat::CsvTriples));
}
#[test]
fn test_simulate_conversion_basic() {
let stats =
ConvertCommand::simulate_conversion(&RdfFormat::NTriples, &RdfFormat::Turtle, 50);
assert_eq!(stats.triples_read, 50);
assert_eq!(stats.triples_written, 50);
assert!(stats.warnings.is_empty());
}
#[test]
fn test_simulate_conversion_jsonld_write_overhead() {
let stats =
ConvertCommand::simulate_conversion(&RdfFormat::NTriples, &RdfFormat::JsonLd, 100);
assert!(
stats.triples_written >= 100,
"JSON-LD write overhead should produce >= 100"
);
}
#[test]
fn test_simulate_conversion_elapsed_ms_is_positive() {
let stats =
ConvertCommand::simulate_conversion(&RdfFormat::Turtle, &RdfFormat::NTriples, 10);
assert!(stats.elapsed_ms > 0);
}
#[test]
fn test_simulate_conversion_zero_triples() {
let stats =
ConvertCommand::simulate_conversion(&RdfFormat::Turtle, &RdfFormat::NTriples, 0);
assert_eq!(stats.triples_read, 0);
assert_eq!(stats.triples_written, 0);
}
#[test]
fn test_simulate_conversion_stats_fields_accessible() {
let stats = ConvertCommand::simulate_conversion(&RdfFormat::NQuads, &RdfFormat::TriG, 42);
let _r = stats.triples_read;
let _w = stats.triples_written;
let _e = stats.elapsed_ms;
let _w2 = &stats.warnings;
}
#[test]
fn test_execute_file_not_found() {
let cmd = ConvertCommand::new();
let args = ConvertArgs {
input_file: "/nonexistent/file.ttl".to_string(),
output_file: "/tmp/output.nt".to_string(),
from_format: None,
to_format: RdfFormat::NTriples,
pretty: false,
base_iri: None,
prefix_file: None,
};
let result = cmd.execute(&args);
assert!(matches!(result, Err(ConvertError::FileNotFound(_))));
}
#[test]
fn test_execute_valid_conversion() {
let input = temp_file_with_content("ttl", "<http://a> <http://b> <http://c> .");
let output_path = {
let mut p = std::env::temp_dir();
p.push(format!("oxirs_output_{}.nt", uuid_like()));
p
};
let cmd = ConvertCommand::new();
let args = ConvertArgs {
input_file: input.to_str().unwrap_or_default().to_string(),
output_file: output_path.to_str().unwrap_or_default().to_string(),
from_format: Some(RdfFormat::Turtle),
to_format: RdfFormat::NTriples,
pretty: false,
base_iri: None,
prefix_file: None,
};
let result = cmd.execute(&args);
assert!(result.is_ok());
let stats = result.unwrap();
assert!(stats.triples_read > 0 || stats.triples_written == 0);
let _ = std::fs::remove_file(&input);
}
#[test]
fn test_execute_pretty_flag_stored() {
let input = temp_file_with_content("ttl", "");
let cmd = ConvertCommand::new();
let args = ConvertArgs {
input_file: input.to_str().unwrap_or_default().to_string(),
output_file: "/tmp/out.nt".to_string(),
from_format: Some(RdfFormat::Turtle),
to_format: RdfFormat::NTriples,
pretty: true, base_iri: None,
prefix_file: None,
};
let result = cmd.execute(&args);
assert!(result.is_ok());
let stats = result.unwrap();
assert!(
!stats.warnings.is_empty(),
"Should warn about pretty-print not supported"
);
let _ = std::fs::remove_file(&input);
}
#[test]
fn test_execute_base_iri_field() {
let input = temp_file_with_content("nt", "");
let cmd = ConvertCommand::new();
let args = ConvertArgs {
input_file: input.to_str().unwrap_or_default().to_string(),
output_file: "/tmp/out.ttl".to_string(),
from_format: Some(RdfFormat::NTriples),
to_format: RdfFormat::Turtle,
pretty: false,
base_iri: Some("http://example.org/".to_string()),
prefix_file: None,
};
let result = cmd.execute(&args);
assert!(result.is_ok());
let _ = std::fs::remove_file(&input);
}
#[test]
fn test_execute_auto_detect_format_from_extension() {
let input = temp_file_with_content("ttl", "");
let cmd = ConvertCommand::new();
let args = ConvertArgs {
input_file: input.to_str().unwrap_or_default().to_string(),
output_file: "/tmp/out.nt".to_string(),
from_format: None, to_format: RdfFormat::NTriples,
pretty: false,
base_iri: None,
prefix_file: None,
};
let result = cmd.execute(&args);
assert!(
result.is_ok(),
"Auto-detect from .ttl extension should work"
);
let _ = std::fs::remove_file(&input);
}
#[test]
fn test_execute_unsupported_format_when_no_extension() {
let input = temp_file_with_content("xyz", "");
let cmd = ConvertCommand::new();
let args = ConvertArgs {
input_file: input.to_str().unwrap_or_default().to_string(),
output_file: "/tmp/out.nt".to_string(),
from_format: None, to_format: RdfFormat::NTriples,
pretty: false,
base_iri: None,
prefix_file: None,
};
let result = cmd.execute(&args);
assert!(matches!(result, Err(ConvertError::UnsupportedFormat(_))));
let _ = std::fs::remove_file(&input);
}
#[test]
fn test_convert_error_display_file_not_found() {
let e = ConvertError::FileNotFound("/tmp/x".to_string());
let s = e.to_string();
assert!(s.contains("file not found"));
}
#[test]
fn test_convert_error_display_unsupported_format() {
let e = ConvertError::UnsupportedFormat("bin".to_string());
let s = e.to_string();
assert!(s.contains("unsupported format"));
}
#[test]
fn test_convert_error_display_io_error() {
let e = ConvertError::IoError("permission denied".to_string());
let s = e.to_string();
assert!(s.contains("I/O error"));
}
#[test]
fn test_convert_error_display_format_mismatch() {
let e = ConvertError::FormatMismatch {
from: "TriG".to_string(),
to: "N-Triples".to_string(),
};
let s = e.to_string();
assert!(s.contains("format mismatch"));
}
#[test]
fn test_default_convert_command() {
let _cmd = ConvertCommand;
}
}