use std::fs::File;
use std::io::Read;
use std::path::Path;
use super::ToolResult;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum RdfFormat {
Turtle,
NTriples,
RdfXml,
JsonLd,
TriG,
NQuads,
RdfJson,
RdFa,
Notation3,
}
impl RdfFormat {
pub fn extensions(&self) -> &'static [&'static str] {
match self {
RdfFormat::Turtle => &["ttl", "turtle"],
RdfFormat::NTriples => &["nt", "ntriples"],
RdfFormat::RdfXml => &["rdf", "xml", "owl"],
RdfFormat::JsonLd => &["jsonld", "json-ld"],
RdfFormat::TriG => &["trig"],
RdfFormat::NQuads => &["nq", "nquads"],
RdfFormat::RdfJson => &["rj", "rdf-json"],
RdfFormat::RdFa => &["html", "xhtml"],
RdfFormat::Notation3 => &["n3"],
}
}
pub fn mime_types(&self) -> &'static [&'static str] {
match self {
RdfFormat::Turtle => &["text/turtle", "application/x-turtle"],
RdfFormat::NTriples => &["application/n-triples", "text/plain"],
RdfFormat::RdfXml => &["application/rdf+xml", "application/xml", "text/xml"],
RdfFormat::JsonLd => &["application/ld+json", "application/json"],
RdfFormat::TriG => &["application/trig", "application/x-trig"],
RdfFormat::NQuads => &["application/n-quads", "text/x-nquads"],
RdfFormat::RdfJson => &["application/rdf+json"],
RdfFormat::RdFa => &["text/html", "application/xhtml+xml"],
RdfFormat::Notation3 => &["text/n3", "text/rdf+n3"],
}
}
pub fn name(&self) -> &'static str {
match self {
RdfFormat::Turtle => "Turtle",
RdfFormat::NTriples => "N-Triples",
RdfFormat::RdfXml => "RDF/XML",
RdfFormat::JsonLd => "JSON-LD",
RdfFormat::TriG => "TriG",
RdfFormat::NQuads => "N-Quads",
RdfFormat::RdfJson => "RDF/JSON",
RdfFormat::RdFa => "RDFa",
RdfFormat::Notation3 => "Notation3",
}
}
pub fn supports_graphs(&self) -> bool {
matches!(
self,
RdfFormat::TriG | RdfFormat::NQuads | RdfFormat::JsonLd
)
}
pub fn is_line_based(&self) -> bool {
matches!(self, RdfFormat::NTriples | RdfFormat::NQuads)
}
pub fn default_extension(&self) -> &'static str {
self.extensions()[0]
}
}
impl std::fmt::Display for RdfFormat {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.name())
}
}
#[derive(Debug, Clone)]
pub struct FormatDetection {
pub format: RdfFormat,
pub confidence: f32, pub reasoning: String,
}
pub struct FormatDetector;
impl FormatDetector {
pub fn detect(
path: Option<&Path>,
content: Option<&str>,
mime_type: Option<&str>,
) -> Vec<FormatDetection> {
let mut detections = Vec::new();
if let Some(path) = path {
if let Some(detection) = Self::detect_by_extension(path) {
detections.push(detection);
}
}
if let Some(mime) = mime_type {
if let Some(detection) = Self::detect_by_mime_type(mime) {
detections.push(detection);
}
}
if let Some(content) = content {
detections.extend(Self::detect_by_content(content));
}
detections.sort_by(|a, b| b.confidence.partial_cmp(&a.confidence).unwrap_or(std::cmp::Ordering::Equal));
let mut seen = std::collections::HashSet::new();
detections.retain(|d| seen.insert(d.format));
detections
}
fn detect_by_extension(path: &Path) -> Option<FormatDetection> {
let ext = path.extension()?.to_str()?.to_lowercase();
for format in &[
RdfFormat::Turtle,
RdfFormat::NTriples,
RdfFormat::RdfXml,
RdfFormat::JsonLd,
RdfFormat::TriG,
RdfFormat::NQuads,
RdfFormat::RdfJson,
RdfFormat::RdFa,
RdfFormat::Notation3,
] {
if format.extensions().contains(&ext.as_str()) {
return Some(FormatDetection {
format: *format,
confidence: 0.8,
reasoning: format!("File extension '.{}' matches {}", ext, format.name()),
});
}
}
None
}
fn detect_by_mime_type(mime: &str) -> Option<FormatDetection> {
let mime_lower = mime.to_lowercase();
for format in &[
RdfFormat::Turtle,
RdfFormat::NTriples,
RdfFormat::RdfXml,
RdfFormat::JsonLd,
RdfFormat::TriG,
RdfFormat::NQuads,
RdfFormat::RdfJson,
RdfFormat::RdFa,
RdfFormat::Notation3,
] {
if format.mime_types().iter().any(|&m| m == mime_lower) {
return Some(FormatDetection {
format: *format,
confidence: 0.9,
reasoning: format!("MIME type '{}' indicates {}", mime, format.name()),
});
}
}
None
}
fn detect_by_content(content: &str) -> Vec<FormatDetection> {
let mut detections = Vec::new();
let trimmed = content.trim();
let first_lines: Vec<&str> = trimmed.lines().take(20).collect();
if first_lines
.iter()
.any(|line| line.starts_with("@prefix") || line.starts_with("@base"))
{
detections.push(FormatDetection {
format: RdfFormat::Turtle,
confidence: 0.95,
reasoning: "Contains @prefix or @base directives".to_string(),
});
}
if first_lines.iter().all(|line| {
line.is_empty()
|| line.starts_with('#')
|| (line.contains(" .") && (line.starts_with('<') || line.starts_with('_')))
}) {
let confidence = if trimmed.contains(" .") { 0.85 } else { 0.6 };
detections.push(FormatDetection {
format: RdfFormat::NTriples,
confidence,
reasoning: "Line-based format with N-Triples patterns".to_string(),
});
}
if trimmed.starts_with("<?xml") || trimmed.starts_with("<rdf:RDF") {
detections.push(FormatDetection {
format: RdfFormat::RdfXml,
confidence: 0.95,
reasoning: "XML declaration or rdf:RDF root element".to_string(),
});
}
if trimmed.starts_with('{')
&& (trimmed.contains("\"@context\"") || trimmed.contains("'@context'"))
{
detections.push(FormatDetection {
format: RdfFormat::JsonLd,
confidence: 0.95,
reasoning: "JSON object with @context".to_string(),
});
}
if first_lines
.iter()
.any(|line| line.trim().starts_with("GRAPH") || line.contains(" {"))
{
detections.push(FormatDetection {
format: RdfFormat::TriG,
confidence: 0.8,
reasoning: "Contains GRAPH keyword or graph brackets".to_string(),
});
}
if first_lines.iter().any(|line| {
let parts: Vec<&str> = line.split_whitespace().collect();
parts.len() >= 4 && line.ends_with(" .")
}) {
detections.push(FormatDetection {
format: RdfFormat::NQuads,
confidence: 0.7,
reasoning: "Line format with four or more components".to_string(),
});
}
detections
}
pub async fn detect_file(path: &Path) -> ToolResult<Vec<FormatDetection>> {
let mut file = File::open(path)?;
let mut buffer = vec![0; 4096];
let bytes_read = file.read(&mut buffer)?;
buffer.truncate(bytes_read);
let content = String::from_utf8_lossy(&buffer);
Ok(Self::detect(Some(path), Some(&content), None))
}
}
pub trait FormatConverter {
fn convert(&self, input: &str, from: RdfFormat, to: RdfFormat) -> ToolResult<String>;
fn supports_conversion(&self, from: RdfFormat, to: RdfFormat) -> bool;
}
pub struct BasicFormatConverter;
impl FormatConverter for BasicFormatConverter {
fn convert(&self, _input: &str, from: RdfFormat, to: RdfFormat) -> ToolResult<String> {
Err(format!(
"Conversion from {} to {} not yet implemented",
from.name(),
to.name()
)
.into())
}
fn supports_conversion(&self, _from: RdfFormat, _to: RdfFormat) -> bool {
false
}
}
pub struct FormatValidator;
impl FormatValidator {
pub fn validate(content: &str, format: RdfFormat) -> ToolResult<ValidationResult> {
let mut result = ValidationResult {
valid: true,
errors: Vec::new(),
warnings: Vec::new(),
stats: ValidationStats::default(),
};
match format {
RdfFormat::NTriples => Self::validate_ntriples(content, &mut result),
RdfFormat::Turtle => Self::validate_turtle(content, &mut result),
RdfFormat::RdfXml => Self::validate_rdf_xml(content, &mut result),
RdfFormat::JsonLd => Self::validate_json_ld(content, &mut result),
_ => {
result.warnings.push(format!(
"Validation for {} format not yet implemented",
format.name()
));
}
}
Ok(result)
}
fn validate_ntriples(content: &str, result: &mut ValidationResult) {
for (line_num, line) in content.lines().enumerate() {
let line = line.trim();
if line.is_empty() || line.starts_with('#') {
continue;
}
result.stats.triple_count += 1;
if !line.ends_with(" .") {
result.errors.push(ValidationError {
line: Some(line_num + 1),
column: None,
message: "N-Triples line must end with ' .'".to_string(),
});
result.valid = false;
}
let parts: Vec<&str> = line.split_whitespace().collect();
if parts.len() < 3 {
result.errors.push(ValidationError {
line: Some(line_num + 1),
column: None,
message: "N-Triples line must have at least 3 components".to_string(),
});
result.valid = false;
}
}
}
fn validate_turtle(_content: &str, result: &mut ValidationResult) {
result
.warnings
.push("Turtle validation not fully implemented".to_string());
}
fn validate_rdf_xml(_content: &str, result: &mut ValidationResult) {
result
.warnings
.push("RDF/XML validation not fully implemented".to_string());
}
fn validate_json_ld(content: &str, result: &mut ValidationResult) {
match serde_json::from_str::<serde_json::Value>(content) {
Ok(value) => {
Self::validate_json_ld_semantics(&value, result);
}
Err(e) => {
result.errors.push(ValidationError {
line: None,
column: None,
message: format!("Invalid JSON: {}", e),
});
result.valid = false;
}
}
}
fn validate_json_ld_semantics(value: &serde_json::Value, result: &mut ValidationResult) {
match value {
serde_json::Value::Object(obj) => {
let has_context = obj.contains_key("@context");
let has_graph = obj.contains_key("@graph");
let has_id = obj.contains_key("@id");
let has_type = obj.contains_key("@type");
let has_value = obj.contains_key("@value");
if !has_context && !has_graph && !has_id && !has_type && !has_value {
let has_jsonld_keywords = obj.keys().any(|k| k.starts_with('@'));
if !has_jsonld_keywords {
result.warnings.push(
"Document appears to be plain JSON rather than JSON-LD (no @context, @graph, @id, @type, or other keywords found)".to_string()
);
}
}
if let Some(context) = obj.get("@context") {
Self::validate_context(context, result);
}
if let Some(type_value) = obj.get("@type") {
Self::validate_type_value(type_value, result);
}
if let Some(id_value) = obj.get("@id") {
Self::validate_id_value(id_value, result);
}
if let Some(value_obj) = obj.get("@value") {
Self::validate_value_object(obj, result);
}
if has_value && (has_id || has_type) {
if !obj.contains_key("@type") || obj.len() > 3 {
result.warnings.push(
"@value objects should only contain @value, @type (for datatype), and @language".to_string()
);
}
}
for (key, nested_value) in obj {
if !key.starts_with('@') {
Self::validate_json_ld_semantics(nested_value, result);
}
}
}
serde_json::Value::Array(arr) => {
for item in arr {
Self::validate_json_ld_semantics(item, result);
}
}
_ => {
}
}
}
fn validate_context(context: &serde_json::Value, result: &mut ValidationResult) {
match context {
serde_json::Value::String(s) => {
if !s.starts_with("http://") && !s.starts_with("https://") && !s.starts_with("file://") {
if !s.contains(':') {
result.warnings.push(
format!("@context IRI '{}' may not be valid (no protocol specified)", s)
);
}
}
}
serde_json::Value::Object(obj) => {
for (key, value) in obj {
if key.starts_with('@') && key != "@base" && key != "@vocab" && key != "@language" && key != "@version" {
result.warnings.push(
format!("Unknown keyword '{}' in @context", key)
);
}
if !key.starts_with('@') {
match value {
serde_json::Value::String(_) => {
}
serde_json::Value::Object(term_def) => {
Self::validate_term_definition(key, term_def, result);
}
_ => {
result.errors.push(ValidationError {
line: None,
column: None,
message: format!("Invalid term definition for '{}': must be string or object", key),
});
result.valid = false;
}
}
}
}
}
serde_json::Value::Array(arr) => {
for item in arr {
Self::validate_context(item, result);
}
}
_ => {
result.errors.push(ValidationError {
line: None,
column: None,
message: "@context must be a string, object, or array".to_string(),
});
result.valid = false;
}
}
}
fn validate_term_definition(term: &str, def: &serde_json::Map<String, serde_json::Value>, result: &mut ValidationResult) {
let valid_keys = ["@id", "@type", "@container", "@context", "@language", "@reverse", "@nest"];
for key in def.keys() {
if !valid_keys.contains(&key.as_str()) {
result.warnings.push(
format!("Unknown key '{}' in term definition for '{}'", key, term)
);
}
}
if def.contains_key("@reverse") && !def.contains_key("@id") {
result.warnings.push(
format!("Term '{}' has @reverse but no @id", term)
);
}
if let Some(container) = def.get("@container") {
match container {
serde_json::Value::String(s) => {
let valid_containers = ["@list", "@set", "@index", "@language", "@id", "@type", "@graph"];
if !valid_containers.contains(&s.as_str()) {
result.warnings.push(
format!("Unknown @container value '{}' for term '{}'", s, term)
);
}
}
serde_json::Value::Array(arr) => {
for item in arr {
if let serde_json::Value::String(s) = item {
let valid_containers = ["@list", "@set", "@index", "@language", "@id", "@type", "@graph"];
if !valid_containers.contains(&s.as_str()) {
result.warnings.push(
format!("Unknown @container value '{}' for term '{}'", s, term)
);
}
}
}
}
_ => {
result.errors.push(ValidationError {
line: None,
column: None,
message: format!("@container for term '{}' must be string or array", term),
});
result.valid = false;
}
}
}
}
fn validate_type_value(type_value: &serde_json::Value, result: &mut ValidationResult) {
match type_value {
serde_json::Value::String(_) => {
}
serde_json::Value::Array(arr) => {
if arr.is_empty() {
result.warnings.push("@type array should not be empty".to_string());
}
for item in arr {
if !item.is_string() {
result.errors.push(ValidationError {
line: None,
column: None,
message: "All @type values must be strings".to_string(),
});
result.valid = false;
}
}
}
_ => {
result.errors.push(ValidationError {
line: None,
column: None,
message: "@type must be a string or array of strings".to_string(),
});
result.valid = false;
}
}
}
fn validate_id_value(id_value: &serde_json::Value, result: &mut ValidationResult) {
match id_value {
serde_json::Value::String(s) => {
if s.is_empty() {
result.warnings.push("@id should not be empty".to_string());
}
}
_ => {
result.errors.push(ValidationError {
line: None,
column: None,
message: "@id must be a string".to_string(),
});
result.valid = false;
}
}
}
fn validate_value_object(obj: &serde_json::Map<String, serde_json::Value>, result: &mut ValidationResult) {
let has_value = obj.contains_key("@value");
let has_language = obj.contains_key("@language");
let has_type = obj.contains_key("@type");
let has_index = obj.contains_key("@index");
if !has_value {
return; }
if has_language && has_type {
result.errors.push(ValidationError {
line: None,
column: None,
message: "@language and @type cannot both be present in a value object".to_string(),
});
result.valid = false;
}
for key in obj.keys() {
if !["@value", "@type", "@language", "@index"].contains(&key.as_str()) {
result.warnings.push(
format!("Unexpected key '{}' in @value object", key)
);
}
}
if let Some(lang) = obj.get("@language") {
if !lang.is_string() {
result.errors.push(ValidationError {
line: None,
column: None,
message: "@language must be a string".to_string(),
});
result.valid = false;
} else if let Some(lang_str) = lang.as_str() {
if lang_str.is_empty() {
result.warnings.push("@language should not be empty".to_string());
}
}
}
}
}
#[derive(Debug)]
pub struct ValidationResult {
pub valid: bool,
pub errors: Vec<ValidationError>,
pub warnings: Vec<String>,
pub stats: ValidationStats,
}
#[derive(Debug)]
pub struct ValidationError {
pub line: Option<usize>,
pub column: Option<usize>,
pub message: String,
}
#[derive(Debug, Default)]
pub struct ValidationStats {
pub triple_count: usize,
pub prefix_count: usize,
pub blank_node_count: usize,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_format_extensions() {
assert!(RdfFormat::Turtle.extensions().contains(&"ttl"));
assert!(RdfFormat::NTriples.extensions().contains(&"nt"));
assert!(RdfFormat::RdfXml.extensions().contains(&"rdf"));
}
#[test]
fn test_format_detection_by_extension() {
let detections = FormatDetector::detect(Some(Path::new("test.ttl")), None, None);
assert!(!detections.is_empty());
assert_eq!(detections[0].format, RdfFormat::Turtle);
}
#[test]
fn test_format_detection_by_content() {
let turtle_content = "@prefix dc: <http://purl.org/dc/elements/1.1/> .\n";
let detections = FormatDetector::detect(None, Some(turtle_content), None);
assert!(!detections.is_empty());
assert_eq!(detections[0].format, RdfFormat::Turtle);
let xml_content = "<?xml version=\"1.0\"?>\n<rdf:RDF>";
let detections = FormatDetector::detect(None, Some(xml_content), None);
assert!(!detections.is_empty());
assert_eq!(detections[0].format, RdfFormat::RdfXml);
}
#[test]
fn test_ntriples_validation() {
let valid_content =
"<http://example.org/s> <http://example.org/p> <http://example.org/o> .\n";
let result = FormatValidator::validate(valid_content, RdfFormat::NTriples).unwrap();
assert!(result.valid);
assert_eq!(result.stats.triple_count, 1);
let invalid_content =
"<http://example.org/s> <http://example.org/p> <http://example.org/o>\n";
let result = FormatValidator::validate(invalid_content, RdfFormat::NTriples).unwrap();
assert!(!result.valid);
assert!(!result.errors.is_empty());
}
}