Skip to main content

oxirs_core/format/
ntriples.rs

1//! N-Triples Format Parser and Serializer
2//!
3//! Extracted and adapted from OxiGraph oxttl with OxiRS enhancements.
4//! Based on W3C N-Triples specification: <https://www.w3.org/TR/n-triples/>
5
6use super::error::SerializeResult;
7use super::error::{ParseResult, RdfParseError, TextPosition};
8use super::serializer::QuadSerializer;
9use crate::model::{BlankNode, Literal, NamedNode, Triple, TripleRef};
10use std::io::{Read, Write};
11
12/// Represents a parsed N-Triples term
13#[derive(Debug, Clone)]
14enum NTriplesTerm {
15    Iri(String),
16    BlankNode(String),
17    Literal(String),
18    LanguageLiteral(String, String),
19    TypedLiteral(String, String),
20}
21
22/// N-Triples parser implementation
23#[derive(Debug, Clone)]
24pub struct NTriplesParser {
25    lenient: bool,
26}
27
28impl NTriplesParser {
29    /// Create a new N-Triples parser
30    pub fn new() -> Self {
31        Self { lenient: false }
32    }
33
34    /// Enable lenient parsing (skip some validations)
35    pub fn lenient(mut self) -> Self {
36        self.lenient = true;
37        self
38    }
39
40    /// Parse N-Triples from a reader
41    pub fn parse_reader<R: Read>(&self, mut reader: R) -> ParseResult<Vec<Triple>> {
42        // Read all data from the reader
43        let mut buffer = String::new();
44        reader.read_to_string(&mut buffer)?;
45
46        // Use the string parser
47        self.parse_str(&buffer)
48    }
49
50    /// Parse N-Triples from a byte slice
51    pub fn parse_slice(&self, slice: &[u8]) -> ParseResult<Vec<Triple>> {
52        let content = std::str::from_utf8(slice)
53            .map_err(|e| RdfParseError::syntax(format!("Invalid UTF-8: {e}")))?;
54        self.parse_str(content)
55    }
56
57    /// Parse N-Triples from a string
58    pub fn parse_str(&self, input: &str) -> ParseResult<Vec<Triple>> {
59        let mut triples = Vec::new();
60        let mut line_number = 1;
61
62        for line in input.lines() {
63            let trimmed = line.trim();
64
65            // Skip empty lines and comments
66            if trimmed.is_empty() || trimmed.starts_with('#') {
67                line_number += 1;
68                continue;
69            }
70
71            // Parse triple
72            match self.parse_triple_line(trimmed, line_number) {
73                Ok(Some(triple)) => triples.push(triple),
74                Ok(None) => {} // Valid but empty line
75                Err(e) if self.lenient => {
76                    // Skip invalid lines in lenient mode
77                    eprintln!("Warning: Skipping invalid line {line_number}: {e}");
78                }
79                Err(e) => return Err(e),
80            }
81
82            line_number += 1;
83        }
84
85        Ok(triples)
86    }
87
88    /// Parse a single triple line
89    fn parse_triple_line(&self, line: &str, line_number: usize) -> ParseResult<Option<Triple>> {
90        // N-Triples format: <subject> <predicate> <object> .
91        // - Subject: IRI or blank node
92        // - Predicate: IRI
93        // - Object: IRI, blank node, or literal
94
95        // Validate line ends with dot
96        if !line.ends_with('.') {
97            return Err(RdfParseError::syntax_at(
98                "N-Triples line must end with '.'",
99                TextPosition::new(line_number, line.len(), 0),
100            ));
101        }
102
103        // Remove the trailing dot and parse terms
104        let line_without_dot = line[..line.len() - 1].trim();
105
106        // Parse the three terms
107        let mut terms = Vec::new();
108        let mut pos = 0;
109
110        // Parse subject
111        let (subject_term, new_pos) = self.parse_term(line_without_dot, pos, line_number)?;
112        terms.push(subject_term);
113        pos = new_pos;
114
115        // Parse predicate
116        let (predicate_term, new_pos) = self.parse_term(line_without_dot, pos, line_number)?;
117        terms.push(predicate_term);
118        pos = new_pos;
119
120        // Parse object
121        let (object_term, _) = self.parse_term(line_without_dot, pos, line_number)?;
122        terms.push(object_term);
123
124        if terms.len() != 3 {
125            return Err(RdfParseError::syntax_at(
126                "N-Triples line must have exactly 3 terms",
127                TextPosition::new(line_number, 1, 0),
128            ));
129        }
130
131        // Build the triple
132        let subject = self.term_to_subject(&terms[0], line_number)?;
133        let predicate = self.term_to_predicate(&terms[1], line_number)?;
134        let object = self.term_to_object(&terms[2], line_number)?;
135
136        Ok(Some(Triple::new(subject, predicate, object)))
137    }
138
139    /// Check if lenient parsing is enabled
140    pub fn is_lenient(&self) -> bool {
141        self.lenient
142    }
143
144    /// Parse a single term (IRI, blank node, or literal)
145    fn parse_term(
146        &self,
147        input: &str,
148        start_pos: usize,
149        line_number: usize,
150    ) -> ParseResult<(NTriplesTerm, usize)> {
151        let trimmed = input[start_pos..].trim_start();
152        let actual_start = start_pos + (input.len() - start_pos - trimmed.len());
153
154        if trimmed.is_empty() {
155            return Err(RdfParseError::syntax_at(
156                "Expected term but found end of line",
157                TextPosition::new(line_number, actual_start, 0),
158            ));
159        }
160
161        if trimmed.starts_with('<') {
162            // Parse IRI
163            self.parse_iri(trimmed, actual_start, line_number)
164        } else if trimmed.starts_with("_:") {
165            // Parse blank node
166            self.parse_blank_node(trimmed, actual_start, line_number)
167        } else if trimmed.starts_with('"') {
168            // Parse literal
169            self.parse_literal(trimmed, actual_start, line_number)
170        } else {
171            Err(RdfParseError::syntax_at(
172                "Invalid term format. Expected <IRI>, _:blank, or \"literal\"",
173                TextPosition::new(line_number, actual_start, 0),
174            ))
175        }
176    }
177
178    /// Parse an IRI term <...>
179    fn parse_iri(
180        &self,
181        input: &str,
182        start_pos: usize,
183        line_number: usize,
184    ) -> ParseResult<(NTriplesTerm, usize)> {
185        if let Some(end_pos) = input.find('>') {
186            let iri = input[1..end_pos].to_string();
187            let new_pos = start_pos + end_pos + 1;
188            Ok((NTriplesTerm::Iri(iri), new_pos))
189        } else {
190            Err(RdfParseError::syntax_at(
191                "Unterminated IRI - missing '>'",
192                TextPosition::new(line_number, start_pos, 0),
193            ))
194        }
195    }
196
197    /// Parse a blank node term _:...
198    fn parse_blank_node(
199        &self,
200        input: &str,
201        start_pos: usize,
202        line_number: usize,
203    ) -> ParseResult<(NTriplesTerm, usize)> {
204        // Find the end of the blank node ID (whitespace or end)
205        let mut end_pos = 2; // Start after _:
206        for (i, c) in input[2..].char_indices() {
207            if c.is_whitespace() {
208                end_pos = 2 + i;
209                break;
210            }
211            end_pos = 2 + i + c.len_utf8();
212        }
213
214        let blank_id = input[2..end_pos].to_string();
215        if blank_id.is_empty() {
216            return Err(RdfParseError::syntax_at(
217                "Blank node ID cannot be empty",
218                TextPosition::new(line_number, start_pos, 0),
219            ));
220        }
221
222        let new_pos = start_pos + end_pos;
223        Ok((NTriplesTerm::BlankNode(blank_id), new_pos))
224    }
225
226    /// Parse a literal term "..." with optional language tag or datatype
227    fn parse_literal(
228        &self,
229        input: &str,
230        start_pos: usize,
231        line_number: usize,
232    ) -> ParseResult<(NTriplesTerm, usize)> {
233        // Find the closing quote
234        let mut end_quote = None;
235        let mut i = 1; // Start after opening quote
236        let chars: Vec<char> = input.chars().collect();
237
238        while i < chars.len() {
239            if chars[i] == '"' {
240                // Check if it's escaped
241                let mut backslash_count = 0;
242                let mut j = i;
243                while j > 0 && chars[j - 1] == '\\' {
244                    backslash_count += 1;
245                    j -= 1;
246                }
247                if backslash_count % 2 == 0 {
248                    // Even number of backslashes means the quote is not escaped
249                    end_quote = Some(i);
250                    break;
251                }
252            }
253            i += 1;
254        }
255
256        let end_quote = end_quote.ok_or_else(|| {
257            RdfParseError::syntax_at(
258                "Unterminated literal - missing closing quote",
259                TextPosition::new(line_number, start_pos, 0),
260            )
261        })?;
262
263        let literal_value = self.unescape_literal(&input[1..end_quote], line_number, start_pos)?;
264        let mut pos_after_quote = start_pos + end_quote + 1;
265
266        // Check for language tag or datatype
267        let remaining = &input[end_quote + 1..];
268
269        if let Some(stripped) = remaining.strip_prefix('@') {
270            // Language tag
271            let mut lang_end = 1;
272            for (i, c) in stripped.char_indices() {
273                if c.is_whitespace() {
274                    lang_end = 1 + i;
275                    break;
276                }
277                lang_end = 1 + i + c.len_utf8();
278            }
279
280            let language = remaining[1..lang_end].to_string();
281            pos_after_quote = start_pos + end_quote + 1 + lang_end;
282            Ok((
283                NTriplesTerm::LanguageLiteral(literal_value, language),
284                pos_after_quote,
285            ))
286        } else if let Some(stripped) = remaining.strip_prefix("^^") {
287            // Datatype
288            if remaining.len() < 3 || !stripped.starts_with('<') {
289                return Err(RdfParseError::syntax_at(
290                    "Invalid datatype format - expected ^^<datatype>",
291                    TextPosition::new(line_number, pos_after_quote, 0),
292                ));
293            }
294
295            if let Some(datatype_end) = remaining[3..].find('>') {
296                let datatype = remaining[3..3 + datatype_end].to_string();
297                pos_after_quote = start_pos + end_quote + 1 + 3 + datatype_end + 1;
298                Ok((
299                    NTriplesTerm::TypedLiteral(literal_value, datatype),
300                    pos_after_quote,
301                ))
302            } else {
303                Err(RdfParseError::syntax_at(
304                    "Unterminated datatype IRI - missing '>'",
305                    TextPosition::new(line_number, pos_after_quote, 0),
306                ))
307            }
308        } else {
309            // Simple literal
310            Ok((NTriplesTerm::Literal(literal_value), pos_after_quote))
311        }
312    }
313
314    /// Unescape special characters in literal values
315    fn unescape_literal(
316        &self,
317        value: &str,
318        line_number: usize,
319        start_pos: usize,
320    ) -> ParseResult<String> {
321        let mut result = String::new();
322        let mut chars = value.chars();
323
324        while let Some(c) = chars.next() {
325            if c == '\\' {
326                match chars.next() {
327                    Some('"') => result.push('"'),
328                    Some('\\') => result.push('\\'),
329                    Some('n') => result.push('\n'),
330                    Some('r') => result.push('\r'),
331                    Some('t') => result.push('\t'),
332                    Some('u') => {
333                        // Parse \uHHHH Unicode escape
334                        let hex_chars: String = chars.by_ref().take(4).collect();
335                        if hex_chars.len() != 4 {
336                            return Err(RdfParseError::syntax_at(
337                                "Invalid Unicode escape sequence \\uHHHH - expected 4 hex digits",
338                                TextPosition::new(line_number, start_pos, 0),
339                            ));
340                        }
341                        let code_point = u32::from_str_radix(&hex_chars, 16).map_err(|_| {
342                            RdfParseError::syntax_at(
343                                "Invalid hex digits in Unicode escape sequence",
344                                TextPosition::new(line_number, start_pos, 0),
345                            )
346                        })?;
347                        let unicode_char = char::from_u32(code_point).ok_or_else(|| {
348                            RdfParseError::syntax_at(
349                                "Invalid Unicode code point",
350                                TextPosition::new(line_number, start_pos, 0),
351                            )
352                        })?;
353                        result.push(unicode_char);
354                    }
355                    Some('U') => {
356                        // Parse \UHHHHHHHH Unicode escape
357                        let hex_chars: String = chars.by_ref().take(8).collect();
358                        if hex_chars.len() != 8 {
359                            return Err(RdfParseError::syntax_at(
360                                "Invalid Unicode escape sequence \\UHHHHHHHH - expected 8 hex digits",
361                                TextPosition::new(line_number, start_pos, 0),
362                            ));
363                        }
364                        let code_point = u32::from_str_radix(&hex_chars, 16).map_err(|_| {
365                            RdfParseError::syntax_at(
366                                "Invalid hex digits in Unicode escape sequence",
367                                TextPosition::new(line_number, start_pos, 0),
368                            )
369                        })?;
370                        let unicode_char = char::from_u32(code_point).ok_or_else(|| {
371                            RdfParseError::syntax_at(
372                                "Invalid Unicode code point",
373                                TextPosition::new(line_number, start_pos, 0),
374                            )
375                        })?;
376                        result.push(unicode_char);
377                    }
378                    Some(other) => {
379                        return Err(RdfParseError::syntax_at(
380                            format!("Invalid escape sequence \\{other}"),
381                            TextPosition::new(line_number, start_pos, 0),
382                        ));
383                    }
384                    None => {
385                        return Err(RdfParseError::syntax_at(
386                            "Incomplete escape sequence at end of literal",
387                            TextPosition::new(line_number, start_pos, 0),
388                        ));
389                    }
390                }
391            } else {
392                result.push(c);
393            }
394        }
395
396        Ok(result)
397    }
398
399    /// Convert parsed term to Subject
400    fn term_to_subject(
401        &self,
402        term: &NTriplesTerm,
403        line_number: usize,
404    ) -> ParseResult<crate::model::term::Subject> {
405        match term {
406            NTriplesTerm::Iri(iri) => {
407                let named_node = NamedNode::new(iri).map_err(|e| {
408                    RdfParseError::syntax_at(
409                        format!("Invalid subject IRI: {e}"),
410                        TextPosition::new(line_number, 0, 0),
411                    )
412                })?;
413                Ok(crate::model::term::Subject::NamedNode(named_node))
414            }
415            NTriplesTerm::BlankNode(id) => {
416                let blank_node = BlankNode::new(id).map_err(|e| {
417                    RdfParseError::syntax_at(
418                        format!("Invalid blank node: {e}"),
419                        TextPosition::new(line_number, 0, 0),
420                    )
421                })?;
422                Ok(crate::model::term::Subject::BlankNode(blank_node))
423            }
424            _ => Err(RdfParseError::syntax_at(
425                "Subject must be an IRI or blank node",
426                TextPosition::new(line_number, 0, 0),
427            )),
428        }
429    }
430
431    /// Convert parsed term to Predicate
432    fn term_to_predicate(
433        &self,
434        term: &NTriplesTerm,
435        line_number: usize,
436    ) -> ParseResult<crate::model::term::Predicate> {
437        match term {
438            NTriplesTerm::Iri(iri) => {
439                let named_node = NamedNode::new(iri).map_err(|e| {
440                    RdfParseError::syntax_at(
441                        format!("Invalid predicate IRI: {e}"),
442                        TextPosition::new(line_number, 0, 0),
443                    )
444                })?;
445                Ok(crate::model::term::Predicate::NamedNode(named_node))
446            }
447            _ => Err(RdfParseError::syntax_at(
448                "Predicate must be an IRI",
449                TextPosition::new(line_number, 0, 0),
450            )),
451        }
452    }
453
454    /// Convert parsed term to Object
455    fn term_to_object(
456        &self,
457        term: &NTriplesTerm,
458        line_number: usize,
459    ) -> ParseResult<crate::model::term::Object> {
460        match term {
461            NTriplesTerm::Iri(iri) => {
462                let named_node = NamedNode::new(iri).map_err(|e| {
463                    RdfParseError::syntax_at(
464                        format!("Invalid object IRI: {e}"),
465                        TextPosition::new(line_number, 0, 0),
466                    )
467                })?;
468                Ok(crate::model::term::Object::NamedNode(named_node))
469            }
470            NTriplesTerm::BlankNode(id) => {
471                let blank_node = BlankNode::new(id).map_err(|e| {
472                    RdfParseError::syntax_at(
473                        format!("Invalid blank node: {e}"),
474                        TextPosition::new(line_number, 0, 0),
475                    )
476                })?;
477                Ok(crate::model::term::Object::BlankNode(blank_node))
478            }
479            NTriplesTerm::Literal(value) => {
480                let literal = Literal::new(value);
481                Ok(crate::model::term::Object::Literal(literal))
482            }
483            NTriplesTerm::LanguageLiteral(value, lang) => {
484                let literal = Literal::new_language_tagged_literal(value, lang).map_err(|e| {
485                    RdfParseError::syntax_at(
486                        format!("Invalid language tag: {e}"),
487                        TextPosition::new(line_number, 0, 0),
488                    )
489                })?;
490                Ok(crate::model::term::Object::Literal(literal))
491            }
492            NTriplesTerm::TypedLiteral(value, datatype_iri) => {
493                let datatype = NamedNode::new(datatype_iri).map_err(|e| {
494                    RdfParseError::syntax_at(
495                        format!("Invalid datatype IRI: {e}"),
496                        TextPosition::new(line_number, 0, 0),
497                    )
498                })?;
499                let literal = Literal::new_typed_literal(value, datatype);
500                Ok(crate::model::term::Object::Literal(literal))
501            }
502        }
503    }
504}
505
506impl Default for NTriplesParser {
507    fn default() -> Self {
508        Self::new()
509    }
510}
511
512/// N-Triples serializer implementation
513#[derive(Debug, Clone)]
514pub struct NTriplesSerializer {
515    validate: bool,
516}
517
518impl NTriplesSerializer {
519    /// Create a new N-Triples serializer
520    pub fn new() -> Self {
521        Self { validate: true }
522    }
523
524    /// Disable output validation for performance
525    pub fn unvalidated(mut self) -> Self {
526        self.validate = false;
527        self
528    }
529
530    /// Create a writer-based serializer
531    pub fn for_writer<W: Write>(self, writer: W) -> WriterNTriplesSerializer<W> {
532        WriterNTriplesSerializer::new(writer, self)
533    }
534
535    /// Serialize triples to a string
536    pub fn serialize_to_string(&self, triples: &[Triple]) -> SerializeResult<String> {
537        let mut buffer = Vec::new();
538        {
539            let mut serializer = self.clone().for_writer(&mut buffer);
540            for triple in triples {
541                serializer.serialize_triple(triple.as_ref())?;
542            }
543            serializer.finish()?;
544        }
545        String::from_utf8(buffer)
546            .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))
547    }
548
549    /// Check if validation is enabled
550    pub fn is_validating(&self) -> bool {
551        self.validate
552    }
553}
554
555impl Default for NTriplesSerializer {
556    fn default() -> Self {
557        Self::new()
558    }
559}
560
561/// Writer-based N-Triples serializer
562#[allow(dead_code)]
563pub struct WriterNTriplesSerializer<W: Write> {
564    writer: W,
565    config: NTriplesSerializer,
566}
567
568impl<W: Write> WriterNTriplesSerializer<W> {
569    /// Create a new writer serializer
570    pub fn new(writer: W, config: NTriplesSerializer) -> Self {
571        Self { writer, config }
572    }
573
574    /// Serialize a triple
575    pub fn serialize_triple(&mut self, triple: TripleRef<'_>) -> SerializeResult<()> {
576        // Format: <subject> <predicate> <object> .
577
578        // Serialize subject
579        self.serialize_subject(triple.subject())?;
580        write!(self.writer, " ")?;
581
582        // Serialize predicate
583        self.serialize_predicate(triple.predicate())?;
584        write!(self.writer, " ")?;
585
586        // Serialize object
587        self.serialize_object(triple.object())?;
588        writeln!(self.writer, " .")?;
589
590        Ok(())
591    }
592
593    /// Serialize a subject (IRI or blank node)
594    fn serialize_subject(
595        &mut self,
596        subject: crate::model::triple::SubjectRef<'_>,
597    ) -> SerializeResult<()> {
598        use crate::model::triple::SubjectRef;
599        match subject {
600            SubjectRef::NamedNode(node) => {
601                let escaped_iri = self.escape_iri(node.as_str());
602                write!(self.writer, "<{escaped_iri}>")?;
603            }
604            SubjectRef::BlankNode(node) => {
605                let node_str = node.as_str();
606                write!(self.writer, "_:{node_str}")?;
607            }
608            SubjectRef::Variable(var) => {
609                let var_str = var.as_str();
610                write!(self.writer, "?{var_str}")?;
611            }
612        }
613        Ok(())
614    }
615
616    /// Serialize a predicate (IRI)
617    fn serialize_predicate(
618        &mut self,
619        predicate: crate::model::triple::PredicateRef<'_>,
620    ) -> SerializeResult<()> {
621        use crate::model::triple::PredicateRef;
622        match predicate {
623            PredicateRef::NamedNode(node) => {
624                let escaped_iri = self.escape_iri(node.as_str());
625                write!(self.writer, "<{escaped_iri}>")?;
626            }
627            PredicateRef::Variable(var) => {
628                let var_str = var.as_str();
629                write!(self.writer, "?{var_str}")?;
630            }
631        }
632        Ok(())
633    }
634
635    /// Serialize an object (IRI, blank node, or literal)
636    fn serialize_object(
637        &mut self,
638        object: crate::model::triple::ObjectRef<'_>,
639    ) -> SerializeResult<()> {
640        use crate::model::triple::ObjectRef;
641        match object {
642            ObjectRef::NamedNode(node) => {
643                let escaped_iri = self.escape_iri(node.as_str());
644                write!(self.writer, "<{escaped_iri}>")?;
645            }
646            ObjectRef::BlankNode(node) => {
647                let node_str = node.as_str();
648                write!(self.writer, "_:{node_str}")?;
649            }
650            ObjectRef::Literal(literal) => {
651                self.serialize_literal(literal)?;
652            }
653            ObjectRef::Variable(var) => {
654                let var_str = var.as_str();
655                write!(self.writer, "?{var_str}")?;
656            }
657        }
658        Ok(())
659    }
660
661    /// Serialize a literal
662    fn serialize_literal(&mut self, literal: &Literal) -> SerializeResult<()> {
663        // Write the literal value with proper escaping
664        let escaped_value = self.escape_literal(literal.value());
665        write!(self.writer, "\"{escaped_value}\"")?;
666
667        // Add language tag or datatype if present
668        if let Some(language) = literal.language() {
669            write!(self.writer, "@{language}")?;
670        } else if literal.datatype().as_str() != crate::vocab::xsd::STRING.as_str() {
671            let escaped_datatype = self.escape_iri(literal.datatype().as_str());
672            write!(self.writer, "^^<{escaped_datatype}>")?;
673        }
674
675        Ok(())
676    }
677
678    /// Escape special characters in IRIs
679    fn escape_iri(&self, iri: &str) -> String {
680        // N-Triples IRIs should already be properly encoded
681        // but we can add basic escaping if needed
682        iri.to_string()
683    }
684
685    /// Escape special characters in literal values
686    fn escape_literal(&self, value: &str) -> String {
687        value
688            .chars()
689            .map(|c| match c {
690                '"' => "\\\"".to_string(),
691                '\\' => "\\\\".to_string(),
692                '\n' => "\\n".to_string(),
693                '\r' => "\\r".to_string(),
694                '\t' => "\\t".to_string(),
695                c if !('\u{0020}'..='\u{007E}').contains(&c) => {
696                    // Escape non-ASCII and control characters
697                    if (c as u32) <= 0xFFFF {
698                        format!("\\u{:04X}", c as u32)
699                    } else {
700                        format!("\\U{:08X}", c as u32)
701                    }
702                }
703                _ => c.to_string(),
704            })
705            .collect()
706    }
707
708    /// Finish serialization and return the writer
709    pub fn finish(self) -> SerializeResult<W> {
710        Ok(self.writer)
711    }
712}
713
714impl<W: Write> QuadSerializer<W> for WriterNTriplesSerializer<W> {
715    fn serialize_quad(&mut self, quad: crate::model::QuadRef<'_>) -> SerializeResult<()> {
716        // N-Triples only supports default graph, so ignore named graphs
717        if quad.graph_name().is_default_graph() {
718            self.serialize_triple(quad.triple())
719        } else {
720            // Could log a warning here about ignoring named graph
721            Ok(())
722        }
723    }
724
725    fn finish(self: Box<Self>) -> SerializeResult<W> {
726        Ok(self.writer)
727    }
728}
729
730#[cfg(test)]
731mod tests {
732    use super::*;
733
734    #[test]
735    fn test_ntriples_parser_creation() {
736        let parser = NTriplesParser::new();
737        assert!(!parser.is_lenient());
738    }
739
740    #[test]
741    fn test_ntriples_parser_lenient() {
742        let parser = NTriplesParser::new().lenient();
743        assert!(parser.is_lenient());
744    }
745
746    #[test]
747    fn test_ntriples_serializer_creation() {
748        let serializer = NTriplesSerializer::new();
749        assert!(serializer.is_validating());
750    }
751
752    #[test]
753    fn test_ntriples_serializer_unvalidated() {
754        let serializer = NTriplesSerializer::new().unvalidated();
755        assert!(!serializer.is_validating());
756    }
757
758    #[test]
759    fn test_empty_ntriples_parsing() {
760        let parser = NTriplesParser::new();
761        let result = parser.parse_str("");
762        assert!(result.is_ok());
763        assert!(result.expect("should have value").is_empty());
764    }
765
766    #[test]
767    fn test_ntriples_comments() {
768        let parser = NTriplesParser::new();
769        let ntriples = "# This is a comment\n# Another comment";
770        let result = parser.parse_str(ntriples);
771        assert!(result.is_ok());
772        assert!(result.expect("should have value").is_empty());
773    }
774
775    #[test]
776    fn test_ntriples_line_validation() {
777        let parser = NTriplesParser::new();
778
779        // Missing dot should fail
780        let result = parser.parse_triple_line(
781            "<http://example.org/s> <http://example.org/p> <http://example.org/o>",
782            1,
783        );
784        assert!(result.is_err());
785
786        // Too few components should fail
787        let result = parser.parse_triple_line("<http://example.org/s> <http://example.org/p> .", 1);
788        assert!(result.is_err());
789    }
790
791    #[test]
792    fn test_ntriples_parsing() {
793        let parser = NTriplesParser::new();
794
795        // Test simple triple
796        let ntriples = "<http://example.org/s> <http://example.org/p> <http://example.org/o> .";
797        let result = parser.parse_str(ntriples);
798        assert!(result.is_ok());
799        let triples = result.expect("should have value");
800        assert_eq!(triples.len(), 1);
801
802        // Test with blank node
803        let ntriples = "_:s <http://example.org/p> \"literal\" .";
804        let result = parser.parse_str(ntriples);
805        assert!(result.is_ok());
806        let triples = result.expect("should have value");
807        assert_eq!(triples.len(), 1);
808
809        // Test with language literal
810        let ntriples = "<http://example.org/s> <http://example.org/p> \"hello\"@en .";
811        let result = parser.parse_str(ntriples);
812        assert!(result.is_ok());
813        let triples = result.expect("should have value");
814        assert_eq!(triples.len(), 1);
815
816        // Test with typed literal
817        let ntriples = "<http://example.org/s> <http://example.org/p> \"42\"^^<http://www.w3.org/2001/XMLSchema#integer> .";
818        let result = parser.parse_str(ntriples);
819        assert!(result.is_ok());
820        let triples = result.expect("should have value");
821        assert_eq!(triples.len(), 1);
822    }
823
824    #[test]
825    fn test_ntriples_serialization() {
826        let serializer = NTriplesSerializer::new();
827
828        // Create a simple triple
829        let subject = NamedNode::new("http://example.org/s").expect("valid IRI");
830        let predicate = NamedNode::new("http://example.org/p").expect("valid IRI");
831        let object = Literal::new("test");
832        let triple = Triple::new(subject, predicate, object);
833
834        let result = serializer.serialize_to_string(&[triple]);
835        assert!(result.is_ok());
836        let output = result.expect("should have value");
837        assert!(output.contains("<http://example.org/s>"));
838        assert!(output.contains("<http://example.org/p>"));
839        assert!(output.contains("\"test\""));
840        assert!(output.ends_with(" .\n"));
841    }
842
843    #[test]
844    fn test_unicode_escape_parsing() {
845        let parser = NTriplesParser::new();
846
847        // Test \uHHHH escape sequence (Euro symbol)
848        let ntriples = r#"<http://example.org/s> <http://example.org/p> "Euro: \u20AC" ."#;
849        let result = parser.parse_str(ntriples);
850        assert!(result.is_ok());
851        let triples = result.expect("should have value");
852        assert_eq!(triples.len(), 1);
853        if let crate::model::term::Object::Literal(lit) = triples[0].object() {
854            assert_eq!(lit.value(), "Euro: €");
855        } else {
856            panic!("Expected literal object");
857        }
858
859        // Test \UHHHHHHHH escape sequence (Emoji)
860        let ntriples = r#"<http://example.org/s> <http://example.org/p> "Smile: \U0001F600" ."#;
861        let result = parser.parse_str(ntriples);
862        assert!(result.is_ok());
863        let triples = result.expect("should have value");
864        assert_eq!(triples.len(), 1);
865        if let crate::model::term::Object::Literal(lit) = triples[0].object() {
866            assert_eq!(lit.value(), "Smile: πŸ˜€");
867        } else {
868            panic!("Expected literal object");
869        }
870    }
871
872    #[test]
873    fn test_escape_sequence_parsing() {
874        let parser = NTriplesParser::new();
875
876        // Test all basic escape sequences
877        let ntriples = r#"<http://example.org/s> <http://example.org/p> "Line 1\nLine 2\tTabbed\rCarriage Return\\Backslash\"Quote" ."#;
878        let result = parser.parse_str(ntriples);
879        assert!(result.is_ok());
880        let triples = result.expect("should have value");
881        assert_eq!(triples.len(), 1);
882        if let crate::model::term::Object::Literal(lit) = triples[0].object() {
883            assert_eq!(
884                lit.value(),
885                "Line 1\nLine 2\tTabbed\rCarriage Return\\Backslash\"Quote"
886            );
887        } else {
888            panic!("Expected literal object");
889        }
890    }
891
892    #[test]
893    fn test_unicode_escape_serialization() {
894        let serializer = NTriplesSerializer::new();
895
896        // Create a triple with Unicode characters
897        let subject = NamedNode::new("http://example.org/s").expect("valid IRI");
898        let predicate = NamedNode::new("http://example.org/p").expect("valid IRI");
899        let object = Literal::new("Hello δΈ–η•Œ 🌍");
900        let triple = Triple::new(subject, predicate, object);
901
902        let result = serializer.serialize_to_string(&[triple]);
903        assert!(result.is_ok());
904        let output = result.expect("should have value");
905
906        // Should contain Unicode escape sequences for non-ASCII characters
907        assert!(output.contains("\\u4E16")); // δΈ–
908        assert!(output.contains("\\u754C")); // η•Œ
909        assert!(output.contains("\\U0001F30D")); // 🌍
910    }
911
912    #[test]
913    fn test_invalid_unicode_escapes() {
914        let parser = NTriplesParser::new();
915
916        // Test invalid \u sequence (too few digits)
917        let ntriples = r#"<http://example.org/s> <http://example.org/p> "Invalid: \u123" ."#;
918        let result = parser.parse_str(ntriples);
919        assert!(result.is_err());
920
921        // Test invalid \U sequence (too few digits)
922        let ntriples = r#"<http://example.org/s> <http://example.org/p> "Invalid: \U1234567" ."#;
923        let result = parser.parse_str(ntriples);
924        assert!(result.is_err());
925
926        // Test invalid hex digits
927        let ntriples = r#"<http://example.org/s> <http://example.org/p> "Invalid: \uGHIJ" ."#;
928        let result = parser.parse_str(ntriples);
929        assert!(result.is_err());
930    }
931}