1#[cfg(feature = "async")]
147mod async_parser;
148mod format_states;
149
150use format_states::{TrigParserState, TurtleParserState};
151
152#[cfg(feature = "async")]
153pub use async_parser::{AsyncRdfSink, AsyncStreamingParser, MemoryAsyncSink, ParseProgress};
154
155use crate::model::{
157 BlankNode, GraphName, Literal, NamedNode, Object, Predicate, Quad, Subject, Triple,
158};
159use crate::{OxirsError, Result};
160
161#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
163pub enum RdfFormat {
164 Turtle,
166 NTriples,
168 TriG,
170 NQuads,
172 RdfXml,
174 JsonLd,
176}
177
178impl RdfFormat {
179 pub fn from_extension(ext: &str) -> Option<Self> {
181 match ext.to_lowercase().as_str() {
182 "ttl" | "turtle" => Some(RdfFormat::Turtle),
183 "nt" | "ntriples" => Some(RdfFormat::NTriples),
184 "trig" => Some(RdfFormat::TriG),
185 "nq" | "nquads" => Some(RdfFormat::NQuads),
186 "rdf" | "xml" | "rdfxml" => Some(RdfFormat::RdfXml),
187 "jsonld" | "json-ld" => Some(RdfFormat::JsonLd),
188 _ => None,
189 }
190 }
191
192 pub fn media_type(&self) -> &'static str {
194 match self {
195 RdfFormat::Turtle => "text/turtle",
196 RdfFormat::NTriples => "application/n-triples",
197 RdfFormat::TriG => "application/trig",
198 RdfFormat::NQuads => "application/n-quads",
199 RdfFormat::RdfXml => "application/rdf+xml",
200 RdfFormat::JsonLd => "application/ld+json",
201 }
202 }
203
204 pub fn extension(&self) -> &'static str {
206 match self {
207 RdfFormat::Turtle => "ttl",
208 RdfFormat::NTriples => "nt",
209 RdfFormat::TriG => "trig",
210 RdfFormat::NQuads => "nq",
211 RdfFormat::RdfXml => "rdf",
212 RdfFormat::JsonLd => "jsonld",
213 }
214 }
215
216 pub fn supports_quads(&self) -> bool {
218 matches!(self, RdfFormat::TriG | RdfFormat::NQuads)
219 }
220}
221
222#[derive(Debug, Clone, Default)]
224pub struct ParserConfig {
225 pub base_iri: Option<String>,
227 pub ignore_errors: bool,
229 pub max_errors: Option<usize>,
231}
232
233#[derive(Debug, Clone)]
235pub struct Parser {
236 format: RdfFormat,
237 config: ParserConfig,
238}
239
240impl Parser {
241 pub fn new(format: RdfFormat) -> Self {
243 Parser {
244 format,
245 config: ParserConfig::default(),
246 }
247 }
248
249 pub fn with_config(format: RdfFormat, config: ParserConfig) -> Self {
251 Parser { format, config }
252 }
253
254 pub fn with_base_iri(mut self, base_iri: impl Into<String>) -> Self {
256 self.config.base_iri = Some(base_iri.into());
257 self
258 }
259
260 pub fn with_error_tolerance(mut self, ignore_errors: bool) -> Self {
262 self.config.ignore_errors = ignore_errors;
263 self
264 }
265
266 pub fn parse_str_to_quads(&self, data: &str) -> Result<Vec<Quad>> {
268 let mut quads = Vec::new();
269 self.parse_str_with_handler(data, |quad| {
270 quads.push(quad);
271 Ok(())
272 })?;
273 Ok(quads)
274 }
275
276 pub fn parse_str_to_triples(&self, data: &str) -> Result<Vec<Triple>> {
278 let quads = self.parse_str_to_quads(data)?;
279 Ok(quads
280 .into_iter()
281 .filter(|quad| quad.is_default_graph())
282 .map(|quad| quad.to_triple())
283 .collect())
284 }
285
286 pub fn parse_str_with_handler<F>(&self, data: &str, handler: F) -> Result<()>
288 where
289 F: FnMut(Quad) -> Result<()>,
290 {
291 match self.format {
292 RdfFormat::Turtle => self.parse_turtle(data, handler),
293 RdfFormat::NTriples => self.parse_ntriples(data, handler),
294 RdfFormat::TriG => self.parse_trig(data, handler),
295 RdfFormat::NQuads => self.parse_nquads(data, handler),
296 RdfFormat::RdfXml => self.parse_rdfxml(data, handler),
297 RdfFormat::JsonLd => self.parse_jsonld(data, handler),
298 }
299 }
300
301 pub fn parse_bytes_to_quads(&self, data: &[u8]) -> Result<Vec<Quad>> {
303 let data_str = std::str::from_utf8(data)
304 .map_err(|e| OxirsError::Parse(format!("Invalid UTF-8: {e}")))?;
305 self.parse_str_to_quads(data_str)
306 }
307
308 fn parse_turtle<F>(&self, data: &str, mut handler: F) -> Result<()>
309 where
310 F: FnMut(Quad) -> Result<()>,
311 {
312 let mut parser = TurtleParserState::new(self.config.base_iri.as_deref());
314
315 for (line_num, line) in data.lines().enumerate() {
316 let line = line.trim();
317
318 if line.is_empty() || line.starts_with('#') {
320 continue;
321 }
322
323 match parser.parse_line(line) {
324 Ok(triples) => {
325 for triple in triples {
326 let quad = Quad::from_triple(triple);
327 handler(quad)?;
328 }
329 }
330 Err(e) => {
331 if self.config.ignore_errors {
332 tracing::warn!("Turtle parse error on line {}: {}", line_num + 1, e);
333 continue;
334 } else {
335 return Err(OxirsError::Parse(format!(
336 "Turtle parse error on line {}: {}",
337 line_num + 1,
338 e
339 )));
340 }
341 }
342 }
343 }
344
345 if let Some(triples) = parser.finalize()? {
347 for triple in triples {
348 let quad = Quad::from_triple(triple);
349 handler(quad)?;
350 }
351 }
352
353 Ok(())
354 }
355
356 fn parse_ntriples<F>(&self, data: &str, mut handler: F) -> Result<()>
357 where
358 F: FnMut(Quad) -> Result<()>,
359 {
360 for (line_num, line) in data.lines().enumerate() {
361 let line = line.trim();
362
363 if line.is_empty() || line.starts_with('#') {
365 continue;
366 }
367
368 match self.parse_ntriples_line(line) {
370 Ok(Some(quad)) => {
371 handler(quad)?;
372 }
373 Ok(None) => {
374 continue;
376 }
377 Err(e) => {
378 if self.config.ignore_errors {
379 tracing::warn!("Parse error on line {}: {}", line_num + 1, e);
380 continue;
381 } else {
382 return Err(OxirsError::Parse(format!(
383 "Parse error on line {}: {}",
384 line_num + 1,
385 e
386 )));
387 }
388 }
389 }
390 }
391
392 Ok(())
393 }
394
395 pub fn parse_ntriples_line(&self, line: &str) -> Result<Option<Quad>> {
396 let line = line.trim();
398
399 if line.is_empty() || line.starts_with('#') {
400 return Ok(None);
401 }
402
403 if !line.ends_with('.') {
405 return Err(OxirsError::Parse("Line must end with '.'".to_string()));
406 }
407
408 let line = &line[..line.len() - 1].trim(); let tokens = self.tokenize_ntriples_line(line)?;
412
413 if tokens.len() != 3 {
414 return Err(OxirsError::Parse(format!(
415 "Expected 3 tokens (subject, predicate, object), found {}",
416 tokens.len()
417 )));
418 }
419
420 let subject = self.parse_subject(&tokens[0])?;
422
423 let predicate = self.parse_predicate(&tokens[1])?;
425
426 let object = self.parse_object(&tokens[2])?;
428
429 let triple = Triple::new(subject, predicate, object);
430 let quad = Quad::from_triple(triple);
431
432 Ok(Some(quad))
433 }
434
435 fn tokenize_ntriples_line(&self, line: &str) -> Result<Vec<String>> {
436 let mut tokens = Vec::new();
437 let mut current_token = String::new();
438 let mut in_quotes = false;
439 let mut escaped = false;
440 let mut chars = line.chars().peekable();
441
442 while let Some(c) = chars.next() {
443 if escaped {
444 current_token.push('\\');
446 current_token.push(c);
447 escaped = false;
448 } else if c == '\\' && in_quotes {
449 escaped = true;
450 } else if c == '"' && !escaped {
451 current_token.push(c);
452 if in_quotes {
453 if let Some(&'@') = chars.peek() {
455 current_token.push(chars.next().expect("peeked '@' should be available"));
457 while let Some(&next_char) = chars.peek() {
458 if next_char.is_alphanumeric() || next_char == '-' {
459 current_token
460 .push(chars.next().expect("peeked char should be available"));
461 } else {
462 break;
463 }
464 }
465 } else if chars.peek() == Some(&'^') {
466 chars.next(); if chars.peek() == Some(&'^') {
469 chars.next(); current_token.push_str("^^");
471 if chars.peek() == Some(&'<') {
472 for next_char in chars.by_ref() {
474 current_token.push(next_char);
475 if next_char == '>' {
476 break;
477 }
478 }
479 }
480 }
481 }
482 in_quotes = false;
483 } else {
484 in_quotes = true;
485 }
486 } else if c == '"' && escaped {
487 current_token.push(c);
489 escaped = false;
490 } else if c.is_whitespace() && !in_quotes {
491 if !current_token.is_empty() {
492 tokens.push(current_token.clone());
493 current_token.clear();
494 }
495 } else {
496 current_token.push(c);
497 }
498 }
499
500 if !current_token.is_empty() {
501 tokens.push(current_token);
502 }
503
504 Ok(tokens)
505 }
506
507 fn parse_subject(&self, token: &str) -> Result<Subject> {
508 if token.starts_with('<') && token.ends_with('>') {
509 let iri = &token[1..token.len() - 1];
510 let named_node = NamedNode::new(iri)?;
511 Ok(Subject::NamedNode(named_node))
512 } else if token.starts_with("_:") {
513 let blank_node = BlankNode::new(token)?;
514 Ok(Subject::BlankNode(blank_node))
515 } else {
516 Err(OxirsError::Parse(format!(
517 "Invalid subject: {token}. Must be IRI or blank node"
518 )))
519 }
520 }
521
522 fn parse_predicate(&self, token: &str) -> Result<Predicate> {
523 if token.starts_with('<') && token.ends_with('>') {
524 let iri = &token[1..token.len() - 1];
525 let named_node = NamedNode::new(iri)?;
526 Ok(Predicate::NamedNode(named_node))
527 } else {
528 Err(OxirsError::Parse(format!(
529 "Invalid predicate: {token}. Must be IRI"
530 )))
531 }
532 }
533
534 fn parse_object(&self, token: &str) -> Result<Object> {
535 if token.starts_with('<') && token.ends_with('>') {
536 let iri = &token[1..token.len() - 1];
538 let named_node = NamedNode::new(iri)?;
539 Ok(Object::NamedNode(named_node))
540 } else if token.starts_with("_:") {
541 let blank_node = BlankNode::new(token)?;
543 Ok(Object::BlankNode(blank_node))
544 } else if token.starts_with('"') {
545 self.parse_literal(token)
547 } else {
548 Err(OxirsError::Parse(format!(
549 "Invalid object: {token}. Must be IRI, blank node, or literal"
550 )))
551 }
552 }
553
554 fn parse_literal(&self, token: &str) -> Result<Object> {
555 if !token.starts_with('"') {
556 return Err(OxirsError::Parse(
557 "Literal must start with quote".to_string(),
558 ));
559 }
560
561 let mut end_quote_pos = None;
563 let mut escaped = false;
564 let chars: Vec<char> = token.chars().collect();
565
566 for (i, &ch) in chars.iter().enumerate().skip(1) {
567 if escaped {
568 escaped = false;
569 continue;
570 }
571
572 if ch == '\\' {
573 escaped = true;
574 } else if ch == '"' {
575 end_quote_pos = Some(i);
576 break;
577 }
578 }
579
580 let end_quote_pos =
581 end_quote_pos.ok_or_else(|| OxirsError::Parse("Unterminated literal".to_string()))?;
582
583 let raw_value: String = chars[1..end_quote_pos].iter().collect();
585 let literal_value = self.unescape_literal_value(&raw_value)?;
586
587 let remaining = &token[end_quote_pos + 1..];
589
590 if let Some(lang_tag) = remaining.strip_prefix('@') {
591 let literal = Literal::new_lang(literal_value, lang_tag)?;
593 Ok(Object::Literal(literal))
594 } else if remaining.starts_with("^^<") && remaining.ends_with('>') {
595 let datatype_iri = &remaining[3..remaining.len() - 1];
597 let datatype = NamedNode::new(datatype_iri)?;
598 let literal = Literal::new_typed(literal_value, datatype);
599 Ok(Object::Literal(literal))
600 } else if remaining.is_empty() {
601 let literal = Literal::new(literal_value);
603 Ok(Object::Literal(literal))
604 } else {
605 Err(OxirsError::Parse(format!(
606 "Invalid literal syntax: {token}"
607 )))
608 }
609 }
610
611 fn parse_trig<F>(&self, data: &str, mut handler: F) -> Result<()>
612 where
613 F: FnMut(Quad) -> Result<()>,
614 {
615 let mut parser = TrigParserState::new(self.config.base_iri.as_deref());
617
618 for (line_num, line) in data.lines().enumerate() {
619 let line = line.trim();
620
621 if line.is_empty() || line.starts_with('#') {
623 continue;
624 }
625
626 match parser.parse_line(line) {
627 Ok(quads) => {
628 for quad in quads {
629 handler(quad)?;
630 }
631 }
632 Err(e) => {
633 if self.config.ignore_errors {
634 tracing::warn!("TriG parse error on line {}: {}", line_num + 1, e);
635 continue;
636 } else {
637 return Err(OxirsError::Parse(format!(
638 "TriG parse error on line {}: {}",
639 line_num + 1,
640 e
641 )));
642 }
643 }
644 }
645 }
646
647 if let Some(quads) = parser.finalize()? {
649 for quad in quads {
650 handler(quad)?;
651 }
652 }
653
654 Ok(())
655 }
656
657 fn parse_nquads<F>(&self, data: &str, mut handler: F) -> Result<()>
658 where
659 F: FnMut(Quad) -> Result<()>,
660 {
661 for (line_num, line) in data.lines().enumerate() {
662 let line = line.trim();
663
664 if line.is_empty() || line.starts_with('#') {
666 continue;
667 }
668
669 match self.parse_nquads_line(line) {
671 Ok(Some(quad)) => {
672 handler(quad)?;
673 }
674 Ok(None) => {
675 continue;
677 }
678 Err(e) => {
679 if self.config.ignore_errors {
680 tracing::warn!("Parse error on line {}: {}", line_num + 1, e);
681 continue;
682 } else {
683 return Err(OxirsError::Parse(format!(
684 "Parse error on line {}: {}",
685 line_num + 1,
686 e
687 )));
688 }
689 }
690 }
691 }
692
693 Ok(())
694 }
695
696 pub fn parse_nquads_line(&self, line: &str) -> Result<Option<Quad>> {
697 let line = line.trim();
699
700 if line.is_empty() || line.starts_with('#') {
701 return Ok(None);
702 }
703
704 if !line.ends_with('.') {
706 return Err(OxirsError::Parse("Line must end with '.'".to_string()));
707 }
708
709 let line = &line[..line.len() - 1].trim(); let tokens = self.tokenize_ntriples_line(line)?;
713
714 if tokens.len() != 4 {
715 return Err(OxirsError::Parse(format!(
716 "Expected 4 tokens (subject, predicate, object, graph), found {}",
717 tokens.len()
718 )));
719 }
720
721 let subject = self.parse_subject(&tokens[0])?;
723
724 let predicate = self.parse_predicate(&tokens[1])?;
726
727 let object = self.parse_object(&tokens[2])?;
729
730 let graph_name = self.parse_graph_name(&tokens[3])?;
732
733 let quad = Quad::new(subject, predicate, object, graph_name);
734
735 Ok(Some(quad))
736 }
737
738 fn parse_graph_name(&self, token: &str) -> Result<GraphName> {
739 if token.starts_with('<') && token.ends_with('>') {
740 let iri = &token[1..token.len() - 1];
741 let named_node = NamedNode::new(iri)?;
742 Ok(GraphName::NamedNode(named_node))
743 } else if token.starts_with("_:") {
744 let blank_node = BlankNode::new(token)?;
745 Ok(GraphName::BlankNode(blank_node))
746 } else {
747 Err(OxirsError::Parse(format!(
748 "Invalid graph name: {token}. Must be IRI or blank node"
749 )))
750 }
751 }
752
753 fn parse_rdfxml<F>(&self, data: &str, mut handler: F) -> Result<()>
754 where
755 F: FnMut(Quad) -> Result<()>,
756 {
757 use crate::rdfxml::wrapper::parse_rdfxml;
758 use std::io::Cursor;
759
760 let reader = Cursor::new(data.as_bytes());
762 let base_iri = self.config.base_iri.as_deref();
763 let quads = parse_rdfxml(reader, base_iri, self.config.ignore_errors)?;
764
765 for quad in quads {
767 handler(quad)?;
768 }
769
770 Ok(())
771 }
772
773 fn parse_jsonld<F>(&self, data: &str, mut handler: F) -> Result<()>
774 where
775 F: FnMut(Quad) -> Result<()>,
776 {
777 use crate::jsonld::to_rdf::JsonLdParser;
779
780 let parser = JsonLdParser::new();
781 let parser = if let Some(base_iri) = &self.config.base_iri {
782 parser
783 .with_base_iri(base_iri.clone())
784 .map_err(|e| OxirsError::Parse(format!("Invalid base IRI: {e}")))?
785 } else {
786 parser
787 };
788
789 for result in parser.for_slice(data.as_bytes()) {
791 match result {
792 Ok(quad) => handler(quad)?,
793 Err(e) => {
794 if self.config.ignore_errors {
795 tracing::warn!("JSON-LD parse error: {}", e);
796 continue;
797 } else {
798 return Err(OxirsError::Parse(format!("JSON-LD parse error: {e}")));
799 }
800 }
801 }
802 }
803
804 Ok(())
805 }
806
807 fn unescape_literal_value(&self, value: &str) -> Result<String> {
809 let mut result = String::new();
810 let mut chars = value.chars();
811
812 while let Some(c) = chars.next() {
813 if c == '\\' {
814 match chars.next() {
815 Some('"') => result.push('"'),
816 Some('\\') => result.push('\\'),
817 Some('n') => result.push('\n'),
818 Some('r') => result.push('\r'),
819 Some('t') => result.push('\t'),
820 Some('u') => {
821 let hex_chars: String = chars.by_ref().take(4).collect();
823 if hex_chars.len() != 4 {
824 return Err(OxirsError::Parse(
825 "Invalid Unicode escape sequence \\uHHHH - expected 4 hex digits"
826 .to_string(),
827 ));
828 }
829 let code_point = u32::from_str_radix(&hex_chars, 16).map_err(|_| {
830 OxirsError::Parse(
831 "Invalid hex digits in Unicode escape sequence".to_string(),
832 )
833 })?;
834 let unicode_char = char::from_u32(code_point).ok_or_else(|| {
835 OxirsError::Parse("Invalid Unicode code point".to_string())
836 })?;
837 result.push(unicode_char);
838 }
839 Some('U') => {
840 let hex_chars: String = chars.by_ref().take(8).collect();
842 if hex_chars.len() != 8 {
843 return Err(OxirsError::Parse(
844 "Invalid Unicode escape sequence \\UHHHHHHHH - expected 8 hex digits".to_string()
845 ));
846 }
847 let code_point = u32::from_str_radix(&hex_chars, 16).map_err(|_| {
848 OxirsError::Parse(
849 "Invalid hex digits in Unicode escape sequence".to_string(),
850 )
851 })?;
852 let unicode_char = char::from_u32(code_point).ok_or_else(|| {
853 OxirsError::Parse("Invalid Unicode code point".to_string())
854 })?;
855 result.push(unicode_char);
856 }
857 Some(other) => {
858 return Err(OxirsError::Parse(format!(
859 "Invalid escape sequence \\{other}"
860 )));
861 }
862 None => {
863 return Err(OxirsError::Parse(
864 "Incomplete escape sequence at end of literal".to_string(),
865 ));
866 }
867 }
868 } else {
869 result.push(c);
870 }
871 }
872
873 Ok(result)
874 }
875
876 }
878
879pub fn detect_format_from_content(content: &str) -> Option<RdfFormat> {
881 let content = content.trim();
882
883 if content.starts_with("<?xml")
885 || content.starts_with("<rdf:RDF")
886 || content.starts_with("<RDF")
887 {
888 return Some(RdfFormat::RdfXml);
889 }
890
891 if content.starts_with('{') && (content.contains("@context") || content.contains("@type")) {
893 return Some(RdfFormat::JsonLd);
894 }
895
896 if content.contains("@prefix") || content.contains("@base") || content.contains(';') {
898 return Some(RdfFormat::Turtle);
899 }
900
901 if content.contains('{') && content.contains('}') {
903 return Some(RdfFormat::TriG);
904 }
905
906 for line in content.lines() {
908 let line = line.trim();
909 if !line.is_empty() && !line.starts_with('#') {
910 let parts: Vec<&str> = line.split_whitespace().collect();
911 if parts.len() == 4 && parts[3] == "." {
912 return Some(RdfFormat::NTriples);
914 } else if parts.len() == 5 && parts[4] == "." {
915 return Some(RdfFormat::NQuads);
917 } else if parts.len() >= 3 && parts[parts.len() - 1] == "." {
918 return Some(RdfFormat::NTriples);
920 }
921 break; }
923 }
924
925 None
926}
927
928#[cfg(test)]
929mod tests {
930 use super::*;
931 use crate::model::graph::Graph;
932
933 #[test]
934 fn test_format_detection_from_extension() {
935 assert_eq!(RdfFormat::from_extension("ttl"), Some(RdfFormat::Turtle));
936 assert_eq!(RdfFormat::from_extension("turtle"), Some(RdfFormat::Turtle));
937 assert_eq!(RdfFormat::from_extension("nt"), Some(RdfFormat::NTriples));
938 assert_eq!(
939 RdfFormat::from_extension("ntriples"),
940 Some(RdfFormat::NTriples)
941 );
942 assert_eq!(RdfFormat::from_extension("trig"), Some(RdfFormat::TriG));
943 assert_eq!(RdfFormat::from_extension("nq"), Some(RdfFormat::NQuads));
944 assert_eq!(RdfFormat::from_extension("rdf"), Some(RdfFormat::RdfXml));
945 assert_eq!(RdfFormat::from_extension("jsonld"), Some(RdfFormat::JsonLd));
946 assert_eq!(RdfFormat::from_extension("unknown"), None);
947 }
948
949 #[test]
950 fn test_format_properties() {
951 assert_eq!(RdfFormat::Turtle.media_type(), "text/turtle");
952 assert_eq!(RdfFormat::NTriples.extension(), "nt");
953 assert!(RdfFormat::TriG.supports_quads());
954 assert!(!RdfFormat::Turtle.supports_quads());
955 }
956
957 #[test]
958 fn test_format_detection_from_content() {
959 let xml_content = "<?xml version=\"1.0\"?>\n<rdf:RDF>";
961 assert_eq!(
962 detect_format_from_content(xml_content),
963 Some(RdfFormat::RdfXml)
964 );
965
966 let jsonld_content = r#"{"@context": "http://example.org", "@type": "Person"}"#;
968 assert_eq!(
969 detect_format_from_content(jsonld_content),
970 Some(RdfFormat::JsonLd)
971 );
972
973 let turtle_content = "@prefix foaf: <http://xmlns.com/foaf/0.1/> .";
975 assert_eq!(
976 detect_format_from_content(turtle_content),
977 Some(RdfFormat::Turtle)
978 );
979
980 let ntriples_content = "<http://example.org/s> <http://example.org/p> \"object\" .";
982 assert_eq!(
983 detect_format_from_content(ntriples_content),
984 Some(RdfFormat::NTriples)
985 );
986 }
987
988 #[test]
989 fn test_ntriples_parsing_simple() {
990 let ntriples_data = r#"<http://example.org/alice> <http://xmlns.com/foaf/0.1/name> "Alice Smith" .
991<http://example.org/alice> <http://xmlns.com/foaf/0.1/age> "30"^^<http://www.w3.org/2001/XMLSchema#integer> .
992_:person1 <http://xmlns.com/foaf/0.1/knows> <http://example.org/bob> ."#;
993
994 let parser = Parser::new(RdfFormat::NTriples);
995 let result = parser.parse_str_to_quads(ntriples_data);
996
997 assert!(result.is_ok());
998 let quads = result.expect("should have value");
999 assert_eq!(quads.len(), 3);
1000
1001 for quad in &quads {
1003 assert!(quad.is_default_graph());
1004 }
1005
1006 let triples: Vec<_> = quads.into_iter().map(|q| q.to_triple()).collect();
1008
1009 let alice_iri = NamedNode::new("http://example.org/alice").expect("valid IRI");
1011 let name_pred = NamedNode::new("http://xmlns.com/foaf/0.1/name").expect("valid IRI");
1012 let name_literal = Literal::new("Alice Smith");
1013 let expected_triple1 = Triple::new(alice_iri.clone(), name_pred, name_literal);
1014 assert!(triples.contains(&expected_triple1));
1015
1016 let age_pred = NamedNode::new("http://xmlns.com/foaf/0.1/age").expect("valid IRI");
1018 let integer_type =
1019 NamedNode::new("http://www.w3.org/2001/XMLSchema#integer").expect("valid IRI");
1020 let age_literal = Literal::new_typed("30", integer_type);
1021 let expected_triple2 = Triple::new(alice_iri, age_pred, age_literal);
1022 assert!(triples.contains(&expected_triple2));
1023
1024 let blank_node = BlankNode::new("_:person1").expect("valid blank node id");
1026 let knows_pred = NamedNode::new("http://xmlns.com/foaf/0.1/knows").expect("valid IRI");
1027 let bob_iri = NamedNode::new("http://example.org/bob").expect("valid IRI");
1028 let expected_triple3 = Triple::new(blank_node, knows_pred, bob_iri);
1029 assert!(triples.contains(&expected_triple3));
1030 }
1031
1032 #[test]
1033 fn test_ntriples_parsing_language_tag() {
1034 let ntriples_data =
1035 r#"<http://example.org/alice> <http://example.org/description> "Une personne"@fr ."#;
1036
1037 let parser = Parser::new(RdfFormat::NTriples);
1038 let result = parser.parse_str_to_quads(ntriples_data);
1039
1040 assert!(result.is_ok());
1041 let quads = result.expect("should have value");
1042 assert_eq!(quads.len(), 1);
1043
1044 let triple = quads[0].to_triple();
1045 if let Object::Literal(literal) = triple.object() {
1046 assert_eq!(literal.value(), "Une personne");
1047 assert_eq!(literal.language(), Some("fr"));
1048 assert!(literal.is_lang_string());
1049 } else {
1050 panic!("Expected literal object");
1051 }
1052 }
1053
1054 #[test]
1055 fn test_ntriples_parsing_escaped_literals() {
1056 let ntriples_data = r#"<http://example.org/test> <http://example.org/desc> "Text with \"quotes\" and \n newlines" ."#;
1057
1058 let parser = Parser::new(RdfFormat::NTriples);
1059 let result = parser.parse_str_to_quads(ntriples_data);
1060
1061 if let Err(e) = &result {
1062 println!("Parse error: {e}");
1063 }
1064 assert!(result.is_ok(), "Parse failed: {result:?}");
1065
1066 let quads = result.expect("should have value");
1067 assert_eq!(quads.len(), 1);
1068
1069 let triple = quads[0].to_triple();
1070 if let Object::Literal(literal) = triple.object() {
1071 assert!(literal.value().contains("\"quotes\""));
1072 assert!(literal.value().contains("\n"));
1073 } else {
1074 panic!("Expected literal object");
1075 }
1076 }
1077
1078 #[test]
1079 fn test_ntriples_parsing_comments_and_empty_lines() {
1080 let ntriples_data = r#"
1081# This is a comment
1082<http://example.org/alice> <http://xmlns.com/foaf/0.1/name> "Alice Smith" .
1083
1084# Another comment
1085<http://example.org/bob> <http://xmlns.com/foaf/0.1/name> "Bob Jones" .
1086"#;
1087
1088 let parser = Parser::new(RdfFormat::NTriples);
1089 let result = parser.parse_str_to_quads(ntriples_data);
1090
1091 assert!(result.is_ok());
1092 let quads = result.expect("should have value");
1093 assert_eq!(quads.len(), 2);
1094 }
1095
1096 #[test]
1097 fn test_ntriples_parsing_error_handling() {
1098 let invalid_data = "invalid ntriples data";
1100 let parser = Parser::new(RdfFormat::NTriples);
1101 let result = parser.parse_str_to_quads(invalid_data);
1102 assert!(result.is_err());
1103
1104 let mixed_data = r#"<http://example.org/valid> <http://example.org/pred> "Valid triple" .
1106invalid line here
1107<http://example.org/valid2> <http://example.org/pred> "Another valid triple" ."#;
1108
1109 let parser_strict = Parser::new(RdfFormat::NTriples);
1110 let result_strict = parser_strict.parse_str_to_quads(mixed_data);
1111 assert!(result_strict.is_err());
1112
1113 let parser_tolerant = Parser::new(RdfFormat::NTriples).with_error_tolerance(true);
1114 let result_tolerant = parser_tolerant.parse_str_to_quads(mixed_data);
1115 assert!(result_tolerant.is_ok());
1116 let quads = result_tolerant.expect("tolerant parse should succeed");
1117 assert_eq!(quads.len(), 2); }
1119
1120 #[test]
1121 fn test_nquads_parsing() {
1122 let nquads_data = r#"<http://example.org/alice> <http://xmlns.com/foaf/0.1/name> "Alice Smith" <http://example.org/graph1> .
1123<http://example.org/alice> <http://xmlns.com/foaf/0.1/age> "30"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph2> .
1124_:person1 <http://xmlns.com/foaf/0.1/knows> <http://example.org/bob> _:graph1 ."#;
1125
1126 let parser = Parser::new(RdfFormat::NQuads);
1127 let result = parser.parse_str_to_quads(nquads_data);
1128
1129 assert!(result.is_ok());
1130 let quads = result.expect("should have value");
1131 assert_eq!(quads.len(), 3);
1132
1133 let first_quad = &quads[0];
1135 assert!(!first_quad.is_default_graph());
1136
1137 if let GraphName::NamedNode(graph_name) = first_quad.graph_name() {
1139 assert!(graph_name.as_str().contains("example.org"));
1140 } else {
1141 panic!("Expected named graph");
1142 }
1143 }
1144
1145 #[test]
1146 fn test_turtle_parsing_basic() {
1147 let turtle_data = r#"@prefix foaf: <http://xmlns.com/foaf/0.1/> .
1148@prefix ex: <http://example.org/> .
1149
1150ex:alice foaf:name "Alice Smith" .
1151ex:alice foaf:age "30"^^<http://www.w3.org/2001/XMLSchema#integer> .
1152ex:alice foaf:knows ex:bob ."#;
1153
1154 let parser = Parser::new(RdfFormat::Turtle);
1155 let result = parser.parse_str_to_quads(turtle_data);
1156
1157 assert!(result.is_ok());
1158 let quads = result.expect("should have value");
1159 assert_eq!(quads.len(), 3);
1160
1161 for quad in &quads {
1163 assert!(quad.is_default_graph());
1164 }
1165 }
1166
1167 #[test]
1168 fn test_turtle_parsing_prefixes() {
1169 let turtle_data = r#"@prefix foaf: <http://xmlns.com/foaf/0.1/> .
1170foaf:Person a foaf:Person ."#;
1171
1172 let parser = Parser::new(RdfFormat::Turtle);
1173 let result = parser.parse_str_to_quads(turtle_data);
1174
1175 assert!(result.is_ok());
1176 let quads = result.expect("should have value");
1177 assert_eq!(quads.len(), 1);
1178
1179 let triple = quads[0].to_triple();
1180 if let Subject::NamedNode(subj) = triple.subject() {
1182 assert!(subj.as_str().contains("xmlns.com/foaf"));
1183 } else {
1184 panic!("Expected named node subject");
1185 }
1186
1187 if let Predicate::NamedNode(pred) = triple.predicate() {
1189 assert!(pred.as_str().contains("rdf-syntax-ns#type"));
1190 } else {
1191 panic!("Expected named node predicate");
1192 }
1193 }
1194
1195 #[test]
1196 fn test_turtle_parsing_abbreviated_syntax() {
1197 let turtle_data = r#"@prefix ex: <http://example.org/> .
1198@prefix foaf: <http://xmlns.com/foaf/0.1/> .
1199
1200ex:alice foaf:name "Alice" ;
1201 foaf:age "30" ."#;
1202
1203 let parser = Parser::new(RdfFormat::Turtle);
1204 let result = parser.parse_str_to_quads(turtle_data);
1205
1206 assert!(result.is_ok());
1207 let quads = result.expect("should have value");
1208 assert_eq!(quads.len(), 2);
1209
1210 let subjects: Vec<_> = quads
1212 .iter()
1213 .map(|q| q.to_triple().subject().clone())
1214 .collect();
1215 assert_eq!(subjects[0], subjects[1]);
1216 }
1217
1218 #[test]
1219 fn test_turtle_parsing_base_iri() {
1220 let turtle_data = r#"@base <http://example.org/> .
1221<alice> <knows> <bob> ."#;
1222
1223 let parser = Parser::new(RdfFormat::Turtle);
1224 let result = parser.parse_str_to_quads(turtle_data);
1225
1226 assert!(result.is_ok());
1227 let quads = result.expect("should have value");
1228 assert_eq!(quads.len(), 1);
1229
1230 let triple = quads[0].to_triple();
1231 if let Subject::NamedNode(subj) = triple.subject() {
1233 assert!(subj.as_str().contains("example.org"));
1234 } else {
1235 panic!("Expected named node subject");
1236 }
1237 }
1238
1239 #[test]
1240 fn test_turtle_parsing_literals() {
1241 let turtle_data = r#"@prefix ex: <http://example.org/> .
1242ex:alice ex:name "Alice"@en .
1243ex:alice ex:age "30"^^<http://www.w3.org/2001/XMLSchema#integer> ."#;
1244
1245 let parser = Parser::new(RdfFormat::Turtle);
1246 let result = parser.parse_str_to_quads(turtle_data);
1247
1248 assert!(result.is_ok());
1249 let quads = result.expect("should have value");
1250 assert_eq!(quads.len(), 2);
1251
1252 let triples: Vec<_> = quads.into_iter().map(|q| q.to_triple()).collect();
1254
1255 let mut found_lang_literal = false;
1256 let mut found_typed_literal = false;
1257
1258 for triple in triples {
1259 if let Object::Literal(literal) = triple.object() {
1260 if literal.language().is_some() {
1261 found_lang_literal = true;
1262 assert_eq!(literal.language(), Some("en"));
1263 } else {
1264 let datatype = literal.datatype();
1265 if datatype.as_str() != "http://www.w3.org/2001/XMLSchema#string"
1267 && datatype.as_str()
1268 != "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString"
1269 {
1270 found_typed_literal = true;
1271 assert!(
1272 datatype.as_str().contains("integer"),
1273 "Expected integer datatype but got: {}",
1274 datatype.as_str()
1275 );
1276 }
1277 }
1278 }
1279 }
1280
1281 assert!(found_lang_literal);
1282 assert!(found_typed_literal);
1283 }
1284
1285 #[test]
1286 fn test_parser_round_trip() {
1287 use crate::serializer::Serializer;
1288
1289 let mut original_graph = Graph::new();
1291
1292 let alice = NamedNode::new("http://example.org/alice").expect("valid IRI");
1293 let name_pred = NamedNode::new("http://xmlns.com/foaf/0.1/name").expect("valid IRI");
1294 let name_literal = Literal::new("Alice Smith");
1295 original_graph.insert(Triple::new(alice.clone(), name_pred, name_literal));
1296
1297 let age_pred = NamedNode::new("http://xmlns.com/foaf/0.1/age").expect("valid IRI");
1298 let age_literal = Literal::new_typed("30", crate::vocab::xsd::INTEGER.clone());
1299 original_graph.insert(Triple::new(alice.clone(), age_pred, age_literal));
1300
1301 let desc_pred = NamedNode::new("http://example.org/description").expect("valid IRI");
1302 let desc_literal =
1303 Literal::new_lang("Une personne", "fr").expect("construction should succeed");
1304 original_graph.insert(Triple::new(alice, desc_pred, desc_literal));
1305
1306 let serializer = Serializer::new(RdfFormat::NTriples);
1308 let ntriples = serializer
1309 .serialize_graph(&original_graph)
1310 .expect("operation should succeed");
1311
1312 let parser = Parser::new(RdfFormat::NTriples);
1314 let quads = parser
1315 .parse_str_to_quads(&ntriples)
1316 .expect("operation should succeed");
1317
1318 let parsed_graph = Graph::from_iter(quads.into_iter().map(|q| q.to_triple()));
1320
1321 assert_eq!(original_graph.len(), parsed_graph.len());
1323
1324 for triple in original_graph.iter() {
1326 assert!(
1327 parsed_graph.contains(triple),
1328 "Parsed graph missing triple: {triple}"
1329 );
1330 }
1331 }
1332
1333 #[test]
1334 fn test_trig_parser() {
1335 let trig_data = r#"
1336@prefix ex: <http://example.org/> .
1337@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
1338
1339# Default graph
1340{
1341 ex:alice rdf:type ex:Person .
1342 ex:alice ex:name "Alice" .
1343}
1344
1345# Named graph
1346ex:graph1 {
1347 ex:bob rdf:type ex:Person .
1348 ex:bob ex:name "Bob" .
1349 ex:bob ex:age "30" .
1350}
1351"#;
1352
1353 let parser = Parser::new(RdfFormat::TriG);
1354 let quads = parser
1355 .parse_str_to_quads(trig_data)
1356 .expect("operation should succeed");
1357
1358 assert!(
1360 quads.len() >= 5,
1361 "Should parse at least 5 quads, got {}",
1362 quads.len()
1363 );
1364
1365 let default_graph_count = quads.iter().filter(|q| q.is_default_graph()).count();
1367 let named_graph_count = quads.len() - default_graph_count;
1368
1369 assert!(
1370 default_graph_count >= 2,
1371 "Should have at least 2 default graph quads, got {default_graph_count}"
1372 );
1373 assert!(
1374 named_graph_count >= 3,
1375 "Should have at least 3 named graph quads, got {named_graph_count}"
1376 );
1377
1378 let alice_uri = "http://example.org/alice";
1380 let bob_uri = "http://example.org/bob";
1381 let person_uri = "http://example.org/Person";
1382
1383 let alice_type_found = quads.iter().any(|q| {
1385 q.is_default_graph()
1386 && q.subject().to_string().contains(alice_uri)
1387 && q.object().to_string().contains(person_uri)
1388 });
1389 assert!(
1390 alice_type_found,
1391 "Should find Alice type assertion in default graph"
1392 );
1393
1394 let bob_in_named_graph = quads
1396 .iter()
1397 .any(|q| !q.is_default_graph() && q.subject().to_string().contains(bob_uri));
1398 assert!(
1399 bob_in_named_graph,
1400 "Should find Bob statements in named graph"
1401 );
1402 }
1403
1404 #[test]
1405 fn test_trig_parser_prefixes() {
1406 let trig_data = r#"
1407@prefix ex: <http://example.org/> .
1408@prefix foaf: <http://xmlns.com/foaf/0.1/> .
1409
1410ex:person1 foaf:name "John Doe" .
1411"#;
1412
1413 let parser = Parser::new(RdfFormat::TriG);
1414 let quads = parser
1415 .parse_str_to_quads(trig_data)
1416 .expect("operation should succeed");
1417
1418 assert!(!quads.is_empty(), "Should parse prefixed statements");
1419
1420 let expanded_found = quads.iter().any(|q| {
1422 q.subject()
1423 .to_string()
1424 .contains("http://example.org/person1")
1425 && q.predicate()
1426 .to_string()
1427 .contains("http://xmlns.com/foaf/0.1/name")
1428 });
1429 assert!(expanded_found, "Should expand prefixes correctly");
1430 }
1431
1432 #[test]
1433 fn test_jsonld_parser() {
1434 let jsonld_data = r#"{
1435 "@context": {
1436 "name": "http://xmlns.com/foaf/0.1/name",
1437 "Person": "http://schema.org/Person"
1438 },
1439 "@type": "Person",
1440 "@id": "http://example.org/john",
1441 "name": "John Doe"
1442}"#;
1443
1444 let parser = Parser::new(RdfFormat::JsonLd);
1445 let result = parser.parse_str_to_quads(jsonld_data);
1446
1447 match result {
1448 Ok(quads) => {
1449 println!("JSON-LD parsed {} quads:", quads.len());
1450 for quad in &quads {
1451 println!(" {quad}");
1452 }
1453 assert!(!quads.is_empty(), "Should parse some quads from JSON-LD");
1454 }
1455 Err(e) => {
1456 println!("JSON-LD parsing error (expected during development): {e}");
1458 }
1460 }
1461 }
1462
1463 #[test]
1464 fn test_jsonld_parser_simple() {
1465 let jsonld_data = r#"{
1466 "@context": "http://schema.org/",
1467 "@type": "Person",
1468 "name": "Alice"
1469}"#;
1470
1471 let parser = Parser::new(RdfFormat::JsonLd);
1472 let result = parser.parse_str_to_quads(jsonld_data);
1473
1474 match result {
1476 Ok(quads) => {
1477 println!("Simple JSON-LD parsed {} quads", quads.len());
1478 }
1479 Err(e) => {
1480 println!("Simple JSON-LD parsing error: {e}");
1481 }
1483 }
1484 }
1485}