1#[cfg(feature = "async")]
147mod async_parser;
148mod format_states;
149
150use format_states::{TrigParserState, TurtleParserState};
151
152#[cfg(feature = "async")]
153pub use async_parser::{AsyncRdfSink, AsyncStreamingParser, MemoryAsyncSink, ParseProgress};
154
155use crate::model::{
157 BlankNode, GraphName, Literal, NamedNode, Object, Predicate, Quad, Subject, Triple,
158};
159use crate::{OxirsError, Result};
160
161#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
163pub enum RdfFormat {
164 Turtle,
166 NTriples,
168 TriG,
170 NQuads,
172 RdfXml,
174 JsonLd,
176}
177
178impl RdfFormat {
179 pub fn from_extension(ext: &str) -> Option<Self> {
181 match ext.to_lowercase().as_str() {
182 "ttl" | "turtle" => Some(RdfFormat::Turtle),
183 "nt" | "ntriples" => Some(RdfFormat::NTriples),
184 "trig" => Some(RdfFormat::TriG),
185 "nq" | "nquads" => Some(RdfFormat::NQuads),
186 "rdf" | "xml" | "rdfxml" => Some(RdfFormat::RdfXml),
187 "jsonld" | "json-ld" => Some(RdfFormat::JsonLd),
188 _ => None,
189 }
190 }
191
192 pub fn media_type(&self) -> &'static str {
194 match self {
195 RdfFormat::Turtle => "text/turtle",
196 RdfFormat::NTriples => "application/n-triples",
197 RdfFormat::TriG => "application/trig",
198 RdfFormat::NQuads => "application/n-quads",
199 RdfFormat::RdfXml => "application/rdf+xml",
200 RdfFormat::JsonLd => "application/ld+json",
201 }
202 }
203
204 pub fn extension(&self) -> &'static str {
206 match self {
207 RdfFormat::Turtle => "ttl",
208 RdfFormat::NTriples => "nt",
209 RdfFormat::TriG => "trig",
210 RdfFormat::NQuads => "nq",
211 RdfFormat::RdfXml => "rdf",
212 RdfFormat::JsonLd => "jsonld",
213 }
214 }
215
216 pub fn supports_quads(&self) -> bool {
218 matches!(self, RdfFormat::TriG | RdfFormat::NQuads)
219 }
220}
221
222#[derive(Debug, Clone, Default)]
224pub struct ParserConfig {
225 pub base_iri: Option<String>,
227 pub ignore_errors: bool,
229 pub max_errors: Option<usize>,
231}
232
233#[derive(Debug, Clone)]
235pub struct Parser {
236 format: RdfFormat,
237 config: ParserConfig,
238}
239
240impl Parser {
241 pub fn new(format: RdfFormat) -> Self {
243 Parser {
244 format,
245 config: ParserConfig::default(),
246 }
247 }
248
249 pub fn with_config(format: RdfFormat, config: ParserConfig) -> Self {
251 Parser { format, config }
252 }
253
254 pub fn with_base_iri(mut self, base_iri: impl Into<String>) -> Self {
256 self.config.base_iri = Some(base_iri.into());
257 self
258 }
259
260 pub fn with_error_tolerance(mut self, ignore_errors: bool) -> Self {
262 self.config.ignore_errors = ignore_errors;
263 self
264 }
265
266 pub fn parse_str_to_quads(&self, data: &str) -> Result<Vec<Quad>> {
268 let mut quads = Vec::new();
269 self.parse_str_with_handler(data, |quad| {
270 quads.push(quad);
271 Ok(())
272 })?;
273 Ok(quads)
274 }
275
276 pub fn parse_str_to_triples(&self, data: &str) -> Result<Vec<Triple>> {
278 let quads = self.parse_str_to_quads(data)?;
279 Ok(quads
280 .into_iter()
281 .filter(|quad| quad.is_default_graph())
282 .map(|quad| quad.to_triple())
283 .collect())
284 }
285
286 pub fn parse_str_with_handler<F>(&self, data: &str, handler: F) -> Result<()>
288 where
289 F: FnMut(Quad) -> Result<()>,
290 {
291 match self.format {
292 RdfFormat::Turtle => self.parse_turtle(data, handler),
293 RdfFormat::NTriples => self.parse_ntriples(data, handler),
294 RdfFormat::TriG => self.parse_trig(data, handler),
295 RdfFormat::NQuads => self.parse_nquads(data, handler),
296 RdfFormat::RdfXml => self.parse_rdfxml(data, handler),
297 RdfFormat::JsonLd => self.parse_jsonld(data, handler),
298 }
299 }
300
301 pub fn parse_bytes_to_quads(&self, data: &[u8]) -> Result<Vec<Quad>> {
303 let data_str = std::str::from_utf8(data)
304 .map_err(|e| OxirsError::Parse(format!("Invalid UTF-8: {e}")))?;
305 self.parse_str_to_quads(data_str)
306 }
307
308 fn parse_turtle<F>(&self, data: &str, mut handler: F) -> Result<()>
309 where
310 F: FnMut(Quad) -> Result<()>,
311 {
312 let mut parser = TurtleParserState::new(self.config.base_iri.as_deref());
314
315 for (line_num, line) in data.lines().enumerate() {
316 let line = line.trim();
317
318 if line.is_empty() || line.starts_with('#') {
320 continue;
321 }
322
323 match parser.parse_line(line) {
324 Ok(triples) => {
325 for triple in triples {
326 let quad = Quad::from_triple(triple);
327 handler(quad)?;
328 }
329 }
330 Err(e) => {
331 if self.config.ignore_errors {
332 tracing::warn!("Turtle parse error on line {}: {}", line_num + 1, e);
333 continue;
334 } else {
335 return Err(OxirsError::Parse(format!(
336 "Turtle parse error on line {}: {}",
337 line_num + 1,
338 e
339 )));
340 }
341 }
342 }
343 }
344
345 if let Some(triples) = parser.finalize()? {
347 for triple in triples {
348 let quad = Quad::from_triple(triple);
349 handler(quad)?;
350 }
351 }
352
353 Ok(())
354 }
355
356 fn parse_ntriples<F>(&self, data: &str, mut handler: F) -> Result<()>
357 where
358 F: FnMut(Quad) -> Result<()>,
359 {
360 for (line_num, line) in data.lines().enumerate() {
361 let line = line.trim();
362
363 if line.is_empty() || line.starts_with('#') {
365 continue;
366 }
367
368 match self.parse_ntriples_line(line) {
370 Ok(Some(quad)) => {
371 handler(quad)?;
372 }
373 Ok(None) => {
374 continue;
376 }
377 Err(e) => {
378 if self.config.ignore_errors {
379 tracing::warn!("Parse error on line {}: {}", line_num + 1, e);
380 continue;
381 } else {
382 return Err(OxirsError::Parse(format!(
383 "Parse error on line {}: {}",
384 line_num + 1,
385 e
386 )));
387 }
388 }
389 }
390 }
391
392 Ok(())
393 }
394
395 pub fn parse_ntriples_line(&self, line: &str) -> Result<Option<Quad>> {
396 let line = line.trim();
398
399 if line.is_empty() || line.starts_with('#') {
400 return Ok(None);
401 }
402
403 if !line.ends_with('.') {
405 return Err(OxirsError::Parse("Line must end with '.'".to_string()));
406 }
407
408 let line = &line[..line.len() - 1].trim(); let tokens = self.tokenize_ntriples_line(line)?;
412
413 if tokens.len() != 3 {
414 return Err(OxirsError::Parse(format!(
415 "Expected 3 tokens (subject, predicate, object), found {}",
416 tokens.len()
417 )));
418 }
419
420 let subject = self.parse_subject(&tokens[0])?;
422
423 let predicate = self.parse_predicate(&tokens[1])?;
425
426 let object = self.parse_object(&tokens[2])?;
428
429 let triple = Triple::new(subject, predicate, object);
430 let quad = Quad::from_triple(triple);
431
432 Ok(Some(quad))
433 }
434
435 fn tokenize_ntriples_line(&self, line: &str) -> Result<Vec<String>> {
436 let mut tokens = Vec::new();
437 let mut current_token = String::new();
438 let mut in_quotes = false;
439 let mut escaped = false;
440 let mut chars = line.chars().peekable();
441
442 while let Some(c) = chars.next() {
443 if escaped {
444 current_token.push('\\');
446 current_token.push(c);
447 escaped = false;
448 } else if c == '\\' && in_quotes {
449 escaped = true;
450 } else if c == '"' && !escaped {
451 current_token.push(c);
452 if in_quotes {
453 if let Some(&'@') = chars.peek() {
455 current_token.push(chars.next().expect("peeked '@' should be available"));
457 while let Some(&next_char) = chars.peek() {
458 if next_char.is_alphanumeric() || next_char == '-' {
459 current_token
460 .push(chars.next().expect("peeked char should be available"));
461 } else {
462 break;
463 }
464 }
465 } else if chars.peek() == Some(&'^') {
466 chars.next(); if chars.peek() == Some(&'^') {
469 chars.next(); current_token.push_str("^^");
471 if chars.peek() == Some(&'<') {
472 for next_char in chars.by_ref() {
474 current_token.push(next_char);
475 if next_char == '>' {
476 break;
477 }
478 }
479 }
480 }
481 }
482 in_quotes = false;
483 } else {
484 in_quotes = true;
485 }
486 } else if c == '"' && escaped {
487 current_token.push(c);
489 escaped = false;
490 } else if c.is_whitespace() && !in_quotes {
491 if !current_token.is_empty() {
492 tokens.push(current_token.clone());
493 current_token.clear();
494 }
495 } else {
496 current_token.push(c);
497 }
498 }
499
500 if !current_token.is_empty() {
501 tokens.push(current_token);
502 }
503
504 Ok(tokens)
505 }
506
507 fn parse_subject(&self, token: &str) -> Result<Subject> {
508 if token.starts_with('<') && token.ends_with('>') {
509 let iri = &token[1..token.len() - 1];
510 let named_node = NamedNode::new(iri)?;
511 Ok(Subject::NamedNode(named_node))
512 } else if token.starts_with("_:") {
513 let blank_node = BlankNode::new(token)?;
514 Ok(Subject::BlankNode(blank_node))
515 } else {
516 Err(OxirsError::Parse(format!(
517 "Invalid subject: {token}. Must be IRI or blank node"
518 )))
519 }
520 }
521
522 fn parse_predicate(&self, token: &str) -> Result<Predicate> {
523 if token.starts_with('<') && token.ends_with('>') {
524 let iri = &token[1..token.len() - 1];
525 let named_node = NamedNode::new(iri)?;
526 Ok(Predicate::NamedNode(named_node))
527 } else {
528 Err(OxirsError::Parse(format!(
529 "Invalid predicate: {token}. Must be IRI"
530 )))
531 }
532 }
533
534 fn parse_object(&self, token: &str) -> Result<Object> {
535 if token.starts_with('<') && token.ends_with('>') {
536 let iri = &token[1..token.len() - 1];
538 let named_node = NamedNode::new(iri)?;
539 Ok(Object::NamedNode(named_node))
540 } else if token.starts_with("_:") {
541 let blank_node = BlankNode::new(token)?;
543 Ok(Object::BlankNode(blank_node))
544 } else if token.starts_with('"') {
545 self.parse_literal(token)
547 } else {
548 Err(OxirsError::Parse(format!(
549 "Invalid object: {token}. Must be IRI, blank node, or literal"
550 )))
551 }
552 }
553
554 fn parse_literal(&self, token: &str) -> Result<Object> {
555 if !token.starts_with('"') {
556 return Err(OxirsError::Parse(
557 "Literal must start with quote".to_string(),
558 ));
559 }
560
561 let mut end_quote_pos = None;
563 let mut escaped = false;
564 let chars: Vec<char> = token.chars().collect();
565
566 for (i, &ch) in chars.iter().enumerate().skip(1) {
567 if escaped {
568 escaped = false;
569 continue;
570 }
571
572 if ch == '\\' {
573 escaped = true;
574 } else if ch == '"' {
575 end_quote_pos = Some(i);
576 break;
577 }
578 }
579
580 let end_quote_pos =
581 end_quote_pos.ok_or_else(|| OxirsError::Parse("Unterminated literal".to_string()))?;
582
583 let raw_value: String = chars[1..end_quote_pos].iter().collect();
585 let literal_value = self.unescape_literal_value(&raw_value)?;
586
587 let remaining = &token[end_quote_pos + 1..];
589
590 if let Some(lang_tag) = remaining.strip_prefix('@') {
591 let literal = Literal::new_lang(literal_value, lang_tag)?;
593 Ok(Object::Literal(literal))
594 } else if remaining.starts_with("^^<") && remaining.ends_with('>') {
595 let datatype_iri = &remaining[3..remaining.len() - 1];
597 let datatype = NamedNode::new(datatype_iri)?;
598 let literal = Literal::new_typed(literal_value, datatype);
599 Ok(Object::Literal(literal))
600 } else if remaining.is_empty() {
601 let literal = Literal::new(literal_value);
603 Ok(Object::Literal(literal))
604 } else {
605 Err(OxirsError::Parse(format!(
606 "Invalid literal syntax: {token}"
607 )))
608 }
609 }
610
611 fn parse_trig<F>(&self, data: &str, mut handler: F) -> Result<()>
612 where
613 F: FnMut(Quad) -> Result<()>,
614 {
615 let mut parser = TrigParserState::new(self.config.base_iri.as_deref());
617
618 for (line_num, line) in data.lines().enumerate() {
619 let line = line.trim();
620
621 if line.is_empty() || line.starts_with('#') {
623 continue;
624 }
625
626 match parser.parse_line(line) {
627 Ok(quads) => {
628 for quad in quads {
629 handler(quad)?;
630 }
631 }
632 Err(e) => {
633 if self.config.ignore_errors {
634 tracing::warn!("TriG parse error on line {}: {}", line_num + 1, e);
635 continue;
636 } else {
637 return Err(OxirsError::Parse(format!(
638 "TriG parse error on line {}: {}",
639 line_num + 1,
640 e
641 )));
642 }
643 }
644 }
645 }
646
647 if let Some(quads) = parser.finalize()? {
649 for quad in quads {
650 handler(quad)?;
651 }
652 }
653
654 Ok(())
655 }
656
657 fn parse_nquads<F>(&self, data: &str, mut handler: F) -> Result<()>
658 where
659 F: FnMut(Quad) -> Result<()>,
660 {
661 for (line_num, line) in data.lines().enumerate() {
662 let line = line.trim();
663
664 if line.is_empty() || line.starts_with('#') {
666 continue;
667 }
668
669 match self.parse_nquads_line(line) {
671 Ok(Some(quad)) => {
672 handler(quad)?;
673 }
674 Ok(None) => {
675 continue;
677 }
678 Err(e) => {
679 if self.config.ignore_errors {
680 tracing::warn!("Parse error on line {}: {}", line_num + 1, e);
681 continue;
682 } else {
683 return Err(OxirsError::Parse(format!(
684 "Parse error on line {}: {}",
685 line_num + 1,
686 e
687 )));
688 }
689 }
690 }
691 }
692
693 Ok(())
694 }
695
696 pub fn parse_nquads_line(&self, line: &str) -> Result<Option<Quad>> {
697 let line = line.trim();
699
700 if line.is_empty() || line.starts_with('#') {
701 return Ok(None);
702 }
703
704 if !line.ends_with('.') {
706 return Err(OxirsError::Parse("Line must end with '.'".to_string()));
707 }
708
709 let line = &line[..line.len() - 1].trim(); let tokens = self.tokenize_ntriples_line(line)?;
713
714 if tokens.len() != 4 {
715 return Err(OxirsError::Parse(format!(
716 "Expected 4 tokens (subject, predicate, object, graph), found {}",
717 tokens.len()
718 )));
719 }
720
721 let subject = self.parse_subject(&tokens[0])?;
723
724 let predicate = self.parse_predicate(&tokens[1])?;
726
727 let object = self.parse_object(&tokens[2])?;
729
730 let graph_name = self.parse_graph_name(&tokens[3])?;
732
733 let quad = Quad::new(subject, predicate, object, graph_name);
734
735 Ok(Some(quad))
736 }
737
738 fn parse_graph_name(&self, token: &str) -> Result<GraphName> {
739 if token.starts_with('<') && token.ends_with('>') {
740 let iri = &token[1..token.len() - 1];
741 let named_node = NamedNode::new(iri)?;
742 Ok(GraphName::NamedNode(named_node))
743 } else if token.starts_with("_:") {
744 let blank_node = BlankNode::new(token)?;
745 Ok(GraphName::BlankNode(blank_node))
746 } else {
747 Err(OxirsError::Parse(format!(
748 "Invalid graph name: {token}. Must be IRI or blank node"
749 )))
750 }
751 }
752
753 fn parse_rdfxml<F>(&self, data: &str, mut handler: F) -> Result<()>
754 where
755 F: FnMut(Quad) -> Result<()>,
756 {
757 use crate::rdfxml::wrapper::parse_rdfxml;
758 use std::io::Cursor;
759
760 let reader = Cursor::new(data.as_bytes());
762 let base_iri = self.config.base_iri.as_deref();
763 let quads = parse_rdfxml(reader, base_iri, self.config.ignore_errors)?;
764
765 for quad in quads {
767 handler(quad)?;
768 }
769
770 Ok(())
771 }
772
773 fn parse_jsonld<F>(&self, data: &str, mut handler: F) -> Result<()>
774 where
775 F: FnMut(Quad) -> Result<()>,
776 {
777 use crate::jsonld::to_rdf::JsonLdParser;
779
780 let parser = JsonLdParser::new();
781 let parser = if let Some(base_iri) = &self.config.base_iri {
782 parser
783 .with_base_iri(base_iri.clone())
784 .map_err(|e| OxirsError::Parse(format!("Invalid base IRI: {e}")))?
785 } else {
786 parser
787 };
788
789 for result in parser.for_slice(data.as_bytes()) {
791 match result {
792 Ok(quad) => handler(quad)?,
793 Err(e) => {
794 if self.config.ignore_errors {
795 tracing::warn!("JSON-LD parse error: {}", e);
796 continue;
797 } else {
798 return Err(OxirsError::Parse(format!("JSON-LD parse error: {e}")));
799 }
800 }
801 }
802 }
803
804 Ok(())
805 }
806
807 fn unescape_literal_value(&self, value: &str) -> Result<String> {
809 let mut result = String::new();
810 let mut chars = value.chars();
811
812 while let Some(c) = chars.next() {
813 if c == '\\' {
814 match chars.next() {
815 Some('"') => result.push('"'),
816 Some('\\') => result.push('\\'),
817 Some('n') => result.push('\n'),
818 Some('r') => result.push('\r'),
819 Some('t') => result.push('\t'),
820 Some('u') => {
821 let hex_chars: String = chars.by_ref().take(4).collect();
823 if hex_chars.len() != 4 {
824 return Err(OxirsError::Parse(
825 "Invalid Unicode escape sequence \\uHHHH - expected 4 hex digits"
826 .to_string(),
827 ));
828 }
829 let code_point = u32::from_str_radix(&hex_chars, 16).map_err(|_| {
830 OxirsError::Parse(
831 "Invalid hex digits in Unicode escape sequence".to_string(),
832 )
833 })?;
834 let unicode_char = char::from_u32(code_point).ok_or_else(|| {
835 OxirsError::Parse("Invalid Unicode code point".to_string())
836 })?;
837 result.push(unicode_char);
838 }
839 Some('U') => {
840 let hex_chars: String = chars.by_ref().take(8).collect();
842 if hex_chars.len() != 8 {
843 return Err(OxirsError::Parse(
844 "Invalid Unicode escape sequence \\UHHHHHHHH - expected 8 hex digits".to_string()
845 ));
846 }
847 let code_point = u32::from_str_radix(&hex_chars, 16).map_err(|_| {
848 OxirsError::Parse(
849 "Invalid hex digits in Unicode escape sequence".to_string(),
850 )
851 })?;
852 let unicode_char = char::from_u32(code_point).ok_or_else(|| {
853 OxirsError::Parse("Invalid Unicode code point".to_string())
854 })?;
855 result.push(unicode_char);
856 }
857 Some(other) => {
858 return Err(OxirsError::Parse(format!(
859 "Invalid escape sequence \\{other}"
860 )));
861 }
862 None => {
863 return Err(OxirsError::Parse(
864 "Incomplete escape sequence at end of literal".to_string(),
865 ));
866 }
867 }
868 } else {
869 result.push(c);
870 }
871 }
872
873 Ok(result)
874 }
875
876 }
878
879pub fn detect_format_from_content(content: &str) -> Option<RdfFormat> {
881 let content = content.trim();
882
883 if content.starts_with("<?xml")
885 || content.starts_with("<rdf:RDF")
886 || content.starts_with("<RDF")
887 {
888 return Some(RdfFormat::RdfXml);
889 }
890
891 if content.starts_with('{') && (content.contains("@context") || content.contains("@type")) {
893 return Some(RdfFormat::JsonLd);
894 }
895
896 if content.contains("@prefix") || content.contains("@base") || content.contains(';') {
898 return Some(RdfFormat::Turtle);
899 }
900
901 if content.contains('{') && content.contains('}') {
903 return Some(RdfFormat::TriG);
904 }
905
906 for line in content.lines() {
908 let line = line.trim();
909 if !line.is_empty() && !line.starts_with('#') {
910 let parts: Vec<&str> = line.split_whitespace().collect();
911 if parts.len() == 4 && parts[3] == "." {
912 return Some(RdfFormat::NTriples);
914 } else if parts.len() == 5 && parts[4] == "." {
915 return Some(RdfFormat::NQuads);
917 } else if parts.len() >= 3 && parts[parts.len() - 1] == "." {
918 return Some(RdfFormat::NTriples);
920 }
921 break; }
923 }
924
925 None
926}
927
928#[cfg(test)]
929mod tests {
930 use super::*;
931 use crate::model::graph::Graph;
932
933 #[test]
934 fn test_format_detection_from_extension() {
935 assert_eq!(RdfFormat::from_extension("ttl"), Some(RdfFormat::Turtle));
936 assert_eq!(RdfFormat::from_extension("turtle"), Some(RdfFormat::Turtle));
937 assert_eq!(RdfFormat::from_extension("nt"), Some(RdfFormat::NTriples));
938 assert_eq!(
939 RdfFormat::from_extension("ntriples"),
940 Some(RdfFormat::NTriples)
941 );
942 assert_eq!(RdfFormat::from_extension("trig"), Some(RdfFormat::TriG));
943 assert_eq!(RdfFormat::from_extension("nq"), Some(RdfFormat::NQuads));
944 assert_eq!(RdfFormat::from_extension("rdf"), Some(RdfFormat::RdfXml));
945 assert_eq!(RdfFormat::from_extension("jsonld"), Some(RdfFormat::JsonLd));
946 assert_eq!(RdfFormat::from_extension("unknown"), None);
947 }
948
949 #[test]
950 fn test_format_properties() {
951 assert_eq!(RdfFormat::Turtle.media_type(), "text/turtle");
952 assert_eq!(RdfFormat::NTriples.extension(), "nt");
953 assert!(RdfFormat::TriG.supports_quads());
954 assert!(!RdfFormat::Turtle.supports_quads());
955 }
956
957 #[test]
958 fn test_format_detection_from_content() {
959 let xml_content = "<?xml version=\"1.0\"?>\n<rdf:RDF>";
961 assert_eq!(
962 detect_format_from_content(xml_content),
963 Some(RdfFormat::RdfXml)
964 );
965
966 let jsonld_content = r#"{"@context": "http://example.org", "@type": "Person"}"#;
968 assert_eq!(
969 detect_format_from_content(jsonld_content),
970 Some(RdfFormat::JsonLd)
971 );
972
973 let turtle_content = "@prefix foaf: <http://xmlns.com/foaf/0.1/> .";
975 assert_eq!(
976 detect_format_from_content(turtle_content),
977 Some(RdfFormat::Turtle)
978 );
979
980 let ntriples_content = "<http://example.org/s> <http://example.org/p> \"object\" .";
982 assert_eq!(
983 detect_format_from_content(ntriples_content),
984 Some(RdfFormat::NTriples)
985 );
986 }
987
988 #[test]
989 fn test_ntriples_parsing_simple() {
990 let ntriples_data = r#"<http://example.org/alice> <http://xmlns.com/foaf/0.1/name> "Alice Smith" .
991<http://example.org/alice> <http://xmlns.com/foaf/0.1/age> "30"^^<http://www.w3.org/2001/XMLSchema#integer> .
992_:person1 <http://xmlns.com/foaf/0.1/knows> <http://example.org/bob> ."#;
993
994 let parser = Parser::new(RdfFormat::NTriples);
995 let result = parser.parse_str_to_quads(ntriples_data);
996
997 assert!(result.is_ok());
998 let quads = result.unwrap();
999 assert_eq!(quads.len(), 3);
1000
1001 for quad in &quads {
1003 assert!(quad.is_default_graph());
1004 }
1005
1006 let triples: Vec<_> = quads.into_iter().map(|q| q.to_triple()).collect();
1008
1009 let alice_iri = NamedNode::new("http://example.org/alice").unwrap();
1011 let name_pred = NamedNode::new("http://xmlns.com/foaf/0.1/name").unwrap();
1012 let name_literal = Literal::new("Alice Smith");
1013 let expected_triple1 = Triple::new(alice_iri.clone(), name_pred, name_literal);
1014 assert!(triples.contains(&expected_triple1));
1015
1016 let age_pred = NamedNode::new("http://xmlns.com/foaf/0.1/age").unwrap();
1018 let integer_type = NamedNode::new("http://www.w3.org/2001/XMLSchema#integer").unwrap();
1019 let age_literal = Literal::new_typed("30", integer_type);
1020 let expected_triple2 = Triple::new(alice_iri, age_pred, age_literal);
1021 assert!(triples.contains(&expected_triple2));
1022
1023 let blank_node = BlankNode::new("_:person1").unwrap();
1025 let knows_pred = NamedNode::new("http://xmlns.com/foaf/0.1/knows").unwrap();
1026 let bob_iri = NamedNode::new("http://example.org/bob").unwrap();
1027 let expected_triple3 = Triple::new(blank_node, knows_pred, bob_iri);
1028 assert!(triples.contains(&expected_triple3));
1029 }
1030
1031 #[test]
1032 fn test_ntriples_parsing_language_tag() {
1033 let ntriples_data =
1034 r#"<http://example.org/alice> <http://example.org/description> "Une personne"@fr ."#;
1035
1036 let parser = Parser::new(RdfFormat::NTriples);
1037 let result = parser.parse_str_to_quads(ntriples_data);
1038
1039 assert!(result.is_ok());
1040 let quads = result.unwrap();
1041 assert_eq!(quads.len(), 1);
1042
1043 let triple = quads[0].to_triple();
1044 if let Object::Literal(literal) = triple.object() {
1045 assert_eq!(literal.value(), "Une personne");
1046 assert_eq!(literal.language(), Some("fr"));
1047 assert!(literal.is_lang_string());
1048 } else {
1049 panic!("Expected literal object");
1050 }
1051 }
1052
1053 #[test]
1054 fn test_ntriples_parsing_escaped_literals() {
1055 let ntriples_data = r#"<http://example.org/test> <http://example.org/desc> "Text with \"quotes\" and \n newlines" ."#;
1056
1057 let parser = Parser::new(RdfFormat::NTriples);
1058 let result = parser.parse_str_to_quads(ntriples_data);
1059
1060 if let Err(e) = &result {
1061 println!("Parse error: {e}");
1062 }
1063 assert!(result.is_ok(), "Parse failed: {result:?}");
1064
1065 let quads = result.unwrap();
1066 assert_eq!(quads.len(), 1);
1067
1068 let triple = quads[0].to_triple();
1069 if let Object::Literal(literal) = triple.object() {
1070 assert!(literal.value().contains("\"quotes\""));
1071 assert!(literal.value().contains("\n"));
1072 } else {
1073 panic!("Expected literal object");
1074 }
1075 }
1076
1077 #[test]
1078 fn test_ntriples_parsing_comments_and_empty_lines() {
1079 let ntriples_data = r#"
1080# This is a comment
1081<http://example.org/alice> <http://xmlns.com/foaf/0.1/name> "Alice Smith" .
1082
1083# Another comment
1084<http://example.org/bob> <http://xmlns.com/foaf/0.1/name> "Bob Jones" .
1085"#;
1086
1087 let parser = Parser::new(RdfFormat::NTriples);
1088 let result = parser.parse_str_to_quads(ntriples_data);
1089
1090 assert!(result.is_ok());
1091 let quads = result.unwrap();
1092 assert_eq!(quads.len(), 2);
1093 }
1094
1095 #[test]
1096 fn test_ntriples_parsing_error_handling() {
1097 let invalid_data = "invalid ntriples data";
1099 let parser = Parser::new(RdfFormat::NTriples);
1100 let result = parser.parse_str_to_quads(invalid_data);
1101 assert!(result.is_err());
1102
1103 let mixed_data = r#"<http://example.org/valid> <http://example.org/pred> "Valid triple" .
1105invalid line here
1106<http://example.org/valid2> <http://example.org/pred> "Another valid triple" ."#;
1107
1108 let parser_strict = Parser::new(RdfFormat::NTriples);
1109 let result_strict = parser_strict.parse_str_to_quads(mixed_data);
1110 assert!(result_strict.is_err());
1111
1112 let parser_tolerant = Parser::new(RdfFormat::NTriples).with_error_tolerance(true);
1113 let result_tolerant = parser_tolerant.parse_str_to_quads(mixed_data);
1114 assert!(result_tolerant.is_ok());
1115 let quads = result_tolerant.unwrap();
1116 assert_eq!(quads.len(), 2); }
1118
1119 #[test]
1120 fn test_nquads_parsing() {
1121 let nquads_data = r#"<http://example.org/alice> <http://xmlns.com/foaf/0.1/name> "Alice Smith" <http://example.org/graph1> .
1122<http://example.org/alice> <http://xmlns.com/foaf/0.1/age> "30"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph2> .
1123_:person1 <http://xmlns.com/foaf/0.1/knows> <http://example.org/bob> _:graph1 ."#;
1124
1125 let parser = Parser::new(RdfFormat::NQuads);
1126 let result = parser.parse_str_to_quads(nquads_data);
1127
1128 assert!(result.is_ok());
1129 let quads = result.unwrap();
1130 assert_eq!(quads.len(), 3);
1131
1132 let first_quad = &quads[0];
1134 assert!(!first_quad.is_default_graph());
1135
1136 if let GraphName::NamedNode(graph_name) = first_quad.graph_name() {
1138 assert!(graph_name.as_str().contains("example.org"));
1139 } else {
1140 panic!("Expected named graph");
1141 }
1142 }
1143
1144 #[test]
1145 fn test_turtle_parsing_basic() {
1146 let turtle_data = r#"@prefix foaf: <http://xmlns.com/foaf/0.1/> .
1147@prefix ex: <http://example.org/> .
1148
1149ex:alice foaf:name "Alice Smith" .
1150ex:alice foaf:age "30"^^<http://www.w3.org/2001/XMLSchema#integer> .
1151ex:alice foaf:knows ex:bob ."#;
1152
1153 let parser = Parser::new(RdfFormat::Turtle);
1154 let result = parser.parse_str_to_quads(turtle_data);
1155
1156 assert!(result.is_ok());
1157 let quads = result.unwrap();
1158 assert_eq!(quads.len(), 3);
1159
1160 for quad in &quads {
1162 assert!(quad.is_default_graph());
1163 }
1164 }
1165
1166 #[test]
1167 fn test_turtle_parsing_prefixes() {
1168 let turtle_data = r#"@prefix foaf: <http://xmlns.com/foaf/0.1/> .
1169foaf:Person a foaf:Person ."#;
1170
1171 let parser = Parser::new(RdfFormat::Turtle);
1172 let result = parser.parse_str_to_quads(turtle_data);
1173
1174 assert!(result.is_ok());
1175 let quads = result.unwrap();
1176 assert_eq!(quads.len(), 1);
1177
1178 let triple = quads[0].to_triple();
1179 if let Subject::NamedNode(subj) = triple.subject() {
1181 assert!(subj.as_str().contains("xmlns.com/foaf"));
1182 } else {
1183 panic!("Expected named node subject");
1184 }
1185
1186 if let Predicate::NamedNode(pred) = triple.predicate() {
1188 assert!(pred.as_str().contains("rdf-syntax-ns#type"));
1189 } else {
1190 panic!("Expected named node predicate");
1191 }
1192 }
1193
1194 #[test]
1195 fn test_turtle_parsing_abbreviated_syntax() {
1196 let turtle_data = r#"@prefix ex: <http://example.org/> .
1197@prefix foaf: <http://xmlns.com/foaf/0.1/> .
1198
1199ex:alice foaf:name "Alice" ;
1200 foaf:age "30" ."#;
1201
1202 let parser = Parser::new(RdfFormat::Turtle);
1203 let result = parser.parse_str_to_quads(turtle_data);
1204
1205 assert!(result.is_ok());
1206 let quads = result.unwrap();
1207 assert_eq!(quads.len(), 2);
1208
1209 let subjects: Vec<_> = quads
1211 .iter()
1212 .map(|q| q.to_triple().subject().clone())
1213 .collect();
1214 assert_eq!(subjects[0], subjects[1]);
1215 }
1216
1217 #[test]
1218 fn test_turtle_parsing_base_iri() {
1219 let turtle_data = r#"@base <http://example.org/> .
1220<alice> <knows> <bob> ."#;
1221
1222 let parser = Parser::new(RdfFormat::Turtle);
1223 let result = parser.parse_str_to_quads(turtle_data);
1224
1225 assert!(result.is_ok());
1226 let quads = result.unwrap();
1227 assert_eq!(quads.len(), 1);
1228
1229 let triple = quads[0].to_triple();
1230 if let Subject::NamedNode(subj) = triple.subject() {
1232 assert!(subj.as_str().contains("example.org"));
1233 } else {
1234 panic!("Expected named node subject");
1235 }
1236 }
1237
1238 #[test]
1239 fn test_turtle_parsing_literals() {
1240 let turtle_data = r#"@prefix ex: <http://example.org/> .
1241ex:alice ex:name "Alice"@en .
1242ex:alice ex:age "30"^^<http://www.w3.org/2001/XMLSchema#integer> ."#;
1243
1244 let parser = Parser::new(RdfFormat::Turtle);
1245 let result = parser.parse_str_to_quads(turtle_data);
1246
1247 assert!(result.is_ok());
1248 let quads = result.unwrap();
1249 assert_eq!(quads.len(), 2);
1250
1251 let triples: Vec<_> = quads.into_iter().map(|q| q.to_triple()).collect();
1253
1254 let mut found_lang_literal = false;
1255 let mut found_typed_literal = false;
1256
1257 for triple in triples {
1258 if let Object::Literal(literal) = triple.object() {
1259 if literal.language().is_some() {
1260 found_lang_literal = true;
1261 assert_eq!(literal.language(), Some("en"));
1262 } else {
1263 let datatype = literal.datatype();
1264 if datatype.as_str() != "http://www.w3.org/2001/XMLSchema#string"
1266 && datatype.as_str()
1267 != "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString"
1268 {
1269 found_typed_literal = true;
1270 assert!(
1271 datatype.as_str().contains("integer"),
1272 "Expected integer datatype but got: {}",
1273 datatype.as_str()
1274 );
1275 }
1276 }
1277 }
1278 }
1279
1280 assert!(found_lang_literal);
1281 assert!(found_typed_literal);
1282 }
1283
1284 #[test]
1285 fn test_parser_round_trip() {
1286 use crate::serializer::Serializer;
1287
1288 let mut original_graph = Graph::new();
1290
1291 let alice = NamedNode::new("http://example.org/alice").unwrap();
1292 let name_pred = NamedNode::new("http://xmlns.com/foaf/0.1/name").unwrap();
1293 let name_literal = Literal::new("Alice Smith");
1294 original_graph.insert(Triple::new(alice.clone(), name_pred, name_literal));
1295
1296 let age_pred = NamedNode::new("http://xmlns.com/foaf/0.1/age").unwrap();
1297 let age_literal = Literal::new_typed("30", crate::vocab::xsd::INTEGER.clone());
1298 original_graph.insert(Triple::new(alice.clone(), age_pred, age_literal));
1299
1300 let desc_pred = NamedNode::new("http://example.org/description").unwrap();
1301 let desc_literal = Literal::new_lang("Une personne", "fr").unwrap();
1302 original_graph.insert(Triple::new(alice, desc_pred, desc_literal));
1303
1304 let serializer = Serializer::new(RdfFormat::NTriples);
1306 let ntriples = serializer.serialize_graph(&original_graph).unwrap();
1307
1308 let parser = Parser::new(RdfFormat::NTriples);
1310 let quads = parser.parse_str_to_quads(&ntriples).unwrap();
1311
1312 let parsed_graph = Graph::from_iter(quads.into_iter().map(|q| q.to_triple()));
1314
1315 assert_eq!(original_graph.len(), parsed_graph.len());
1317
1318 for triple in original_graph.iter() {
1320 assert!(
1321 parsed_graph.contains(triple),
1322 "Parsed graph missing triple: {triple}"
1323 );
1324 }
1325 }
1326
1327 #[test]
1328 fn test_trig_parser() {
1329 let trig_data = r#"
1330@prefix ex: <http://example.org/> .
1331@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
1332
1333# Default graph
1334{
1335 ex:alice rdf:type ex:Person .
1336 ex:alice ex:name "Alice" .
1337}
1338
1339# Named graph
1340ex:graph1 {
1341 ex:bob rdf:type ex:Person .
1342 ex:bob ex:name "Bob" .
1343 ex:bob ex:age "30" .
1344}
1345"#;
1346
1347 let parser = Parser::new(RdfFormat::TriG);
1348 let quads = parser.parse_str_to_quads(trig_data).unwrap();
1349
1350 assert!(
1352 quads.len() >= 5,
1353 "Should parse at least 5 quads, got {}",
1354 quads.len()
1355 );
1356
1357 let default_graph_count = quads.iter().filter(|q| q.is_default_graph()).count();
1359 let named_graph_count = quads.len() - default_graph_count;
1360
1361 assert!(
1362 default_graph_count >= 2,
1363 "Should have at least 2 default graph quads, got {default_graph_count}"
1364 );
1365 assert!(
1366 named_graph_count >= 3,
1367 "Should have at least 3 named graph quads, got {named_graph_count}"
1368 );
1369
1370 let alice_uri = "http://example.org/alice";
1372 let bob_uri = "http://example.org/bob";
1373 let person_uri = "http://example.org/Person";
1374
1375 let alice_type_found = quads.iter().any(|q| {
1377 q.is_default_graph()
1378 && q.subject().to_string().contains(alice_uri)
1379 && q.object().to_string().contains(person_uri)
1380 });
1381 assert!(
1382 alice_type_found,
1383 "Should find Alice type assertion in default graph"
1384 );
1385
1386 let bob_in_named_graph = quads
1388 .iter()
1389 .any(|q| !q.is_default_graph() && q.subject().to_string().contains(bob_uri));
1390 assert!(
1391 bob_in_named_graph,
1392 "Should find Bob statements in named graph"
1393 );
1394 }
1395
1396 #[test]
1397 fn test_trig_parser_prefixes() {
1398 let trig_data = r#"
1399@prefix ex: <http://example.org/> .
1400@prefix foaf: <http://xmlns.com/foaf/0.1/> .
1401
1402ex:person1 foaf:name "John Doe" .
1403"#;
1404
1405 let parser = Parser::new(RdfFormat::TriG);
1406 let quads = parser.parse_str_to_quads(trig_data).unwrap();
1407
1408 assert!(!quads.is_empty(), "Should parse prefixed statements");
1409
1410 let expanded_found = quads.iter().any(|q| {
1412 q.subject()
1413 .to_string()
1414 .contains("http://example.org/person1")
1415 && q.predicate()
1416 .to_string()
1417 .contains("http://xmlns.com/foaf/0.1/name")
1418 });
1419 assert!(expanded_found, "Should expand prefixes correctly");
1420 }
1421
1422 #[test]
1423 fn test_jsonld_parser() {
1424 let jsonld_data = r#"{
1425 "@context": {
1426 "name": "http://xmlns.com/foaf/0.1/name",
1427 "Person": "http://schema.org/Person"
1428 },
1429 "@type": "Person",
1430 "@id": "http://example.org/john",
1431 "name": "John Doe"
1432}"#;
1433
1434 let parser = Parser::new(RdfFormat::JsonLd);
1435 let result = parser.parse_str_to_quads(jsonld_data);
1436
1437 match result {
1438 Ok(quads) => {
1439 println!("JSON-LD parsed {} quads:", quads.len());
1440 for quad in &quads {
1441 println!(" {quad}");
1442 }
1443 assert!(!quads.is_empty(), "Should parse some quads from JSON-LD");
1444 }
1445 Err(e) => {
1446 println!("JSON-LD parsing error (expected during development): {e}");
1448 }
1450 }
1451 }
1452
1453 #[test]
1454 fn test_jsonld_parser_simple() {
1455 let jsonld_data = r#"{
1456 "@context": "http://schema.org/",
1457 "@type": "Person",
1458 "name": "Alice"
1459}"#;
1460
1461 let parser = Parser::new(RdfFormat::JsonLd);
1462 let result = parser.parse_str_to_quads(jsonld_data);
1463
1464 match result {
1466 Ok(quads) => {
1467 println!("Simple JSON-LD parsed {} quads", quads.len());
1468 }
1469 Err(e) => {
1470 println!("Simple JSON-LD parsing error: {e}");
1471 }
1473 }
1474 }
1475}