1use super::error::{ParseResult, RdfParseError, RdfSyntaxError, TextPosition};
7use super::toolkit::{char_utils, BufferProvider, TokenRecognizer};
8use std::fmt;
9
10#[derive(Debug, Clone, PartialEq)]
12pub enum N3Token {
13 Dot, Semicolon, Comma, LeftBracket, RightBracket, LeftParen, RightParen, LeftBrace, RightBrace, Prefix, Base, A, Iri(String), PrefixedName {
32 prefix: Option<String>,
34 local: String,
35 },
36 BlankNode(String), Literal {
38 value: String,
39 datatype: Option<String>,
40 language: Option<String>,
41 },
42 Variable(String), QuotedTripleStart, QuotedTripleEnd, True, False, Integer(i64),
54 Decimal(f64),
55 Double(f64),
56
57 Comment(String),
59 Whitespace,
60
61 Eof,
63}
64
65impl fmt::Display for N3Token {
66 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
67 match self {
68 N3Token::Dot => write!(f, "."),
69 N3Token::Semicolon => write!(f, ";"),
70 N3Token::Comma => write!(f, ","),
71 N3Token::LeftBracket => write!(f, "["),
72 N3Token::RightBracket => write!(f, "]"),
73 N3Token::LeftParen => write!(f, "("),
74 N3Token::RightParen => write!(f, ")"),
75 N3Token::LeftBrace => write!(f, "{{"),
76 N3Token::RightBrace => write!(f, "}}"),
77 N3Token::Prefix => write!(f, "@prefix"),
78 N3Token::Base => write!(f, "@base"),
79 N3Token::A => write!(f, "a"),
80 N3Token::Iri(iri) => write!(f, "<{iri}>"),
81 N3Token::PrefixedName {
82 prefix: Some(prefix),
83 local,
84 } => write!(f, "{prefix}:{local}"),
85 N3Token::PrefixedName {
86 prefix: None,
87 local,
88 } => write!(f, ":{local}"),
89 N3Token::BlankNode(label) => write!(f, "_:{label}"),
90 N3Token::Literal {
91 value,
92 datatype: Some(dt),
93 language: None,
94 } => write!(f, "\"{value}\"^^<{dt}>"),
95 N3Token::Literal {
96 value,
97 datatype: None,
98 language: Some(lang),
99 } => write!(f, "\"{value}\"@{lang}"),
100 N3Token::Literal {
101 value,
102 datatype: None,
103 language: None,
104 } => write!(f, "\"{value}\""),
105 N3Token::Literal {
106 value,
107 datatype: Some(dt),
108 language: Some(lang),
109 } => write!(f, "\"{value}\"@{lang}^^<{dt}>"),
110 N3Token::Variable(var) => write!(f, "?{var}"),
111 N3Token::QuotedTripleStart => write!(f, "<<"),
112 N3Token::QuotedTripleEnd => write!(f, ">>"),
113 N3Token::True => write!(f, "true"),
114 N3Token::False => write!(f, "false"),
115 N3Token::Integer(i) => write!(f, "{i}"),
116 N3Token::Decimal(d) => write!(f, "{d}"),
117 N3Token::Double(d) => write!(f, "{d}"),
118 N3Token::Comment(comment) => write!(f, "# {comment}"),
119 N3Token::Whitespace => write!(f, " "),
120 N3Token::Eof => write!(f, "EOF"),
121 }
122 }
123}
124
125#[derive(Debug, Clone)]
127pub struct N3Lexer {
128 pub skip_whitespace: bool,
130 pub parse_variables: bool,
132}
133
134impl Default for N3Lexer {
135 fn default() -> Self {
136 Self {
137 skip_whitespace: true,
138 parse_variables: false,
139 }
140 }
141}
142
143impl N3Lexer {
144 pub fn new() -> Self {
145 Self::default()
146 }
147
148 pub fn with_variables(mut self) -> Self {
149 self.parse_variables = true;
150 self
151 }
152
153 pub fn with_whitespace(mut self) -> Self {
154 self.skip_whitespace = false;
155 self
156 }
157
158 fn read_iri(&self, buffer: &mut dyn BufferProvider) -> ParseResult<String> {
160 let mut iri = String::new();
161
162 buffer.advance();
164
165 while let Some(ch) = buffer.current() {
166 match ch {
167 '>' => {
168 buffer.advance();
169 return Ok(iri);
170 }
171 '\\' => {
172 buffer.advance();
173 match buffer.current() {
174 Some('u') => {
175 buffer.advance();
176 let unicode = self.read_unicode_escape(buffer, 4)?;
177 iri.push(unicode);
178 }
179 Some('U') => {
180 buffer.advance();
181 let unicode = self.read_unicode_escape(buffer, 8)?;
182 iri.push(unicode);
183 }
184 Some(escaped) => {
185 match escaped {
187 't' => iri.push('\t'),
188 'n' => iri.push('\n'),
189 'r' => iri.push('\r'),
190 '\\' => iri.push('\\'),
191 '>' => iri.push('>'),
192 _ => {
193 return Err(RdfParseError::Syntax(
194 RdfSyntaxError::with_position(
195 format!("Invalid IRI escape sequence: \\{escaped}"),
196 *buffer.position(),
197 ),
198 ));
199 }
200 }
201 buffer.advance();
202 }
203 None => {
204 return Err(RdfParseError::Syntax(RdfSyntaxError::with_position(
205 "Unexpected end of IRI".to_string(),
206 *buffer.position(),
207 )));
208 }
209 }
210 }
211 ch if char_utils::is_iri_char(ch) => {
212 iri.push(ch);
213 buffer.advance();
214 }
215 _ => {
216 return Err(RdfParseError::Syntax(RdfSyntaxError::with_position(
217 format!("Invalid character in IRI: '{ch}'"),
218 *buffer.position(),
219 )));
220 }
221 }
222 }
223
224 Err(RdfParseError::Syntax(RdfSyntaxError::with_position(
225 "Unclosed IRI".to_string(),
226 *buffer.position(),
227 )))
228 }
229
230 fn read_unicode_escape(
232 &self,
233 buffer: &mut dyn BufferProvider,
234 digits: usize,
235 ) -> ParseResult<char> {
236 let mut unicode_str = String::new();
237
238 for _ in 0..digits {
239 match buffer.current() {
240 Some(ch) if char_utils::is_hex_digit(ch) => {
241 unicode_str.push(ch);
242 buffer.advance();
243 }
244 Some(ch) => {
245 return Err(RdfParseError::Syntax(RdfSyntaxError::with_position(
246 format!("Invalid hex digit in Unicode escape: '{ch}'"),
247 *buffer.position(),
248 )));
249 }
250 None => {
251 return Err(RdfParseError::Syntax(RdfSyntaxError::with_position(
252 "Unexpected end of Unicode escape".to_string(),
253 *buffer.position(),
254 )));
255 }
256 }
257 }
258
259 let code_point = u32::from_str_radix(&unicode_str, 16).map_err(|_| {
260 RdfParseError::Syntax(RdfSyntaxError::with_position(
261 "Invalid Unicode code point".to_string(),
262 *buffer.position(),
263 ))
264 })?;
265
266 char::from_u32(code_point).ok_or_else(|| {
267 RdfParseError::Syntax(RdfSyntaxError::with_position(
268 "Invalid Unicode code point".to_string(),
269 *buffer.position(),
270 ))
271 })
272 }
273
274 fn read_prefixed_name(&self, buffer: &mut dyn BufferProvider) -> ParseResult<N3Token> {
276 let mut prefix = String::new();
277
278 while let Some(ch) = buffer.current() {
280 if ch == ':' {
281 buffer.advance();
282 break;
283 } else if char_utils::is_pn_chars(ch) {
284 prefix.push(ch);
285 buffer.advance();
286 } else {
287 return Err(RdfParseError::Syntax(RdfSyntaxError::with_position(
288 format!("Invalid character in prefix: '{ch}'"),
289 *buffer.position(),
290 )));
291 }
292 }
293
294 let mut local = String::new();
296 while let Some(ch) = buffer.current() {
297 if char_utils::is_pn_chars(ch) || ch == '.' {
298 local.push(ch);
299 buffer.advance();
300 } else {
301 break;
302 }
303 }
304
305 let prefix_opt = if prefix.is_empty() {
306 None
307 } else {
308 Some(prefix)
309 };
310
311 Ok(N3Token::PrefixedName {
312 prefix: prefix_opt,
313 local,
314 })
315 }
316
317 fn read_blank_node(&self, buffer: &mut dyn BufferProvider) -> ParseResult<String> {
319 buffer.advance(); if buffer.current() != Some(':') {
322 return Err(RdfParseError::Syntax(RdfSyntaxError::with_position(
323 "Expected ':' after '_' in blank node".to_string(),
324 *buffer.position(),
325 )));
326 }
327 buffer.advance(); let mut label = String::new();
330
331 match buffer.current() {
333 Some(ch) if char_utils::is_pn_chars_base(ch) || char_utils::is_digit(ch) => {
334 label.push(ch);
335 buffer.advance();
336 }
337 Some(ch) => {
338 return Err(RdfParseError::Syntax(RdfSyntaxError::with_position(
339 format!("Invalid first character in blank node label: '{ch}'"),
340 *buffer.position(),
341 )));
342 }
343 None => {
344 return Err(RdfParseError::Syntax(RdfSyntaxError::with_position(
345 "Expected blank node label after '_:'".to_string(),
346 *buffer.position(),
347 )));
348 }
349 }
350
351 while let Some(ch) = buffer.current() {
353 if char_utils::is_pn_chars(ch) {
354 label.push(ch);
355 buffer.advance();
356 } else {
357 break;
358 }
359 }
360
361 Ok(label)
362 }
363
364 fn read_string_literal(
366 &self,
367 buffer: &mut dyn BufferProvider,
368 quote_char: char,
369 ) -> ParseResult<N3Token> {
370 buffer.advance(); let mut value = String::new();
373 let mut triple_quoted = false;
374
375 if buffer.current() == Some(quote_char) {
377 buffer.advance();
378 if buffer.current() == Some(quote_char) {
379 buffer.advance();
380 triple_quoted = true;
381 } else {
382 return self.read_literal_suffix(buffer, value);
384 }
385 }
386
387 while let Some(ch) = buffer.current() {
388 match ch {
389 c if c == quote_char => {
390 if triple_quoted {
391 buffer.advance();
393 if buffer.current() == Some(quote_char) {
394 buffer.advance();
395 if buffer.current() == Some(quote_char) {
396 buffer.advance();
397 break; } else {
399 value.push(quote_char);
401 value.push(quote_char);
402 }
403 } else {
404 value.push(quote_char);
406 }
407 } else {
408 buffer.advance();
409 break; }
411 }
412 '\\' => {
413 buffer.advance();
414 match buffer.current() {
415 Some('t') => value.push('\t'),
416 Some('n') => value.push('\n'),
417 Some('r') => value.push('\r'),
418 Some('b') => value.push('\u{0008}'),
419 Some('f') => value.push('\u{000C}'),
420 Some('"') => value.push('"'),
421 Some('\'') => value.push('\''),
422 Some('\\') => value.push('\\'),
423 Some('u') => {
424 buffer.advance();
425 let unicode = self.read_unicode_escape(buffer, 4)?;
426 value.push(unicode);
427 continue; }
429 Some('U') => {
430 buffer.advance();
431 let unicode = self.read_unicode_escape(buffer, 8)?;
432 value.push(unicode);
433 continue; }
435 Some(other) => {
436 return Err(RdfParseError::Syntax(RdfSyntaxError::with_position(
437 format!("Invalid escape sequence: \\{other}"),
438 *buffer.position(),
439 )));
440 }
441 None => {
442 return Err(RdfParseError::Syntax(RdfSyntaxError::with_position(
443 "Unexpected end of string literal".to_string(),
444 *buffer.position(),
445 )));
446 }
447 }
448 buffer.advance();
449 }
450 '\n' | '\r' if !triple_quoted => {
451 return Err(RdfParseError::Syntax(RdfSyntaxError::with_position(
452 "Newline in single-quoted string literal".to_string(),
453 *buffer.position(),
454 )));
455 }
456 _ => {
457 value.push(ch);
458 buffer.advance();
459 }
460 }
461 }
462
463 self.read_literal_suffix(buffer, value)
464 }
465
466 fn read_literal_suffix(
468 &self,
469 buffer: &mut dyn BufferProvider,
470 value: String,
471 ) -> ParseResult<N3Token> {
472 let mut datatype = None;
473 let mut language = None;
474
475 match buffer.current() {
477 Some('@') => {
478 buffer.advance();
479 let mut lang = String::new();
480 while let Some(ch) = buffer.current() {
481 if ch.is_ascii_alphanumeric() || ch == '-' {
482 lang.push(ch);
483 buffer.advance();
484 } else {
485 break;
486 }
487 }
488 language = Some(lang);
489 }
490 Some('^') => {
491 buffer.advance();
492 if buffer.current() == Some('^') {
493 buffer.advance();
494 if buffer.current() == Some('<') {
495 let dt = self.read_iri(buffer)?;
496 datatype = Some(dt);
497 } else {
498 return Err(RdfParseError::Syntax(RdfSyntaxError::with_position(
499 "Expected '<' after '^^' in datatype".to_string(),
500 *buffer.position(),
501 )));
502 }
503 } else {
504 return Err(RdfParseError::Syntax(RdfSyntaxError::with_position(
505 "Expected '^' after first '^' in datatype".to_string(),
506 *buffer.position(),
507 )));
508 }
509 }
510 _ => {} }
512
513 Ok(N3Token::Literal {
514 value,
515 datatype,
516 language,
517 })
518 }
519
520 fn read_numeric(&self, buffer: &mut dyn BufferProvider) -> ParseResult<N3Token> {
522 let mut number_str = String::new();
523 let mut has_decimal = false;
524 let mut has_exponent = false;
525
526 if matches!(buffer.current(), Some('+') | Some('-')) {
528 number_str.push(
529 buffer
530 .current()
531 .expect("sign character validated by matches!"),
532 );
533 buffer.advance();
534 }
535
536 while let Some(ch) = buffer.current() {
538 if char_utils::is_digit(ch) {
539 number_str.push(ch);
540 buffer.advance();
541 } else {
542 break;
543 }
544 }
545
546 if buffer.current() == Some('.') {
548 has_decimal = true;
549 number_str.push('.');
550 buffer.advance();
551
552 while let Some(ch) = buffer.current() {
554 if char_utils::is_digit(ch) {
555 number_str.push(ch);
556 buffer.advance();
557 } else {
558 break;
559 }
560 }
561 }
562
563 if matches!(buffer.current(), Some('e') | Some('E')) {
565 has_exponent = true;
566 number_str.push(
567 buffer
568 .current()
569 .expect("exponent character validated by matches!"),
570 );
571 buffer.advance();
572
573 if matches!(buffer.current(), Some('+') | Some('-')) {
575 number_str.push(
576 buffer
577 .current()
578 .expect("sign character validated by matches!"),
579 );
580 buffer.advance();
581 }
582
583 while let Some(ch) = buffer.current() {
585 if char_utils::is_digit(ch) {
586 number_str.push(ch);
587 buffer.advance();
588 } else {
589 break;
590 }
591 }
592 }
593
594 if has_exponent {
596 let value = number_str.parse::<f64>().map_err(|_| {
598 RdfParseError::Syntax(RdfSyntaxError::with_position(
599 format!("Invalid double literal: {number_str}"),
600 *buffer.position(),
601 ))
602 })?;
603 Ok(N3Token::Double(value))
604 } else if has_decimal {
605 let value = number_str.parse::<f64>().map_err(|_| {
607 RdfParseError::Syntax(RdfSyntaxError::with_position(
608 format!("Invalid decimal literal: {number_str}"),
609 *buffer.position(),
610 ))
611 })?;
612 Ok(N3Token::Decimal(value))
613 } else {
614 let value = number_str.parse::<i64>().map_err(|_| {
616 RdfParseError::Syntax(RdfSyntaxError::with_position(
617 format!("Invalid integer literal: {number_str}"),
618 *buffer.position(),
619 ))
620 })?;
621 Ok(N3Token::Integer(value))
622 }
623 }
624
625 fn read_keyword(&self, buffer: &mut dyn BufferProvider) -> ParseResult<N3Token> {
627 let mut keyword = String::new();
628
629 while let Some(ch) = buffer.current() {
630 if char_utils::is_pn_chars(ch) {
631 keyword.push(ch);
632 buffer.advance();
633 } else {
634 break;
635 }
636 }
637
638 match keyword.as_str() {
639 "true" => Ok(N3Token::True),
640 "false" => Ok(N3Token::False),
641 "a" => Ok(N3Token::A),
642 _ => {
643 if buffer.current() == Some(':') {
645 buffer.advance();
646 let mut local = String::new();
647 while let Some(ch) = buffer.current() {
648 if char_utils::is_pn_chars(ch) || ch == '.' {
649 local.push(ch);
650 buffer.advance();
651 } else {
652 break;
653 }
654 }
655 Ok(N3Token::PrefixedName {
656 prefix: Some(keyword),
657 local,
658 })
659 } else {
660 Ok(N3Token::PrefixedName {
662 prefix: None,
663 local: keyword,
664 })
665 }
666 }
667 }
668 }
669
670 fn read_comment(&self, buffer: &mut dyn BufferProvider) -> ParseResult<String> {
672 buffer.advance(); let mut comment = String::new();
675 while let Some(ch) = buffer.current() {
676 if ch == '\n' || ch == '\r' {
677 break;
678 }
679 comment.push(ch);
680 buffer.advance();
681 }
682
683 Ok(comment)
684 }
685
686 fn skip_whitespace(&self, buffer: &mut dyn BufferProvider) {
688 while let Some(ch) = buffer.current() {
689 if char_utils::is_whitespace(ch) {
690 buffer.advance();
691 } else {
692 break;
693 }
694 }
695 }
696}
697
698impl TokenRecognizer for N3Lexer {
699 type Token = N3Token;
700 fn recognize_next_token(
701 &mut self,
702 buffer: &mut dyn BufferProvider,
703 _position: &mut TextPosition,
704 ) -> ParseResult<Option<N3Token>> {
705 loop {
706 match buffer.current() {
707 None => return Ok(Some(N3Token::Eof)),
708
709 Some(ch) if char_utils::is_whitespace(ch) => {
710 if self.skip_whitespace {
711 self.skip_whitespace(buffer);
712 continue;
713 } else {
714 buffer.advance();
715 return Ok(Some(N3Token::Whitespace));
716 }
717 }
718
719 Some('#') => {
720 if self.skip_whitespace {
721 self.read_comment(buffer)?;
722 continue;
723 } else {
724 let comment = self.read_comment(buffer)?;
725 return Ok(Some(N3Token::Comment(comment)));
726 }
727 }
728
729 Some('.') => {
731 buffer.advance();
732 return Ok(Some(N3Token::Dot));
733 }
734 Some(';') => {
735 buffer.advance();
736 return Ok(Some(N3Token::Semicolon));
737 }
738 Some(',') => {
739 buffer.advance();
740 return Ok(Some(N3Token::Comma));
741 }
742 Some('[') => {
743 buffer.advance();
744 return Ok(Some(N3Token::LeftBracket));
745 }
746 Some(']') => {
747 buffer.advance();
748 return Ok(Some(N3Token::RightBracket));
749 }
750 Some('(') => {
751 buffer.advance();
752 return Ok(Some(N3Token::LeftParen));
753 }
754 Some(')') => {
755 buffer.advance();
756 return Ok(Some(N3Token::RightParen));
757 }
758 Some('{') => {
759 buffer.advance();
760 return Ok(Some(N3Token::LeftBrace));
761 }
762 Some('}') => {
763 buffer.advance();
764 return Ok(Some(N3Token::RightBrace));
765 }
766
767 Some('<') => {
769 if buffer.peek() == Some('<') {
771 buffer.advance(); buffer.advance(); return Ok(Some(N3Token::QuotedTripleStart));
774 } else {
775 let iri = self.read_iri(buffer)?;
776 return Ok(Some(N3Token::Iri(iri)));
777 }
778 }
779
780 Some('>') => {
782 if buffer.peek() == Some('>') {
784 buffer.advance(); buffer.advance(); return Ok(Some(N3Token::QuotedTripleEnd));
787 } else {
788 return Err(RdfParseError::Syntax(RdfSyntaxError::with_position(
789 "Unexpected '>' character".to_string(),
790 *buffer.position(),
791 )));
792 }
793 }
794
795 Some('_') => {
797 let label = self.read_blank_node(buffer)?;
798 return Ok(Some(N3Token::BlankNode(label)));
799 }
800
801 Some('"') => {
803 let literal = self.read_string_literal(buffer, '"')?;
804 return Ok(Some(literal));
805 }
806 Some('\'') => {
807 let literal = self.read_string_literal(buffer, '\'')?;
808 return Ok(Some(literal));
809 }
810
811 Some('?') | Some('$') if self.parse_variables => {
813 let _var_char = buffer
814 .current()
815 .expect("variable start character validated by matches!");
816 buffer.advance();
817 let mut var_name = String::new();
818 while let Some(ch) = buffer.current() {
819 if char_utils::is_pn_chars(ch) {
820 var_name.push(ch);
821 buffer.advance();
822 } else {
823 break;
824 }
825 }
826 return Ok(Some(N3Token::Variable(var_name)));
827 }
828
829 Some('@') => {
831 buffer.advance();
832 let mut directive = String::new();
833 while let Some(ch) = buffer.current() {
834 if char_utils::is_pn_chars(ch) {
835 directive.push(ch);
836 buffer.advance();
837 } else {
838 break;
839 }
840 }
841
842 match directive.as_str() {
843 "prefix" => return Ok(Some(N3Token::Prefix)),
844 "base" => return Ok(Some(N3Token::Base)),
845 _ => {
846 return Err(RdfParseError::Syntax(RdfSyntaxError::with_position(
847 format!("Unknown directive: @{directive}"),
848 *buffer.position(),
849 )));
850 }
851 }
852 }
853
854 Some(ch) if char_utils::is_numeric_start(ch) => {
856 let token = self.read_numeric(buffer)?;
857 return Ok(Some(token));
858 }
859
860 Some(':') => {
862 let token = self.read_prefixed_name(buffer)?;
863 return Ok(Some(token));
864 }
865
866 Some(ch) if char_utils::is_pn_chars_base(ch) => {
868 let token = self.read_keyword(buffer)?;
869 return Ok(Some(token));
870 }
871
872 Some(ch) => {
873 return Err(RdfParseError::Syntax(RdfSyntaxError::with_position(
874 format!("Unexpected character: '{ch}'"),
875 *buffer.position(),
876 )));
877 }
878 }
879 }
880 }
881}
882
883#[cfg(test)]
884mod tests {
885 use super::super::toolkit::StringBuffer;
886 use super::*;
887
888 fn tokenize_string(input: &str) -> ParseResult<Vec<N3Token>> {
889 let mut buffer = StringBuffer::new(input.to_string());
890 let mut lexer = N3Lexer::new();
891 let mut tokens = Vec::new();
892
893 loop {
894 match lexer.recognize_next_token(&mut buffer, &mut TextPosition::start())? {
895 Some(N3Token::Eof) => break,
896 Some(token) => tokens.push(token),
897 None => break,
898 }
899 }
900
901 Ok(tokens)
902 }
903
904 #[test]
905 fn test_basic_punctuation() {
906 let tokens = tokenize_string(". ; , [ ] ( ) { }").expect("tokenization should succeed");
907 assert_eq!(
908 tokens,
909 vec![
910 N3Token::Dot,
911 N3Token::Semicolon,
912 N3Token::Comma,
913 N3Token::LeftBracket,
914 N3Token::RightBracket,
915 N3Token::LeftParen,
916 N3Token::RightParen,
917 N3Token::LeftBrace,
918 N3Token::RightBrace,
919 ]
920 );
921 }
922
923 #[test]
924 fn test_iri() {
925 let tokens = tokenize_string("<http://example.org>").expect("tokenization should succeed");
926 assert_eq!(tokens, vec![N3Token::Iri("http://example.org".to_string())]);
927 }
928
929 #[test]
930 fn test_prefixed_name() {
931 let tokens = tokenize_string("ex:name :name").expect("tokenization should succeed");
932 assert_eq!(
933 tokens,
934 vec![
935 N3Token::PrefixedName {
936 prefix: Some("ex".to_string()),
937 local: "name".to_string()
938 },
939 N3Token::PrefixedName {
940 prefix: None,
941 local: "name".to_string()
942 },
943 ]
944 );
945 }
946
947 #[test]
948 fn test_blank_node() {
949 let tokens = tokenize_string("_:blank1").expect("tokenization should succeed");
950 assert_eq!(tokens, vec![N3Token::BlankNode("blank1".to_string())]);
951 }
952
953 #[test]
954 fn test_string_literal() {
955 let tokens = tokenize_string("\"hello world\"").expect("tokenization should succeed");
956 assert_eq!(
957 tokens,
958 vec![N3Token::Literal {
959 value: "hello world".to_string(),
960 datatype: None,
961 language: None,
962 }]
963 );
964 }
965
966 #[test]
967 fn test_string_literal_with_language() {
968 let tokens = tokenize_string("\"hello\"@en").expect("tokenization should succeed");
969 assert_eq!(
970 tokens,
971 vec![N3Token::Literal {
972 value: "hello".to_string(),
973 datatype: None,
974 language: Some("en".to_string()),
975 }]
976 );
977 }
978
979 #[test]
980 fn test_string_literal_with_datatype() {
981 let tokens = tokenize_string("\"42\"^^<http://www.w3.org/2001/XMLSchema#integer>")
982 .expect("tokenization should succeed");
983 assert_eq!(
984 tokens,
985 vec![N3Token::Literal {
986 value: "42".to_string(),
987 datatype: Some("http://www.w3.org/2001/XMLSchema#integer".to_string()),
988 language: None,
989 }]
990 );
991 }
992
993 #[test]
994 fn test_numeric_literals() {
995 let tokens = tokenize_string("42 3.14 1.5e10").expect("tokenization should succeed");
996 assert_eq!(
997 tokens,
998 vec![
999 N3Token::Integer(42),
1000 #[allow(clippy::approx_constant)]
1001 N3Token::Decimal(3.14),
1002 N3Token::Double(1.5e10),
1003 ]
1004 );
1005 }
1006
1007 #[test]
1008 fn test_boolean_literals() {
1009 let tokens = tokenize_string("true false").expect("tokenization should succeed");
1010 assert_eq!(tokens, vec![N3Token::True, N3Token::False]);
1011 }
1012
1013 #[test]
1014 fn test_directives() {
1015 let tokens = tokenize_string("@prefix @base").expect("tokenization should succeed");
1016 assert_eq!(tokens, vec![N3Token::Prefix, N3Token::Base]);
1017 }
1018
1019 #[test]
1020 fn test_type_shorthand() {
1021 let tokens = tokenize_string("a").expect("tokenization should succeed");
1022 assert_eq!(tokens, vec![N3Token::A]);
1023 }
1024
1025 #[test]
1026 fn test_variables() {
1027 let mut lexer = N3Lexer::new().with_variables();
1028 let mut buffer = StringBuffer::new("?x $y".to_string());
1029 let mut tokens = Vec::new();
1030
1031 loop {
1032 match lexer
1033 .recognize_next_token(&mut buffer, &mut TextPosition::start())
1034 .expect("operation should succeed")
1035 {
1036 Some(N3Token::Eof) => break,
1037 Some(token) => tokens.push(token),
1038 None => break,
1039 }
1040 }
1041
1042 assert_eq!(
1043 tokens,
1044 vec![
1045 N3Token::Variable("x".to_string()),
1046 N3Token::Variable("y".to_string()),
1047 ]
1048 );
1049 }
1050}