1#[derive(Debug, Clone, PartialEq, Eq)]
9pub enum TokenKind {
10 Keyword,
12 Iri,
14 PrefixedName,
16 Variable,
18 Literal,
20 Punctuation,
22 Whitespace,
24 Comment,
26 Eof,
28}
29
30#[derive(Debug, Clone, PartialEq, Eq)]
32pub struct Token {
33 pub kind: TokenKind,
35 pub value: String,
37 pub position: usize,
39}
40
41impl Token {
42 pub fn new(kind: TokenKind, value: impl Into<String>, position: usize) -> Self {
44 Token {
45 kind,
46 value: value.into(),
47 position,
48 }
49 }
50}
51
52#[derive(Debug, Clone)]
54pub struct TokenStream {
55 tokens: Vec<Token>,
56 pos: usize,
57}
58
59pub type ParseResult<T> = Result<(T, TokenStream), ParseError>;
61
62#[derive(Debug, Clone, PartialEq, Eq)]
64pub struct ParseError {
65 pub message: String,
67 pub position: usize,
69}
70
71impl ParseError {
72 pub fn new(message: impl Into<String>, position: usize) -> Self {
74 ParseError {
75 message: message.into(),
76 position,
77 }
78 }
79}
80
81impl std::fmt::Display for ParseError {
82 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
83 write!(f, "parse error at {}: {}", self.position, self.message)
84 }
85}
86
87impl std::error::Error for ParseError {}
88
89impl TokenStream {
92 pub fn new(tokens: Vec<Token>) -> Self {
94 TokenStream { tokens, pos: 0 }
95 }
96
97 pub fn peek(&self) -> Option<&Token> {
99 self.tokens.get(self.pos)
100 }
101
102 pub fn next(mut self) -> (Option<Token>, TokenStream) {
104 if self.pos < self.tokens.len() {
105 let tok = self.tokens[self.pos].clone();
106 self.pos += 1;
107 (Some(tok), self)
108 } else {
109 (None, self)
110 }
111 }
112
113 pub fn is_empty(&self) -> bool {
115 self.remaining() == 0
116 }
117
118 pub fn remaining(&self) -> usize {
120 let total = self.tokens.len();
121 if total == 0 {
122 return 0;
123 }
124 let remaining_tokens = &self.tokens[self.pos..];
126 remaining_tokens
127 .iter()
128 .filter(|t| t.kind != TokenKind::Eof)
129 .count()
130 }
131
132 pub fn position(&self) -> usize {
134 self.pos
135 }
136
137 pub fn byte_offset(&self) -> usize {
139 self.tokens.get(self.pos).map(|t| t.position).unwrap_or(0)
140 }
141}
142
143pub fn expect_keyword(stream: TokenStream, keyword: &str) -> ParseResult<()> {
147 match stream.peek() {
148 Some(tok) if tok.kind == TokenKind::Keyword && tok.value.eq_ignore_ascii_case(keyword) => {
149 let (_, rest) = stream.next();
150 Ok(((), rest))
151 }
152 Some(tok) => Err(ParseError::new(
153 format!(
154 "expected keyword '{}', found {:?} '{}'",
155 keyword, tok.kind, tok.value
156 ),
157 tok.position,
158 )),
159 None => Err(ParseError::new(
160 format!("expected keyword '{}', reached end of stream", keyword),
161 0,
162 )),
163 }
164}
165
166pub fn expect_iri(stream: TokenStream) -> ParseResult<String> {
168 match stream.peek() {
169 Some(tok) if tok.kind == TokenKind::Iri => {
170 let value = tok.value.clone();
171 let (_, rest) = stream.next();
172 Ok((value, rest))
173 }
174 Some(tok) if tok.kind == TokenKind::PrefixedName => {
175 let value = tok.value.clone();
176 let (_, rest) = stream.next();
177 Ok((value, rest))
178 }
179 Some(tok) => Err(ParseError::new(
180 format!("expected IRI, found {:?} '{}'", tok.kind, tok.value),
181 tok.position,
182 )),
183 None => Err(ParseError::new("expected IRI, reached end of stream", 0)),
184 }
185}
186
187pub fn expect_variable(stream: TokenStream) -> ParseResult<String> {
189 match stream.peek() {
190 Some(tok) if tok.kind == TokenKind::Variable => {
191 let value = tok.value.clone();
192 let (_, rest) = stream.next();
193 Ok((value, rest))
194 }
195 Some(tok) => Err(ParseError::new(
196 format!("expected variable, found {:?} '{}'", tok.kind, tok.value),
197 tok.position,
198 )),
199 None => Err(ParseError::new(
200 "expected variable, reached end of stream",
201 0,
202 )),
203 }
204}
205
206pub fn optional<T, F>(stream: TokenStream, f: F) -> ParseResult<Option<T>>
208where
209 F: Fn(TokenStream) -> ParseResult<T>,
210{
211 let snapshot = stream.clone();
212 match f(stream) {
213 Ok((value, rest)) => Ok((Some(value), rest)),
214 Err(_) => Ok((None, snapshot)),
215 }
216}
217
218pub fn many0<T, F>(stream: TokenStream, f: F) -> ParseResult<Vec<T>>
221where
222 F: Fn(TokenStream) -> ParseResult<T>,
223{
224 let mut results = Vec::new();
225 let mut current = stream;
226 loop {
227 let snapshot = current.clone();
228 match f(current) {
229 Ok((value, rest)) => {
230 results.push(value);
231 current = rest;
232 }
233 Err(_) => {
234 current = snapshot;
235 break;
236 }
237 }
238 }
239 Ok((results, current))
240}
241
242pub fn choice<T>(
244 stream: TokenStream,
245 parsers: Vec<Box<dyn Fn(TokenStream) -> ParseResult<T>>>,
246) -> ParseResult<T> {
247 let mut last_err = ParseError::new("no alternatives in choice", stream.byte_offset());
248 for parser in &parsers {
249 let snapshot = stream.clone();
250 match parser(snapshot) {
251 Ok(result) => return Ok(result),
252 Err(e) => last_err = e,
253 }
254 }
255 Err(last_err)
256}
257
258const SPARQL_KEYWORDS: &[&str] = &[
262 "BASE",
263 "PREFIX",
264 "SELECT",
265 "DISTINCT",
266 "REDUCED",
267 "CONSTRUCT",
268 "DESCRIBE",
269 "ASK",
270 "FROM",
271 "NAMED",
272 "WHERE",
273 "ORDER",
274 "BY",
275 "ASC",
276 "DESC",
277 "LIMIT",
278 "OFFSET",
279 "HAVING",
280 "GROUP",
281 "UNION",
282 "OPTIONAL",
283 "MINUS",
284 "GRAPH",
285 "SERVICE",
286 "BIND",
287 "VALUES",
288 "FILTER",
289 "EXISTS",
290 "NOT",
291 "IN",
292 "AS",
293 "SEPARATOR",
294 "COUNT",
295 "SUM",
296 "MIN",
297 "MAX",
298 "AVG",
299 "SAMPLE",
300 "REGEX",
301 "LANG",
302 "DATATYPE",
303 "IRI",
304 "URI",
305 "BNODE",
306 "STR",
307 "STRDT",
308 "STRLANG",
309 "TRUE",
310 "FALSE",
311 "UNDEF",
312 "LOAD",
313 "CLEAR",
314 "DROP",
315 "CREATE",
316 "ADD",
317 "MOVE",
318 "COPY",
319 "INSERT",
320 "DELETE",
321 "WITH",
322 "USING",
323 "DATA",
324 "INTO",
325 "ALL",
326 "DEFAULT",
327 "SILENT",
328 "UPDATE",
329 "SPARQL",
330];
331
332pub struct Tokenizer;
334
335impl Tokenizer {
336 pub fn tokenize(input: &str) -> Result<Vec<Token>, ParseError> {
338 let mut tokens = Vec::new();
339 let chars: Vec<char> = input.chars().collect();
340 let mut i = 0;
341
342 while i < chars.len() {
343 let start = i;
344 let ch = chars[i];
345
346 if ch.is_whitespace() {
348 let mut end = i;
349 while end < chars.len() && chars[end].is_whitespace() {
350 end += 1;
351 }
352 let value: String = chars[start..end].iter().collect();
353 tokens.push(Token::new(TokenKind::Whitespace, value, start));
354 i = end;
355 continue;
356 }
357
358 if ch == '#' {
360 let mut end = i;
361 while end < chars.len() && chars[end] != '\n' {
362 end += 1;
363 }
364 let value: String = chars[start..end].iter().collect();
365 tokens.push(Token::new(TokenKind::Comment, value, start));
366 i = end;
367 continue;
368 }
369
370 if ch == '<' && !(i + 1 < chars.len() && chars[i + 1] == '=') {
372 let mut end = i + 1;
373 while end < chars.len() && chars[end] != '>' {
374 if chars[end] == '\n' || chars[end] == ' ' {
375 return Err(ParseError::new(
376 "unterminated IRI: unexpected whitespace inside angle brackets",
377 start,
378 ));
379 }
380 end += 1;
381 }
382 if end >= chars.len() {
383 return Err(ParseError::new("unterminated IRI: missing '>'", start));
384 }
385 end += 1; let value: String = chars[start..end].iter().collect();
387 tokens.push(Token::new(TokenKind::Iri, value, start));
388 i = end;
389 continue;
390 }
391
392 if ch == '"' || ch == '\'' {
394 let quote = ch;
395 let triple = i + 2 < chars.len() && chars[i + 1] == quote && chars[i + 2] == quote;
397 let (delim_len, close_seq): (usize, Vec<char>) = if triple {
398 (3, vec![quote, quote, quote])
399 } else {
400 (1, vec![quote])
401 };
402 let mut end = i + delim_len;
403 loop {
404 if end + close_seq.len() > chars.len() {
405 return Err(ParseError::new("unterminated string literal", start));
406 }
407 let window: Vec<char> = chars[end..end + close_seq.len()].to_vec();
408 if window == close_seq {
409 end += close_seq.len();
410 break;
411 }
412 if chars[end] == '\\' {
413 end += 2; } else {
415 end += 1;
416 }
417 }
418 if end < chars.len() && chars[end] == '@' {
420 end += 1;
421 while end < chars.len() && (chars[end].is_alphanumeric() || chars[end] == '-') {
422 end += 1;
423 }
424 } else if end + 1 < chars.len() && chars[end] == '^' && chars[end + 1] == '^' {
425 end += 2;
426 if end < chars.len() && chars[end] == '<' {
427 while end < chars.len() && chars[end] != '>' {
428 end += 1;
429 }
430 if end < chars.len() {
431 end += 1;
432 }
433 } else {
434 while end < chars.len()
436 && (chars[end].is_alphanumeric()
437 || chars[end] == ':'
438 || chars[end] == '_')
439 {
440 end += 1;
441 }
442 }
443 }
444 let value: String = chars[start..end].iter().collect();
445 tokens.push(Token::new(TokenKind::Literal, value, start));
446 i = end;
447 continue;
448 }
449
450 if ch == '?' || ch == '$' {
452 let mut end = i + 1;
453 while end < chars.len() && (chars[end].is_alphanumeric() || chars[end] == '_') {
454 end += 1;
455 }
456 let value: String = chars[start..end].iter().collect();
457 tokens.push(Token::new(TokenKind::Variable, value, start));
458 i = end;
459 continue;
460 }
461
462 if ch.is_ascii_digit()
464 || (ch == '-' && i + 1 < chars.len() && chars[i + 1].is_ascii_digit())
465 {
466 let mut end = i;
467 if chars[end] == '-' {
468 end += 1;
469 }
470 while end < chars.len() && chars[end].is_ascii_digit() {
471 end += 1;
472 }
473 if end < chars.len() && chars[end] == '.' {
474 end += 1;
475 while end < chars.len() && chars[end].is_ascii_digit() {
476 end += 1;
477 }
478 }
479 if end < chars.len() && (chars[end] == 'e' || chars[end] == 'E') {
481 end += 1;
482 if end < chars.len() && (chars[end] == '+' || chars[end] == '-') {
483 end += 1;
484 }
485 while end < chars.len() && chars[end].is_ascii_digit() {
486 end += 1;
487 }
488 }
489 let value: String = chars[start..end].iter().collect();
490 tokens.push(Token::new(TokenKind::Literal, value, start));
491 i = end;
492 continue;
493 }
494
495 if ch.is_alphabetic() || ch == '_' {
497 let mut end = i;
498 while end < chars.len()
499 && (chars[end].is_alphanumeric() || chars[end] == '_' || chars[end] == '-')
500 {
501 end += 1;
502 }
503 let word: String = chars[start..end].iter().collect();
504
505 if end < chars.len() && chars[end] == ':' {
507 end += 1; while end < chars.len()
510 && (chars[end].is_alphanumeric()
511 || chars[end] == '_'
512 || chars[end] == '-'
513 || chars[end] == '.')
514 {
515 end += 1;
516 }
517 let full: String = chars[start..end].iter().collect();
518 tokens.push(Token::new(TokenKind::PrefixedName, full, start));
519 i = end;
520 continue;
521 }
522
523 if SPARQL_KEYWORDS
525 .iter()
526 .any(|kw| kw.eq_ignore_ascii_case(&word))
527 {
528 tokens.push(Token::new(TokenKind::Keyword, word, start));
529 } else {
530 tokens.push(Token::new(TokenKind::Literal, word, start));
532 }
533 i = end;
534 continue;
535 }
536
537 let punct_chars: &[char] = &[
539 '{', '}', '(', ')', '[', ']', '.', ',', ';', '|', '/', '^', '+', '*', '!', '=',
540 '<', '>', '&', '@',
541 ];
542 if punct_chars.contains(&ch) {
543 let two: String = if i + 1 < chars.len() {
545 chars[i..i + 2].iter().collect()
546 } else {
547 String::new()
548 };
549 if matches!(two.as_str(), "!=" | "<=" | ">=" | "&&" | "||" | "^^") {
550 tokens.push(Token::new(TokenKind::Punctuation, two, start));
551 i += 2;
552 } else {
553 tokens.push(Token::new(TokenKind::Punctuation, ch.to_string(), start));
554 i += 1;
555 }
556 continue;
557 }
558
559 return Err(ParseError::new(
560 format!("unexpected character '{}'", ch),
561 start,
562 ));
563 }
564
565 tokens.push(Token::new(TokenKind::Eof, "", input.len()));
566 Ok(tokens)
567 }
568
569 pub fn tokenize_filtered(input: &str) -> Result<Vec<Token>, ParseError> {
571 let tokens = Self::tokenize(input)?;
572 Ok(tokens
573 .into_iter()
574 .filter(|t| t.kind != TokenKind::Whitespace && t.kind != TokenKind::Comment)
575 .collect())
576 }
577}
578
579#[cfg(test)]
582mod tests {
583 use super::*;
584
585 #[test]
588 fn test_tokenize_keyword_select() {
589 let tokens = Tokenizer::tokenize("SELECT").expect("valid SPARQL input");
590 assert_eq!(tokens[0].kind, TokenKind::Keyword);
591 assert_eq!(tokens[0].value, "SELECT");
592 }
593
594 #[test]
595 fn test_tokenize_keyword_case_insensitive() {
596 let tokens = Tokenizer::tokenize("select").expect("valid SPARQL input");
597 assert_eq!(tokens[0].kind, TokenKind::Keyword);
598 }
599
600 #[test]
601 fn test_tokenize_keyword_where() {
602 let tokens = Tokenizer::tokenize("WHERE").expect("valid SPARQL input");
603 assert_eq!(tokens[0].kind, TokenKind::Keyword);
604 assert_eq!(tokens[0].value, "WHERE");
605 }
606
607 #[test]
608 fn test_tokenize_keyword_prefix() {
609 let tokens = Tokenizer::tokenize("PREFIX").expect("valid SPARQL input");
610 assert_eq!(tokens[0].kind, TokenKind::Keyword);
611 }
612
613 #[test]
614 fn test_tokenize_keyword_optional() {
615 let tokens = Tokenizer::tokenize("OPTIONAL").expect("valid SPARQL input");
616 assert_eq!(tokens[0].kind, TokenKind::Keyword);
617 }
618
619 #[test]
620 fn test_tokenize_iri() {
621 let tokens = Tokenizer::tokenize("<http://example.org/foo>").expect("valid SPARQL input");
622 assert_eq!(tokens[0].kind, TokenKind::Iri);
623 assert_eq!(tokens[0].value, "<http://example.org/foo>");
624 assert_eq!(tokens[0].position, 0);
625 }
626
627 #[test]
628 fn test_tokenize_iri_position() {
629 let tokens = Tokenizer::tokenize(" <http://example.org/>").expect("valid SPARQL input");
630 let iri = tokens
631 .iter()
632 .find(|t| t.kind == TokenKind::Iri)
633 .expect("should find element");
634 assert_eq!(iri.position, 2);
635 }
636
637 #[test]
638 fn test_tokenize_variable_question_mark() {
639 let tokens = Tokenizer::tokenize("?name").expect("valid SPARQL input");
640 assert_eq!(tokens[0].kind, TokenKind::Variable);
641 assert_eq!(tokens[0].value, "?name");
642 }
643
644 #[test]
645 fn test_tokenize_variable_dollar() {
646 let tokens = Tokenizer::tokenize("$subject").expect("valid SPARQL input");
647 assert_eq!(tokens[0].kind, TokenKind::Variable);
648 assert_eq!(tokens[0].value, "$subject");
649 }
650
651 #[test]
652 fn test_tokenize_string_literal_double_quote() {
653 let tokens = Tokenizer::tokenize("\"hello\"").expect("valid SPARQL input");
654 assert_eq!(tokens[0].kind, TokenKind::Literal);
655 assert_eq!(tokens[0].value, "\"hello\"");
656 }
657
658 #[test]
659 fn test_tokenize_string_literal_single_quote() {
660 let tokens = Tokenizer::tokenize("'world'").expect("valid SPARQL input");
661 assert_eq!(tokens[0].kind, TokenKind::Literal);
662 }
663
664 #[test]
665 fn test_tokenize_numeric_literal_integer() {
666 let tokens = Tokenizer::tokenize("42").expect("valid SPARQL input");
667 assert_eq!(tokens[0].kind, TokenKind::Literal);
668 assert_eq!(tokens[0].value, "42");
669 }
670
671 #[test]
672 fn test_tokenize_numeric_literal_float() {
673 let tokens = Tokenizer::tokenize("3.14").expect("valid SPARQL input");
674 assert_eq!(tokens[0].kind, TokenKind::Literal);
675 assert_eq!(tokens[0].value, "3.14");
676 }
677
678 #[test]
679 fn test_tokenize_prefixed_name() {
680 let tokens = Tokenizer::tokenize("rdf:type").expect("valid SPARQL input");
681 assert_eq!(tokens[0].kind, TokenKind::PrefixedName);
682 assert_eq!(tokens[0].value, "rdf:type");
683 }
684
685 #[test]
686 fn test_tokenize_prefixed_name_empty_local() {
687 let tokens = Tokenizer::tokenize("ex:").expect("valid SPARQL input");
688 assert_eq!(tokens[0].kind, TokenKind::PrefixedName);
689 }
690
691 #[test]
692 fn test_tokenize_punctuation_brace() {
693 let tokens = Tokenizer::tokenize("{").expect("valid SPARQL input");
694 assert_eq!(tokens[0].kind, TokenKind::Punctuation);
695 assert_eq!(tokens[0].value, "{");
696 }
697
698 #[test]
699 fn test_tokenize_punctuation_dot() {
700 let tokens = Tokenizer::tokenize(".").expect("valid SPARQL input");
701 assert_eq!(tokens[0].kind, TokenKind::Punctuation);
702 assert_eq!(tokens[0].value, ".");
703 }
704
705 #[test]
706 fn test_tokenize_whitespace() {
707 let tokens = Tokenizer::tokenize(" ").expect("valid SPARQL input");
708 assert_eq!(tokens[0].kind, TokenKind::Whitespace);
709 }
710
711 #[test]
712 fn test_tokenize_comment() {
713 let tokens = Tokenizer::tokenize("# this is a comment\n").expect("valid SPARQL input");
714 assert_eq!(tokens[0].kind, TokenKind::Comment);
715 assert!(tokens[0].value.starts_with('#'));
716 }
717
718 #[test]
719 fn test_tokenize_eof_appended() {
720 let tokens = Tokenizer::tokenize("SELECT").expect("valid SPARQL input");
721 assert_eq!(
722 tokens.last().expect("collection should not be empty").kind,
723 TokenKind::Eof
724 );
725 }
726
727 #[test]
728 fn test_tokenize_multiple_tokens() {
729 let tokens =
730 Tokenizer::tokenize_filtered("SELECT ?x WHERE { ?x rdf:type <http://a.org/A> }")
731 .expect("operation should succeed");
732 let kinds: Vec<&TokenKind> = tokens.iter().map(|t| &t.kind).collect();
733 assert!(kinds.contains(&&TokenKind::Keyword));
734 assert!(kinds.contains(&&TokenKind::Variable));
735 assert!(kinds.contains(&&TokenKind::PrefixedName));
736 assert!(kinds.contains(&&TokenKind::Iri));
737 assert!(kinds.contains(&&TokenKind::Punctuation));
738 }
739
740 #[test]
741 fn test_tokenize_filtered_removes_whitespace() {
742 let all = Tokenizer::tokenize("SELECT ?x").expect("valid SPARQL input");
743 let filtered = Tokenizer::tokenize_filtered("SELECT ?x").expect("valid SPARQL input");
744 assert!(all.len() > filtered.len());
745 assert!(!filtered.iter().any(|t| t.kind == TokenKind::Whitespace));
746 }
747
748 #[test]
749 fn test_tokenize_filtered_removes_comments() {
750 let filtered =
751 Tokenizer::tokenize_filtered("SELECT # comment\n?x").expect("valid SPARQL input");
752 assert!(!filtered.iter().any(|t| t.kind == TokenKind::Comment));
753 }
754
755 #[test]
756 fn test_tokenize_string_with_language_tag() {
757 let tokens = Tokenizer::tokenize("\"hello\"@en").expect("valid SPARQL input");
758 assert_eq!(tokens[0].kind, TokenKind::Literal);
759 assert!(tokens[0].value.contains("@en"));
760 }
761
762 #[test]
763 fn test_tokenize_unterminated_iri_error() {
764 let result = Tokenizer::tokenize("<http://unclosed");
765 assert!(result.is_err());
766 }
767
768 #[test]
771 fn test_stream_peek_first_token() {
772 let tokens = Tokenizer::tokenize_filtered("SELECT").expect("valid SPARQL input");
773 let stream = TokenStream::new(tokens);
774 let tok = stream.peek().expect("stream should have tokens");
775 assert_eq!(tok.kind, TokenKind::Keyword);
776 }
777
778 #[test]
779 fn test_stream_peek_empty() {
780 let stream = TokenStream::new(vec![]);
781 assert!(stream.peek().is_none());
782 }
783
784 #[test]
785 fn test_stream_next_advances() {
786 let tokens = Tokenizer::tokenize_filtered("SELECT ?x").expect("valid SPARQL input");
787 let stream = TokenStream::new(tokens);
788 let (tok, rest) = stream.next();
789 assert!(tok.is_some());
790 assert_eq!(tok.expect("should have value").kind, TokenKind::Keyword);
791 let (tok2, _) = rest.next();
792 assert_eq!(
793 tok2.expect("should have second token").kind,
794 TokenKind::Variable
795 );
796 }
797
798 #[test]
799 fn test_stream_remaining_count() {
800 let tokens = Tokenizer::tokenize_filtered("SELECT ?x WHERE").expect("valid SPARQL input");
801 let stream = TokenStream::new(tokens);
802 assert_eq!(stream.remaining(), 3);
804 }
805
806 #[test]
807 fn test_stream_is_empty_after_consuming_all() {
808 let tokens = Tokenizer::tokenize_filtered("SELECT").expect("valid SPARQL input");
809 let stream = TokenStream::new(tokens);
810 let (_, rest) = stream.next(); let (_, rest2) = rest.next(); assert!(rest2.is_empty());
813 }
814
815 #[test]
816 fn test_stream_position_zero_initially() {
817 let tokens = Tokenizer::tokenize_filtered("WHERE").expect("valid SPARQL input");
818 let stream = TokenStream::new(tokens);
819 assert_eq!(stream.position(), 0);
820 }
821
822 #[test]
823 fn test_stream_position_advances() {
824 let tokens = Tokenizer::tokenize_filtered("SELECT ?x").expect("valid SPARQL input");
825 let stream = TokenStream::new(tokens);
826 let (_, rest) = stream.next();
827 assert_eq!(rest.position(), 1);
828 }
829
830 #[test]
833 fn test_expect_keyword_success() {
834 let tokens = Tokenizer::tokenize_filtered("SELECT").expect("valid SPARQL input");
835 let stream = TokenStream::new(tokens);
836 let result = expect_keyword(stream, "SELECT");
837 assert!(result.is_ok());
838 }
839
840 #[test]
841 fn test_expect_keyword_case_insensitive() {
842 let tokens = Tokenizer::tokenize_filtered("select").expect("valid SPARQL input");
843 let stream = TokenStream::new(tokens);
844 assert!(expect_keyword(stream, "SELECT").is_ok());
845 }
846
847 #[test]
848 fn test_expect_keyword_wrong_keyword() {
849 let tokens = Tokenizer::tokenize_filtered("WHERE").expect("valid SPARQL input");
850 let stream = TokenStream::new(tokens);
851 let result = expect_keyword(stream, "SELECT");
852 assert!(result.is_err());
853 assert!(result.unwrap_err().message.contains("SELECT"));
854 }
855
856 #[test]
857 fn test_expect_keyword_not_a_keyword() {
858 let tokens = Tokenizer::tokenize_filtered("?x").expect("valid SPARQL input");
859 let stream = TokenStream::new(tokens);
860 let result = expect_keyword(stream, "SELECT");
861 assert!(result.is_err());
862 }
863
864 #[test]
865 fn test_expect_keyword_consumes_token() {
866 let tokens = Tokenizer::tokenize_filtered("SELECT WHERE").expect("valid SPARQL input");
867 let stream = TokenStream::new(tokens);
868 let (_, rest) = expect_keyword(stream, "SELECT").expect("keyword parse should succeed");
869 assert!(expect_keyword(rest, "WHERE").is_ok());
870 }
871
872 #[test]
875 fn test_expect_iri_success() {
876 let tokens =
877 Tokenizer::tokenize_filtered("<http://example.org/>").expect("valid SPARQL input");
878 let stream = TokenStream::new(tokens);
879 let result = expect_iri(stream);
880 assert!(result.is_ok());
881 assert_eq!(
882 result.expect("should have value").0,
883 "<http://example.org/>"
884 );
885 }
886
887 #[test]
888 fn test_expect_iri_prefixed_name() {
889 let tokens = Tokenizer::tokenize_filtered("rdf:type").expect("valid SPARQL input");
890 let stream = TokenStream::new(tokens);
891 let result = expect_iri(stream);
892 assert!(result.is_ok());
893 }
894
895 #[test]
896 fn test_expect_iri_failure_on_variable() {
897 let tokens = Tokenizer::tokenize_filtered("?x").expect("valid SPARQL input");
898 let stream = TokenStream::new(tokens);
899 let result = expect_iri(stream);
900 assert!(result.is_err());
901 }
902
903 #[test]
906 fn test_expect_variable_success() {
907 let tokens = Tokenizer::tokenize_filtered("?subject").expect("valid SPARQL input");
908 let stream = TokenStream::new(tokens);
909 let result = expect_variable(stream);
910 assert!(result.is_ok());
911 assert_eq!(result.expect("should have value").0, "?subject");
912 }
913
914 #[test]
915 fn test_expect_variable_dollar_prefix() {
916 let tokens = Tokenizer::tokenize_filtered("$pred").expect("valid SPARQL input");
917 let stream = TokenStream::new(tokens);
918 let result = expect_variable(stream);
919 assert!(result.is_ok());
920 assert_eq!(result.expect("should have value").0, "$pred");
921 }
922
923 #[test]
924 fn test_expect_variable_failure_on_keyword() {
925 let tokens = Tokenizer::tokenize_filtered("SELECT").expect("valid SPARQL input");
926 let stream = TokenStream::new(tokens);
927 let result = expect_variable(stream);
928 assert!(result.is_err());
929 }
930
931 #[test]
934 fn test_optional_hit() {
935 let tokens = Tokenizer::tokenize_filtered("SELECT").expect("valid SPARQL input");
936 let stream = TokenStream::new(tokens);
937 let (result, _) = optional(stream, |s| expect_keyword(s, "SELECT"))
938 .expect("optional parse should succeed");
939 assert!(result.is_some());
940 }
941
942 #[test]
943 fn test_optional_miss_returns_none() {
944 let tokens = Tokenizer::tokenize_filtered("WHERE").expect("valid SPARQL input");
945 let stream = TokenStream::new(tokens);
946 let (result, rest) = optional(stream, |s| expect_keyword(s, "SELECT"))
947 .expect("optional parse should succeed");
948 assert!(result.is_none());
949 assert_eq!(rest.position(), 0);
951 }
952
953 #[test]
954 fn test_optional_miss_does_not_advance_stream() {
955 let tokens = Tokenizer::tokenize_filtered("?x").expect("valid SPARQL input");
956 let stream = TokenStream::new(tokens);
957 let pos_before = stream.position();
958 let (_, rest) = optional(stream, |s| expect_keyword(s, "SELECT"))
959 .expect("optional parse should succeed");
960 assert_eq!(rest.position(), pos_before);
961 }
962
963 #[test]
966 fn test_many0_zero_matches() {
967 let tokens = Tokenizer::tokenize_filtered("WHERE").expect("valid SPARQL input");
968 let stream = TokenStream::new(tokens);
969 let (results, rest) = many0(stream, |s| expect_keyword(s, "SELECT"))
970 .expect("repetition parse should succeed");
971 assert_eq!(results.len(), 0);
972 assert_eq!(rest.position(), 0);
973 }
974
975 #[test]
976 fn test_many0_one_match() {
977 let tokens = Tokenizer::tokenize_filtered("SELECT WHERE").expect("valid SPARQL input");
978 let stream = TokenStream::new(tokens);
979 let (results, _) = many0(stream, |s| expect_keyword(s, "SELECT"))
980 .expect("repetition parse should succeed");
981 assert_eq!(results.len(), 1);
982 }
983
984 #[test]
985 fn test_many0_multiple_matches() {
986 let tokens =
987 Tokenizer::tokenize_filtered("SELECT SELECT SELECT WHERE").expect("valid SPARQL input");
988 let stream = TokenStream::new(tokens);
989 let (results, rest) = many0(stream, |s| expect_keyword(s, "SELECT"))
990 .expect("repetition parse should succeed");
991 assert_eq!(results.len(), 3);
992 assert!(rest.remaining() >= 1);
994 }
995
996 #[test]
997 fn test_many0_variables() {
998 let tokens = Tokenizer::tokenize_filtered("?a ?b ?c WHERE").expect("valid SPARQL input");
999 let stream = TokenStream::new(tokens);
1000 let (vars, _) = many0(stream, expect_variable).expect("repetition parse should succeed");
1001 assert_eq!(vars.len(), 3);
1002 assert_eq!(vars[0], "?a");
1003 assert_eq!(vars[1], "?b");
1004 assert_eq!(vars[2], "?c");
1005 }
1006
1007 #[test]
1010 fn test_choice_first_alternative() {
1011 let tokens = Tokenizer::tokenize_filtered("SELECT").expect("valid SPARQL input");
1012 let stream = TokenStream::new(tokens);
1013 let parsers: Vec<Box<dyn Fn(TokenStream) -> ParseResult<&'static str>>> = vec![
1014 Box::new(|s| expect_keyword(s, "SELECT").map(|(_, r)| ("SELECT", r))),
1015 Box::new(|s| expect_keyword(s, "ASK").map(|(_, r)| ("ASK", r))),
1016 ];
1017 let (result, _) = choice(stream, parsers).expect("choice parse should succeed");
1018 assert_eq!(result, "SELECT");
1019 }
1020
1021 #[test]
1022 fn test_choice_second_alternative() {
1023 let tokens = Tokenizer::tokenize_filtered("ASK").expect("valid SPARQL input");
1024 let stream = TokenStream::new(tokens);
1025 let parsers: Vec<Box<dyn Fn(TokenStream) -> ParseResult<&'static str>>> = vec![
1026 Box::new(|s| expect_keyword(s, "SELECT").map(|(_, r)| ("SELECT", r))),
1027 Box::new(|s| expect_keyword(s, "ASK").map(|(_, r)| ("ASK", r))),
1028 ];
1029 let (result, _) = choice(stream, parsers).expect("choice parse should succeed");
1030 assert_eq!(result, "ASK");
1031 }
1032
1033 #[test]
1034 fn test_choice_no_match_returns_error() {
1035 let tokens = Tokenizer::tokenize_filtered("WHERE").expect("valid SPARQL input");
1036 let stream = TokenStream::new(tokens);
1037 let parsers: Vec<Box<dyn Fn(TokenStream) -> ParseResult<&'static str>>> = vec![
1038 Box::new(|s| expect_keyword(s, "SELECT").map(|(_, r)| ("SELECT", r))),
1039 Box::new(|s| expect_keyword(s, "ASK").map(|(_, r)| ("ASK", r))),
1040 ];
1041 assert!(choice(stream, parsers).is_err());
1042 }
1043
1044 #[test]
1045 fn test_choice_empty_parsers_returns_error() {
1046 let tokens = Tokenizer::tokenize_filtered("SELECT").expect("valid SPARQL input");
1047 let stream = TokenStream::new(tokens);
1048 let parsers: Vec<Box<dyn Fn(TokenStream) -> ParseResult<String>>> = vec![];
1049 assert!(choice(stream, parsers).is_err());
1050 }
1051
1052 #[test]
1055 fn test_parse_error_position() {
1056 let tokens = Tokenizer::tokenize_filtered("?x").expect("valid SPARQL input");
1057 let stream = TokenStream::new(tokens);
1058 let err = expect_keyword(stream, "SELECT").unwrap_err();
1059 assert_eq!(err.position, 0);
1060 }
1061
1062 #[test]
1063 fn test_parse_error_message_contains_expected() {
1064 let tokens = Tokenizer::tokenize_filtered("?x").expect("valid SPARQL input");
1065 let stream = TokenStream::new(tokens);
1066 let err = expect_keyword(stream, "SELECT").unwrap_err();
1067 assert!(err.message.contains("SELECT"));
1068 }
1069
1070 #[test]
1071 fn test_parse_error_display() {
1072 let err = ParseError::new("test error", 42);
1073 let display = format!("{}", err);
1074 assert!(display.contains("42"));
1075 assert!(display.contains("test error"));
1076 }
1077
1078 #[test]
1081 fn test_parse_simple_triple_pattern() {
1082 let tokens = Tokenizer::tokenize_filtered("?s rdf:type ?o").expect("valid SPARQL input");
1084 let stream = TokenStream::new(tokens);
1085
1086 let (subj, rest) = expect_variable(stream).expect("variable parse should succeed");
1087 let (pred, rest) = expect_iri(rest).expect("IRI parse should succeed");
1088 let (obj, _) = expect_variable(rest).expect("variable parse should succeed");
1089
1090 assert_eq!(subj, "?s");
1091 assert_eq!(pred, "rdf:type");
1092 assert_eq!(obj, "?o");
1093 }
1094
1095 #[test]
1096 fn test_parse_select_query_skeleton() {
1097 let tokens = Tokenizer::tokenize_filtered("SELECT ?x WHERE").expect("valid SPARQL input");
1098 let stream = TokenStream::new(tokens);
1099 let (_, rest) = expect_keyword(stream, "SELECT").expect("keyword parse should succeed");
1100 let (vars, rest) = many0(rest, expect_variable).expect("repetition parse should succeed");
1101 let (_, _) = expect_keyword(rest, "WHERE").expect("keyword parse should succeed");
1102 assert_eq!(vars, vec!["?x"]);
1103 }
1104
1105 #[test]
1106 fn test_token_new() {
1107 let tok = Token::new(TokenKind::Keyword, "SELECT", 0);
1108 assert_eq!(tok.kind, TokenKind::Keyword);
1109 assert_eq!(tok.value, "SELECT");
1110 assert_eq!(tok.position, 0);
1111 }
1112
1113 #[test]
1114 fn test_parse_error_new() {
1115 let err = ParseError::new("oops", 5);
1116 assert_eq!(err.position, 5);
1117 assert_eq!(err.message, "oops");
1118 }
1119
1120 #[test]
1121 fn test_tokenize_two_char_operator_neq() {
1122 let tokens = Tokenizer::tokenize("!=").expect("valid SPARQL input");
1123 assert_eq!(tokens[0].kind, TokenKind::Punctuation);
1124 assert_eq!(tokens[0].value, "!=");
1125 }
1126
1127 #[test]
1128 fn test_tokenize_two_char_operator_leq() {
1129 let tokens = Tokenizer::tokenize("<=").expect("valid SPARQL input");
1130 assert_eq!(tokens[0].kind, TokenKind::Punctuation);
1131 assert_eq!(tokens[0].value, "<=");
1132 }
1133
1134 #[test]
1135 fn test_tokenize_keyword_filter() {
1136 let tokens = Tokenizer::tokenize("FILTER").expect("valid SPARQL input");
1137 assert_eq!(tokens[0].kind, TokenKind::Keyword);
1138 }
1139
1140 #[test]
1141 fn test_tokenize_keyword_bind() {
1142 let tokens = Tokenizer::tokenize("BIND").expect("valid SPARQL input");
1143 assert_eq!(tokens[0].kind, TokenKind::Keyword);
1144 }
1145
1146 #[test]
1147 fn test_stream_clone_independence() {
1148 let tokens = Tokenizer::tokenize_filtered("SELECT WHERE").expect("valid SPARQL input");
1149 let stream = TokenStream::new(tokens);
1150 let clone = stream.clone();
1151 let (_, advanced) = stream.next();
1152 assert_eq!(clone.position(), 0);
1154 assert_eq!(advanced.position(), 1);
1155 }
1156
1157 #[test]
1158 fn test_many0_with_iri() {
1159 let tokens = Tokenizer::tokenize_filtered("<http://a.org/> <http://b.org/> ?x")
1160 .expect("valid SPARQL input");
1161 let stream = TokenStream::new(tokens);
1162 let (iris, rest) = many0(stream, expect_iri).expect("repetition parse should succeed");
1163 assert_eq!(iris.len(), 2);
1164 assert_eq!(iris[0], "<http://a.org/>");
1165 assert_eq!(iris[1], "<http://b.org/>");
1166 assert_eq!(
1168 rest.peek().expect("stream should have tokens").kind,
1169 TokenKind::Variable
1170 );
1171 }
1172
1173 #[test]
1174 fn test_optional_iri_hit() {
1175 let tokens = Tokenizer::tokenize_filtered("<http://example.org/> WHERE")
1176 .expect("valid SPARQL input");
1177 let stream = TokenStream::new(tokens);
1178 let (result, _) = optional(stream, expect_iri).expect("optional parse should succeed");
1179 assert!(result.is_some());
1180 }
1181
1182 #[test]
1183 fn test_optional_variable_miss_on_keyword() {
1184 let tokens = Tokenizer::tokenize_filtered("SELECT").expect("valid SPARQL input");
1185 let stream = TokenStream::new(tokens);
1186 let (result, rest) =
1187 optional(stream, expect_variable).expect("optional parse should succeed");
1188 assert!(result.is_none());
1189 assert_eq!(rest.position(), 0);
1190 }
1191}