#[derive(Debug, Clone, PartialEq, Eq)]
pub enum TokenKind {
Keyword,
Iri,
PrefixedName,
Variable,
Literal,
Punctuation,
Whitespace,
Comment,
Eof,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Token {
pub kind: TokenKind,
pub value: String,
pub position: usize,
}
impl Token {
pub fn new(kind: TokenKind, value: impl Into<String>, position: usize) -> Self {
Token {
kind,
value: value.into(),
position,
}
}
}
#[derive(Debug, Clone)]
pub struct TokenStream {
tokens: Vec<Token>,
pos: usize,
}
pub type ParseResult<T> = Result<(T, TokenStream), ParseError>;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ParseError {
pub message: String,
pub position: usize,
}
impl ParseError {
pub fn new(message: impl Into<String>, position: usize) -> Self {
ParseError {
message: message.into(),
position,
}
}
}
impl std::fmt::Display for ParseError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "parse error at {}: {}", self.position, self.message)
}
}
impl std::error::Error for ParseError {}
impl TokenStream {
pub fn new(tokens: Vec<Token>) -> Self {
TokenStream { tokens, pos: 0 }
}
pub fn peek(&self) -> Option<&Token> {
self.tokens.get(self.pos)
}
pub fn next(mut self) -> (Option<Token>, TokenStream) {
if self.pos < self.tokens.len() {
let tok = self.tokens[self.pos].clone();
self.pos += 1;
(Some(tok), self)
} else {
(None, self)
}
}
pub fn is_empty(&self) -> bool {
self.remaining() == 0
}
pub fn remaining(&self) -> usize {
let total = self.tokens.len();
if total == 0 {
return 0;
}
let remaining_tokens = &self.tokens[self.pos..];
remaining_tokens
.iter()
.filter(|t| t.kind != TokenKind::Eof)
.count()
}
pub fn position(&self) -> usize {
self.pos
}
pub fn byte_offset(&self) -> usize {
self.tokens.get(self.pos).map(|t| t.position).unwrap_or(0)
}
}
pub fn expect_keyword(stream: TokenStream, keyword: &str) -> ParseResult<()> {
match stream.peek() {
Some(tok) if tok.kind == TokenKind::Keyword && tok.value.eq_ignore_ascii_case(keyword) => {
let (_, rest) = stream.next();
Ok(((), rest))
}
Some(tok) => Err(ParseError::new(
format!(
"expected keyword '{}', found {:?} '{}'",
keyword, tok.kind, tok.value
),
tok.position,
)),
None => Err(ParseError::new(
format!("expected keyword '{}', reached end of stream", keyword),
0,
)),
}
}
pub fn expect_iri(stream: TokenStream) -> ParseResult<String> {
match stream.peek() {
Some(tok) if tok.kind == TokenKind::Iri => {
let value = tok.value.clone();
let (_, rest) = stream.next();
Ok((value, rest))
}
Some(tok) if tok.kind == TokenKind::PrefixedName => {
let value = tok.value.clone();
let (_, rest) = stream.next();
Ok((value, rest))
}
Some(tok) => Err(ParseError::new(
format!("expected IRI, found {:?} '{}'", tok.kind, tok.value),
tok.position,
)),
None => Err(ParseError::new("expected IRI, reached end of stream", 0)),
}
}
pub fn expect_variable(stream: TokenStream) -> ParseResult<String> {
match stream.peek() {
Some(tok) if tok.kind == TokenKind::Variable => {
let value = tok.value.clone();
let (_, rest) = stream.next();
Ok((value, rest))
}
Some(tok) => Err(ParseError::new(
format!("expected variable, found {:?} '{}'", tok.kind, tok.value),
tok.position,
)),
None => Err(ParseError::new(
"expected variable, reached end of stream",
0,
)),
}
}
pub fn optional<T, F>(stream: TokenStream, f: F) -> ParseResult<Option<T>>
where
F: Fn(TokenStream) -> ParseResult<T>,
{
let snapshot = stream.clone();
match f(stream) {
Ok((value, rest)) => Ok((Some(value), rest)),
Err(_) => Ok((None, snapshot)),
}
}
pub fn many0<T, F>(stream: TokenStream, f: F) -> ParseResult<Vec<T>>
where
F: Fn(TokenStream) -> ParseResult<T>,
{
let mut results = Vec::new();
let mut current = stream;
loop {
let snapshot = current.clone();
match f(current) {
Ok((value, rest)) => {
results.push(value);
current = rest;
}
Err(_) => {
current = snapshot;
break;
}
}
}
Ok((results, current))
}
pub fn choice<T>(
stream: TokenStream,
parsers: Vec<Box<dyn Fn(TokenStream) -> ParseResult<T>>>,
) -> ParseResult<T> {
let mut last_err = ParseError::new("no alternatives in choice", stream.byte_offset());
for parser in &parsers {
let snapshot = stream.clone();
match parser(snapshot) {
Ok(result) => return Ok(result),
Err(e) => last_err = e,
}
}
Err(last_err)
}
const SPARQL_KEYWORDS: &[&str] = &[
"BASE",
"PREFIX",
"SELECT",
"DISTINCT",
"REDUCED",
"CONSTRUCT",
"DESCRIBE",
"ASK",
"FROM",
"NAMED",
"WHERE",
"ORDER",
"BY",
"ASC",
"DESC",
"LIMIT",
"OFFSET",
"HAVING",
"GROUP",
"UNION",
"OPTIONAL",
"MINUS",
"GRAPH",
"SERVICE",
"BIND",
"VALUES",
"FILTER",
"EXISTS",
"NOT",
"IN",
"AS",
"SEPARATOR",
"COUNT",
"SUM",
"MIN",
"MAX",
"AVG",
"SAMPLE",
"REGEX",
"LANG",
"DATATYPE",
"IRI",
"URI",
"BNODE",
"STR",
"STRDT",
"STRLANG",
"TRUE",
"FALSE",
"UNDEF",
"LOAD",
"CLEAR",
"DROP",
"CREATE",
"ADD",
"MOVE",
"COPY",
"INSERT",
"DELETE",
"WITH",
"USING",
"DATA",
"INTO",
"ALL",
"DEFAULT",
"SILENT",
"UPDATE",
"SPARQL",
];
pub struct Tokenizer;
impl Tokenizer {
pub fn tokenize(input: &str) -> Result<Vec<Token>, ParseError> {
let mut tokens = Vec::new();
let chars: Vec<char> = input.chars().collect();
let mut i = 0;
while i < chars.len() {
let start = i;
let ch = chars[i];
if ch.is_whitespace() {
let mut end = i;
while end < chars.len() && chars[end].is_whitespace() {
end += 1;
}
let value: String = chars[start..end].iter().collect();
tokens.push(Token::new(TokenKind::Whitespace, value, start));
i = end;
continue;
}
if ch == '#' {
let mut end = i;
while end < chars.len() && chars[end] != '\n' {
end += 1;
}
let value: String = chars[start..end].iter().collect();
tokens.push(Token::new(TokenKind::Comment, value, start));
i = end;
continue;
}
if ch == '<' && !(i + 1 < chars.len() && chars[i + 1] == '=') {
let mut end = i + 1;
while end < chars.len() && chars[end] != '>' {
if chars[end] == '\n' || chars[end] == ' ' {
return Err(ParseError::new(
"unterminated IRI: unexpected whitespace inside angle brackets",
start,
));
}
end += 1;
}
if end >= chars.len() {
return Err(ParseError::new("unterminated IRI: missing '>'", start));
}
end += 1; let value: String = chars[start..end].iter().collect();
tokens.push(Token::new(TokenKind::Iri, value, start));
i = end;
continue;
}
if ch == '"' || ch == '\'' {
let quote = ch;
let triple = i + 2 < chars.len() && chars[i + 1] == quote && chars[i + 2] == quote;
let (delim_len, close_seq): (usize, Vec<char>) = if triple {
(3, vec![quote, quote, quote])
} else {
(1, vec![quote])
};
let mut end = i + delim_len;
loop {
if end + close_seq.len() > chars.len() {
return Err(ParseError::new("unterminated string literal", start));
}
let window: Vec<char> = chars[end..end + close_seq.len()].to_vec();
if window == close_seq {
end += close_seq.len();
break;
}
if chars[end] == '\\' {
end += 2; } else {
end += 1;
}
}
if end < chars.len() && chars[end] == '@' {
end += 1;
while end < chars.len() && (chars[end].is_alphanumeric() || chars[end] == '-') {
end += 1;
}
} else if end + 1 < chars.len() && chars[end] == '^' && chars[end + 1] == '^' {
end += 2;
if end < chars.len() && chars[end] == '<' {
while end < chars.len() && chars[end] != '>' {
end += 1;
}
if end < chars.len() {
end += 1;
}
} else {
while end < chars.len()
&& (chars[end].is_alphanumeric()
|| chars[end] == ':'
|| chars[end] == '_')
{
end += 1;
}
}
}
let value: String = chars[start..end].iter().collect();
tokens.push(Token::new(TokenKind::Literal, value, start));
i = end;
continue;
}
if ch == '?' || ch == '$' {
let mut end = i + 1;
while end < chars.len() && (chars[end].is_alphanumeric() || chars[end] == '_') {
end += 1;
}
let value: String = chars[start..end].iter().collect();
tokens.push(Token::new(TokenKind::Variable, value, start));
i = end;
continue;
}
if ch.is_ascii_digit()
|| (ch == '-' && i + 1 < chars.len() && chars[i + 1].is_ascii_digit())
{
let mut end = i;
if chars[end] == '-' {
end += 1;
}
while end < chars.len() && chars[end].is_ascii_digit() {
end += 1;
}
if end < chars.len() && chars[end] == '.' {
end += 1;
while end < chars.len() && chars[end].is_ascii_digit() {
end += 1;
}
}
if end < chars.len() && (chars[end] == 'e' || chars[end] == 'E') {
end += 1;
if end < chars.len() && (chars[end] == '+' || chars[end] == '-') {
end += 1;
}
while end < chars.len() && chars[end].is_ascii_digit() {
end += 1;
}
}
let value: String = chars[start..end].iter().collect();
tokens.push(Token::new(TokenKind::Literal, value, start));
i = end;
continue;
}
if ch.is_alphabetic() || ch == '_' {
let mut end = i;
while end < chars.len()
&& (chars[end].is_alphanumeric() || chars[end] == '_' || chars[end] == '-')
{
end += 1;
}
let word: String = chars[start..end].iter().collect();
if end < chars.len() && chars[end] == ':' {
end += 1; while end < chars.len()
&& (chars[end].is_alphanumeric()
|| chars[end] == '_'
|| chars[end] == '-'
|| chars[end] == '.')
{
end += 1;
}
let full: String = chars[start..end].iter().collect();
tokens.push(Token::new(TokenKind::PrefixedName, full, start));
i = end;
continue;
}
if SPARQL_KEYWORDS
.iter()
.any(|kw| kw.eq_ignore_ascii_case(&word))
{
tokens.push(Token::new(TokenKind::Keyword, word, start));
} else {
tokens.push(Token::new(TokenKind::Literal, word, start));
}
i = end;
continue;
}
let punct_chars: &[char] = &[
'{', '}', '(', ')', '[', ']', '.', ',', ';', '|', '/', '^', '+', '*', '!', '=',
'<', '>', '&', '@',
];
if punct_chars.contains(&ch) {
let two: String = if i + 1 < chars.len() {
chars[i..i + 2].iter().collect()
} else {
String::new()
};
if matches!(two.as_str(), "!=" | "<=" | ">=" | "&&" | "||" | "^^") {
tokens.push(Token::new(TokenKind::Punctuation, two, start));
i += 2;
} else {
tokens.push(Token::new(TokenKind::Punctuation, ch.to_string(), start));
i += 1;
}
continue;
}
return Err(ParseError::new(
format!("unexpected character '{}'", ch),
start,
));
}
tokens.push(Token::new(TokenKind::Eof, "", input.len()));
Ok(tokens)
}
pub fn tokenize_filtered(input: &str) -> Result<Vec<Token>, ParseError> {
let tokens = Self::tokenize(input)?;
Ok(tokens
.into_iter()
.filter(|t| t.kind != TokenKind::Whitespace && t.kind != TokenKind::Comment)
.collect())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_tokenize_keyword_select() {
let tokens = Tokenizer::tokenize("SELECT").expect("valid SPARQL input");
assert_eq!(tokens[0].kind, TokenKind::Keyword);
assert_eq!(tokens[0].value, "SELECT");
}
#[test]
fn test_tokenize_keyword_case_insensitive() {
let tokens = Tokenizer::tokenize("select").expect("valid SPARQL input");
assert_eq!(tokens[0].kind, TokenKind::Keyword);
}
#[test]
fn test_tokenize_keyword_where() {
let tokens = Tokenizer::tokenize("WHERE").expect("valid SPARQL input");
assert_eq!(tokens[0].kind, TokenKind::Keyword);
assert_eq!(tokens[0].value, "WHERE");
}
#[test]
fn test_tokenize_keyword_prefix() {
let tokens = Tokenizer::tokenize("PREFIX").expect("valid SPARQL input");
assert_eq!(tokens[0].kind, TokenKind::Keyword);
}
#[test]
fn test_tokenize_keyword_optional() {
let tokens = Tokenizer::tokenize("OPTIONAL").expect("valid SPARQL input");
assert_eq!(tokens[0].kind, TokenKind::Keyword);
}
#[test]
fn test_tokenize_iri() {
let tokens = Tokenizer::tokenize("<http://example.org/foo>").expect("valid SPARQL input");
assert_eq!(tokens[0].kind, TokenKind::Iri);
assert_eq!(tokens[0].value, "<http://example.org/foo>");
assert_eq!(tokens[0].position, 0);
}
#[test]
fn test_tokenize_iri_position() {
let tokens = Tokenizer::tokenize(" <http://example.org/>").expect("valid SPARQL input");
let iri = tokens
.iter()
.find(|t| t.kind == TokenKind::Iri)
.expect("should find element");
assert_eq!(iri.position, 2);
}
#[test]
fn test_tokenize_variable_question_mark() {
let tokens = Tokenizer::tokenize("?name").expect("valid SPARQL input");
assert_eq!(tokens[0].kind, TokenKind::Variable);
assert_eq!(tokens[0].value, "?name");
}
#[test]
fn test_tokenize_variable_dollar() {
let tokens = Tokenizer::tokenize("$subject").expect("valid SPARQL input");
assert_eq!(tokens[0].kind, TokenKind::Variable);
assert_eq!(tokens[0].value, "$subject");
}
#[test]
fn test_tokenize_string_literal_double_quote() {
let tokens = Tokenizer::tokenize("\"hello\"").expect("valid SPARQL input");
assert_eq!(tokens[0].kind, TokenKind::Literal);
assert_eq!(tokens[0].value, "\"hello\"");
}
#[test]
fn test_tokenize_string_literal_single_quote() {
let tokens = Tokenizer::tokenize("'world'").expect("valid SPARQL input");
assert_eq!(tokens[0].kind, TokenKind::Literal);
}
#[test]
fn test_tokenize_numeric_literal_integer() {
let tokens = Tokenizer::tokenize("42").expect("valid SPARQL input");
assert_eq!(tokens[0].kind, TokenKind::Literal);
assert_eq!(tokens[0].value, "42");
}
#[test]
fn test_tokenize_numeric_literal_float() {
let tokens = Tokenizer::tokenize("3.14").expect("valid SPARQL input");
assert_eq!(tokens[0].kind, TokenKind::Literal);
assert_eq!(tokens[0].value, "3.14");
}
#[test]
fn test_tokenize_prefixed_name() {
let tokens = Tokenizer::tokenize("rdf:type").expect("valid SPARQL input");
assert_eq!(tokens[0].kind, TokenKind::PrefixedName);
assert_eq!(tokens[0].value, "rdf:type");
}
#[test]
fn test_tokenize_prefixed_name_empty_local() {
let tokens = Tokenizer::tokenize("ex:").expect("valid SPARQL input");
assert_eq!(tokens[0].kind, TokenKind::PrefixedName);
}
#[test]
fn test_tokenize_punctuation_brace() {
let tokens = Tokenizer::tokenize("{").expect("valid SPARQL input");
assert_eq!(tokens[0].kind, TokenKind::Punctuation);
assert_eq!(tokens[0].value, "{");
}
#[test]
fn test_tokenize_punctuation_dot() {
let tokens = Tokenizer::tokenize(".").expect("valid SPARQL input");
assert_eq!(tokens[0].kind, TokenKind::Punctuation);
assert_eq!(tokens[0].value, ".");
}
#[test]
fn test_tokenize_whitespace() {
let tokens = Tokenizer::tokenize(" ").expect("valid SPARQL input");
assert_eq!(tokens[0].kind, TokenKind::Whitespace);
}
#[test]
fn test_tokenize_comment() {
let tokens = Tokenizer::tokenize("# this is a comment\n").expect("valid SPARQL input");
assert_eq!(tokens[0].kind, TokenKind::Comment);
assert!(tokens[0].value.starts_with('#'));
}
#[test]
fn test_tokenize_eof_appended() {
let tokens = Tokenizer::tokenize("SELECT").expect("valid SPARQL input");
assert_eq!(
tokens.last().expect("collection should not be empty").kind,
TokenKind::Eof
);
}
#[test]
fn test_tokenize_multiple_tokens() {
let tokens =
Tokenizer::tokenize_filtered("SELECT ?x WHERE { ?x rdf:type <http://a.org/A> }")
.expect("operation should succeed");
let kinds: Vec<&TokenKind> = tokens.iter().map(|t| &t.kind).collect();
assert!(kinds.contains(&&TokenKind::Keyword));
assert!(kinds.contains(&&TokenKind::Variable));
assert!(kinds.contains(&&TokenKind::PrefixedName));
assert!(kinds.contains(&&TokenKind::Iri));
assert!(kinds.contains(&&TokenKind::Punctuation));
}
#[test]
fn test_tokenize_filtered_removes_whitespace() {
let all = Tokenizer::tokenize("SELECT ?x").expect("valid SPARQL input");
let filtered = Tokenizer::tokenize_filtered("SELECT ?x").expect("valid SPARQL input");
assert!(all.len() > filtered.len());
assert!(!filtered.iter().any(|t| t.kind == TokenKind::Whitespace));
}
#[test]
fn test_tokenize_filtered_removes_comments() {
let filtered =
Tokenizer::tokenize_filtered("SELECT # comment\n?x").expect("valid SPARQL input");
assert!(!filtered.iter().any(|t| t.kind == TokenKind::Comment));
}
#[test]
fn test_tokenize_string_with_language_tag() {
let tokens = Tokenizer::tokenize("\"hello\"@en").expect("valid SPARQL input");
assert_eq!(tokens[0].kind, TokenKind::Literal);
assert!(tokens[0].value.contains("@en"));
}
#[test]
fn test_tokenize_unterminated_iri_error() {
let result = Tokenizer::tokenize("<http://unclosed");
assert!(result.is_err());
}
#[test]
fn test_stream_peek_first_token() {
let tokens = Tokenizer::tokenize_filtered("SELECT").expect("valid SPARQL input");
let stream = TokenStream::new(tokens);
let tok = stream.peek().expect("stream should have tokens");
assert_eq!(tok.kind, TokenKind::Keyword);
}
#[test]
fn test_stream_peek_empty() {
let stream = TokenStream::new(vec![]);
assert!(stream.peek().is_none());
}
#[test]
fn test_stream_next_advances() {
let tokens = Tokenizer::tokenize_filtered("SELECT ?x").expect("valid SPARQL input");
let stream = TokenStream::new(tokens);
let (tok, rest) = stream.next();
assert!(tok.is_some());
assert_eq!(tok.expect("should have value").kind, TokenKind::Keyword);
let (tok2, _) = rest.next();
assert_eq!(
tok2.expect("should have second token").kind,
TokenKind::Variable
);
}
#[test]
fn test_stream_remaining_count() {
let tokens = Tokenizer::tokenize_filtered("SELECT ?x WHERE").expect("valid SPARQL input");
let stream = TokenStream::new(tokens);
assert_eq!(stream.remaining(), 3);
}
#[test]
fn test_stream_is_empty_after_consuming_all() {
let tokens = Tokenizer::tokenize_filtered("SELECT").expect("valid SPARQL input");
let stream = TokenStream::new(tokens);
let (_, rest) = stream.next(); let (_, rest2) = rest.next(); assert!(rest2.is_empty());
}
#[test]
fn test_stream_position_zero_initially() {
let tokens = Tokenizer::tokenize_filtered("WHERE").expect("valid SPARQL input");
let stream = TokenStream::new(tokens);
assert_eq!(stream.position(), 0);
}
#[test]
fn test_stream_position_advances() {
let tokens = Tokenizer::tokenize_filtered("SELECT ?x").expect("valid SPARQL input");
let stream = TokenStream::new(tokens);
let (_, rest) = stream.next();
assert_eq!(rest.position(), 1);
}
#[test]
fn test_expect_keyword_success() {
let tokens = Tokenizer::tokenize_filtered("SELECT").expect("valid SPARQL input");
let stream = TokenStream::new(tokens);
let result = expect_keyword(stream, "SELECT");
assert!(result.is_ok());
}
#[test]
fn test_expect_keyword_case_insensitive() {
let tokens = Tokenizer::tokenize_filtered("select").expect("valid SPARQL input");
let stream = TokenStream::new(tokens);
assert!(expect_keyword(stream, "SELECT").is_ok());
}
#[test]
fn test_expect_keyword_wrong_keyword() {
let tokens = Tokenizer::tokenize_filtered("WHERE").expect("valid SPARQL input");
let stream = TokenStream::new(tokens);
let result = expect_keyword(stream, "SELECT");
assert!(result.is_err());
assert!(result.unwrap_err().message.contains("SELECT"));
}
#[test]
fn test_expect_keyword_not_a_keyword() {
let tokens = Tokenizer::tokenize_filtered("?x").expect("valid SPARQL input");
let stream = TokenStream::new(tokens);
let result = expect_keyword(stream, "SELECT");
assert!(result.is_err());
}
#[test]
fn test_expect_keyword_consumes_token() {
let tokens = Tokenizer::tokenize_filtered("SELECT WHERE").expect("valid SPARQL input");
let stream = TokenStream::new(tokens);
let (_, rest) = expect_keyword(stream, "SELECT").expect("keyword parse should succeed");
assert!(expect_keyword(rest, "WHERE").is_ok());
}
#[test]
fn test_expect_iri_success() {
let tokens =
Tokenizer::tokenize_filtered("<http://example.org/>").expect("valid SPARQL input");
let stream = TokenStream::new(tokens);
let result = expect_iri(stream);
assert!(result.is_ok());
assert_eq!(
result.expect("should have value").0,
"<http://example.org/>"
);
}
#[test]
fn test_expect_iri_prefixed_name() {
let tokens = Tokenizer::tokenize_filtered("rdf:type").expect("valid SPARQL input");
let stream = TokenStream::new(tokens);
let result = expect_iri(stream);
assert!(result.is_ok());
}
#[test]
fn test_expect_iri_failure_on_variable() {
let tokens = Tokenizer::tokenize_filtered("?x").expect("valid SPARQL input");
let stream = TokenStream::new(tokens);
let result = expect_iri(stream);
assert!(result.is_err());
}
#[test]
fn test_expect_variable_success() {
let tokens = Tokenizer::tokenize_filtered("?subject").expect("valid SPARQL input");
let stream = TokenStream::new(tokens);
let result = expect_variable(stream);
assert!(result.is_ok());
assert_eq!(result.expect("should have value").0, "?subject");
}
#[test]
fn test_expect_variable_dollar_prefix() {
let tokens = Tokenizer::tokenize_filtered("$pred").expect("valid SPARQL input");
let stream = TokenStream::new(tokens);
let result = expect_variable(stream);
assert!(result.is_ok());
assert_eq!(result.expect("should have value").0, "$pred");
}
#[test]
fn test_expect_variable_failure_on_keyword() {
let tokens = Tokenizer::tokenize_filtered("SELECT").expect("valid SPARQL input");
let stream = TokenStream::new(tokens);
let result = expect_variable(stream);
assert!(result.is_err());
}
#[test]
fn test_optional_hit() {
let tokens = Tokenizer::tokenize_filtered("SELECT").expect("valid SPARQL input");
let stream = TokenStream::new(tokens);
let (result, _) = optional(stream, |s| expect_keyword(s, "SELECT"))
.expect("optional parse should succeed");
assert!(result.is_some());
}
#[test]
fn test_optional_miss_returns_none() {
let tokens = Tokenizer::tokenize_filtered("WHERE").expect("valid SPARQL input");
let stream = TokenStream::new(tokens);
let (result, rest) = optional(stream, |s| expect_keyword(s, "SELECT"))
.expect("optional parse should succeed");
assert!(result.is_none());
assert_eq!(rest.position(), 0);
}
#[test]
fn test_optional_miss_does_not_advance_stream() {
let tokens = Tokenizer::tokenize_filtered("?x").expect("valid SPARQL input");
let stream = TokenStream::new(tokens);
let pos_before = stream.position();
let (_, rest) = optional(stream, |s| expect_keyword(s, "SELECT"))
.expect("optional parse should succeed");
assert_eq!(rest.position(), pos_before);
}
#[test]
fn test_many0_zero_matches() {
let tokens = Tokenizer::tokenize_filtered("WHERE").expect("valid SPARQL input");
let stream = TokenStream::new(tokens);
let (results, rest) = many0(stream, |s| expect_keyword(s, "SELECT"))
.expect("repetition parse should succeed");
assert_eq!(results.len(), 0);
assert_eq!(rest.position(), 0);
}
#[test]
fn test_many0_one_match() {
let tokens = Tokenizer::tokenize_filtered("SELECT WHERE").expect("valid SPARQL input");
let stream = TokenStream::new(tokens);
let (results, _) = many0(stream, |s| expect_keyword(s, "SELECT"))
.expect("repetition parse should succeed");
assert_eq!(results.len(), 1);
}
#[test]
fn test_many0_multiple_matches() {
let tokens =
Tokenizer::tokenize_filtered("SELECT SELECT SELECT WHERE").expect("valid SPARQL input");
let stream = TokenStream::new(tokens);
let (results, rest) = many0(stream, |s| expect_keyword(s, "SELECT"))
.expect("repetition parse should succeed");
assert_eq!(results.len(), 3);
assert!(rest.remaining() >= 1);
}
#[test]
fn test_many0_variables() {
let tokens = Tokenizer::tokenize_filtered("?a ?b ?c WHERE").expect("valid SPARQL input");
let stream = TokenStream::new(tokens);
let (vars, _) = many0(stream, expect_variable).expect("repetition parse should succeed");
assert_eq!(vars.len(), 3);
assert_eq!(vars[0], "?a");
assert_eq!(vars[1], "?b");
assert_eq!(vars[2], "?c");
}
#[test]
fn test_choice_first_alternative() {
let tokens = Tokenizer::tokenize_filtered("SELECT").expect("valid SPARQL input");
let stream = TokenStream::new(tokens);
let parsers: Vec<Box<dyn Fn(TokenStream) -> ParseResult<&'static str>>> = vec![
Box::new(|s| expect_keyword(s, "SELECT").map(|(_, r)| ("SELECT", r))),
Box::new(|s| expect_keyword(s, "ASK").map(|(_, r)| ("ASK", r))),
];
let (result, _) = choice(stream, parsers).expect("choice parse should succeed");
assert_eq!(result, "SELECT");
}
#[test]
fn test_choice_second_alternative() {
let tokens = Tokenizer::tokenize_filtered("ASK").expect("valid SPARQL input");
let stream = TokenStream::new(tokens);
let parsers: Vec<Box<dyn Fn(TokenStream) -> ParseResult<&'static str>>> = vec![
Box::new(|s| expect_keyword(s, "SELECT").map(|(_, r)| ("SELECT", r))),
Box::new(|s| expect_keyword(s, "ASK").map(|(_, r)| ("ASK", r))),
];
let (result, _) = choice(stream, parsers).expect("choice parse should succeed");
assert_eq!(result, "ASK");
}
#[test]
fn test_choice_no_match_returns_error() {
let tokens = Tokenizer::tokenize_filtered("WHERE").expect("valid SPARQL input");
let stream = TokenStream::new(tokens);
let parsers: Vec<Box<dyn Fn(TokenStream) -> ParseResult<&'static str>>> = vec![
Box::new(|s| expect_keyword(s, "SELECT").map(|(_, r)| ("SELECT", r))),
Box::new(|s| expect_keyword(s, "ASK").map(|(_, r)| ("ASK", r))),
];
assert!(choice(stream, parsers).is_err());
}
#[test]
fn test_choice_empty_parsers_returns_error() {
let tokens = Tokenizer::tokenize_filtered("SELECT").expect("valid SPARQL input");
let stream = TokenStream::new(tokens);
let parsers: Vec<Box<dyn Fn(TokenStream) -> ParseResult<String>>> = vec![];
assert!(choice(stream, parsers).is_err());
}
#[test]
fn test_parse_error_position() {
let tokens = Tokenizer::tokenize_filtered("?x").expect("valid SPARQL input");
let stream = TokenStream::new(tokens);
let err = expect_keyword(stream, "SELECT").unwrap_err();
assert_eq!(err.position, 0);
}
#[test]
fn test_parse_error_message_contains_expected() {
let tokens = Tokenizer::tokenize_filtered("?x").expect("valid SPARQL input");
let stream = TokenStream::new(tokens);
let err = expect_keyword(stream, "SELECT").unwrap_err();
assert!(err.message.contains("SELECT"));
}
#[test]
fn test_parse_error_display() {
let err = ParseError::new("test error", 42);
let display = format!("{}", err);
assert!(display.contains("42"));
assert!(display.contains("test error"));
}
#[test]
fn test_parse_simple_triple_pattern() {
let tokens = Tokenizer::tokenize_filtered("?s rdf:type ?o").expect("valid SPARQL input");
let stream = TokenStream::new(tokens);
let (subj, rest) = expect_variable(stream).expect("variable parse should succeed");
let (pred, rest) = expect_iri(rest).expect("IRI parse should succeed");
let (obj, _) = expect_variable(rest).expect("variable parse should succeed");
assert_eq!(subj, "?s");
assert_eq!(pred, "rdf:type");
assert_eq!(obj, "?o");
}
#[test]
fn test_parse_select_query_skeleton() {
let tokens = Tokenizer::tokenize_filtered("SELECT ?x WHERE").expect("valid SPARQL input");
let stream = TokenStream::new(tokens);
let (_, rest) = expect_keyword(stream, "SELECT").expect("keyword parse should succeed");
let (vars, rest) = many0(rest, expect_variable).expect("repetition parse should succeed");
let (_, _) = expect_keyword(rest, "WHERE").expect("keyword parse should succeed");
assert_eq!(vars, vec!["?x"]);
}
#[test]
fn test_token_new() {
let tok = Token::new(TokenKind::Keyword, "SELECT", 0);
assert_eq!(tok.kind, TokenKind::Keyword);
assert_eq!(tok.value, "SELECT");
assert_eq!(tok.position, 0);
}
#[test]
fn test_parse_error_new() {
let err = ParseError::new("oops", 5);
assert_eq!(err.position, 5);
assert_eq!(err.message, "oops");
}
#[test]
fn test_tokenize_two_char_operator_neq() {
let tokens = Tokenizer::tokenize("!=").expect("valid SPARQL input");
assert_eq!(tokens[0].kind, TokenKind::Punctuation);
assert_eq!(tokens[0].value, "!=");
}
#[test]
fn test_tokenize_two_char_operator_leq() {
let tokens = Tokenizer::tokenize("<=").expect("valid SPARQL input");
assert_eq!(tokens[0].kind, TokenKind::Punctuation);
assert_eq!(tokens[0].value, "<=");
}
#[test]
fn test_tokenize_keyword_filter() {
let tokens = Tokenizer::tokenize("FILTER").expect("valid SPARQL input");
assert_eq!(tokens[0].kind, TokenKind::Keyword);
}
#[test]
fn test_tokenize_keyword_bind() {
let tokens = Tokenizer::tokenize("BIND").expect("valid SPARQL input");
assert_eq!(tokens[0].kind, TokenKind::Keyword);
}
#[test]
fn test_stream_clone_independence() {
let tokens = Tokenizer::tokenize_filtered("SELECT WHERE").expect("valid SPARQL input");
let stream = TokenStream::new(tokens);
let clone = stream.clone();
let (_, advanced) = stream.next();
assert_eq!(clone.position(), 0);
assert_eq!(advanced.position(), 1);
}
#[test]
fn test_many0_with_iri() {
let tokens = Tokenizer::tokenize_filtered("<http://a.org/> <http://b.org/> ?x")
.expect("valid SPARQL input");
let stream = TokenStream::new(tokens);
let (iris, rest) = many0(stream, expect_iri).expect("repetition parse should succeed");
assert_eq!(iris.len(), 2);
assert_eq!(iris[0], "<http://a.org/>");
assert_eq!(iris[1], "<http://b.org/>");
assert_eq!(
rest.peek().expect("stream should have tokens").kind,
TokenKind::Variable
);
}
#[test]
fn test_optional_iri_hit() {
let tokens = Tokenizer::tokenize_filtered("<http://example.org/> WHERE")
.expect("valid SPARQL input");
let stream = TokenStream::new(tokens);
let (result, _) = optional(stream, expect_iri).expect("optional parse should succeed");
assert!(result.is_some());
}
#[test]
fn test_optional_variable_miss_on_keyword() {
let tokens = Tokenizer::tokenize_filtered("SELECT").expect("valid SPARQL input");
let stream = TokenStream::new(tokens);
let (result, rest) =
optional(stream, expect_variable).expect("optional parse should succeed");
assert!(result.is_none());
assert_eq!(rest.position(), 0);
}
}