use crate::error::ParseError;
use crate::expr::{Expr, RawAtom};
use crate::span::Span;
pub fn parse(input: &str) -> Result<Expr<RawAtom>, ParseError> {
let mut parser = Parser::new(input);
parser.skip_ws();
if parser.is_at_end() {
return Err(ParseError::EmptyExpression);
}
let expr = parser.parse_or()?;
parser.skip_ws();
if let Some(ch) = parser.peek_char() {
let position = parser.pos;
if ch == ')' {
return Err(ParseError::UnmatchedCloseParen { position });
}
return Err(ParseError::UnexpectedChar { ch, position });
}
Ok(expr)
}
struct Parser<'a> {
input: &'a str,
pos: usize,
}
impl<'a> Parser<'a> {
fn new(input: &'a str) -> Self {
Self { input, pos: 0 }
}
fn is_at_end(&self) -> bool {
self.pos >= self.input.len()
}
fn peek_byte(&self) -> Option<u8> {
self.input.as_bytes().get(self.pos).copied()
}
fn peek_char(&self) -> Option<char> {
self.input[self.pos..].chars().next()
}
fn skip_ws(&mut self) {
while matches!(self.peek_byte(), Some(b' ' | b'\t')) {
self.pos += 1;
}
}
fn parse_or(&mut self) -> Result<Expr<RawAtom>, ParseError> {
let mut left = self.parse_and()?;
loop {
self.skip_ws();
if self.peek_byte() != Some(b'|') {
break;
}
self.pos += 1;
let right = self.parse_and()?;
left = Expr::Or(Box::new(left), Box::new(right));
}
Ok(left)
}
fn parse_and(&mut self) -> Result<Expr<RawAtom>, ParseError> {
let mut left = self.parse_not()?;
loop {
self.skip_ws();
if self.peek_byte() != Some(b'&') {
break;
}
self.pos += 1;
let right = self.parse_not()?;
left = Expr::And(Box::new(left), Box::new(right));
}
Ok(left)
}
fn parse_not(&mut self) -> Result<Expr<RawAtom>, ParseError> {
self.skip_ws();
if self.peek_byte() == Some(b'!') {
self.pos += 1;
let inner = self.parse_atom()?;
Ok(Expr::Not(Box::new(inner)))
} else {
self.parse_atom()
}
}
fn parse_atom(&mut self) -> Result<Expr<RawAtom>, ParseError> {
self.skip_ws();
match self.peek_byte() {
Some(b'(') => {
let open = self.pos;
self.pos += 1;
self.skip_ws();
if self.peek_byte() == Some(b')') {
return Err(ParseError::ExpectedAtom { position: self.pos });
}
let inner = self.parse_or()?;
self.skip_ws();
if self.peek_byte() != Some(b')') {
return Err(ParseError::UnmatchedOpenParen { position: open });
}
self.pos += 1;
Ok(inner)
}
None => Err(ParseError::ExpectedAtom { position: self.pos }),
Some(byte @ (b'|' | b'&' | b')' | b'!')) => Err(ParseError::UnexpectedOperator {
op: char::from(byte),
position: self.pos,
}),
Some(_) => {
let atom = self.read_atom_text();
Ok(Expr::Atom(atom))
}
}
}
fn read_atom_text(&mut self) -> RawAtom {
let start = self.pos;
let mut end = start;
for (offset, ch) in self.input[start..].char_indices() {
if matches!(ch, '|' | '&' | '!' | '(' | ')' | ' ' | '\t') {
break;
}
end = start + offset + ch.len_utf8();
}
let text = self.input[start..end].to_owned();
self.pos = end;
RawAtom {
text,
span: Span { start, end },
}
}
}
#[cfg(test)]
mod tests {
use super::*;
fn atom(text: &str, start: usize, end: usize) -> Expr<RawAtom> {
Expr::Atom(RawAtom {
text: text.to_owned(),
span: Span { start, end },
})
}
#[test]
fn qry_002_parses_single_atom() {
let expr = parse("foo").unwrap();
assert_eq!(expr, atom("foo", 0, 3));
}
#[test]
fn qry_002_parses_atom_with_colon_separator() {
let expr = parse("name:foo").unwrap();
assert_eq!(expr, atom("name:foo", 0, 8));
}
#[test]
fn qry_002_parses_atom_with_glob_chars() {
let expr = parse("src/**/*.rs").unwrap();
assert_eq!(expr, atom("src/**/*.rs", 0, 11));
}
#[test]
fn qry_002_parses_atom_with_path_chars_including_brackets() {
let expr = parse("src/[ab]/file.rs").unwrap();
assert_eq!(expr, atom("src/[ab]/file.rs", 0, 16));
}
#[test]
fn qry_002_parses_unary_not() {
let expr = parse("!foo").unwrap();
assert_eq!(expr, Expr::Not(Box::new(atom("foo", 1, 4))));
}
#[test]
fn qry_002_parses_binary_and() {
let expr = parse("a & b").unwrap();
assert_eq!(
expr,
Expr::And(Box::new(atom("a", 0, 1)), Box::new(atom("b", 4, 5))),
);
}
#[test]
fn qry_002_parses_binary_or() {
let expr = parse("a | b").unwrap();
assert_eq!(
expr,
Expr::Or(Box::new(atom("a", 0, 1)), Box::new(atom("b", 4, 5))),
);
}
#[test]
fn qry_002_chains_and_left_associative() {
let expr = parse("a & b & c").unwrap();
assert_eq!(
expr,
Expr::And(
Box::new(Expr::And(
Box::new(atom("a", 0, 1)),
Box::new(atom("b", 4, 5)),
)),
Box::new(atom("c", 8, 9)),
),
);
}
#[test]
fn qry_002_chains_or_left_associative() {
let expr = parse("a | b | c").unwrap();
assert_eq!(
expr,
Expr::Or(
Box::new(Expr::Or(
Box::new(atom("a", 0, 1)),
Box::new(atom("b", 4, 5)),
)),
Box::new(atom("c", 8, 9)),
),
);
}
#[test]
fn qry_002_and_binds_tighter_than_or_left() {
let expr = parse("a & b | c").unwrap();
assert_eq!(
expr,
Expr::Or(
Box::new(Expr::And(
Box::new(atom("a", 0, 1)),
Box::new(atom("b", 4, 5)),
)),
Box::new(atom("c", 8, 9)),
),
);
}
#[test]
fn qry_002_and_binds_tighter_than_or_right() {
let expr = parse("a | b & c").unwrap();
assert_eq!(
expr,
Expr::Or(
Box::new(atom("a", 0, 1)),
Box::new(Expr::And(
Box::new(atom("b", 4, 5)),
Box::new(atom("c", 8, 9)),
)),
),
);
}
#[test]
fn qry_002_not_binds_tighter_than_and() {
let expr = parse("!a & b").unwrap();
assert_eq!(
expr,
Expr::And(
Box::new(Expr::Not(Box::new(atom("a", 1, 2)))),
Box::new(atom("b", 5, 6)),
),
);
}
#[test]
fn qry_002_parens_override_precedence() {
let expr = parse("(a | b) & c").unwrap();
assert_eq!(
expr,
Expr::And(
Box::new(Expr::Or(
Box::new(atom("a", 1, 2)),
Box::new(atom("b", 5, 6)),
)),
Box::new(atom("c", 10, 11)),
),
);
}
#[test]
fn qry_002_parens_around_single_atom_are_transparent() {
let expr = parse("(foo)").unwrap();
assert_eq!(expr, atom("foo", 1, 4));
}
#[test]
fn qry_002_nested_parens_are_transparent() {
let expr = parse("((foo))").unwrap();
assert_eq!(expr, atom("foo", 2, 5));
}
#[test]
fn qry_002_not_applies_to_parenthesised_group() {
let expr = parse("!(a & b)").unwrap();
assert_eq!(
expr,
Expr::Not(Box::new(Expr::And(
Box::new(atom("a", 2, 3)),
Box::new(atom("b", 6, 7)),
))),
);
}
#[test]
fn qry_002_whitespace_around_operators_is_ignored() {
let expr = parse(" a & b ").unwrap();
assert_eq!(
expr,
Expr::And(Box::new(atom("a", 2, 3)), Box::new(atom("b", 10, 11))),
);
}
#[test]
fn qry_002_tabs_are_whitespace() {
let expr = parse("a\t&\tb").unwrap();
assert_eq!(
expr,
Expr::And(Box::new(atom("a", 0, 1)), Box::new(atom("b", 4, 5))),
);
}
#[test]
fn qry_002_operators_without_surrounding_whitespace() {
let expr = parse("a&b").unwrap();
assert_eq!(
expr,
Expr::And(Box::new(atom("a", 0, 1)), Box::new(atom("b", 2, 3))),
);
}
#[test]
fn qry_002_atom_spans_record_byte_offsets() {
let expr = parse("alpha | beta").unwrap();
let Expr::Or(left, right) = expr else {
panic!("expected Or, got {expr:?}");
};
assert_eq!(*left, atom("alpha", 0, 5));
assert_eq!(*right, atom("beta", 8, 12));
}
#[test]
fn qry_002_supports_unicode_atom_text() {
let input = "caf\u{00e9} | other";
let expr = parse(input).unwrap();
let Expr::Or(left, right) = expr else {
panic!("expected Or, got {expr:?}");
};
assert_eq!(*left, atom("caf\u{00e9}", 0, 5));
assert_eq!(*right, atom("other", 8, 13));
}
#[test]
fn qry_002_rejects_empty_input() {
assert_eq!(parse(""), Err(ParseError::EmptyExpression));
}
#[test]
fn qry_002_rejects_whitespace_only_input() {
assert_eq!(parse(" \t "), Err(ParseError::EmptyExpression));
}
#[test]
fn qry_002_rejects_trailing_atom_with_no_operator() {
assert_eq!(
parse("a b"),
Err(ParseError::UnexpectedChar {
ch: 'b',
position: 2,
}),
);
}
#[test]
fn qry_002_rejects_trailing_and_operator() {
assert_eq!(parse("a &"), Err(ParseError::ExpectedAtom { position: 3 }),);
}
#[test]
fn qry_002_rejects_trailing_or_operator() {
assert_eq!(parse("a |"), Err(ParseError::ExpectedAtom { position: 3 }),);
}
#[test]
fn qry_002_rejects_leading_and_operator() {
assert_eq!(
parse("& a"),
Err(ParseError::UnexpectedOperator {
op: '&',
position: 0,
}),
);
}
#[test]
fn qry_002_rejects_leading_or_operator() {
assert_eq!(
parse("| a"),
Err(ParseError::UnexpectedOperator {
op: '|',
position: 0,
}),
);
}
#[test]
fn qry_002_rejects_double_and_operator() {
assert_eq!(
parse("a && b"),
Err(ParseError::UnexpectedOperator {
op: '&',
position: 3,
}),
);
}
#[test]
fn qry_002_rejects_double_or_operator() {
assert_eq!(
parse("a || b"),
Err(ParseError::UnexpectedOperator {
op: '|',
position: 3,
}),
);
}
#[test]
fn qry_002_rejects_unmatched_open_paren() {
assert_eq!(
parse("(a"),
Err(ParseError::UnmatchedOpenParen { position: 0 }),
);
}
#[test]
fn qry_002_rejects_unmatched_close_paren() {
assert_eq!(
parse("a)"),
Err(ParseError::UnmatchedCloseParen { position: 1 }),
);
}
#[test]
fn qry_002_rejects_empty_paren_group() {
assert_eq!(parse("()"), Err(ParseError::ExpectedAtom { position: 1 }),);
}
#[test]
fn qry_002_rejects_double_negation() {
assert_eq!(
parse("!!a"),
Err(ParseError::UnexpectedOperator {
op: '!',
position: 1,
}),
);
}
#[test]
fn qry_002_rejects_negation_with_no_atom() {
assert_eq!(parse("!"), Err(ParseError::ExpectedAtom { position: 1 }),);
}
#[test]
fn qry_002_rejects_paren_then_operator() {
assert_eq!(
parse("(&a)"),
Err(ParseError::UnexpectedOperator {
op: '&',
position: 1,
}),
);
}
}