orql 0.1.0

A toy SQL parser for a subset of the Oracle dialect.
Documentation
//! Provides a lazily parsing iterator over a SQL source.

use std::ops::{Deref, Range};

use super::{Error, Location, MetaTracker, Metadata, ParserInner, Token, TokenType};
use crate::ast::Statement;

/// A parsing error while [iterating](super::iter) a source's statements.
#[derive(Debug)]
pub struct IterError<'s> {
    /// The section / slice of the (full) source that failed to parse into a
    /// [Statement] and is consequently skipped by the originating iterator.
    pub skipped: SourceSlice<'s>,

    /// The parser error itself
    ///
    /// Note: location data referred by the the error corresponds to the full
    /// source being parsed.
    pub error: Error,
}

impl<'s> From<IterError<'s>> for Error {
    fn from(value: IterError) -> Self {
        value.error
    }
}

/// The source section that failed to parse and was skipped.
///
/// Served by [Iter].
#[derive(Debug)]
pub struct SourceSlice<'s> {
    /// the (skipped) text extracted from the (full) source
    pub text: &'s str,

    /// the location span of `slice` (within the full source)
    pub span: Range<Location>,
}

impl<'s> Deref for SourceSlice<'s> {
    type Target = str;

    fn deref(&self) -> &Self::Target {
        self.text
    }
}

// ----------------------------------------------------------------------------

/// The type of items published by a [statment iterator](Iter).
pub type IterItem<'s, ID> = std::result::Result<Statement<'s, ID>, IterError<'s>>;

/// An iterator over [statments](Statement), skipping unparsable source
/// sections.
pub trait Iter<'s, ID>: Iterator<Item = IterItem<'s, ID>> {
    /// Retrieves temporary access to metadata.
    fn metadata(&self) -> &impl Metadata<'s, NodeId = ID>;

    /// Consumes this iterator returning all collected metadata so far.
    fn into_metadata(self) -> impl Metadata<'s, NodeId = ID>;
}

struct IterImpl<'s, M: MetaTracker<'s>> {
    parser: ParserInner<'s, M>,
    last_pos: usize,
    last_loc: Location,
}

impl<'s, M: MetaTracker<'s>> IterImpl<'s, M> {
    fn new(parser: ParserInner<'s, M>) -> Self {
        Self {
            last_pos: parser.tokens.position(),
            last_loc: parser.tokens.location(),
            parser,
        }
    }
}

impl<'s, M> Iter<'s, M::NodeId> for IterImpl<'s, M>
where
    M: MetaTracker<'s>,
{
    fn metadata(&self) -> &impl Metadata<'s, NodeId = M::NodeId> {
        self.parser.meta_tracker.metadata()
    }

    fn into_metadata(self) -> impl Metadata<'s, NodeId = M::NodeId> {
        self.parser.meta_tracker.finish()
    }
}

impl<'s, M> Iterator for IterImpl<'s, M>
where
    M: MetaTracker<'s>,
{
    type Item = IterItem<'s, <<M as MetaTracker<'s>>::Metadata as Metadata<'s>>::NodeId>;

    fn next(&mut self) -> Option<Self::Item> {
        match self.parser.parse_statement_().transpose() {
            Some(Err(error)) => {
                // ~ advance the token stream right after the next semicolon and
                // return the error along with skipped section
                loop {
                    match self.parser.next_token() {
                        Err(_)
                        | Ok(None)
                        | Ok(Some(Token {
                            ttype: TokenType::Semicolon,
                            ..
                        })) => break,
                        _ => {
                            // ~ keep going
                        }
                    }
                }
                // ~ do not associate the skippped comments with the next token
                self.parser.meta_tracker.on_node_end();
                // ~ extract the piece of text we skipped
                let (last_pos, last_loc) = (self.last_pos, self.last_loc);
                let (next_pos, next_loc) =
                    (self.parser.tokens.position(), self.parser.tokens.location());
                let skipped = SourceSlice {
                    text: &self.parser.tokens.source_range(last_pos..next_pos),
                    span: last_loc..next_loc,
                };
                self.last_pos = next_pos;
                self.last_loc = next_loc;
                Some(Err(IterError { skipped, error }))
            }
            Some(Ok(stmt)) => {
                // ~ advance the parser past any comment so that they get
                // registered and can be served as "trailing" comments to the
                // last token in `stmt`
                self.last_pos = self.parser.tokens.position();
                self.last_loc = self.parser.tokens.location();
                if let Ok((pos, loc)) = self.parser.skip_comments() {
                    self.last_pos = pos;
                    self.last_loc = loc;
                } else {
                    // XXX push back the error to serve it on the next `.next()` call
                }
                Some(Ok(stmt))
            }
            None => None,
        }
    }
}

impl<'s, M> ParserInner<'s, M>
where
    M: MetaTracker<'s> + 's,
{
    /// Turns this parser into a lazily parsing statement iterator.
    pub(super) fn into_iter(self) -> impl Iter<'s, M::NodeId, Item = IterItem<'s, M::NodeId>> {
        IterImpl::new(self)
    }
}

#[cfg(test)]
mod tests {
    use pretty_assertions::assert_eq;

    use std::borrow::Cow;

    use super::*;
    use crate::{
        ast::{Node, QueryBlock, QuerySelect, StatementType},
        parser::{Comment, CommentStyle, DefaultTracker, Location, VoidTracker},
        scanner::Scanner,
    };

    fn unpack_select<'s, ID>(
        stmt: Statement<'s, ID>,
    ) -> (QuerySelect<'s, ID>, Option<Node<(), ID>>) {
        let StatementType::Select(select) = stmt.statement else {
            panic!("not a select statement")
        };
        let QueryBlock::Select(select) = select.query.body.block else {
            panic!("not a plain select query");
        };
        (select, stmt.terminator)
    }

    #[test]
    fn test_iter_with_invalid_statements() {
        // ~ three empty statements
        let mut iter = ParserInner::new(
            Scanner::new("/*one*/; /*two*/ bad; /*three*/; /*appendage*/"),
            VoidTracker,
        )
        .into_iter();

        assert!(matches!(
            iter.next(),
            Some(Ok(Statement {
                statement: StatementType::Empty,
                terminator: Some(Node((), _))
            }))
        ));
        assert!(matches!(
            iter.next(),
            Some(Err(
                IterError {
                    skipped,
                    error: Error::Unexpected {
                        unexpected: Cow::Owned(unexpected),
                        expected: "a statement",
                        loc: Location { line: 1, col: 18 },
                    }
                }))
            // ~ we expect the "trailing /*two*/" comment be consumed and not
            // reported as "skipped", as its tracked for the preceding
            // statement
            if unexpected == "'bad'" && &*skipped == " bad;"
        ));
        assert!(matches!(
            iter.next(),
            Some(Ok(Statement {
                statement: StatementType::Empty,
                terminator: Some(Node((), _))
            }))
        ));
        assert!(iter.next().is_none());
    }

    #[test]
    fn test_iter_with_comments() {
        let mut iter = ParserInner::new(
            Scanner::new(
                r"
/*one*/  select /*two*/ /*2a*/ /*2b*/ 1 from /*three*/ dual ;
-- three and a half
/*four*/ select 2         from           dual /*five*/ ;
-- six",
            ),
            DefaultTracker::default(),
        )
        .into_iter();

        // ~ first statement --------------------------------------------------
        let stmt = iter.next().unwrap().unwrap();
        let meta = iter.metadata();
        let (
            QuerySelect {
                select_token, from, ..
            },
            terminator,
        ) = unpack_select(stmt);
        // ~ first statement: `select` token
        assert_eq!(
            meta.comments(select_token.1),
            (
                &[Comment {
                    text: "one",
                    style: CommentStyle::Block,
                    loc: Location { line: 2, col: 1 }
                },][..],
                &[
                    Comment {
                        text: "two",
                        style: CommentStyle::Block,
                        loc: Location { line: 2, col: 17 },
                    },
                    Comment {
                        text: "2a",
                        style: CommentStyle::Block,
                        loc: Location { line: 2, col: 25 },
                    },
                    Comment {
                        text: "2b",
                        style: CommentStyle::Block,
                        loc: Location { line: 2, col: 32 },
                    }
                ][..]
            )
        );
        // ~ first statement: `from` token
        assert_eq!(
            meta.comments(from.from_token.1),
            (
                &[][..],
                &[Comment {
                    text: "three",
                    style: CommentStyle::Block,
                    loc: Location { line: 2, col: 46 }
                }][..]
            )
        );
        // ~ first statement: terminator token, i.e. `;`
        assert_eq!(
            meta.location(terminator.as_ref().unwrap().1),
            Location { line: 2, col: 61 }
        );
        assert_eq!(
            meta.comments(terminator.as_ref().unwrap().1),
            (
                &[][..],
                &[
                    Comment {
                        text: " three and a half",
                        style: CommentStyle::Line,
                        loc: Location { line: 3, col: 1 },
                    },
                    Comment {
                        text: "four",
                        style: CommentStyle::Block,
                        loc: Location { line: 4, col: 1 },
                    }
                ][..]
            )
        );

        // ~ second statement -------------------------------------------------
        let stmt = iter.next().unwrap().unwrap();
        let meta = iter.metadata();
        let (QuerySelect { select_token, .. }, terminator) = unpack_select(stmt);
        assert_eq!(
            meta.comments(select_token.1),
            (
                &[
                    Comment {
                        text: " three and a half",
                        style: CommentStyle::Line,
                        loc: Location { line: 3, col: 1 },
                    },
                    Comment {
                        text: "four",
                        style: CommentStyle::Block,
                        loc: Location { line: 4, col: 1 }
                    }
                ][..],
                &[][..]
            )
        );
        assert_eq!(
            meta.comments(terminator.unwrap().1),
            (
                &[Comment {
                    text: "five",
                    style: CommentStyle::Block,
                    loc: Location { line: 4, col: 47 },
                },][..],
                &[Comment {
                    text: " six",
                    style: CommentStyle::Line,
                    loc: Location { line: 5, col: 1 }
                }][..],
            )
        );

        // ~ no more statements expected --------------------------------------
        assert!(iter.next().is_none());
    }
}