rust_dot 0.6.0 - Docs.rs

/*!
This takes the tokens Rust gives, and iterates over them into the lexemes of DOT.
*/
#![macro_use]
use std::{fmt::Write, iter::Peekable};

use proc_macro2::{
    Delimiter, Group, Literal, Spacing, TokenStream,
    TokenTree::{self, *},
};

/// Peek at next iter value. Return whether it exists and matches.
macro_rules! peek {
    ($iter:expr, $($pat:pat_param)|+ $(if $($guard:expr)+)?) => {
        matches!($iter.peek(), $(Some($pat))|+ $(if $($guard)+)?)
    }
}

/// Peek at next iter value. Return whether it exists and matches and gobble it, if it does.
/// In 2nd form takes a character after `=>` and returns it as str if it’s the next item.
macro_rules! next_if {
    ($iter:expr, $($pat:tt)+) => {
        if peek!($iter, $($pat)+) { let _ = $iter.next(); true } else { false }
    };
    ($iter:expr => $char:literal) => {
        if next_if!($iter, Punct(x) if x.as_char() == $char) { concat!($char) } else { "" }
    }
}

/// All the kinds of things we look for in a `TokenStream`
#[derive(Debug)]
pub(crate) enum Lexeme {
    Strict,
    Graph,
    DiGraph,
    Block(Group),
    Node,
    Edge,
    Id(String),
    NodeId(String), // it's a NodeId if it was followed by :port[:compass_pt]
    Html(String),
    Attrs(()),    //Attrs(TokenStream),
    EdgeOp(bool), // directed?
    Other(char),
}

use Lexeme::*;
/* impl Default for Lexeme {
    fn default() -> Self { None }
} */

/* struct Lexeme {
    value: Value,
    span: Span
}
const NONE: Lexeme = Lexeme { value: None, } */

/// An Iterable that skips `#` to line end pseudo comments.
pub(crate) struct LexerIter {
    inner: Peekable<proc_macro2::token_stream::IntoIter>,
    parse_fn: bool,
}

impl LexerIter {
    /// Constructor which takes a `bool` to differentiate call from `parse_*()` vs. `rust_dot!`.
    pub fn new(input: TokenStream, parse_fn: bool) -> Self {
        LexerIter {
            // inner is peekable for skip_cpp()
            inner: input.into_iter().peekable(),
            parse_fn,
        }
    }

    /// On the inner iter skip any `#` to line end pseudo comments.
    /// If called from rust_dot!{} this is a nop, as stable Rust provides no line numbers.
    fn skip_cpp(&mut self) {
        if !self.parse_fn {
            return;
        }
        while let Some(Punct(ref punct)) = self.inner.peek() {
            if punct.as_char() == '#' {
                let first = punct.span().start().line;
                self.inner.next();
                while next_if!(self.inner, this if first == this.span().start().line) {}
            } else {
                break;
            }
        }
    }

    /// Get the inner iter next, after skipping any `#` to line end pseudo comments.
    fn next_tt(&mut self) -> Option<TokenTree> {
        self.skip_cpp();
        self.inner.next()
        //dbg!(self.inner.next())
    }

    /// Stringify the literal, and also return whether it was a plain or raw string.
    /// If it was, also strip the quoting.
    fn string(&mut self, lit: Literal) -> (bool, String) {
        let mut lit = lit.to_string();
        // Why can't we get kind, instead of having to reparse string?
        let str = lit.starts_with('"');
        // if called from parse_* there is no r"", as there is a work around for "\N" etc.
        let str_or_raw = str || (!self.parse_fn && lit.starts_with('r'));
        if str_or_raw {
            if str {
                let _ = lit.pop();
                lit.remove(0);
            } else {
                // strip trailing "### and leading r###"
                let i = lit.find('"').unwrap();
                lit.replace_range(lit.len() - i.., "");
                lit.replace_range(..i + 1, "");
            }
        }
        (str_or_raw, lit)
    }

    fn html_acc(&mut self, str: &mut String, group: bool) {
        let delim = |g: &Group| match g.delimiter() {
            Delimiter::Brace => ("{", "}"),
            Delimiter::Bracket => ("[", "]"),
            Delimiter::Parenthesis => ("(", ")"),
            Delimiter::None => ("", ""),
        };
        let mut nest = 0;
        loop {
            let tt = match self.next_tt() {
                Some(tt) => tt,
                _ if group => break,
                _ => panic!("Unterminated html '{str}'"),
            };
            match tt {
                Punct(p) => match p.as_char() {
                    '>' => {
                        if nest == 0 && !group {
                            break;
                        }
                        nest -= 1;
                        write!(str, ">")
                    }
                    '<' => {
                        nest += 1;
                        write!(
                            str,
                            " <{}{}",
                            next_if!(self.inner => '/'),
                            self.inner.next().expect("Tag expected")
                        )
                    }
                    '&' => write!(
                        str,
                        " &{}{}{}",
                        next_if!(self.inner => '#'),
                        self.inner.next().expect("Entity expected"),
                        next_if!(self.inner => ';')
                    ),
                    ',' | ';' | '.' | ':' | '!' | '?' => write!(str, "{p}"),
                    _ => write!(str, " {p}"),
                },
                Group(g) => {
                    write!(str, " {}", delim(&g).0).unwrap();
                    Self::new(g.stream(), self.parse_fn).html_acc(str, true);
                    write!(str, " {}", delim(&g).1)
                }
                Ident(i) => write!(str, " {i}"),
                Literal(l) => write!(str, " {l}"),
            }
            .unwrap()
        }
    }
    fn html(&mut self) -> Lexeme {
        let mut str = String::new();
        self.html_acc(&mut str, false);
        Html(str)
    }

    /// Transform Literal to Lexeme. For strings cope with `+` and `:port:compass_pt`.
    /// For a number merge in negation and deal with `.1` which Rust doesn’t accept.
    fn literal(&mut self, lit: Literal, negate: bool, fract: bool) -> Lexeme {
        let (str, mut lit) = self.string(lit);
        if str {
            if negate || fract {
                panic!("Can't have `-` and/or `.` before a string {lit}");
            }
            while next_if!(self.inner, Punct(punct) if punct.as_char() == '+') {
                if let Some(Literal(lit2)) = self.inner.next() {
                    let (str, lit2) = self.string(lit2);
                    if str {
                        lit += &lit2;
                    } else {
                        panic!("Expected another string after `+`");
                    }
                } else {
                    panic!("Expected another string after `+`");
                }
            }
            if self.parse_fn {
                // todo, pass down and test esc instead
                let mut slice = &lit[0..];
                while let Some(i) = slice.rfind(r"\\") {
                    lit.remove(i);
                    slice = &lit[..i];
                }
            }
        } else if lit.starts_with(['b', 'c', 'r', '\'']) {
            panic!("Unsupported literal: {lit}");
        } else {
            // number
            if negate ^ fract {
                lit.insert(0, if fract { '.' } else { '-' });
            } else if fract {
                // i.e. due to ^: also negate
                lit.insert_str(0, "-.")
            }
        }
        self.maybe_node_id(lit)
    }

    // `1.` is a literal but `.1` is not. But that's valid DOT, so emulate it.
    fn fract(&mut self, negate: bool) -> Lexeme {
        if let Some(Literal(lit)) = self.inner.next() {
            self.literal(lit, negate, true)
        } else {
            panic!("After `.` or `-.` expected number")
        }
    }

    /// Having found an Id, check if it has a `:port:compass_pt`. If so, it's certainly a NodeId.
    fn maybe_node_id(&mut self, str: String) -> Lexeme {
        self.skip_cpp();
        if next_if!(self.inner, Punct(punct) if punct.as_char() == ':') {
            // will recursively gobble port[:compass_pt]
            if let Some(NodeId(_)) | Some(Id(_)) = self.next() {
                return NodeId(str);
            }
            panic!("Expected :port[:compass_pt] after {str}");
        }
        Id(str)
    }
}

impl Iterator for LexerIter {
    type Item = Lexeme;

    /// Get high level next item from `TokenStream`.
    // next() is the only required method
    fn next(&mut self) -> Option<Self::Item> {
        let item = self.next_tt()?;
        Some(match item {
            Group(group) => {
                use Delimiter::*;
                match group.delimiter() {
                    Brace => Block(group),
                    Parenthesis => todo!("Exprs"), //Expr(group),
                    Bracket => Attrs(()),          // Attrs(group.stream()),
                    None => panic!("Unknown group delimiter, not '(', '{{' or '['."),
                }
            }
            Ident(ref ident) => {
                let str = ident.to_string();
                match str.to_ascii_lowercase().as_ref() {
                    "strict" => Strict,
                    "graph" => Graph,
                    "digraph" => DiGraph,
                    "node" => Node,
                    "edge" => Edge,
                    "subgraph" => {
                        let mut next = self.next();
                        if let Some(Id(_)) = next {
                            next = self.next();
                        }
                        if let Some(Block(_)) = next {
                            next.unwrap()
                        } else {
                            panic!("Expected {{ block }} after keyword {str}")
                        }
                    }
                    _ => self.maybe_node_id(str),
                }
            }
            Literal(lit) => self.literal(lit, false, false),
            Punct(punct) => {
                match punct.as_char() {
                    '-' => {
                        let next = self.next_tt();
                        match next {
                            Some(Literal(lit)) => self.literal(lit, true, false),
                            Some(Punct(second)) => {
                                match second.as_char() {
                                    '-' => EdgeOp(false), // never Joint as Rust doesn't check for --
                                    '>' if punct.spacing() == Spacing::Joint => EdgeOp(true),
                                    '.' => self.fract(true),
                                    ch => panic!("Expected --, -> or -.fraction, got -{ch}"),
                                }
                            }
                            _ => panic!("Expected --, -> or -number, got -{next:?}"),
                        }
                    }
                    '.' => self.fract(false),
                    '<' => self.html(),
                    char => Other(char),
                }
            }
        })
    }
}

pub type Lexer = Peekable<LexerIter>;

/// Make a `Lexer` (`Peekable` iter over `Lexeme`) from `TokenStream`.
/// Takes a `bool` to differentiate call from `parse_*()` vs. `rust_dot!`.
pub fn lexer(input: TokenStream, parse_fn: bool) -> Lexer {
    LexerIter::new(input, parse_fn).peekable()
}

#[cfg(test)]
mod tests {
    use super::*;
    use quote::quote;

    macro_rules! validate {
        // Workaround for matching &str to String
        ($lexer:ident Id($result:literal), $($rest:tt)*) => {
            validate!($lexer Id(str) if str == $result, $($rest)*)
        };
        ($lexer:ident NodeId($result:literal), $($rest:tt)*) => {
            validate!($lexer NodeId(str) if str == $result, $($rest)*)
        };
        ($lexer:ident Html($result:literal), $($rest:tt)*) => {
            validate!($lexer Html(str) if str == $result, $($rest)*)
        };
        ($lexer:ident $result:pat $(if $($guard:expr)+)?, $($rest:tt)*) => {
            match $lexer.peek() {
                Some($result) $(if $($guard)+)? => { $lexer.next(); }
                _ => panic!("wanted: {}\ngot: {:?}",
                    stringify!($result $(if $($guard)+)?),
                    $lexer.next())
            }
            validate!($lexer $($rest)*)
        };
        ($lexer:ident) => {};
        ({$($graph:tt)*} $($result:tt)*) => {
            let mut lexer = lexer(quote!($($graph)*), false);
            validate!(lexer $($result)*);
            assert!(lexer.peek().is_none());
            assert!(lexer.next().is_none());
        }
    }

    #[test]
    // Do the things Peekable does have an overhead?
    fn zero_cost() {
        use std::mem::size_of;
        macro_rules! opt_eq {
            ($type:ty) => {
                assert_eq!(size_of::<$type>(), size_of::<Option<$type>>())
            };
        }
        opt_eq!(&Lexeme);
        opt_eq!(Lexeme);
        opt_eq!(Option<Lexeme>);
    }

    #[test]
    fn id() {
        validate! {
            {
                STRICT  Strict  strict  "strict"
                foo  "bar"  baz:p  r"raw"  "aha":q:nw
                async  _  true
                2  02  -2  -02  -0_2:r
                2.0  -2.00  2.  -2.  .2  -.2
                "foo" + "bar" + r"baz"  "foo" + "bar":q:nw
                <html>  <<B>bold</B>>
                <<I>[an italic, cursive]</I>, a normal &amp; <U>{Yes! an under&#65;line}</U>>
            }
            Strict,  Strict,  Strict,  Id("strict"),
            Id("foo"),  Id("bar"),  NodeId("baz"),  Id("raw"),  NodeId("aha"),
            Id("async"),  Id("_"),  Id("true"),
            Id("2"),  Id("02"),  Id("-2"),  Id("-02"),  NodeId("-0_2"),
            Id("2.0"),  Id("-2.00"),  Id("2."),  Id("-2."),  Id(".2"),  Id("-.2"),
            Id("foobarbaz"),  NodeId("foobar"),
            Html(" html"),  Html(" <B> bold </B>"),
            Html(" <I> [ an italic, cursive ] </I>, a normal &amp; <U> { Yes! an under &#65; line } </U>"),
        }
    }

    #[test]
    fn group() {
        validate! {
            {
                //(1 * 2)
                [ label = wow ]
                { A; 1 }
                subgraph { A; 2 }
                SubGraph AB { A; 3 }
                SUBGRAPH "AB" { A; 4 }
            }
            //Expr(_),
            Attrs(_),
            Block(_),
            Block(_),
            Block(_),
            Block(_),
        }
    }

    #[test]
    fn sym() {
        validate! {
            {
                -- ->
                = ; ,
            }
            EdgeOp(false),  EdgeOp(true),
            Other('='),  Other(';'),  Other(','),
        }
    }
}