#![macro_use]
use std::{fmt::Write, iter::Peekable};
use proc_macro2::{
Delimiter, Group, Literal, Spacing, TokenStream,
TokenTree::{self, *},
};
macro_rules! peek {
($iter:expr, $($pat:pat_param)|+ $(if $($guard:expr)+)?) => {
matches!($iter.peek(), $(Some($pat))|+ $(if $($guard)+)?)
}
}
macro_rules! next_if {
($iter:expr, $($pat:tt)+) => {
if peek!($iter, $($pat)+) { let _ = $iter.next(); true } else { false }
};
($iter:expr => $char:literal) => {
if next_if!($iter, Punct(x) if x.as_char() == $char) { concat!($char) } else { "" }
}
}
#[derive(Debug)]
pub(crate) enum Lexeme {
Strict,
Graph,
DiGraph,
Block(Group),
Node,
Edge,
Id(String),
NodeId(String), Html(String),
Attrs(()), EdgeOp(bool), Other(char),
}
use Lexeme::*;
pub(crate) struct LexerIter {
inner: Peekable<proc_macro2::token_stream::IntoIter>,
parse_fn: bool,
}
impl LexerIter {
pub fn new(input: TokenStream, parse_fn: bool) -> Self {
LexerIter {
inner: input.into_iter().peekable(),
parse_fn,
}
}
fn skip_cpp(&mut self) {
if !self.parse_fn {
return;
}
while let Some(Punct(ref punct)) = self.inner.peek() {
if punct.as_char() == '#' {
let first = punct.span().start().line;
self.inner.next();
while next_if!(self.inner, this if first == this.span().start().line) {}
} else {
break;
}
}
}
fn next_tt(&mut self) -> Option<TokenTree> {
self.skip_cpp();
self.inner.next()
}
fn string(&mut self, lit: Literal) -> (bool, String) {
let mut lit = lit.to_string();
let str = lit.starts_with('"');
let str_or_raw = str || (!self.parse_fn && lit.starts_with('r'));
if str_or_raw {
if str {
let _ = lit.pop();
lit.remove(0);
} else {
let i = lit.find('"').unwrap();
lit.replace_range(lit.len() - i.., "");
lit.replace_range(..i + 1, "");
}
}
(str_or_raw, lit)
}
fn html_acc(&mut self, str: &mut String, group: bool) {
let delim = |g: &Group| match g.delimiter() {
Delimiter::Brace => ("{", "}"),
Delimiter::Bracket => ("[", "]"),
Delimiter::Parenthesis => ("(", ")"),
Delimiter::None => ("", ""),
};
let mut nest = 0;
loop {
let tt = match self.next_tt() {
Some(tt) => tt,
_ if group => break,
_ => panic!("Unterminated html '{str}'"),
};
match tt {
Punct(p) => match p.as_char() {
'>' => {
if nest == 0 && !group {
break;
}
nest -= 1;
write!(str, ">")
}
'<' => {
nest += 1;
write!(
str,
" <{}{}",
next_if!(self.inner => '/'),
self.inner.next().expect("Tag expected")
)
}
'&' => write!(
str,
" &{}{}{}",
next_if!(self.inner => '#'),
self.inner.next().expect("Entity expected"),
next_if!(self.inner => ';')
),
',' | ';' | '.' | ':' | '!' | '?' => write!(str, "{p}"),
_ => write!(str, " {p}"),
},
Group(g) => {
write!(str, " {}", delim(&g).0).unwrap();
Self::new(g.stream(), self.parse_fn).html_acc(str, true);
write!(str, " {}", delim(&g).1)
}
Ident(i) => write!(str, " {i}"),
Literal(l) => write!(str, " {l}"),
}
.unwrap()
}
}
fn html(&mut self) -> Lexeme {
let mut str = String::new();
self.html_acc(&mut str, false);
Html(str)
}
fn literal(&mut self, lit: Literal, negate: bool, fract: bool) -> Lexeme {
let (str, mut lit) = self.string(lit);
if str {
if negate || fract {
panic!("Can't have `-` and/or `.` before a string {lit}");
}
while next_if!(self.inner, Punct(punct) if punct.as_char() == '+') {
if let Some(Literal(lit2)) = self.inner.next() {
let (str, lit2) = self.string(lit2);
if str {
lit += &lit2;
} else {
panic!("Expected another string after `+`");
}
} else {
panic!("Expected another string after `+`");
}
}
if self.parse_fn {
let mut slice = &lit[0..];
while let Some(i) = slice.rfind(r"\\") {
lit.remove(i);
slice = &lit[..i];
}
}
} else if lit.starts_with(['b', 'c', 'r', '\'']) {
panic!("Unsupported literal: {lit}");
} else {
if negate ^ fract {
lit.insert(0, if fract { '.' } else { '-' });
} else if fract {
lit.insert_str(0, "-.")
}
}
self.maybe_node_id(lit)
}
fn fract(&mut self, negate: bool) -> Lexeme {
if let Some(Literal(lit)) = self.inner.next() {
self.literal(lit, negate, true)
} else {
panic!("After `.` or `-.` expected number")
}
}
fn maybe_node_id(&mut self, str: String) -> Lexeme {
self.skip_cpp();
if next_if!(self.inner, Punct(punct) if punct.as_char() == ':') {
if let Some(NodeId(_)) | Some(Id(_)) = self.next() {
return NodeId(str);
}
panic!("Expected :port[:compass_pt] after {str}");
}
Id(str)
}
}
impl Iterator for LexerIter {
type Item = Lexeme;
fn next(&mut self) -> Option<Self::Item> {
let item = self.next_tt()?;
Some(match item {
Group(group) => {
use Delimiter::*;
match group.delimiter() {
Brace => Block(group),
Parenthesis => todo!("Exprs"), Bracket => Attrs(()), None => panic!("Unknown group delimiter, not '(', '{{' or '['."),
}
}
Ident(ref ident) => {
let str = ident.to_string();
match str.to_ascii_lowercase().as_ref() {
"strict" => Strict,
"graph" => Graph,
"digraph" => DiGraph,
"node" => Node,
"edge" => Edge,
"subgraph" => {
let mut next = self.next();
if let Some(Id(_)) = next {
next = self.next();
}
if let Some(Block(_)) = next {
next.unwrap()
} else {
panic!("Expected {{ block }} after keyword {str}")
}
}
_ => self.maybe_node_id(str),
}
}
Literal(lit) => self.literal(lit, false, false),
Punct(punct) => {
match punct.as_char() {
'-' => {
let next = self.next_tt();
match next {
Some(Literal(lit)) => self.literal(lit, true, false),
Some(Punct(second)) => {
match second.as_char() {
'-' => EdgeOp(false), '>' if punct.spacing() == Spacing::Joint => EdgeOp(true),
'.' => self.fract(true),
ch => panic!("Expected --, -> or -.fraction, got -{ch}"),
}
}
_ => panic!("Expected --, -> or -number, got -{next:?}"),
}
}
'.' => self.fract(false),
'<' => self.html(),
char => Other(char),
}
}
})
}
}
pub type Lexer = Peekable<LexerIter>;
pub fn lexer(input: TokenStream, parse_fn: bool) -> Lexer {
LexerIter::new(input, parse_fn).peekable()
}
#[cfg(test)]
mod tests {
use super::*;
use quote::quote;
macro_rules! validate {
($lexer:ident Id($result:literal), $($rest:tt)*) => {
validate!($lexer Id(str) if str == $result, $($rest)*)
};
($lexer:ident NodeId($result:literal), $($rest:tt)*) => {
validate!($lexer NodeId(str) if str == $result, $($rest)*)
};
($lexer:ident Html($result:literal), $($rest:tt)*) => {
validate!($lexer Html(str) if str == $result, $($rest)*)
};
($lexer:ident $result:pat $(if $($guard:expr)+)?, $($rest:tt)*) => {
match $lexer.peek() {
Some($result) $(if $($guard)+)? => { $lexer.next(); }
_ => panic!("wanted: {}\ngot: {:?}",
stringify!($result $(if $($guard)+)?),
$lexer.next())
}
validate!($lexer $($rest)*)
};
($lexer:ident) => {};
({$($graph:tt)*} $($result:tt)*) => {
let mut lexer = lexer(quote!($($graph)*), false);
validate!(lexer $($result)*);
assert!(lexer.peek().is_none());
assert!(lexer.next().is_none());
}
}
#[test]
fn zero_cost() {
use std::mem::size_of;
macro_rules! opt_eq {
($type:ty) => {
assert_eq!(size_of::<$type>(), size_of::<Option<$type>>())
};
}
opt_eq!(&Lexeme);
opt_eq!(Lexeme);
opt_eq!(Option<Lexeme>);
}
#[test]
fn id() {
validate! {
{
STRICT Strict strict "strict"
foo "bar" baz:p r"raw" "aha":q:nw
async _ true
2 02 -2 -02 -0_2:r
2.0 -2.00 2. -2. .2 -.2
"foo" + "bar" + r"baz" "foo" + "bar":q:nw
<html> <<B>bold</B>>
<<I>[an italic, cursive]</I>, a normal & <U>{Yes! an underAline}</U>>
}
Strict, Strict, Strict, Id("strict"),
Id("foo"), Id("bar"), NodeId("baz"), Id("raw"), NodeId("aha"),
Id("async"), Id("_"), Id("true"),
Id("2"), Id("02"), Id("-2"), Id("-02"), NodeId("-0_2"),
Id("2.0"), Id("-2.00"), Id("2."), Id("-2."), Id(".2"), Id("-.2"),
Id("foobarbaz"), NodeId("foobar"),
Html(" html"), Html(" <B> bold </B>"),
Html(" <I> [ an italic, cursive ] </I>, a normal & <U> { Yes! an under A line } </U>"),
}
}
#[test]
fn group() {
validate! {
{
[ label = wow ]
{ A; 1 }
subgraph { A; 2 }
SubGraph AB { A; 3 }
SUBGRAPH "AB" { A; 4 }
}
Attrs(_),
Block(_),
Block(_),
Block(_),
Block(_),
}
}
#[test]
fn sym() {
validate! {
{
-- ->
= ; ,
}
EdgeOp(false), EdgeOp(true),
Other('='), Other(';'), Other(','),
}
}
}