use thiserror::Error;
#[derive(Error, Debug, Clone, Default, PartialEq)]
pub enum LexError {
#[default]
#[error("unknown lexing error")]
Unknown,
#[error("expected {expect}, found {found}")]
Expected { expect: &'static str, found: String },
#[error("expected {expect}, found EOF")]
Empty { expect: &'static str },
#[error("{source}")]
Spanned {
#[source]
source: Box<LexError>,
span: Span,
},
}
synkit::parser_kit! {
error: LexError,
skip_tokens: [Space, Tab, Newline],
tokens: {
#[token(" ", priority = 0)]
Space,
#[token("\t", priority = 0)]
Tab,
#[regex(r"\r?\n")]
#[fmt("newline")]
Newline,
#[token("{")]
LBrace,
#[token("}")]
RBrace,
#[token("(")]
LParen,
#[token(")")]
RParen,
#[token("[")]
LBracket,
#[token("]")]
RBracket,
#[token(":")]
Colon,
#[token(",")]
Comma,
#[token(";")]
Semi,
#[token("struct")]
KwStruct,
#[token("enum")]
KwEnum,
#[token("type")]
KwType,
#[regex(r"[A-Za-z_][A-Za-z0-9_]*", |lex| lex.slice().to_string())]
#[fmt("identifier")]
#[derive(PartialOrd, Ord, Hash, Eq)]
Ident(String),
#[regex(r"[0-9]+", |lex| lex.slice().parse().ok())]
#[fmt("number")]
Number(i64),
#[regex(r#""([^"\\]|\\.)*""#, |lex| {
let s = lex.slice();
s[1..s.len()-1].to_string()
})]
#[fmt("string")]
String(String),
},
delimiters: {
Brace => (LBrace, RBrace),
Paren => (LParen, RParen),
Bracket => (LBracket, RBracket),
},
span_derives: [Debug, Clone, PartialEq, Eq, Hash],
token_derives: [Clone, PartialEq, Debug],
}
impl LexError {
pub fn expected<D: Diagnostic>(found: &Token) -> Self {
Self::Expected {
expect: D::fmt(),
found: format!("{}", found),
}
}
pub fn empty<D: Diagnostic>() -> Self {
Self::Empty { expect: D::fmt() }
}
}
impl synkit::SpannedError for LexError {
type Span = Span;
fn with_span(self, span: Span) -> Self {
Self::Spanned {
source: Box::new(self),
span,
}
}
fn span(&self) -> Option<&Span> {
match self {
Self::Spanned { span, .. } => Some(span),
_ => None,
}
}
}
#[derive(Debug, Clone)]
pub struct StructField {
pub name: Spanned<tokens::IdentToken>,
pub colon: Spanned<tokens::ColonToken>,
pub ty: Spanned<tokens::IdentToken>,
}
impl Peek for StructField {
fn is(token: &Token) -> bool {
tokens::IdentToken::is(token)
}
}
impl Parse for StructField {
fn parse(stream: &mut TokenStream) -> Result<Self, LexError> {
Ok(Self {
name: stream.parse()?,
colon: stream.parse()?,
ty: stream.parse()?,
})
}
}
impl ToTokens for StructField {
fn write(&self, p: &mut Printer) {
use synkit::Printer as _;
p.token(&self.name.value.token());
p.token(&self.colon.value.token());
p.space();
p.token(&self.ty.value.token());
}
}
#[derive(Debug, Clone)]
pub struct StructDef {
pub kw_struct: Spanned<tokens::KwStructToken>,
pub name: Spanned<tokens::IdentToken>,
pub lbrace: Spanned<tokens::LBraceToken>,
pub fields: Vec<Spanned<StructField>>,
pub rbrace: Spanned<tokens::RBraceToken>,
}
impl Peek for StructDef {
fn is(token: &Token) -> bool {
tokens::KwStructToken::is(token)
}
}
impl Parse for StructDef {
fn parse(stream: &mut TokenStream) -> Result<Self, LexError> {
let kw_struct = stream.parse()?;
let name = stream.parse()?;
let lbrace = stream.parse()?;
let mut fields = Vec::new();
while stream.peek::<StructField>() {
fields.push(stream.parse()?);
if stream.peek::<tokens::CommaToken>() {
let _comma: Spanned<tokens::CommaToken> = stream.parse()?;
}
}
let rbrace = stream.parse()?;
Ok(Self {
kw_struct,
name,
lbrace,
fields,
rbrace,
})
}
}
impl ToTokens for StructDef {
fn write(&self, p: &mut Printer) {
use synkit::Printer as _;
p.token(&self.kw_struct.value.token());
p.space();
p.token(&self.name.value.token());
p.space();
p.token(&self.lbrace.value.token());
p.indent();
for (i, field) in self.fields.iter().enumerate() {
p.newline();
field.value.write(p);
if i < self.fields.len() - 1 {
p.word(",");
}
}
p.dedent();
p.newline();
p.token(&self.rbrace.value.token());
}
}
pub trait AstVisitor {
fn visit_struct_def(&mut self, node: &StructDef) {
self.walk_struct_def(node);
}
fn visit_struct_field(&mut self, node: &StructField) {
self.walk_struct_field(node);
}
fn visit_ident(&mut self, _node: &tokens::IdentToken) {}
fn walk_struct_def(&mut self, node: &StructDef) {
self.visit_ident(&node.name.value);
for field in &node.fields {
self.visit_struct_field(&field.value);
}
}
fn walk_struct_field(&mut self, node: &StructField) {
self.visit_ident(&node.name.value);
self.visit_ident(&node.ty.value);
}
}
struct IdentCollector {
idents: Vec<String>,
}
impl AstVisitor for IdentCollector {
fn visit_ident(&mut self, node: &tokens::IdentToken) {
self.idents.push(node.0.clone());
}
}
struct FieldCounter {
count: usize,
}
impl AstVisitor for FieldCounter {
fn visit_struct_field(&mut self, node: &StructField) {
self.count += 1;
self.walk_struct_field(node);
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_lexer_basic_tokens() {
let source = "struct Foo { }";
let ts = stream::TokenStream::lex(source).expect("lexing failed");
let all = ts.all();
assert!(!all.is_empty());
let kinds: Vec<_> = all
.iter()
.filter(|t| !matches!(&t.value, Token::Space | Token::Tab | Token::Newline))
.map(|t| &t.value)
.collect();
assert!(matches!(kinds[0], Token::KwStruct));
assert!(matches!(kinds[1], Token::Ident(s) if s == "Foo"));
assert!(matches!(kinds[2], Token::LBrace));
assert!(matches!(kinds[3], Token::RBrace));
}
#[test]
fn test_lexer_with_whitespace() {
let source = "struct Foo\n{\n x: i32\n}";
let ts = stream::TokenStream::lex(source).expect("lexing failed");
let all = ts.all();
let kinds: Vec<_> = all
.iter()
.map(|t| &t.value)
.filter(|t| !matches!(t, Token::Space | Token::Tab | Token::Newline))
.collect();
assert!(matches!(kinds[0], Token::KwStruct));
assert!(matches!(kinds[1], Token::Ident(s) if s == "Foo"));
assert!(matches!(kinds[2], Token::LBrace));
assert!(matches!(kinds[3], Token::Ident(s) if s == "x"));
assert!(matches!(kinds[4], Token::Colon));
assert!(matches!(kinds[5], Token::Ident(s) if s == "i32"));
assert!(matches!(kinds[6], Token::RBrace));
}
#[test]
fn test_token_stream_skips_whitespace() {
use synkit::TokenStream as _;
let source = "struct Foo";
let mut ts = stream::TokenStream::lex(source).expect("lexing failed");
let tok1 = ts.next().expect("expected token");
assert!(matches!(tok1.value, Token::KwStruct));
let tok2 = ts.next().expect("expected token");
assert!(matches!(tok2.value, Token::Ident(s) if s == "Foo"));
}
#[test]
fn test_parse_struct() {
let source = r#"struct Point {
x: i32,
y: i32
}"#;
let mut ts = stream::TokenStream::lex(source).expect("lexing failed");
let parsed: Spanned<StructDef> = ts.parse().expect("parsing failed");
assert_eq!(*parsed.value.name.value, "Point");
assert_eq!(parsed.value.fields.len(), 2);
assert_eq!(*parsed.value.fields[0].value.name.value, "x");
assert_eq!(*parsed.value.fields[0].value.ty.value, "i32");
assert_eq!(*parsed.value.fields[1].value.name.value, "y");
assert_eq!(*parsed.value.fields[1].value.ty.value, "i32");
}
#[test]
fn test_span_tracking() {
use synkit::SpanLike;
let source = "struct Foo { }";
let ts = stream::TokenStream::lex(source).expect("lexing failed");
let all = ts.all();
assert_eq!(all[0].span.start(), 0);
assert_eq!(all[0].span.end(), 6);
assert_eq!(all[2].span.start(), 7);
assert_eq!(all[2].span.end(), 10);
}
#[test]
fn test_error_with_span() {
let source = "struct 123";
let mut ts = stream::TokenStream::lex(source).expect("lexing failed");
let _kw: Spanned<tokens::KwStructToken> = ts.parse().expect("parse struct kw");
let err: Result<Spanned<tokens::IdentToken>, _> = ts.parse();
assert!(err.is_err());
}
#[test]
fn test_peek_without_consume() {
let source = "struct Foo";
let mut ts = stream::TokenStream::lex(source).expect("lexing failed");
assert!(ts.peek::<tokens::KwStructToken>());
assert!(ts.peek::<tokens::KwStructToken>());
let _: Spanned<tokens::KwStructToken> = ts.parse().unwrap();
assert!(ts.peek::<tokens::IdentToken>());
assert!(!ts.peek::<tokens::KwStructToken>());
}
#[test]
fn test_fork_and_rewind() {
use synkit::TokenStream as _;
let source = "struct Foo { }";
let mut ts = stream::TokenStream::lex(source).expect("lexing failed");
let pos = ts.cursor();
let _: Spanned<tokens::KwStructToken> = ts.parse().unwrap();
let _: Spanned<tokens::IdentToken> = ts.parse().unwrap();
ts.rewind(pos);
let kw: Spanned<tokens::KwStructToken> = ts.parse().unwrap();
assert_eq!(kw.value.token(), Token::KwStruct);
}
#[test]
fn test_diagnostic_fmt() {
assert_eq!(tokens::IdentToken::fmt(), "identifier");
assert_eq!(tokens::NumberToken::fmt(), "number");
assert_eq!(tokens::KwStructToken::fmt(), "struct");
}
#[test]
fn test_token_display() {
assert_eq!(format!("{}", Token::KwStruct), "struct");
assert_eq!(format!("{}", Token::LBrace), "{");
assert_eq!(format!("{}", Token::Ident("foo".to_string())), "foo");
assert_eq!(format!("{}", Token::Number(42)), "42");
}
#[test]
fn test_printer_basic() {
use synkit::Printer as _;
let mut printer = printer::Printer::new();
printer.token(&Token::KwStruct);
printer.space();
printer.token(&Token::Ident("Foo".to_string()));
assert_eq!(printer.into_string(), "struct Foo");
}
#[test]
fn test_printer_indentation() {
use synkit::Printer as _;
let mut printer = printer::Printer::new();
printer.token(&Token::LBrace);
printer.indent();
printer.newline();
printer.word("content");
printer.dedent();
printer.newline();
printer.token(&Token::RBrace);
let output = printer.into_string();
assert!(output.contains(" content"));
}
#[test]
fn test_numbers_and_strings() {
let source = r#"123 "hello world""#;
let ts = stream::TokenStream::lex(source).expect("lexing failed");
let all = ts.all();
assert!(matches!(&all[0].value, Token::Number(123)));
assert!(matches!(&all[2].value, Token::String(s) if s == "hello world"));
}
#[test]
fn test_empty_struct() {
let source = "struct Empty { }";
let mut ts = stream::TokenStream::lex(source).expect("lexing failed");
let parsed: Spanned<StructDef> = ts.parse().expect("parsing failed");
assert_eq!(*parsed.value.name.value, "Empty");
assert!(parsed.value.fields.is_empty());
}
#[test_case::test_case("struct A { x: T }", "A", &["x"]; "single field")]
#[test_case::test_case("struct B { a: X, b: Y, c: Z }", "B", &["a", "b", "c"]; "multiple fields")]
#[test_case::test_case("struct C { }", "C", &[]; "empty struct")]
fn test_struct_parsing_variants(source: &str, expected_name: &str, expected_fields: &[&str]) {
let mut ts = stream::TokenStream::lex(source).expect("lexing failed");
let parsed: Spanned<StructDef> = ts.parse().expect("parsing failed");
assert_eq!(&**parsed.value.name.value, expected_name);
assert_eq!(parsed.value.fields.len(), expected_fields.len());
for (field, expected) in parsed.value.fields.iter().zip(expected_fields.iter()) {
assert_eq!(&**field.value.name.value, *expected);
}
}
#[test]
fn test_visitor_collect_idents() {
let source = "struct Point { x: i32, y: f64 }";
let mut ts = stream::TokenStream::lex(source).expect("lexing failed");
let parsed: Spanned<StructDef> = ts.parse().expect("parsing failed");
let mut collector = IdentCollector { idents: Vec::new() };
collector.visit_struct_def(&parsed.value);
assert_eq!(collector.idents, vec!["Point", "x", "i32", "y", "f64"]);
}
#[test]
fn test_visitor_count_fields() {
let source = "struct Data { a: A, b: B, c: C, d: D }";
let mut ts = stream::TokenStream::lex(source).expect("lexing failed");
let parsed: Spanned<StructDef> = ts.parse().expect("parsing failed");
let mut counter = FieldCounter { count: 0 };
counter.visit_struct_def(&parsed.value);
assert_eq!(counter.count, 4);
}
#[test]
fn test_to_tokens_struct_field() {
let field = StructField {
name: Spanned::new(0, 1, tokens::IdentToken::new("x".to_string())),
colon: Spanned::new(1, 2, tokens::ColonToken::new()),
ty: Spanned::new(3, 6, tokens::IdentToken::new("i32".to_string())),
};
let output = field.to_string_formatted();
assert_eq!(output, "x: i32");
}
#[test]
fn test_to_tokens_struct_def() {
let source = "struct Point { x: i32, y: i32 }";
let mut ts = stream::TokenStream::lex(source).expect("lexing failed");
let parsed: Spanned<StructDef> = ts.parse().expect("parsing failed");
let output = parsed.value.to_string_formatted();
assert!(output.contains("struct"));
assert!(output.contains("Point"));
assert!(output.contains("x: i32"));
assert!(output.contains("y: i32"));
}
}