ungrammar/
lib.rs

1//! Ungrammar -- a DSL for specifying concrete syntax tree grammar.
2//!
3//! Producing a parser is an explicit non-goal -- it's ok for this grammar to be
4//! ambiguous, non LL, non LR, etc.
5//!
6//! See this
7//! [introductory post](https://rust-analyzer.github.io/blog/2020/10/24/introducing-ungrammar.html)
8//! for details.
9
10#![deny(missing_debug_implementations)]
11#![deny(missing_docs)]
12#![deny(rust_2018_idioms)]
13
14mod error;
15mod lexer;
16mod parser;
17
18use std::{ops, str::FromStr};
19
20pub use error::{Error, Result};
21
22/// Returns a Rust grammar.
23pub fn rust_grammar() -> Grammar {
24    let src = include_str!("../rust.ungram");
25    src.parse().unwrap()
26}
27
28/// A node, like `A = 'b' | 'c'`.
29///
30/// Indexing into a [`Grammar`] with a [`Node`] returns a reference to a
31/// [`NodeData`].
32#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
33pub struct Node(usize);
34
35/// A token, denoted with single quotes, like `'+'` or `'struct'`.
36///
37/// Indexing into a [`Grammar`] with a [`Token`] returns a reference to a
38/// [`TokenData`].
39#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
40pub struct Token(usize);
41
42/// An Ungrammar grammar.
43#[derive(Default, Debug)]
44pub struct Grammar {
45    nodes: Vec<NodeData>,
46    tokens: Vec<TokenData>,
47}
48
49impl FromStr for Grammar {
50    type Err = Error;
51    fn from_str(s: &str) -> Result<Self> {
52        let tokens = lexer::tokenize(s)?;
53        parser::parse(tokens)
54    }
55}
56
57impl Grammar {
58    /// Returns an iterator over all nodes in the grammar.
59    pub fn iter(&self) -> impl Iterator<Item = Node> + '_ {
60        (0..self.nodes.len()).map(Node)
61    }
62
63    /// Returns an iterator over all tokens in the grammar.
64    pub fn tokens(&self) -> impl Iterator<Item = Token> + '_ {
65        (0..self.tokens.len()).map(Token)
66    }
67}
68
69impl ops::Index<Node> for Grammar {
70    type Output = NodeData;
71    fn index(&self, Node(index): Node) -> &NodeData {
72        &self.nodes[index]
73    }
74}
75
76impl ops::Index<Token> for Grammar {
77    type Output = TokenData;
78    fn index(&self, Token(index): Token) -> &TokenData {
79        &self.tokens[index]
80    }
81}
82
83/// Data about a node.
84#[derive(Debug)]
85pub struct NodeData {
86    /// The name of the node.
87    ///
88    /// In the rule `A = 'b' | 'c'`, this is `"A"`.
89    pub name: String,
90    /// The rule for this node.
91    ///
92    /// In the rule `A = 'b' | 'c'`, this represents `'b' | 'c'`.
93    pub rule: Rule,
94}
95
96/// Data about a token.
97#[derive(Debug)]
98pub struct TokenData {
99    /// The name of the token.
100    pub name: String,
101}
102
103/// A production rule.
104#[derive(Debug, Eq, PartialEq)]
105pub enum Rule {
106    /// A labeled rule, like `a:B` (`"a"` is the label, `B` is the rule).
107    Labeled {
108        /// The label.
109        label: String,
110        /// The rule.
111        rule: Box<Rule>,
112    },
113    /// A node, like `A`.
114    Node(Node),
115    /// A token, like `'struct'`.
116    Token(Token),
117    /// A sequence of rules, like `'while' '(' Expr ')' Stmt`.
118    Seq(Vec<Rule>),
119    /// An alternative between many rules, like `'+' | '-' | '*' | '/'`.
120    Alt(Vec<Rule>),
121    /// An optional rule, like `A?`.
122    Opt(Box<Rule>),
123    /// A repeated rule, like `A*`.
124    Rep(Box<Rule>),
125}
126
127#[test]
128fn smoke() {
129    let grammar = include_str!("../ungrammar.ungram");
130    let grammar = grammar.parse::<Grammar>().unwrap();
131    drop(grammar)
132}
133
134#[test]
135fn test_rust_grammar() {
136    let _ = rust_grammar();
137}