whenever_parser/
ast.rs

1// Grammar:
2// The grammar has been refined so that alternations and sequences don't mix,
3// allowing to express all alternations through traits and all sequences
4// through structs.
5// Lowercase means nonterminal, uppercase means terminal (tokens).
6//
7// Root:
8// line := NUMBER statement SEMICOLON
9//
10// number := absolutenumber | unopnum | binopnum | parensnum | stringtonum
11// absolutenumber := absnum // This avoids alternation in alternation
12// absnum := NUMBER | n | read
13// unmathop := PLUS | MINUS
14// unopnum := unmathop number
15// binmathop := PLUS | MINUS | MATHOP
16// binopnum := number binmathop number
17// parensnum := LPAREN number RPAREN
18//
19// boolean := unopbool | binopbool | binopnumbool | parensbool | numtobool
20// unopbool := UNBOOLOP boolean
21// binopbool := boolean BINBOOLOP boolean
22// binopnumbool := number BINNUMBOOLOP number
23// parensbool := LPAREN boolean RPAREN
24//
25// n := N LPAREN number RPAREN
26// read := READ LPAREN RPAREN
27//
28// statement := lineoperations | again | defer | forget | print
29// lineoperations := lineops // ditto
30// lineops := lineop | lineoplist
31// lineop := singlelineop // ditto
32// singlelineop := numtolineop | countlineop
33// countlineop := number SHARP number
34// lineoplist := lineop COMMA lineops
35// again := AGAIN LPAREN boolean RPAREN statement
36// defer := DEFER LPAREN boolean RPAREN statement
37// forget := FORGET LPAREN boolean RPAREN statement
38// print := PRINT LPAREN string RPAREN
39//
40// string := STRING | u | concat | numtostring
41// concat := string PLUS string
42// u := U LPAREN absnum RPAREN
43//
44// These conversion are not straightforward and will require their own nodes in
45// the AST:
46// numtobool := number
47// numtolineop := number
48// numtostring := number
49// stringtonum := string
50
51use std::ptr;
52
53// Traits defining relationships between nodes
54pub trait Graph<'a> {
55    /// Returns the range of characters spanned by the node.
56    fn get_str(&self) -> &'a str;
57    /// Returns a text representation of the node.
58    fn get_label(&self) -> String;
59    /// Generates arrows for each children and returns the concatenation of the
60    /// call to `to_dot` on them.
61    ///
62    /// TODO: Find a non-intrusive fashion to do the same thing.
63    fn to_dot_recurse(&self) -> String;
64
65    /// Returns a unique identifier for this node.
66    fn get_id(&self) -> String;
67    /// Returns a representation of the subtree of `self` in Graphiz' dot
68    /// format.
69    fn to_dot(&self) -> String
70    {
71        let id = self.get_id();
72        let mut dot =
73            format!("  \"{}\" [label={}];\n", id, self.get_label());
74        dot += self.to_dot_recurse().as_str();
75        return dot;
76    }
77}
78
79/// Returns the output of `to_dot()` with identifiers normalized for
80/// consistency.
81///
82/// Replaces all occurrences of pointers with their offset from
83/// `root.get_str().as_ptr()`.
84pub fn to_dot_normalized<'a>(root: &dyn Graph<'a>) -> String
85{
86    let dot = root.to_dot();
87    let base = root.get_str().as_ptr() as usize;
88
89    let hexsize = format!("{:x}", base).len(); // TODO: atoi maybe ?
90    let mut split = dot.split("0x");
91
92    let mut normalized = String::from(split.next().unwrap());
93
94    for abnormal in split
95    {
96        let (addr, remainder) = abnormal.split_at(hexsize);
97        let offset = usize::from_str_radix(addr, 16).unwrap() - base;
98        normalized += format!("{:#x}", offset).as_str();
99        normalized += remainder;
100    }
101
102    normalized
103}
104
105// Traits for all alternations, and struct wrappers
106//
107// Alternation structs are not part of the graph, they are just here to wrap
108// around a Box<> and provide a consistent interface.
109trait Alternation<'a>
110{
111    fn get_str(&self) -> &'a str;
112    fn get_id(&self) -> String;
113    fn to_dot(&self) -> String;
114}
115macro_rules! define_alternation
116{
117    ($name: ident, $altname: ident) =>
118    {
119        pub trait $altname<'a>: Graph<'a> {}
120        pub struct $name<'a>
121        {
122            pub alt: Box<dyn $altname<'a> + 'a>
123        }
124        impl<'a> Alternation<'a> for $name<'a>
125        {
126            fn get_str(&self) -> &'a str { self.alt.get_str() }
127            fn get_id(&self) -> String { self.alt.get_id() }
128            fn to_dot(&self) -> String { self.alt.to_dot() }
129        }
130    }
131}
132macro_rules! implement_alternations
133{
134    ($name: ident, $($alt: ident),*) =>
135    {
136        $(impl<'a> $alt<'a> for $name<'a> {})*
137    }
138}
139
140define_alternation!(Number, NumberAlt);
141define_alternation!(AbsNumber, AbsNumAlt);
142define_alternation!(UnMathOp, UnMathOpAlt);
143define_alternation!(BinMathOp, BinMathOpAlt);
144define_alternation!(Boolean, BoolAlt);
145define_alternation!(Statement, StatementAlt);
146define_alternation!(LineOps, LineOpsAlt);
147define_alternation!(SingleLineOp, SingleLineOpAlt);
148define_alternation!(String_, StringAlt); // Fixing collision is too much work
149
150// Structs defining terminals
151macro_rules! define_terminal
152{
153    ($name: ident) =>
154    {
155        pub struct $name<'a>
156        {
157            pub tok: &'a str
158        }
159        impl<'a> Graph<'a> for $name<'a>
160        {
161            fn get_str(&self) -> &'a str { self.tok }
162            fn get_id(&self) -> String
163            {
164                format!("{:p}_{}", self.tok.as_ptr(), self.tok.len())
165            }
166            fn get_label(&self) -> String { format!("\"{}\"", self.tok) }
167            fn to_dot_recurse(&self) -> String { String::new() }
168        }
169    };
170}
171
172define_terminal!(AgainToken);
173define_terminal!(DeferToken);
174define_terminal!(ForgetToken);
175define_terminal!(NToken);
176define_terminal!(PrintToken);
177define_terminal!(ReadToken);
178define_terminal!(UToken);
179
180define_terminal!(PlusToken);
181implement_alternations!(PlusToken, UnMathOpAlt, BinMathOpAlt);
182define_terminal!(MinusToken);
183implement_alternations!(MinusToken, UnMathOpAlt, BinMathOpAlt);
184
185define_terminal!(StringToken);
186implement_alternations!(StringToken, StringAlt);
187define_terminal!(UnBoolOpToken);
188define_terminal!(BinBoolOpToken);
189define_terminal!(BinNumBoolOpToken);
190define_terminal!(MathOpToken);
191implement_alternations!(MathOpToken, BinMathOpAlt);
192
193define_terminal!(CommaToken);
194define_terminal!(LeftParensToken);
195define_terminal!(RightParensToken);
196define_terminal!(SemicolonToken);
197define_terminal!(SharpToken);
198
199pub struct NumberToken<'a>
200{
201    pub tok: &'a str,
202    pub val: usize
203}
204impl<'a> Graph<'a> for NumberToken<'a>
205{
206    fn get_str(&self) -> &'a str { self.tok }
207    fn get_id(&self) -> String
208    {
209        format!("{:p}_{}", self.tok.as_ptr(), self.tok.len())
210    }
211    fn get_label(&self) -> String
212    {
213        format!("\"{} ({})\"", self.tok, self.val)
214    }
215    fn to_dot_recurse(&self) -> String { String::new() }
216}
217implement_alternations!(NumberToken, AbsNumAlt);
218
219// Structs defining nonterminals
220macro_rules! define_nonterminal
221{
222    ($name: ident, $($field: ident, $fieldtype: ident),*) =>
223    {
224        pub struct $name<'a>
225        {
226            pub range: &'a str,
227            $(pub $field: $fieldtype<'a>,)*
228        }
229        impl<'a> $name<'a>
230        {
231            pub fn new($($field: $fieldtype<'a>,)*) -> $name<'a>
232            {
233                let mut minptr : *const u8 = ptr::null();
234                let mut minpos = std::usize::MAX;
235                let mut maxpos = 0usize;
236                $(
237                    let s = $field.get_str();
238                    let ptr = s.as_ptr();
239                    let pos = ptr as usize;
240
241                    if pos < minpos
242                    {
243                        minptr = ptr;
244                        minpos = pos;
245                    }
246                    let endpos = pos + s.len();
247                    if maxpos < endpos
248                    {
249                        maxpos = endpos;
250                    }
251                 )*
252                let len = maxpos - minpos;
253                if minptr.is_null() || len < 1
254                {
255                    panic!("Invalid range: {:p} len: {}", minptr, len);
256                }
257                let range : &'a str = unsafe {
258                    let slice = std::slice::from_raw_parts(minptr,
259                                                           len as usize);
260                    std::str::from_utf8(slice).unwrap()
261                };
262                $name {
263                    range,
264                    $($field,)*
265                }
266            }
267        }
268        impl<'a> Graph<'a> for $name<'a>
269        {
270            fn get_str(&self) -> &'a str { self.range }
271            fn get_id(&self) -> String
272            {
273                format!(concat!("{:p}_{}_", stringify!($name)),
274                        self.range.as_ptr(), self.range.len())
275            }
276            fn get_label(&self) -> String
277            {
278                String::from(concat!("<<I>", stringify!($name), "</I>>"))
279            }
280            fn to_dot_recurse(&self) -> String
281            {
282                let mut res = String::new();
283                let id = self.get_id();
284                $(
285                    res += format!("  \"{}\" -> \"{}\";\n",
286                                   id, self.$field.get_id()).as_str();
287                    res += self.$field.to_dot().as_str();
288                )*
289                res
290            }
291        }
292    }
293}
294define_nonterminal!(Line, num, NumberToken,
295                          stmt, Statement,
296                          semi, SemicolonToken);
297define_nonterminal!(AbsoluteNumber, num, AbsNumber);
298implement_alternations!(AbsoluteNumber, NumberAlt);
299define_nonterminal!(UnOpNumber, op, UnMathOp,
300                                num, Number);
301implement_alternations!(UnOpNumber, NumberAlt);
302define_nonterminal!(BinOpNumber, num1, Number,
303                                 op, BinMathOp,
304                                 num2, Number);
305implement_alternations!(BinOpNumber, NumberAlt);
306define_nonterminal!(ParensNumber, lparen, LeftParensToken,
307                                  num, Number,
308                                  rparen, RightParensToken);
309implement_alternations!(ParensNumber, NumberAlt);
310define_nonterminal!(UnOpBoolean, op, UnBoolOpToken,
311                                 boolean, Boolean);
312implement_alternations!(UnOpBoolean, BoolAlt);
313define_nonterminal!(BinOpBoolean, boolean1, Boolean,
314                                  op, BinBoolOpToken,
315                                  boolean2, Boolean);
316implement_alternations!(BinOpBoolean, BoolAlt);
317define_nonterminal!(BinOpNumBoolean, num1, Number,
318                                     op, BinNumBoolOpToken,
319                                     num2, Number);
320implement_alternations!(BinOpNumBoolean, BoolAlt);
321define_nonterminal!(ParensBoolean, lparen, LeftParensToken,
322                                   boolean, Boolean,
323                                   rparen, RightParensToken);
324implement_alternations!(ParensBoolean, BoolAlt);
325define_nonterminal!(N, keyword, NToken,
326                       lparen, LeftParensToken,
327                       num, Number,
328                       rparen, RightParensToken);
329implement_alternations!(N, AbsNumAlt);
330define_nonterminal!(Read, keyword, ReadToken,
331                          lparen, LeftParensToken,
332                          rparen, RightParensToken);
333implement_alternations!(Read, AbsNumAlt);
334define_nonterminal!(LineOperations, lineops, LineOps);
335implement_alternations!(LineOperations, StatementAlt);
336define_nonterminal!(LineOp, slo, SingleLineOp);
337implement_alternations!(LineOp, LineOpsAlt);
338define_nonterminal!(CountLineOp, line, Number,
339                                 sharp, SharpToken,
340                                 count, Number);
341implement_alternations!(CountLineOp, SingleLineOpAlt);
342define_nonterminal!(LineOpList, lineop, LineOp,
343                                comma, CommaToken,
344                                list, LineOps);
345implement_alternations!(LineOpList, LineOpsAlt);
346define_nonterminal!(Again, keyword, AgainToken,
347                           lparen, LeftParensToken,
348                           boolean, Boolean,
349                           rparen, RightParensToken,
350                           statement, Statement);
351implement_alternations!(Again, StatementAlt);
352define_nonterminal!(Defer, keyword, DeferToken,
353                           lparen, LeftParensToken,
354                           boolean, Boolean,
355                           rparen, RightParensToken,
356                           statement, Statement);
357implement_alternations!(Defer, StatementAlt);
358define_nonterminal!(Forget, keyword, ForgetToken,
359                            lparen, LeftParensToken,
360                            boolean, Boolean,
361                            rparen, RightParensToken,
362                            statement, Statement);
363implement_alternations!(Forget, StatementAlt);
364define_nonterminal!(Print, keyword, PrintToken,
365                           lparen, LeftParensToken,
366                           string, String_,
367                           rparen, RightParensToken);
368implement_alternations!(Print, StatementAlt);
369define_nonterminal!(Concat, str1, String_,
370                            op, PlusToken,
371                            str2, String_);
372implement_alternations!(Concat, StringAlt);
373define_nonterminal!(U, keyword, UToken,
374                       lparen, LeftParensToken,
375                       num, AbsNumber,
376                       rparen, RightParensToken);
377implement_alternations!(U, StringAlt);
378
379// Nonterminals for conversions
380macro_rules! define_conversion
381{
382    ($name: ident, $from: ident, $to: ty, $varname: ident) =>
383    {
384        pub struct $name<'a>
385        {
386            pub $varname: $from<'a>
387        }
388        impl<'a> Graph<'a> for $name<'a>
389        {
390            fn get_str(&self) -> &'a str { self.$varname.get_str() }
391            fn get_id(&self) -> String
392            {
393                self.$varname.get_id() + concat!('_', stringify!($name))
394            }
395            fn get_label(&self) -> String
396            {
397                String::from(concat!("<<I>", stringify!($from), " ⮕ ",
398                                     stringify!($to), "</I>>"))
399            }
400            fn to_dot_recurse(&self) -> String
401            {
402                format!("  \"{}\" -> \"{}\";\n", self.get_id(),
403                        self.$varname.get_id())
404                    + self.$varname.to_dot().as_str()
405            }
406        }
407    }
408}
409define_conversion!(NumToBool, Number, Boolean, num);
410implement_alternations!(NumToBool, BoolAlt);
411define_conversion!(NumToLineOp, Number, SingleLineOp, num);
412implement_alternations!(NumToLineOp, SingleLineOpAlt);
413define_conversion!(NumToString, Number, String, num);
414implement_alternations!(NumToString, StringAlt);
415pub struct StringToNum<'a>
416{
417    pub string: String_<'a>
418}
419impl<'a> Graph<'a> for StringToNum<'a>
420{
421    fn get_str(&self) -> &'a str { self.string.get_str() }
422    fn get_id(&self) -> String { self.string.get_id() + "_StringToNum" }
423    fn get_label(&self) -> String { String::from("<<I>String ⮕ Number</I>>") }
424    fn to_dot_recurse(&self) -> String
425    {
426        format!("  \"{}\" -> \"{}\";\n", self.get_id(), self.string.get_id())
427            + self.string.to_dot().as_str()
428    }
429}
430implement_alternations!(StringToNum, NumberAlt);