simploxide_bindgen/syntax/
mod.rs

1//! A module with a simple greedy lexer for the SimpleX api command syntax and a binding submodule
2//! that generates actual interpreters.
3
4pub mod binding;
5
6pub use binding::Interpretable;
7
8#[derive(Debug, Clone, Copy)]
9pub enum SyntaxElement<'a> {
10    /// `/_command`
11    Literal(&'a str),
12    /// `@|#|*|`
13    EnumSubstitutions(EnumSubstitutions<'a>),
14    /// `on, off, on|off`
15    MaybeBool(MaybeBool),
16    /// `<member_name>`
17    TrivialMemberSubstitution { member_name: &'a str },
18    /// `<str(member_name)>`
19    DelegateMemberSubstitution { member_name: &'a str },
20    /// `<json(member_name)>`
21    JsonMemberSubstitution { member_name: &'a str },
22    /// `<member_name[0]>[,<member_name[1]>...]`
23    VecMemberSubstitution {
24        member_name: &'a str,
25        delim: &'a str,
26    },
27    /// `[whatever]`
28    Optional { unparsed: &'a str },
29}
30
31impl<'a> SyntaxElement<'a> {
32    fn new_enum_substitutions(subs: &'a str) -> Self {
33        Self::EnumSubstitutions(EnumSubstitutions(subs))
34    }
35
36    #[cfg(test)]
37    fn optional(&self) -> Option<&'a str> {
38        if let Self::Optional { unparsed } = self {
39            Some(unparsed)
40        } else {
41            None
42        }
43    }
44}
45
46#[derive(Debug, Clone, Copy)]
47pub struct EnumSubstitutions<'a>(&'a str);
48
49impl<'a> EnumSubstitutions<'a> {
50    pub fn iter(&self) -> impl Iterator<Item = &'a str> {
51        self.0.split('|').map(str::trim).filter(|s| !s.is_empty())
52    }
53}
54
55/// on, off, on|off
56#[derive(Debug, Clone, Copy, PartialEq, Eq)]
57pub enum MaybeBool {
58    On,
59    Off,
60    Either,
61}
62
63pub fn lex(s: &str) -> Lexer<'_> {
64    Lexer::new(s)
65}
66
67/// An iterator over syntax elements
68pub struct Lexer<'a> {
69    syntax: &'a str,
70}
71
72impl<'a> Lexer<'a> {
73    fn new(syntax: &'a str) -> Self {
74        Self { syntax }
75    }
76}
77
78// TODO: This impl needs some love. Refactor it to remove unwraps and handle errors uniformly
79impl<'a> Iterator for Lexer<'a> {
80    type Item = Result<SyntaxElement<'a>, String>;
81
82    fn next(&mut self) -> Option<Self::Item> {
83        const SIGNIFICANT_CHARS: &[char] = &[' ', '=', '<', '['];
84
85        enum NaughtyLiteral<'a> {
86            Literal(&'a str),
87            EnumSubs(&'a str),
88            Bool(MaybeBool),
89        }
90
91        // Need to distinguish literals to know how to deal with the trailing spaces. Normal
92        // literals include them while enum substitutions exclude them so spaces must become
93        // separate literals before/after EnumSubs
94        fn deduce_literal(s: &str) -> Result<NaughtyLiteral<'_>, String> {
95            if s.contains('|') {
96                if s.ends_with('=') {
97                    return Err(format!(
98                        "Unexpected `=` operator after enum substitution {s}"
99                    ));
100                }
101                let s = s.trim();
102
103                match s {
104                    "on|off" => Ok(NaughtyLiteral::Bool(MaybeBool::Either)),
105                    _ => Ok(NaughtyLiteral::EnumSubs(s)),
106                }
107            } else {
108                match s {
109                    "on" => Ok(NaughtyLiteral::Bool(MaybeBool::On)),
110                    "off" => Ok(NaughtyLiteral::Bool(MaybeBool::Off)),
111                    _ => Ok(NaughtyLiteral::Literal(s)),
112                }
113            }
114        }
115
116        if self.syntax.is_empty() {
117            return None;
118        }
119
120        let mut pos = match self.syntax.find(SIGNIFICANT_CHARS) {
121            Some(pos) => pos,
122            None => {
123                let s = std::mem::take(&mut self.syntax);
124                // No extra handling is required because Space is a significant character so it
125                // cannot be present in the None branch.
126                match deduce_literal(s) {
127                    Ok(NaughtyLiteral::Literal(lit)) => {
128                        return Some(Ok(SyntaxElement::Literal(lit)));
129                    }
130                    Ok(NaughtyLiteral::EnumSubs(enum_subs)) => {
131                        return Some(Ok(SyntaxElement::new_enum_substitutions(enum_subs)));
132                    }
133                    Ok(NaughtyLiteral::Bool(b)) => {
134                        return Some(Ok(SyntaxElement::MaybeBool(b)));
135                    }
136                    Err(e) => return Some(Err(e)),
137                }
138            }
139        };
140
141        if pos != 0 {
142            let mut ends_with_space = false;
143
144            let space_offset = ' '.len_utf8();
145            if self.syntax[pos..].starts_with(' ') || self.syntax[pos..].starts_with('=') {
146                pos += space_offset;
147                ends_with_space = true;
148            }
149
150            match deduce_literal(&self.syntax[..pos]) {
151                Ok(NaughtyLiteral::Literal(literal)) => {
152                    let (_, unparsed) = self.syntax.split_at(pos);
153                    self.syntax = unparsed;
154
155                    Some(Ok(SyntaxElement::Literal(literal)))
156                }
157                Ok(special) => {
158                    if ends_with_space {
159                        pos -= space_offset;
160                    }
161
162                    let (_, unparsed) = self.syntax.split_at(pos);
163                    self.syntax = unparsed;
164
165                    match special {
166                        NaughtyLiteral::EnumSubs(enum_subs) => {
167                            Some(Ok(SyntaxElement::new_enum_substitutions(enum_subs)))
168                        }
169                        NaughtyLiteral::Bool(b) => Some(Ok(SyntaxElement::MaybeBool(b))),
170                        NaughtyLiteral::Literal(_) => unreachable!(),
171                    }
172                }
173                Err(e) => {
174                    self.syntax = "";
175                    Some(Err(e))
176                }
177            }
178        } else if self.syntax.starts_with(' ') {
179            let end = self.syntax.find(|c: char| !c.is_whitespace())?;
180            let (spaces, unparsed) = self.syntax.split_at(end);
181            self.syntax = unparsed;
182
183            Some(Ok(SyntaxElement::Literal(spaces)))
184        } else if self.syntax.starts_with('=') {
185            let err = Err(format!(
186                "'=' is expected to be captured by the literals. Got uncaptured '=' at `{}`",
187                self.syntax
188            ));
189            self.syntax = "";
190            Some(err)
191        } else if self.syntax.starts_with('[') {
192            let end = self.syntax.find(']').unwrap();
193            let inner = &self.syntax['['.len_utf8()..end];
194            self.syntax = &self.syntax[end + ']'.len_utf8()..];
195
196            Some(Ok(SyntaxElement::Optional { unparsed: inner }))
197        } else if self.syntax.starts_with('<') {
198            let end = self.syntax.find('>').unwrap();
199            let inner = &self.syntax['<'.len_utf8()..end];
200            self.syntax = &self.syntax[end + '>'.len_utf8()..];
201
202            if let Some(name) = inner.strip_prefix("str(") {
203                let member_name = name.strip_suffix(')').unwrap();
204                Some(Ok(SyntaxElement::DelegateMemberSubstitution {
205                    member_name,
206                }))
207            } else if let Some(name) = inner.strip_prefix("json(") {
208                let member_name = name.strip_suffix(')').unwrap();
209                Some(Ok(SyntaxElement::JsonMemberSubstitution { member_name }))
210            } else if let Some(member_name) = inner.strip_suffix("[0]") {
211                let Some(mut delim_beg) = self.syntax.find('[') else {
212                    return Some(Err(format!(
213                        "Failed to find array continuation(`[<delim><member_name[1]>...]`) by looking ahead for '[' at {}",
214                        self.syntax
215                    )));
216                };
217                let Some(delim_end) = self.syntax[delim_beg..].find('<') else {
218                    return Some(Err(format!(
219                        "Failed to delimiter boundary in array conitnuation (`[<delim><member_name[1]>...]`) by looking ahead for '<' at {}",
220                        self.syntax
221                    )));
222                };
223
224                delim_beg += '['.len_utf8();
225
226                let delim = &self.syntax[delim_beg..delim_end];
227                let mut new_end = self.syntax.find("...]").unwrap();
228                new_end += "...]".len();
229                self.syntax = &self.syntax[new_end..];
230
231                Some(Ok(SyntaxElement::VecMemberSubstitution {
232                    member_name,
233                    delim,
234                }))
235            } else {
236                Some(Ok(SyntaxElement::TrivialMemberSubstitution {
237                    member_name: inner,
238                }))
239            }
240        } else {
241            let err = Err(format!("Unexpected syntax at: {}", self.syntax));
242            self.syntax = "";
243            Some(err)
244        }
245    }
246}
247
248#[cfg(test)]
249mod tests {
250    use super::*;
251    use expect_test::expect;
252
253    #[test]
254    fn lexer_simple1() {
255        let syntax = "/_address <userId>";
256        let tokens: Vec<_> = lex(syntax).collect();
257
258        expect![[r#"
259            [
260                Ok(
261                    Literal(
262                        "/_address ",
263                    ),
264                ),
265                Ok(
266                    TrivialMemberSubstitution {
267                        member_name: "userId",
268                    },
269                ),
270            ]
271        "#]]
272        .assert_debug_eq(&tokens);
273    }
274
275    #[test]
276    fn lexer_messy() {
277        let syntax = "(_support[:<groupMemberId_>])";
278        let tokens: Vec<_> = lex(syntax).collect();
279        expect![[r#"
280            [
281                Ok(
282                    Literal(
283                        "(_support",
284                    ),
285                ),
286                Ok(
287                    Optional {
288                        unparsed: ":<groupMemberId_>",
289                    },
290                ),
291                Ok(
292                    Literal(
293                        ")",
294                    ),
295                ),
296            ]
297        "#]]
298        .assert_debug_eq(&tokens);
299
300        let nested: Vec<_> = tokens
301            .iter()
302            .filter_map(|tok| tok.as_ref().unwrap().optional())
303            .map(|subsyntax| lex(subsyntax).collect::<Vec<_>>())
304            .collect();
305
306        expect![[r#"
307            [
308                [
309                    Ok(
310                        Literal(
311                            ":",
312                        ),
313                    ),
314                    Ok(
315                        TrivialMemberSubstitution {
316                            member_name: "groupMemberId_",
317                        },
318                    ),
319                ],
320            ]
321        "#]]
322        .assert_debug_eq(&nested);
323    }
324
325    #[test]
326    fn lexer_moderate1() {
327        let syntax = "full|entity|messages[ notify=off]";
328
329        let tokens: Vec<_> = lex(syntax).collect();
330        expect![[r#"
331            [
332                Ok(
333                    EnumSubstitutions(
334                        EnumSubstitutions(
335                            "full|entity|messages",
336                        ),
337                    ),
338                ),
339                Ok(
340                    Optional {
341                        unparsed: " notify=off",
342                    },
343                ),
344            ]
345        "#]]
346        .assert_debug_eq(&tokens);
347
348        let nested: Vec<_> = tokens
349            .iter()
350            .filter_map(|tok| tok.as_ref().unwrap().optional())
351            .map(|subsyntax| lex(subsyntax).collect::<Vec<_>>())
352            .collect();
353
354        expect![[r#"
355            [
356                [
357                    Ok(
358                        Literal(
359                            " ",
360                        ),
361                    ),
362                    Ok(
363                        Literal(
364                            "notify=",
365                        ),
366                    ),
367                    Ok(
368                        MaybeBool(
369                            Off,
370                        ),
371                    ),
372                ],
373            ]
374        "#]]
375        .assert_debug_eq(&nested);
376    }
377
378    #[test]
379    fn lexer_moderate2() {
380        let syntax = "/_send <str(sendRef)>[ live=on][ ttl=<ttl>] json <json(composedMessages)>";
381        let tokens: Vec<_> = lex(syntax).collect();
382
383        expect![[r#"
384            [
385                Ok(
386                    Literal(
387                        "/_send ",
388                    ),
389                ),
390                Ok(
391                    DelegateMemberSubstitution {
392                        member_name: "sendRef",
393                    },
394                ),
395                Ok(
396                    Optional {
397                        unparsed: " live=on",
398                    },
399                ),
400                Ok(
401                    Optional {
402                        unparsed: " ttl=<ttl>",
403                    },
404                ),
405                Ok(
406                    Literal(
407                        " ",
408                    ),
409                ),
410                Ok(
411                    Literal(
412                        "json ",
413                    ),
414                ),
415                Ok(
416                    JsonMemberSubstitution {
417                        member_name: "composedMessages",
418                    },
419                ),
420            ]
421        "#]]
422        .assert_debug_eq(&tokens);
423
424        let nested: Vec<_> = tokens
425            .iter()
426            .filter_map(|tok| tok.as_ref().unwrap().optional())
427            .map(|s| lex(s).collect::<Vec<_>>())
428            .collect();
429
430        expect![[r#"
431            [
432                [
433                    Ok(
434                        Literal(
435                            " ",
436                        ),
437                    ),
438                    Ok(
439                        Literal(
440                            "live=",
441                        ),
442                    ),
443                    Ok(
444                        MaybeBool(
445                            On,
446                        ),
447                    ),
448                ],
449                [
450                    Ok(
451                        Literal(
452                            " ",
453                        ),
454                    ),
455                    Ok(
456                        Literal(
457                            "ttl=",
458                        ),
459                    ),
460                    Ok(
461                        TrivialMemberSubstitution {
462                            member_name: "ttl",
463                        },
464                    ),
465                ],
466            ]
467        "#]]
468        .assert_debug_eq(&nested);
469    }
470
471    #[test]
472    fn lexer_complex1() {
473        let syntax1 = "/_member role #<groupId> <groupMemberIds[0]>[,<groupMemberIds[1]>...] observer|author|member|moderator|admin|owner";
474        let syntax2 =
475            "/_block #<groupId> <groupMemberIds[0]>[,<groupMemberIds[1]>...] blocked=on|off";
476
477        let tokens1: Vec<_> = lex(syntax1).collect();
478        let tokens2: Vec<_> = lex(syntax2).collect();
479
480        expect![[r##"
481            [
482                Ok(
483                    Literal(
484                        "/_member ",
485                    ),
486                ),
487                Ok(
488                    Literal(
489                        "role ",
490                    ),
491                ),
492                Ok(
493                    Literal(
494                        "#",
495                    ),
496                ),
497                Ok(
498                    TrivialMemberSubstitution {
499                        member_name: "groupId",
500                    },
501                ),
502                Ok(
503                    Literal(
504                        " ",
505                    ),
506                ),
507                Ok(
508                    VecMemberSubstitution {
509                        member_name: "groupMemberIds",
510                        delim: ",",
511                    },
512                ),
513                Ok(
514                    Literal(
515                        " ",
516                    ),
517                ),
518                Ok(
519                    EnumSubstitutions(
520                        EnumSubstitutions(
521                            "observer|author|member|moderator|admin|owner",
522                        ),
523                    ),
524                ),
525            ]
526        "##]]
527        .assert_debug_eq(&tokens1);
528
529        expect![[r##"
530            [
531                Ok(
532                    Literal(
533                        "/_block ",
534                    ),
535                ),
536                Ok(
537                    Literal(
538                        "#",
539                    ),
540                ),
541                Ok(
542                    TrivialMemberSubstitution {
543                        member_name: "groupId",
544                    },
545                ),
546                Ok(
547                    Literal(
548                        " ",
549                    ),
550                ),
551                Ok(
552                    VecMemberSubstitution {
553                        member_name: "groupMemberIds",
554                        delim: ",",
555                    },
556                ),
557                Ok(
558                    Literal(
559                        " ",
560                    ),
561                ),
562                Ok(
563                    Literal(
564                        "blocked=",
565                    ),
566                ),
567                Ok(
568                    MaybeBool(
569                        Either,
570                    ),
571                ),
572            ]
573        "##]]
574        .assert_debug_eq(&tokens2);
575    }
576
577    #[test]
578    fn lexer_complex2() {
579        let syntax =
580            "/_remove #<groupId> <groupMemberIds[0]>[,<groupMemberIds[1]>...][ messages=on]";
581
582        let tokens: Vec<_> = lex(syntax).collect();
583
584        expect![[r##"
585            [
586                Ok(
587                    Literal(
588                        "/_remove ",
589                    ),
590                ),
591                Ok(
592                    Literal(
593                        "#",
594                    ),
595                ),
596                Ok(
597                    TrivialMemberSubstitution {
598                        member_name: "groupId",
599                    },
600                ),
601                Ok(
602                    Literal(
603                        " ",
604                    ),
605                ),
606                Ok(
607                    VecMemberSubstitution {
608                        member_name: "groupMemberIds",
609                        delim: ",",
610                    },
611                ),
612                Ok(
613                    Optional {
614                        unparsed: " messages=on",
615                    },
616                ),
617            ]
618        "##]]
619        .assert_debug_eq(&tokens);
620
621        let nested: Vec<_> = tokens
622            .iter()
623            .filter_map(|tok| tok.as_ref().unwrap().optional())
624            .map(|s| lex(s).collect::<Vec<_>>())
625            .collect();
626
627        expect![[r#"
628            [
629                [
630                    Ok(
631                        Literal(
632                            " ",
633                        ),
634                    ),
635                    Ok(
636                        Literal(
637                            "messages=",
638                        ),
639                    ),
640                    Ok(
641                        MaybeBool(
642                            On,
643                        ),
644                    ),
645                ],
646            ]
647        "#]]
648        .assert_debug_eq(&nested);
649    }
650}