champ/amp/
parse.rs

1use crate::{amp, fail, lex, util};
2use std::fmt::Write;
3
4#[derive(Default)]
5pub struct Parser {
6    pub stmts: Vec<Stmt>,
7    lexer: lex::Lexer,
8}
9
10#[derive(Debug, Eq, PartialEq, Clone, Default)]
11pub struct Stmt {
12    pub range: Range,
13    pub kind: Kind,
14}
15
16#[derive(Debug, Eq, PartialEq, Clone)]
17pub enum Kind {
18    Text(String),
19    Amp(amp::Path),
20}
21
22#[derive(PartialEq, Eq, Clone)]
23pub enum Match {
24    Everywhere,
25    OnlyStart,
26}
27
28type Range = std::ops::Range<usize>;
29
30#[derive(Clone, Debug)]
31enum State {
32    Text,
33    Amp,
34}
35
36impl std::fmt::Display for State {
37    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
38        match self {
39            State::Text => write!(f, "Text"),
40            State::Amp => write!(f, "Amp"),
41        }
42    }
43}
44
45impl Default for Kind {
46    fn default() -> Kind {
47        Kind::Text(String::new())
48    }
49}
50
51impl Stmt {
52    pub fn new(range: Range, kind: Kind) -> Stmt {
53        Stmt { range, kind }
54    }
55}
56
57impl std::fmt::Display for Stmt {
58    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
59        match &self.kind {
60            Kind::Text(text) => write!(f, "({text})"),
61            Kind::Amp(path) => write!(f, "[{}]", path),
62        }
63    }
64}
65
66impl Parser {
67    pub fn new() -> Parser {
68        Parser::default()
69    }
70
71    pub fn parse(&mut self, content: &str, m: &Match) -> util::Result<()> {
72        self.stmts.clear();
73
74        self.lexer.tokenize(content);
75
76        let mut grouper = Grouper::new();
77        grouper.create_groups(&self.lexer.tokens, m);
78
79        // Translate the Groups into Stmts
80        self.stmts = grouper
81            .groups
82            .iter()
83            .map(|group: &Group| -> util::Result<Stmt> {
84                let mut stmt = Stmt::default();
85
86                if let Some(token) = group.tokens.first() {
87                    stmt.range = token.range.clone();
88                }
89                if let Some(token) = group.tokens.last() {
90                    stmt.range.end = token.range.end;
91                }
92
93                match group.state {
94                    State::Text => {
95                        let s = content.get(stmt.range.clone()).unwrap_or_else(|| "");
96                        stmt.kind = Kind::Text(s.into());
97                    }
98                    State::Amp => {
99                        let mut range = 0..group.tokens.len();
100                        let mut pop = |kind: &lex::Kind| -> bool {
101                            if let Some(token) = group.tokens.get(range.start) {
102                                if &token.kind == kind {
103                                    range.start += 1;
104                                    return true;
105                                }
106                            }
107                            return false;
108                        };
109
110                        if !pop(&lex::Kind::Ampersand) {
111                            fail!("Expected group to start with `&`");
112                        }
113                        let is_definition = pop(&lex::Kind::Bang);
114                        let is_absolute = pop(&lex::Kind::Colon);
115
116                        let mut parts = Vec::<String>::new();
117                        let mut part = None;
118                        for ix in range {
119                            if let Some(token) = group.tokens.get(ix) {
120                                match token.kind {
121                                    lex::Kind::Colon => match token.range.len() {
122                                        // &todo: Support multi-colon
123                                        _ => {
124                                            if let Some(part) = part {
125                                                parts.push(part);
126                                            }
127                                            part = None;
128                                        }
129                                    },
130                                    _ => {
131                                        if let Some(s) = content.get(token.range.clone()) {
132                                            if let Some(part) = &mut part {
133                                                part.push_str(s);
134                                            } else {
135                                                part = Some(s.to_owned());
136                                            }
137                                        }
138                                    }
139                                }
140                            } else {
141                                fail!("Could not find Token {ix}");
142                            }
143                        }
144                        if let Some(part) = part {
145                            parts.push(part);
146                        }
147
148                        let parts = parts
149                            .into_iter()
150                            .map(|part| amp::Part::Text(part))
151                            .collect();
152                        let path = amp::Path {
153                            is_definition,
154                            is_absolute,
155                            parts,
156                        };
157                        stmt.kind = Kind::Amp(path);
158                    }
159                }
160
161                Ok(stmt)
162            })
163            .collect::<util::Result<Vec<Stmt>>>()?;
164
165        Ok(())
166    }
167}
168
169// A sequence of Tokens that can be translated into a Stmt
170#[derive(Debug)]
171struct Group<'a> {
172    state: State,
173    tokens: &'a [lex::Token],
174}
175
176impl<'a> std::fmt::Display for Group<'a> {
177    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
178        write!(f, "[Group](state:{})", &self.state)?;
179        for token in self.tokens.iter() {
180            write!(f, "{token}")?;
181        }
182        Ok(())
183    }
184}
185
186// Groups a sequence of Tokens into Groups that can be easily translated into a Stmt
187struct Grouper<'a> {
188    state: State,
189    token_range: Range,
190    groups: Vec<Group<'a>>,
191}
192
193impl<'a> Grouper<'a> {
194    fn new() -> Grouper<'a> {
195        Grouper {
196            state: State::Text,
197            token_range: 0..0,
198            groups: Vec::new(),
199        }
200    }
201    fn reset(&mut self) {
202        self.state = State::Text;
203        self.token_range = 0..0;
204        self.groups.clear();
205    }
206
207    fn create_groups(&mut self, tokens: &'a [lex::Token], m: &Match) {
208        self.reset();
209
210        let mut is_first = true;
211        let mut last_was_space = true;
212        let mut m: Match = m.clone();
213        for token in tokens {
214            match self.state {
215                State::Text => {
216                    if token.kind == lex::Kind::Ampersand
217                        && token.range.len() == 1
218                        && (is_first || m == Match::Everywhere)
219                        // &spec: ampersand can only start Amp at start or after a space
220                        && last_was_space
221                    {
222                        self.start_new_group(State::Amp, tokens);
223                        // &spec: as soon as we found a match, we allow matches everywhere
224                        m = Match::Everywhere;
225                    }
226                    self.token_range.end += 1;
227                }
228                State::Amp => match token.kind {
229                    lex::Kind::Ampersand => {
230                        self.start_new_group(State::Amp, tokens);
231                        self.token_range.end += 1;
232                    }
233                    lex::Kind::Space => {
234                        self.start_new_group(State::Text, tokens);
235                        self.token_range.end += 1;
236                    }
237                    lex::Kind::Semicolon => {
238                        // &spec: a semicolon is cannot occur in Amp.
239                        // &todo: make this more precise: an Amp cannot _end_ with a semicolon
240                        self.state = State::Text;
241                        self.token_range.end += 1;
242                        self.start_new_group(State::Text, tokens);
243                    }
244                    _ => {
245                        self.token_range.end += 1;
246                    }
247                },
248            }
249            last_was_space = token.kind == lex::Kind::Space;
250            is_first = false;
251        }
252        // Might require more than one additional group at the end, eg, if content ends with `@todo:`:
253        // - The `:` is splitted last-minute into a new group
254        while !self.token_range.is_empty() {
255            self.start_new_group(State::Text, tokens);
256        }
257    }
258
259    fn start_new_group(&mut self, state: State, tokens: &'a [lex::Token]) {
260        let end = self.token_range.end;
261
262        while !self.token_range.is_empty() {
263            let tokens = &tokens[self.token_range.clone()];
264
265            let mut push_group = || {
266                self.groups.push(Group::<'a> {
267                    state: self.state.clone(),
268                    tokens,
269                });
270                self.token_range.start = self.token_range.end;
271            };
272
273            match self.state {
274                State::Text => push_group(),
275                State::Amp => match tokens.last().unwrap().kind {
276                    lex::Kind::Semicolon | lex::Kind::Comma => {
277                        // &spec: Group ending on `;` or `,` is considered as Text
278                        // - &nbsp; occurs ofter in Markdown and is considered a false positive
279                        // - &param, occurs in commented-out C/C++/Rust source code
280                        self.state = State::Text;
281                    }
282                    _ => push_group(),
283                },
284            }
285        }
286
287        self.state = state;
288        self.token_range.end = end;
289    }
290}
291
292#[cfg(test)]
293mod tests {
294    use super::*;
295
296    #[test]
297    fn test_parse() {
298        let scns = [
299            // String
300            (&Match::Everywhere, "todo", "(todo)"),
301            (&Match::Everywhere, "&&", "(&&)"),
302            (&Match::Everywhere, "a,b", "(a,b)"),
303            (&Match::Everywhere, "&nbsp;", "(&nbsp;)"),
304            (&Match::Everywhere, "&nbsp;abc", "(&nbsp;)(abc)"),
305            (&Match::Everywhere, "&param,", "(&param,)"),
306            (&Match::Everywhere, "r&d", "(r&d)"),
307            // Metadata
308            (&Match::Everywhere, "&todo", "[todo]"),
309            (&Match::Everywhere, "&todo:", "[todo]"),
310            (
311                &Match::Everywhere,
312                "&!:prio:~priority",
313                "[!:prio:~priority]",
314            ),
315        ];
316
317        let mut parser = Parser::new();
318        for (m, content, exp) in scns {
319            parser.parse(content, m);
320            let mut s = String::new();
321            for stmt in &parser.stmts {
322                write!(&mut s, "{stmt}").unwrap();
323            }
324            assert_eq!(&s, exp)
325        }
326    }
327}