scarf_parser/lexer/
postprocess.rs

1// =======================================================================
2// postprocess.rs
3// =======================================================================
4// Postprocessing to perform on a sequence of lexed tokens
5
6use crate::*;
7use logos::Span;
8
9// -----------------------------------------------------------------------
10// block_comment_merge_postprocess
11// -----------------------------------------------------------------------
12// Merge content into a single BlockComment based on the start and end
13// delimiters
14
15pub fn block_comment_merge_postprocess<'a>(
16    stream: Vec<(Result<Token<'a>, String>, Span)>,
17    src: &'a str,
18) -> Vec<(Result<Token<'a>, String>, Span)> {
19    let mut block_comment_started = false;
20    let mut block_comment_start_span = Span::default();
21    let mut new_vec: Vec<(Result<Token<'a>, String>, Span)> = Vec::new();
22    for chunk in stream {
23        match chunk {
24            (Ok(Token::BlockCommentStart), start_span) => {
25                if !block_comment_started {
26                    block_comment_started = true;
27                    block_comment_start_span = start_span.clone();
28                }
29            }
30            (Ok(Token::BlockCommentEnd), end_span) => {
31                if !block_comment_started {
32                    new_vec.push((
33                        Err("Ending block comment without beginning".to_owned()),
34                        end_span,
35                    ))
36                } else {
37                    let comment_span = Span {
38                        start: block_comment_start_span.start,
39                        end: end_span.end,
40                    };
41                    let text_span = Span {
42                        start: block_comment_start_span.end,
43                        end: end_span.start,
44                    };
45                    let comment_text = &src[text_span.start..text_span.end];
46                    new_vec.push((Ok(Token::BlockComment(comment_text)), comment_span));
47                    block_comment_started = false;
48                }
49            }
50            _ => {
51                if !block_comment_started {
52                    new_vec.push(chunk)
53                }
54            }
55        }
56    }
57    if block_comment_started {
58        new_vec.push((
59            Err("Block comment with no ending".to_owned()),
60            block_comment_start_span,
61        ))
62    }
63    new_vec
64}
65
66// -----------------------------------------------------------------------
67// keyword_postprocess
68// -----------------------------------------------------------------------
69// Turns keywords into identifiers based on which keywords are reserved
70// based on the current standard
71
72pub fn keyword_postprocess<'a>(stream: &mut Vec<(Result<Token<'a>, String>, Span)>, _: &'a str) {
73    let mut curr_standard = vec![(StandardVersion::IEEE1800_2023, Span::default())];
74    let mut keyword_standard_ended = true;
75    let mut begin_keywords_started = false;
76    let mut begin_keywords_started_span = Span::default();
77    for chunk in stream.iter_mut() {
78        match chunk {
79            (Ok(Token::DirBeginKeywords), span) => {
80                begin_keywords_started = true;
81                keyword_standard_ended = false;
82                begin_keywords_started_span = span.clone();
83            }
84            (Ok(Token::StringLiteral(specifier)), span) => {
85                if begin_keywords_started {
86                    match *specifier {
87                        "1800-2023" => {
88                            curr_standard.push((StandardVersion::IEEE1800_2023, span.clone()))
89                        }
90                        "1800-2017" => {
91                            curr_standard.push((StandardVersion::IEEE1800_2017, span.clone()))
92                        }
93                        "1800-2012" => {
94                            curr_standard.push((StandardVersion::IEEE1800_2012, span.clone()))
95                        }
96                        "1800-2009" => {
97                            curr_standard.push((StandardVersion::IEEE1800_2009, span.clone()))
98                        }
99                        "1800-2005" => {
100                            curr_standard.push((StandardVersion::IEEE1800_2005, span.clone()))
101                        }
102                        "1364-2005" => {
103                            curr_standard.push((StandardVersion::IEEE1364_2005, span.clone()))
104                        }
105                        "1364-2001" => {
106                            curr_standard.push((StandardVersion::IEEE1364_2001, span.clone()))
107                        }
108                        "1364-2001-noconfig" => curr_standard
109                            .push((StandardVersion::IEEE1364_2001Noconfig, span.clone())),
110                        "1364-1995" => {
111                            curr_standard.push((StandardVersion::IEEE1364_1995, span.clone()))
112                        }
113                        _ => {
114                            *chunk = (
115                                Err(format!("Invalid version specifier '{}'", specifier)),
116                                span.clone(),
117                            )
118                        }
119                    }
120                    begin_keywords_started = false;
121                }
122            }
123            (Ok(Token::DirEndKeywords), span) => {
124                if curr_standard.len() > 1 {
125                    keyword_standard_ended = true;
126                    curr_standard.pop();
127                } else {
128                    *chunk = (
129                        Err("end_keywords directive with no begin_keywords".to_owned()),
130                        span.clone(),
131                    );
132                }
133            }
134            (result, span) => {
135                if begin_keywords_started {
136                    *chunk = (Err("Expected version specifier".to_owned()), span.clone());
137                    begin_keywords_started = false;
138                } else {
139                    if let Ok(token) = result {
140                        if token.keyword_replace(curr_standard.last().unwrap().clone().0) {
141                            *chunk = (Ok(Token::SimpleIdentifier(token.as_str())), span.clone());
142                        }
143                    }
144                }
145            }
146        }
147    }
148    if !keyword_standard_ended {
149        stream.push((
150            Err("New keyword standard with no ending".to_owned()),
151            begin_keywords_started_span,
152        ))
153    }
154}
155
156// -----------------------------------------------------------------------
157// time_unit_postprocess
158// -----------------------------------------------------------------------
159// Convert identifiers into time units if they follow a number
160
161pub fn time_unit_postprocess<'a>(stream: &mut Vec<(Result<Token<'a>, String>, Span)>) {
162    let mut previous_number = false;
163    for chunk in stream.iter_mut() {
164        match chunk {
165            (Ok(Token::UnsignedNumber(_)), _) => previous_number = true,
166            (Ok(Token::FixedPointNumber(_)), _) => previous_number = true,
167            (Ok(Token::SimpleIdentifier(text)), span) => {
168                if previous_number {
169                    match *text {
170                        "s" | "ms" | "us" | "ns" | "ps" | "fs" => {
171                            *chunk = (Ok(Token::TimeUnit(text)), span.clone())
172                        }
173                        _ => (),
174                    }
175                    previous_number = false;
176                }
177            }
178            _ => previous_number = false,
179        }
180    }
181}
182
183// -----------------------------------------------------------------------
184// triple_quote_string_postprocess
185// -----------------------------------------------------------------------
186// Form triple-quoted strings based on the start and end delimiters
187
188pub fn triple_quote_string_postprocess<'a>(
189    stream: Vec<(Result<Token<'a>, String>, Span)>,
190    src: &'a str,
191) -> Vec<(Result<Token<'a>, String>, Span)> {
192    let mut triple_quote_string_started = false;
193    let mut triple_quote_string_start_span = Span::default();
194    let mut new_vec: Vec<(Result<Token<'a>, String>, Span)> = Vec::new();
195    for chunk in stream {
196        match chunk {
197            (Ok(Token::QuoteQuoteQuote), span) => {
198                if triple_quote_string_started {
199                    let string_span = Span {
200                        start: triple_quote_string_start_span.start,
201                        end: span.end,
202                    };
203                    let text_span = Span {
204                        start: triple_quote_string_start_span.end,
205                        end: span.start,
206                    };
207                    let string_text = &src[text_span.start..text_span.end];
208                    new_vec.push((
209                        Ok(Token::TripleQuoteStringLiteral(string_text)),
210                        string_span,
211                    ));
212                    triple_quote_string_started = false;
213                } else {
214                    triple_quote_string_started = true;
215                    triple_quote_string_start_span = span;
216                }
217            }
218            _ => {
219                if !triple_quote_string_started {
220                    new_vec.push(chunk)
221                }
222            }
223        }
224    }
225    if triple_quote_string_started {
226        new_vec.push((
227            Err("Triple-quote string with no ending".to_owned()),
228            triple_quote_string_start_span,
229        ))
230    }
231    new_vec
232}
233
234// -----------------------------------------------------------------------
235// postprocess
236// -----------------------------------------------------------------------
237// Apply all post-processing passes
238
239pub fn postprocess<'a>(
240    stream: Vec<(Result<Token<'a>, String>, Span)>,
241    src: &'a str,
242) -> Vec<(Result<Token<'a>, String>, Span)> {
243    let stream = block_comment_merge_postprocess(stream, src);
244    let mut stream = triple_quote_string_postprocess(stream, src);
245    keyword_postprocess(&mut stream, src);
246    time_unit_postprocess(&mut stream);
247    stream
248}