Skip to main content

scarf_parser/lexer/
mod.rs

1// =======================================================================
2// mod.rs
3// =======================================================================
4//! Lexing a source file into semantic tokens
5
6pub(crate) mod callbacks;
7pub(crate) mod keywords;
8pub(crate) mod tokens;
9use crate::SpannedToken;
10use ariadne::Report;
11use ariadne::{Color, Label, ReportKind};
12pub use keywords::StandardVersion;
13use logos::Logos;
14use logos::Span as ByteSpan;
15use scarf_syntax::Span;
16use std::fs::{self, File};
17use std::io::{self, BufWriter, Write};
18use std::path::Path;
19pub use tokens::Token;
20
21fn report_lex_result<'a>(
22    lex_result: (Result<Token<'a>, String>, Span<'a>),
23) -> Option<Report<'a, (&'a str, ByteSpan)>> {
24    let (result, span) = lex_result;
25    if let Err(ref text) = result {
26        let report = if text.len() == 0 {
27            Report::build(ReportKind::Error, (span.file, span.bytes.clone()))
28                .with_code("L1")
29                .with_config(
30                    ariadne::Config::new()
31                        .with_index_type(ariadne::IndexType::Byte),
32                )
33                .with_message("Unrecognized token")
34                .with_label(
35                    Label::new((span.file, span.bytes.clone()))
36                        .with_message("Unrecognized token")
37                        .with_color(Color::Red),
38                )
39                .finish()
40        } else {
41            Report::build(ReportKind::Error, (span.file, span.bytes.clone()))
42                .with_code("L2")
43                .with_config(
44                    ariadne::Config::new()
45                        .with_index_type(ariadne::IndexType::Byte),
46                )
47                .with_message(text.clone())
48                .with_label(
49                    Label::new((span.file, span.bytes.clone()))
50                        .with_message(text)
51                        .with_color(Color::Red),
52                )
53                .finish()
54        };
55        Some(report)
56    } else {
57        None
58    }
59}
60
61fn map_lex_result<'a>(
62    lex_result: (Result<Token<'a>, String>, Span<'a>),
63) -> SpannedToken<'a> {
64    match lex_result.0 {
65        Ok(tok) => SpannedToken(tok, lex_result.1),
66        Err(_) => SpannedToken(Token::Error, lex_result.1),
67    }
68}
69
70/// An iterator over syntactical tokens for a SystemVerilog source
71pub trait LexedSource<'a>:
72    Iterator<Item = (Result<Token<'a>, String>, Span<'a>)> + Clone
73{
74    /// Generate error reports for any errors encountered in lexing
75    fn report_errors(
76        &self,
77    ) -> impl Iterator<Item = Report<'a, (&'a str, ByteSpan)>> {
78        self.clone().filter_map(report_lex_result)
79    }
80
81    /// Dump a representation of the lexed source to a file, for debugging
82    fn dump(&self, file_path: &Path) -> io::Result<()> {
83        if let Some(parent_dir) = file_path.parent() {
84            fs::create_dir_all(parent_dir)?;
85        }
86        let file = File::create(file_path)?;
87        let mut writer = BufWriter::new(file);
88        for (result, span) in self.clone() {
89            let dump_str = format!(
90                "[{:>2}:{:>2}] {}\n",
91                span.bytes.start,
92                span.bytes.end,
93                match result {
94                    Ok(token) => token,
95                    Err(_) => Token::Error,
96                }
97            );
98            writer.write_all(dump_str.as_bytes())?;
99        }
100        writer.flush()?;
101        Ok(())
102    }
103
104    /// Process the lexing of the source, storing the result
105    ///
106    /// While this does incur memory overhead, it avoid processing
107    /// the source multiple times if cloned.
108    fn process(
109        self,
110    ) -> std::vec::IntoIter<(Result<Token<'a>, String>, Span<'a>)> {
111        self.collect::<Vec<_>>().into_iter()
112    }
113
114    /// Convert lexer results into tokens, turning errors into [`Token::Error`]
115    fn tokens(self) -> impl Iterator<Item = SpannedToken<'a>> {
116        self.into_iter().map(map_lex_result)
117    }
118}
119impl<'a, T> LexedSource<'a> for T where
120    T: Iterator<Item = (Result<Token<'a>, String>, Span<'a>)> + Clone
121{
122}
123
124fn token_span_mapper<'a>(
125    file_name: &'a str,
126    included_from: Option<&'a Span<'a>>,
127) -> impl Fn(
128    (Result<Token<'a>, String>, ByteSpan),
129) -> (Result<Token<'a>, String>, Span<'a>)
130+ Clone {
131    move |(token_result, byte_span)| {
132        (
133            token_result,
134            Span {
135                file: file_name,
136                bytes: byte_span,
137                expanded_from: None,
138                included_from,
139            },
140        )
141    }
142}
143
144pub(crate) fn lex_helper<'a>(
145    src: &'a str,
146    file_name: &'a str,
147    included_from: Option<&'a Span<'a>>,
148) -> impl LexedSource<'a> {
149    let span_mapper = token_span_mapper(file_name, included_from);
150    Token::lexer(src).spanned().map(span_mapper)
151}
152
153/// Separate a source file into syntactic tokens
154///
155/// ```rust
156/// # use scarf_parser::*;
157/// let file_contents = "module test_module; endmodule";
158/// let mut tokens = lex(file_contents, "test_file.v");
159/// assert!(matches!(tokens.next().unwrap(), (Ok(Token::Module), _)));
160/// assert!(matches!(tokens.next().unwrap(), (Ok(Token::SimpleIdentifier("test_module")), _)));
161/// assert!(matches!(tokens.next().unwrap(), (Ok(Token::SColon), _)));
162/// assert!(matches!(tokens.next().unwrap(), (Ok(Token::Endmodule), _)));
163/// assert!(tokens.next().is_none());
164/// ```
165///
166/// If the lexer encounters an error, the resulting `Err(string)` may contain
167/// more information if possible to discern.
168pub fn lex<'a>(src: &'a str, file_name: &'a str) -> impl LexedSource<'a> {
169    lex_helper(src, file_name, None)
170}