syntax_parser_generator/lex/
lexeme.rs

1use crate::lex::regex::Regex;
2
3/// Describes a category of lexemes with similar syntactic meanings.
4///
5/// This is used as part of a lexical-analyzer's specification, as it is built to recognize
6/// different types of lexemes.
7pub struct LexemeDescriptor<LexemeType> {
8    /// The type of lexemes being described.
9    pub lexeme_type: LexemeType,
10
11    /// A regular-expression pattern that matches the lexemes of the specified type.
12    pub pattern: Regex,
13}
14
15impl<LexemeType> LexemeDescriptor<LexemeType> {
16    /// Creates a [LexemeDescriptor] describing the specified `lexeme_type` with the specified
17    ///`pattern`.
18    pub fn new(lexeme_type: LexemeType, pattern: Regex) -> Self {
19        LexemeDescriptor {
20            lexeme_type,
21            pattern,
22        }
23    }
24
25    /// Creates a new [LexemeDescriptor] that describes a keyword.
26    ///
27    /// A keyword is a type of lexeme that only matches some hard-coded string (such as `if` or
28    /// `int`). This function can be used to efficiently describe such keywords.
29    ///
30    /// # Example
31    /// ```rust
32    /// # use syntax_parser_generator::lex::LexemeDescriptor;
33    /// enum MyLexemeType { If, While }
34    /// let my_lexeme_descriptors = vec![
35    ///     LexemeDescriptor::keyword(MyLexemeType::If, "if"),
36    ///     LexemeDescriptor::keyword(MyLexemeType::While, "while"),
37    /// ];
38    /// ```
39    pub fn keyword(lexeme_type: LexemeType, name: &str) -> Self {
40        Self::new(lexeme_type, Regex::constant_string(name))
41    }
42
43    /// Creates a new [LexemeDescriptor] that describes a special character.
44    ///
45    /// A special character is a type of lexeme that only matches some hard-coded character (such as
46    /// operators: `+`, `*`). This function can be used to efficiently describe such characters.
47    ///
48    /// # Example
49    /// ```rust
50    /// # use syntax_parser_generator::lex::LexemeDescriptor;
51    /// enum MyLexemeType { Addition, Subtraction }
52    /// let my_lexeme_descriptors = vec![
53    ///     LexemeDescriptor::special_char(MyLexemeType::Addition, '+'),
54    ///     LexemeDescriptor::special_char(MyLexemeType::Subtraction, '-'),
55    /// ];
56    /// ```
57    pub fn special_char(lexeme_type: LexemeType, value: char) -> Self {
58        Self::new(lexeme_type, Regex::single_char(value))
59    }
60}
61
62/// A lexeme extracted from input text by a lexical analyzers.
63///
64/// Lexemes, also known as "tokens", are sequences of consecutive characters separated from
65/// input text, and classified into categories (such as keywords, identifiers, operators), during
66/// the lexical analysis phase of the syntax-parsing pipeline. They represent atomic units of
67/// syntactic meaning.
68#[derive(Debug, PartialEq, Eq)]
69pub struct Lexeme<LexemeType> {
70    /// The type (category) of the lexeme.
71    pub lexeme_type: LexemeType,
72
73    /// The original text that constituted the lexeme.
74    pub contents: String,
75}
76
77impl<LexemeType> Lexeme<LexemeType> {
78    /// Creates a new [Lexeme] of the given `lexeme_type` with the given `contents`.
79    pub fn new(lexeme_type: LexemeType, contents: &str) -> Self {
80        Self {
81            lexeme_type,
82            contents: String::from(contents),
83        }
84    }
85}