syntax_parser_generator/lex/lexeme.rs
1use crate::lex::regex::Regex;
2
3/// Describes a category of lexemes with similar syntactic meanings.
4///
5/// This is used as part of a lexical-analyzer's specification, as it is built to recognize
6/// different types of lexemes.
7pub struct LexemeDescriptor<LexemeType> {
8 /// The type of lexemes being described.
9 pub lexeme_type: LexemeType,
10
11 /// A regular-expression pattern that matches the lexemes of the specified type.
12 pub pattern: Regex,
13}
14
15impl<LexemeType> LexemeDescriptor<LexemeType> {
16 /// Creates a [LexemeDescriptor] describing the specified `lexeme_type` with the specified
17 ///`pattern`.
18 pub fn new(lexeme_type: LexemeType, pattern: Regex) -> Self {
19 LexemeDescriptor {
20 lexeme_type,
21 pattern,
22 }
23 }
24
25 /// Creates a new [LexemeDescriptor] that describes a keyword.
26 ///
27 /// A keyword is a type of lexeme that only matches some hard-coded string (such as `if` or
28 /// `int`). This function can be used to efficiently describe such keywords.
29 ///
30 /// # Example
31 /// ```rust
32 /// # use syntax_parser_generator::lex::LexemeDescriptor;
33 /// enum MyLexemeType { If, While }
34 /// let my_lexeme_descriptors = vec![
35 /// LexemeDescriptor::keyword(MyLexemeType::If, "if"),
36 /// LexemeDescriptor::keyword(MyLexemeType::While, "while"),
37 /// ];
38 /// ```
39 pub fn keyword(lexeme_type: LexemeType, name: &str) -> Self {
40 Self::new(lexeme_type, Regex::constant_string(name))
41 }
42
43 /// Creates a new [LexemeDescriptor] that describes a special character.
44 ///
45 /// A special character is a type of lexeme that only matches some hard-coded character (such as
46 /// operators: `+`, `*`). This function can be used to efficiently describe such characters.
47 ///
48 /// # Example
49 /// ```rust
50 /// # use syntax_parser_generator::lex::LexemeDescriptor;
51 /// enum MyLexemeType { Addition, Subtraction }
52 /// let my_lexeme_descriptors = vec![
53 /// LexemeDescriptor::special_char(MyLexemeType::Addition, '+'),
54 /// LexemeDescriptor::special_char(MyLexemeType::Subtraction, '-'),
55 /// ];
56 /// ```
57 pub fn special_char(lexeme_type: LexemeType, value: char) -> Self {
58 Self::new(lexeme_type, Regex::single_char(value))
59 }
60}
61
62/// A lexeme extracted from input text by a lexical analyzers.
63///
64/// Lexemes, also known as "tokens", are sequences of consecutive characters separated from
65/// input text, and classified into categories (such as keywords, identifiers, operators), during
66/// the lexical analysis phase of the syntax-parsing pipeline. They represent atomic units of
67/// syntactic meaning.
68#[derive(Debug, PartialEq, Eq)]
69pub struct Lexeme<LexemeType> {
70 /// The type (category) of the lexeme.
71 pub lexeme_type: LexemeType,
72
73 /// The original text that constituted the lexeme.
74 pub contents: String,
75}
76
77impl<LexemeType> Lexeme<LexemeType> {
78 /// Creates a new [Lexeme] of the given `lexeme_type` with the given `contents`.
79 pub fn new(lexeme_type: LexemeType, contents: &str) -> Self {
80 Self {
81 lexeme_type,
82 contents: String::from(contents),
83 }
84 }
85}