scnr/
pattern.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
//! Module with the pattern types and their methods.
use serde::{Deserialize, Serialize};

/// A lookahead that is used to match the input.
/// The lookahead is a regular expression that must match after the pattern for the pattern to be
/// considered a match.
/// The lookahead can be positive or negative.
/// If the lookahead is positive, it must match for the pattern to be considered a match.
/// If the lookahead is negative, it must not match for the pattern to be considered a match.
/// The lookahead is optional.
/// The characters read by the lookahead are not included in the match.
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct Lookahead {
    /// If the lookahead is positive.
    pub is_positive: bool,
    /// The lookahead pattern.
    pub pattern: String,
}

impl Lookahead {
    /// Create a new lookahead.
    pub fn new(is_positive: bool, pattern: String) -> Self {
        Self {
            is_positive,
            pattern,
        }
    }

    /// Get the pattern.
    #[inline]
    pub fn pattern(&self) -> &str {
        &self.pattern
    }

    /// Check if the lookahead is positive.
    #[inline]
    pub fn is_positive(&self) -> bool {
        self.is_positive
    }
}

impl std::fmt::Display for Lookahead {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        if self.is_positive {
            write!(f, "(?={})", self.pattern.escape_default())
        } else {
            write!(f, "(?!{})", self.pattern.escape_default())
        }
    }
}

/// A pattern that is used to match the input.
/// The pattern is represented by a regular expression and a token type number.
/// The token type number is used to identify the pattern in the scanner.
/// The pattern also has an optional lookahead.
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct Pattern {
    pattern: String,
    token_type: usize,
    #[serde(skip_serializing_if = "Option::is_none")]
    lookahead: Option<Lookahead>,
}

impl Pattern {
    /// Create a new pattern.
    pub fn new(pattern: String, token_type: usize) -> Self {
        Self {
            pattern,
            token_type,
            lookahead: None,
        }
    }

    /// Create a new pattern with lookahead.
    pub fn with_lookahead(self, lookahead: Lookahead) -> Self {
        Self {
            pattern: self.pattern,
            token_type: self.token_type,
            lookahead: Some(lookahead),
        }
    }

    /// Get the pattern.
    #[inline]
    pub fn pattern(&self) -> &str {
        &self.pattern
    }

    /// Get the token type.
    #[inline]
    pub fn terminal_id(&self) -> usize {
        self.token_type
    }

    /// Get the lookahead.
    #[inline]
    pub fn lookahead(&self) -> Option<&Lookahead> {
        self.lookahead.as_ref()
    }
}

impl std::fmt::Display for Pattern {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "{}", self.pattern.escape_default())?;
        if let Some(lookahead) = &self.lookahead {
            write!(f, "{}", lookahead)?
        }
        Ok(())
    }
}