Skip to main content

oak_regex/ast/
mod.rs

1use core::range::Range;
2use serde::{Deserialize, Serialize};
3
4#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize)]
5pub struct Identifier {
6    pub name: String,
7    #[serde(with = "oak_core::serde_range")]
8    pub span: Range<usize>,
9}
10
11/// Strongly-typed AST root
12#[derive(Debug, PartialEq, Clone, Serialize, Deserialize)]
13pub struct RegexRoot {
14    pub alternatives: Vec<Pattern>,
15}
16
17/// Regular expression pattern
18#[derive(Debug, PartialEq, Clone, Serialize, Deserialize)]
19pub struct Pattern {
20    pub alternatives: Vec<Alternative>,
21    #[serde(with = "oak_core::serde_range")]
22    pub span: Range<usize>,
23}
24
25/// Alternation expression (|)
26#[derive(Debug, PartialEq, Clone, Serialize, Deserialize)]
27pub struct Alternative {
28    pub elements: Vec<PatternElement>,
29    #[serde(with = "oak_core::serde_range")]
30    pub span: Range<usize>,
31}
32
33/// Pattern element
34#[derive(Debug, PartialEq, Clone, Serialize, Deserialize)]
35pub enum PatternElement {
36    /// Character class
37    CharacterClass(CharacterClass),
38    /// Quantifier
39    Quantifier(Quantifier),
40    /// Group
41    Group(Group),
42    /// Assertion
43    Assertion(Assertion),
44    /// Literal
45    Literal(Literal),
46    /// Special character
47    Special(Special),
48}
49
50/// Character class
51#[derive(Debug, PartialEq, Clone, Serialize, Deserialize)]
52pub struct CharacterClass {
53    pub negated: bool,
54    pub ranges: Vec<CharacterRange>,
55    #[serde(with = "oak_core::serde_range")]
56    pub span: Range<usize>,
57}
58
59/// Character range
60#[derive(Debug, PartialEq, Clone, Serialize, Deserialize)]
61pub struct CharacterRange {
62    pub start: char,
63    pub end: Option<char>, // None means single character
64    #[serde(with = "oak_core::serde_range")]
65    pub span: Range<usize>,
66}
67
68/// Quantifier
69#[derive(Debug, PartialEq, Clone, Serialize, Deserialize)]
70pub struct Quantifier {
71    pub element: Box<PatternElement>,
72    pub kind: QuantifierKind,
73    pub greedy: bool,
74    #[serde(with = "oak_core::serde_range")]
75    pub span: Range<usize>,
76}
77
78/// Quantifier type
79#[derive(Debug, PartialEq, Clone, Serialize, Deserialize)]
80pub enum QuantifierKind {
81    /// Zero or one (?)
82    ZeroOrOne,
83    /// Zero or more (*)
84    ZeroOrMore,
85    /// One or more (+)
86    OneOrMore,
87    /// Exact count {n}
88    Exact(u32),
89    /// Range count {n,m}
90    Range(u32, Option<u32>), // None means no upper limit
91}
92
93/// Group
94#[derive(Debug, PartialEq, Clone, Serialize, Deserialize)]
95pub struct Group {
96    pub kind: GroupKind,
97    pub element: Box<PatternElement>,
98    #[serde(with = "oak_core::serde_range")]
99    pub span: Range<usize>,
100}
101
102/// Group type
103#[derive(Debug, PartialEq, Clone, Serialize, Deserialize)]
104pub enum GroupKind {
105    /// Capturing group (...) with optional number
106    Capturing(Option<u32>), // Number, None means auto-numbered
107    /// Non-capturing group (?:...)
108    NonCapturing,
109    /// Positive lookahead (?=...)
110    Lookahead,
111    /// Negative lookahead (?!...)
112    NegativeLookahead,
113    /// Positive lookbehind (?<=...)
114    Lookbehind,
115    /// Negative lookbehind (?<!...)
116    NegativeLookbehind,
117    /// Atomic group (?>...)
118    Atomic,
119    /// Conditional group (?(condition)...)
120    Conditional(Condition),
121}
122
123/// Condition
124#[derive(Debug, PartialEq, Clone, Serialize, Deserialize)]
125pub enum Condition {
126    /// Group number condition
127    GroupNumber(u32),
128    /// Recursion condition
129    Recursion,
130    /// Assertion condition
131    Assertion(Box<PatternElement>),
132}
133
134/// Assertion
135#[derive(Debug, PartialEq, Clone, Serialize, Deserialize)]
136pub struct Assertion {
137    pub kind: AssertionKind,
138    #[serde(with = "oak_core::serde_range")]
139    pub span: Range<usize>,
140}
141
142/// Assertion type
143#[derive(Debug, PartialEq, Clone, Serialize, Deserialize)]
144pub enum AssertionKind {
145    /// Start of string ^
146    Start,
147    /// End of string $
148    End,
149    /// Word boundary \b
150    WordBoundary,
151    /// Non-word boundary \B
152    NonWordBoundary,
153}
154
155/// Literal
156#[derive(Debug, PartialEq, Clone, Serialize, Deserialize)]
157pub struct Literal {
158    pub value: String,
159    #[serde(with = "oak_core::serde_range")]
160    pub span: Range<usize>,
161}
162
163/// Special character
164#[derive(Debug, PartialEq, Clone, Serialize, Deserialize)]
165pub struct Special {
166    pub kind: SpecialKind,
167    #[serde(with = "oak_core::serde_range")]
168    pub span: Range<usize>,
169}
170
171/// Special character type
172#[derive(Debug, PartialEq, Clone, Serialize, Deserialize)]
173pub enum SpecialKind {
174    /// Any character .
175    Any,
176    /// Digit \d
177    Digit,
178    /// Non-digit \D
179    NonDigit,
180    /// Word character \w
181    Word,
182    /// Non-word character \W
183    NonWord,
184    /// Whitespace \s
185    Whitespace,
186    /// Non-whitespace \S
187    NonWhitespace,
188    /// Backspace \b
189    Backspace,
190    /// Form feed \f
191    FormFeed,
192    /// Newline \n
193    Newline,
194    /// Carriage return \r
195    CarriageReturn,
196    /// Tab \t
197    Tab,
198    /// Vertical tab \v
199    VerticalTab,
200    /// Octal escape \ooo
201    Octal(u32),
202    /// Hexadecimal escape \xhh
203    Hexadecimal(u32),
204    /// Unicode escape \uhhhh or \U{hhhhh}
205    Unicode(u32),
206    /// Control character \cX
207    Control(char),
208    /// Named reference \k<name>
209    NamedReference(String),
210}