regex_anre/
route.rs

1// Copyright (c) 2024 Hemashushu <hippospark@gmail.com>, All rights reserved.
2//
3// This Source Code Form is subject to the terms of
4// the Mozilla Public License version 2.0 and additional exceptions,
5// more details in file LICENSE, LICENSE.additional and CONTRIBUTING.
6
7use crate::transition::Transition;
8
9pub const MAIN_LINE_INDEX: usize = 0;
10
11// route
12// |-- line --\
13// |          |-- node --\
14// |          |          |- transition item
15// |          |          |- transition item
16// |          |          |- ...
17// |          |          \- transition item
18// |          |-- node
19// |          |-- ...
20// |          |-- node
21// |
22// |-- line
23
24// the compile target
25#[derive(Debug)]
26pub struct Route {
27    pub lines: Vec<Line>,
28    pub capture_groups: Vec<CaptureGroup>,
29}
30
31#[derive(Debug)]
32pub struct Line {
33    pub nodes: Vec<Node>,
34    pub start_node_index: usize,
35    pub end_node_index: usize,
36
37    // it is true when the expression starts with `^`,
38    // or encounters is_after and is_before sub-thread.
39    // it's used to optimise efficiency by directly exiting
40    // the thread looop when a thread fails.
41    pub fixed_start_position: bool,
42}
43
44// every node has one or more transitions,
45// except the exit node has no transition.
46#[derive(Debug)]
47pub struct Node {
48    pub transition_items: Vec<TransitionItem>,
49}
50
51#[derive(Debug)]
52pub struct TransitionItem {
53    pub transition: Transition,   // the type of transition
54    pub target_node_index: usize, // the index of next node
55}
56
57#[derive(Debug)]
58pub struct CaptureGroup {
59    pub name: Option<String>,
60}
61
62impl Route {
63    pub fn new() -> Self {
64        Route {
65            lines: vec![],
66            capture_groups: vec![],
67        }
68    }
69
70    pub fn new_line(&mut self) -> usize {
71        let line = Line {
72            nodes: vec![],
73            start_node_index: 0,
74            end_node_index: 0,
75            fixed_start_position: false,
76        };
77
78        let idx = self.lines.len();
79        self.lines.push(line);
80        idx
81    }
82
83    pub fn new_capture_group(&mut self, name: Option<String>) -> usize {
84        let idx = self.capture_groups.len();
85        self.capture_groups.push(CaptureGroup { name });
86        idx
87    }
88
89    pub fn get_capture_group_index_by_name(&self, name: &str) -> Option<usize> {
90        self.capture_groups.iter().position(|e| match &e.name {
91            Some(n) => n == name,
92            None => false,
93        })
94    }
95
96    pub fn get_capture_group_name_by_index(&self, index:usize) -> Option<&str> {
97        let n = &self.capture_groups[index].name;
98        if let Some(m) = n {
99            Some(m.as_str())
100        }else {
101            None
102        }
103    }
104    // pub fn get_capture_group_names(&self) -> Vec<Option<&String>> {
105    //     self.capture_groups
106    //         .iter()
107    //         .map(|item| {
108    //             if let Some(name) = &item.name {
109    //                 Some(name)
110    //             } else {
111    //                 None
112    //             }
113    //         })
114    //         .collect()
115    // }
116
117    // pub fn get_number_of_capture_groups(&self) -> usize {
118    //     self.capture_groups.len()
119    // }
120
121    // pub fn get_number_of_counters(&self) -> usize {
122    //     self.number_of_counters
123    // }
124
125    // for debug
126    pub fn get_debug_text(&self) -> String {
127        let mut ss = vec![];
128
129        // lines
130        if self.lines.len() == 1 {
131            ss.push(self.lines[0].get_debug_text());
132        } else {
133            for (node_index, line) in self.lines.iter().enumerate() {
134                ss.push(format!("= ${}", node_index));
135                ss.push(line.get_debug_text());
136            }
137        }
138
139        // capture groups
140        for (capture_group_index, capture_group) in self.capture_groups.iter().enumerate() {
141            let s = if let Some(name) = &capture_group.name {
142                format!("# {{{}}}, {}", capture_group_index, name)
143            } else {
144                format!("# {{{}}}", capture_group_index)
145            };
146            ss.push(s);
147        }
148
149        ss.join("\n")
150    }
151}
152
153impl Default for Route {
154    fn default() -> Self {
155        Self::new()
156    }
157}
158
159impl Line {
160    // return the index of the new node
161    pub fn new_node(&mut self) -> usize {
162        let node = Node {
163            transition_items: vec![],
164        };
165        let idx = self.nodes.len();
166        self.nodes.push(node);
167
168        idx
169    }
170
171    pub fn append_transition(
172        &mut self,
173        source_node_index: usize,
174        target_node_index: usize,
175        transition: Transition,
176    ) -> usize {
177        let transition_node = TransitionItem {
178            transition,
179            target_node_index,
180        };
181
182        let idx = self.nodes[source_node_index].transition_items.len();
183
184        self.nodes[source_node_index]
185            .transition_items
186            .push(transition_node);
187
188        idx
189    }
190
191    // for debug
192    pub fn get_debug_text(&self) -> String {
193        let mut ss = vec![];
194
195        for (node_index, node) in self.nodes.iter().enumerate() {
196            // node
197            let prefix = if node_index == self.start_node_index {
198                '>'
199            } else if node_index == self.end_node_index {
200                '<'
201            } else {
202                '-'
203            };
204
205            let s = format!("{} {}", prefix, node_index);
206            ss.push(s);
207
208            // transition items
209            for transition_item in &node.transition_items {
210                let s = format!(
211                    "  -> {}, {}",
212                    transition_item.target_node_index, transition_item.transition
213                );
214                ss.push(s);
215            }
216        }
217
218        ss.join("\n")
219    }
220}
221
222#[cfg(test)]
223mod tests {
224    use pretty_assertions::{assert_eq, assert_str_eq};
225
226    use crate::{
227        route::Route,
228        transition::{CharTransition, Transition},
229    };
230
231    #[test]
232    fn test_route_new_line() {
233        let mut route = Route::new();
234
235        // create a line
236        {
237            let line_index = route.new_line();
238            let line = &mut route.lines[line_index];
239
240            // create a node
241            line.new_node();
242
243            assert_str_eq!(
244                line.get_debug_text(),
245                "\
246> 0"
247            );
248
249            // create other nodes
250            let idx1 = line.new_node();
251            let _idx2 = line.new_node();
252            let idx3 = line.new_node();
253
254            line.start_node_index = idx1;
255            line.end_node_index = idx3;
256
257            assert_str_eq!(
258                line.get_debug_text(),
259                "\
260- 0
261> 1
262- 2
263< 3"
264            );
265        }
266
267        // create another line
268        {
269            let line_index = route.new_line();
270            let line = &mut route.lines[line_index];
271
272            // create a node
273            line.new_node();
274
275            assert_str_eq!(
276                route.get_debug_text(),
277                "\
278= $0
279- 0
280> 1
281- 2
282< 3
283= $1
284> 0"
285            );
286        }
287    }
288
289    #[test]
290    fn test_route_new_capture_group() {
291        let mut route = Route::new();
292        let line_index = route.new_line();
293        let line = &mut route.lines[line_index];
294
295        line.new_node();
296        line.new_node();
297
298        route.new_capture_group(None);
299        route.new_capture_group(Some("foo".to_owned()));
300        route.new_capture_group(None);
301
302        assert_str_eq!(
303            route.get_debug_text(),
304            "\
305> 0
306- 1
307# {0}
308# {1}, foo
309# {2}"
310        );
311
312        assert_eq!(route.get_capture_group_index_by_name("foo"), Some(1));
313        assert!(route.get_capture_group_index_by_name("bar").is_none());
314    }
315
316    #[test]
317    fn test_line_append_transition() {
318        let mut route = Route::new();
319        let line_index = route.new_line();
320        let line = &mut route.lines[line_index];
321
322        let node_idx0 = line.new_node();
323        let node_idx1 = line.new_node();
324        let node_idx2 = line.new_node();
325        let node_idx3 = line.new_node();
326
327        let trans_idx0 = line.append_transition(
328            node_idx0,
329            node_idx1,
330            Transition::Char(CharTransition::new('a')),
331        );
332
333        assert_str_eq!(
334            line.get_debug_text(),
335            "\
336> 0
337  -> 1, Char 'a'
338- 1
339- 2
340- 3"
341        );
342
343        assert_eq!(trans_idx0, 0);
344
345        let trans_idx1 = line.append_transition(
346            node_idx0,
347            node_idx2,
348            Transition::Char(CharTransition::new('b')),
349        );
350
351        let trans_idx2 = line.append_transition(
352            node_idx0,
353            node_idx3,
354            Transition::Char(CharTransition::new('c')),
355        );
356
357        assert_str_eq!(
358            line.get_debug_text(),
359            "\
360> 0
361  -> 1, Char 'a'
362  -> 2, Char 'b'
363  -> 3, Char 'c'
364- 1
365- 2
366- 3"
367        );
368
369        assert_eq!(trans_idx1, 1);
370        assert_eq!(trans_idx2, 2);
371
372        let trans_idx3 = line.append_transition(
373            node_idx1,
374            node_idx2,
375            Transition::Char(CharTransition::new('x')),
376        );
377
378        assert_str_eq!(
379            line.get_debug_text(),
380            "\
381> 0
382  -> 1, Char 'a'
383  -> 2, Char 'b'
384  -> 3, Char 'c'
385- 1
386  -> 2, Char 'x'
387- 2
388- 3"
389        );
390
391        assert_eq!(trans_idx3, 0);
392    }
393}