leetcode_rust/problems/p000_0xx/
p000_010.rs

1//! # Description
2//!
3//! Given an input string `s` and a pattern `p`, implement regular expression
4//! matching with support for '`.`' and '`*`' where:
5//!
6//! - '`.`' Matches any single character.​​​​
7//! - '`*`' Matches zero or more of the preceding element.
8//! The matching should cover the **entire** input string (not partial).
9//!
10//! Example 1:
11//!
12//! ```plain
13//! Input: s = "aa", p = "a"
14//! Output: false
15//! Explanation: "a" does not match the entire string "aa".
16//! ```
17//!
18//! Example 2:
19//!
20//! ```plain
21//! Input: s = "aa", p = "a*"
22//! Output: true
23//! Explanation: '*' means zero or more of the preceding element, 'a'. Therefore, by repeating 'a' once, it becomes "aa".
24//! ```
25//!
26//! Example 3:
27//!
28//! ```plain
29//! Input: s = "ab", p = ".*"
30//! Output: true
31//! Explanation: ".*" means "zero or more (*) of any character (.)".
32//! ```
33//!
34//! Constraints:
35//!
36//! - `1 $\leqslant$ s.length $\leqslant$ 20`
37//! - `1 $\leqslant$ p.length $\leqslant$ 30`
38//! - `s` contains only lowercase English letters.
39//! - `p` contains only lowercase English letters, '`.`', and '`*`'.
40//! - It is guaranteed for each appearance of the character '`*`', there will be a previous valid character to match.
41//! 
42//! Source: <https://leetcode.com/problems/regular-expression-matching/>
43
44////////////////////////////////////////////////////////////////////////////////
45
46use std::collections::HashMap;
47
48/// Simplified regular expression match algorithm
49///
50/// # Arguments
51/// * `s` - input string
52/// * `p` - pattern
53pub fn is_match(s: String, p: String) -> bool {
54    alg_1(s, p)
55    // TODO: Try to implement with another more effecient approach in the future.
56    // alg_3_from_leetcode(s, p) // This approach comes from LeetCode.
57}
58
59/// Node struct representing each match pattern segment.
60#[derive(Debug)]
61struct MatchRuleNode {
62    allow_empty: bool,  // Symbol "*"
63    allow_repeat: bool, // Symbol "*"
64    allow_any: bool,    // Symbol "."
65    u8: u8,
66}
67
68/// A struct holding parsed pattern.
69///
70/// Call `match_next` with the whole input and parsed nodes to run matching.
71#[derive(Debug)]
72struct MatchTree {
73    labeled_results: HashMap<(usize, usize), bool>,
74}
75impl MatchTree {
76    /// Match next character (English letters only)
77    ///
78    /// #TODO: document this function
79    ///
80    /// # Arguments
81    /// * `s` - input string of each step in bytes form
82    /// * `nodes` - pattern nodes left to match
83    fn match_next(&mut self, s: &[u8], nodes: &[MatchRuleNode]) -> bool {
84        macro_rules! label_pair {
85            ($s:expr, $p:expr) => {{
86                if !self.labeled_results.contains_key(&($s, $p)) {
87                    self.labeled_results.insert(($s, $p), false);
88                }
89                false
90            }};
91        }
92
93        macro_rules! match_if_not_labeled {
94            ($s:expr, $n:expr) => {
95                if self.labeled_results.contains_key(&($s.len(), $n.len())) {
96                    *self.labeled_results.get(&($s.len(), $n.len())).unwrap()
97                } else {
98                    self.match_next($s, $n)
99                }
100            };
101        }
102
103        if nodes.len() == 0 && s.len() > 0 {
104            label_pair!(s.len(), nodes.len())
105        } else if s.len() == 0 && nodes.len() > 0 {
106            for n in nodes {
107                if !n.allow_empty {
108                    return label_pair!(s.len(), nodes.len());
109                }
110            }
111            true
112        } else if s.len() == 0 && nodes.len() == 0 {
113            true
114        } else {
115            if nodes.len() == 1 && !nodes[0].allow_any {
116                // Try fast exit
117                for ch in s {
118                    if *ch != nodes[0].u8 {
119                        return label_pair!(s.len(), nodes.len());
120                    }
121                }
122            }
123
124            if nodes[0].allow_any {
125                if nodes[0].allow_repeat {
126                    match_if_not_labeled!(&s[1..], &nodes[1..])
127                        || match_if_not_labeled!(s, &nodes[1..])
128                        || match_if_not_labeled!(&s[1..], nodes)
129                } else {
130                    match_if_not_labeled!(&s[1..], &nodes[1..])
131                }
132            } else if nodes[0].u8 == s[0] {
133                if nodes[0].allow_repeat {
134                    match_if_not_labeled!(&s[1..], &nodes[1..])
135                        || match_if_not_labeled!(s, &nodes[1..])
136                        || match_if_not_labeled!(&s[1..], nodes)
137                } else {
138                    match_if_not_labeled!(&s[1..], &nodes[1..])
139                }
140            } else if nodes[0].allow_empty {
141                match_if_not_labeled!(s, &nodes[1..])
142            } else {
143                label_pair!(s.len(), nodes.len())
144            }
145        }
146    }
147}
148
149/// Create a new MatchRuleNode vector.
150///
151/// #TODO: document this function
152///
153/// # Arguments
154/// * `p` - pattern
155fn parse_pattern(p: &str) -> Vec<MatchRuleNode> {
156    let mut nodes = vec![];
157
158    let mut idx = 0;
159    let pbytes = p.as_bytes();
160    loop {
161        if idx == pbytes.len() {
162            break;
163        }
164        let mut temp_node = MatchRuleNode {
165            allow_empty: true,
166            allow_repeat: true,
167            allow_any: pbytes[idx] == 46,
168            u8: pbytes[idx],
169        };
170        if pbytes.len() - 1 > idx && pbytes[idx + 1] == 42 {
171            idx += 2;
172        } else {
173            temp_node.allow_empty = false;
174            temp_node.allow_repeat = false;
175            idx += 1;
176        }
177        if nodes.len() > 0 {
178            let last: &MatchRuleNode = nodes.last().unwrap();
179            if last.u8 == temp_node.u8
180                && temp_node.allow_repeat == true
181                && last.allow_any == temp_node.allow_any
182                && last.allow_repeat == temp_node.allow_repeat
183                && last.allow_empty == temp_node.allow_empty
184            {
185                // Skip this
186                continue;
187            }
188        }
189        nodes.push(temp_node);
190    }
191
192    nodes
193}
194
195/// Using struct to parse patterns and match input string.
196///
197/// # Arguments
198/// * `s` - input string
199/// * `p` - pattern
200#[allow(unused)]
201fn alg_1(s: String, p: String) -> bool {
202    let mut nodes = parse_pattern(p.as_str());
203    let mut tree = MatchTree {
204        labeled_results: HashMap::new(),
205    };
206
207    tree.match_next(&s.as_bytes(), nodes.as_slice())
208}
209
210/// A function based variant of algorithm 1.
211///
212/// # Arguments
213/// * `s` - input string
214/// * `p` - pattern
215#[deprecated(
216    since = "0.2.1",
217    note = "This function will cause \"Time Limit Exceeded\" error"
218)]
219#[allow(unused)]
220fn alg_2(s: String, p: String) -> bool {
221    let mut match_tree: Vec<(bool, bool, bool, u8)> = vec![];
222
223    let mut idx = 0;
224    let pbytes = p.as_bytes();
225    loop {
226        if idx == pbytes.len() {
227            break;
228        }
229        let mut temp_node = (true, true, pbytes[idx] == 46, pbytes[idx]);
230        if pbytes.len() - 1 > idx && pbytes[idx + 1] == 42 {
231            idx += 2;
232        } else {
233            temp_node.0 = false;
234            temp_node.1 = false;
235            idx += 1;
236        }
237        if match_tree.len() > 1 {
238            let last = match_tree.last().unwrap();
239            if last.3 == temp_node.3
240                && temp_node.1 == true
241                && last.2 == temp_node.2
242                && last.1 == temp_node.1
243                && last.0 == temp_node.0
244            {
245                // Skip this
246                continue;
247            }
248        }
249        match_tree.push(temp_node);
250    }
251
252    fn recur(s: &[u8], nodes: &[(bool, bool, bool, u8)]) -> bool {
253        if nodes.len() == 0 && s.len() > 0 {
254            false
255        } else if s.len() == 0 && nodes.len() > 0 {
256            for n in nodes {
257                if !n.0 {
258                    return false;
259                }
260            }
261            true
262        } else if s.len() == 0 && nodes.len() == 0 {
263            true
264        } else {
265            if nodes[0].2 {
266                if nodes[0].1 {
267                    recur(&s[1..], nodes) || recur(&s[1..], &nodes[1..]) || recur(s, &nodes[1..])
268                } else {
269                    recur(&s[1..], &nodes[1..])
270                }
271            } else if nodes[0].3 == s[0] {
272                if nodes[0].1 {
273                    recur(&s[1..], nodes) || recur(&s[1..], &nodes[1..]) || recur(s, &nodes[1..])
274                } else {
275                    recur(&s[1..], &nodes[1..])
276                }
277            } else if nodes[0].0 {
278                recur(s, &nodes[1..])
279            } else {
280                false
281            }
282        }
283    }
284
285    recur(s.as_bytes(), &match_tree)
286}
287
288////////////////////////////////////////////////////////////////////////////////
289
290struct State {
291    ch: u8,
292    wildcard: bool,
293}
294
295/// A demo submission from LeetCode
296///
297/// Source: <https://leetcode.com/problems/regular-expression-matching/submissions/889316368/>
298///
299/// # Arguments
300/// * `s` - input string
301/// * `p` - pattern to match
302#[allow(unused)]
303fn alg_3_from_leetcode(s: String, p: String) -> bool {
304    let states: Vec<State> = p
305        .bytes()
306        .enumerate()
307        .filter_map(|(i, ch)| {
308            if ch == b'*' {
309                None
310            } else {
311                Some(State {
312                    ch,
313                    wildcard: p.as_bytes().get(i + 1).copied() == Some(b'*'),
314                })
315            }
316        })
317        .chain([State {
318            ch: 0,
319            wildcard: false,
320        }])
321        .collect();
322    let state_count = states.len();
323
324    let mut cur_states = vec![false; state_count];
325    let add_state = |dest: &mut Vec<bool>, mut i: usize| {
326        while !dest[i] {
327            dest[i] = true;
328            if states[i].wildcard {
329                i += 1;
330            } else {
331                break;
332            }
333        }
334    };
335
336    // Initialization
337    add_state(&mut cur_states, 0);
338
339    // Iterate through s.
340    for ch in s.into_bytes() {
341        if cur_states.is_empty() {
342            break;
343        }
344        let mut new_states = vec![false; state_count];
345        for i in 0..state_count {
346            if cur_states[i] && (states[i].ch == b'.' || states[i].ch == ch) {
347                if states[i].wildcard {
348                    add_state(&mut new_states, i);
349                } else {
350                    add_state(&mut new_states, i + 1);
351                }
352            }
353        }
354        cur_states = new_states;
355    }
356
357    cur_states.last().copied().unwrap_or(false)
358}