cedar_policy_core/ast/
pattern.rs

1/*
2 * Copyright 2022-2023 Amazon.com, Inc. or its affiliates. All Rights Reserved.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      https://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17use std::sync::Arc;
18
19use serde::{Deserialize, Serialize};
20
21/// Represent an element in a pattern literal (the RHS of the like operation)
22#[derive(Deserialize, Hash, Debug, Clone, Copy, PartialEq, Eq)]
23// We need special serialization implementation for CedarDRT because Rust's
24// unicode escape sequences (e.g., `\u{1234}`) can appear in serialized strings
25// and it's difficult to parse them into Dafny characters.
26// Instead we serialize the unicode values of Rust characters and leverage
27// Dafny's type conversion to retrieve the characters.
28#[cfg_attr(not(fuzzing), derive(Serialize))]
29#[cfg_attr(fuzzing, derive(arbitrary::Arbitrary))]
30pub enum PatternElem {
31    /// A character literal
32    Char(char),
33    /// The wildcard `*`
34    Wildcard,
35}
36
37#[cfg(fuzzing)]
38impl Serialize for PatternElem {
39    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
40    where
41        S: serde::Serializer,
42    {
43        // Helper enum for serialization
44        #[derive(Debug, Serialize)]
45        enum PatternElemForDafny {
46            Char(u32),
47            Wildcard,
48        }
49        match self {
50            Self::Char(c) => PatternElemForDafny::Char(*c as u32).serialize(serializer),
51            Self::Wildcard => PatternElemForDafny::Wildcard.serialize(serializer),
52        }
53    }
54}
55
56/// Represent a pattern literal (the RHS of the like operator)
57/// Also provides an implementation of the Display trait as well as a wildcard matching method.
58#[derive(Debug, Clone, Hash, Eq, PartialEq, Serialize, Deserialize)]
59#[serde(transparent)]
60pub struct Pattern {
61    /// A vector of pattern elements
62    elems: Arc<Vec<PatternElem>>,
63}
64
65impl Pattern {
66    /// Explicitly create a pattern literal out of a vector of pattern elements
67    pub fn new(elems: impl IntoIterator<Item = PatternElem>) -> Self {
68        Self {
69            elems: Arc::new(elems.into_iter().collect()),
70        }
71    }
72
73    /// Getter to the wrapped vector
74    pub fn get_elems(&self) -> &[PatternElem] {
75        &self.elems
76    }
77
78    /// Iterate over pattern elements
79    pub fn iter(&self) -> impl Iterator<Item = &PatternElem> {
80        self.elems.iter()
81    }
82}
83
84impl std::fmt::Display for Pattern {
85    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
86        for pc in self.elems.as_ref() {
87            match pc {
88                PatternElem::Char(c) if c == &'*' => write!(f, r#"\*"#)?,
89                PatternElem::Char(c) => write!(f, "{}", c.escape_debug())?,
90                PatternElem::Wildcard => write!(f, r#"*"#)?,
91            }
92        }
93        Ok(())
94    }
95}
96
97impl PatternElem {
98    fn match_char(&self, text_char: &char) -> bool {
99        match self {
100            PatternElem::Char(c) => text_char == c,
101            PatternElem::Wildcard => true,
102        }
103    }
104    fn is_wildcard(&self) -> bool {
105        matches!(self, PatternElem::Wildcard)
106    }
107}
108
109impl Pattern {
110    /// Find if the argument text matches the pattern
111    pub fn wildcard_match(&self, text: &str) -> bool {
112        let pattern = self.get_elems();
113        if pattern.is_empty() {
114            return text.is_empty();
115        }
116
117        // Copying the strings into vectors requires extra space, but has two benefits:
118        // 1. It makes accessing elements more efficient. The alternative (i.e.,
119        //    chars().nth()) needs to re-scan the string for each invocation. Note
120        //    that a simple iterator will not work here since we move both forward
121        //    and backward through the string.
122        // 2. It provides an unambiguous length. In general for a string s,
123        //    s.len() is not the same as s.chars().count(). The length of these
124        //    created vectors will match .chars().count()
125        let text: Vec<char> = text.chars().collect();
126
127        let mut i: usize = 0; // index into text
128        let mut j: usize = 0; // index into pattern
129        let mut star_idx: usize = 0; // index in pattern (j) of the most recent *
130        let mut tmp_idx: usize = 0; // index in text (i) of the most recent *
131        let mut contains_star: bool = false; // does the pattern contain *?
132
133        let text_len = text.len();
134        let pattern_len = pattern.len();
135
136        while i < text_len && (!contains_star || star_idx != pattern_len - 1) {
137            if j < pattern_len && pattern[j].is_wildcard() {
138                contains_star = true;
139                star_idx = j;
140                tmp_idx = i;
141                j += 1;
142            } else if j < pattern_len && pattern[j].match_char(&text[i]) {
143                i += 1;
144                j += 1;
145            } else if contains_star {
146                j = star_idx + 1;
147                i = tmp_idx + 1;
148                tmp_idx = i;
149            } else {
150                return false;
151            }
152        }
153
154        while j < pattern_len && pattern[j].is_wildcard() {
155            j += 1;
156        }
157
158        j == pattern_len
159    }
160}
161
162#[cfg(test)]
163pub mod test {
164    use super::*;
165
166    impl std::ops::Add for Pattern {
167        type Output = Pattern;
168        fn add(self, rhs: Self) -> Self::Output {
169            let elems = [self.get_elems(), rhs.get_elems()].concat();
170            Pattern::new(elems)
171        }
172    }
173
174    // Map a string into a pattern literal with `PatternElem::Char`
175    fn string_map(text: &str) -> Pattern {
176        Pattern::new(text.chars().map(PatternElem::Char))
177    }
178
179    // Create a star pattern literal
180    fn star() -> Pattern {
181        Pattern::new(vec![PatternElem::Wildcard])
182    }
183
184    // Create an empty pattern literal
185    fn empty() -> Pattern {
186        Pattern::new(vec![])
187    }
188
189    #[test]
190    fn test_wildcard_match_basic() {
191        // Patterns that match "foo bar"
192        assert!((string_map("foo") + star()).wildcard_match("foo bar"));
193        assert!((star() + string_map("bar")).wildcard_match("foo bar"));
194        assert!((star() + string_map("o b") + star()).wildcard_match("foo bar"));
195        assert!((string_map("f") + star() + string_map(" bar")).wildcard_match("foo bar"));
196        assert!((string_map("f") + star() + star() + string_map("r")).wildcard_match("foo bar"));
197        assert!((star() + string_map("f") + star() + star() + star()).wildcard_match("foo bar"));
198
199        // Patterns that do not match "foo bar"
200        assert!(!(star() + string_map("foo")).wildcard_match("foo bar"));
201        assert!(!(string_map("bar") + star()).wildcard_match("foo bar"));
202        assert!(!(star() + string_map("bo") + star()).wildcard_match("foo bar"));
203        assert!(!(string_map("f") + star() + string_map("br")).wildcard_match("foo bar"));
204        assert!(!(star() + string_map("x") + star() + star() + star()).wildcard_match("foo bar"));
205        assert!(!empty().wildcard_match("foo bar"));
206
207        // Patterns that match ""
208        assert!(empty().wildcard_match(""));
209        assert!(star().wildcard_match(""));
210
211        // Patterns that do not match ""
212        assert!(!string_map("foo bar").wildcard_match(""));
213
214        // Patterns that match "*"
215        assert!(string_map("*").wildcard_match("*"));
216        assert!(star().wildcard_match("*"));
217
218        // Patterns that do not match "*"
219        assert!(!string_map("\u{0000}").wildcard_match("*"));
220        assert!(!string_map(r#"\u{0000}"#).wildcard_match("*"));
221    }
222
223    #[test]
224    fn test_wildcard_match_unicode() {
225        // Patterns that match "y̆"
226        assert!((string_map("y") + star()).wildcard_match("y̆"));
227        assert!(string_map("y̆").wildcard_match("y̆"));
228
229        // Patterns that do not match "y̆"
230        assert!(!(star() + string_map("p") + star()).wildcard_match("y̆"));
231
232        // Patterns that match "ḛ̶͑͝x̶͔͛a̵̰̯͛m̴͉̋́p̷̠͂l̵͇̍̔ȩ̶̣͝"
233        assert!((star() + string_map("p") + star()).wildcard_match("ḛ̶͑͝x̶͔͛a̵̰̯͛m̴͉̋́p̷̠͂l̵͇̍̔ȩ̶̣͝"));
234        assert!((star() + string_map("a̵̰̯͛m̴͉̋́") + star()).wildcard_match("ḛ̶͑͝x̶͔͛a̵̰̯͛m̴͉̋́p̷̠͂l̵͇̍̔ȩ̶̣͝"));
235
236        // Patterns that do not match "ḛ̶͑͝x̶͔͛a̵̰̯͛m̴͉̋́p̷̠͂l̵͇̍̔ȩ̶̣͝"
237        assert!(!(string_map("y") + star()).wildcard_match("ḛ̶͑͝x̶͔͛a̵̰̯͛m̴͉̋́p̷̠͂l̵͇̍̔ȩ̶̣͝"));
238    }
239}