zenoh_keyexpr/key_expr/
canon.rs

1//
2// Copyright (c) 2023 ZettaScale Technology
3//
4// This program and the accompanying materials are made available under the
5// terms of the Eclipse Public License 2.0 which is available at
6// http://www.eclipse.org/legal/epl-2.0, or the Apache License, Version 2.0
7// which is available at https://www.apache.org/licenses/LICENSE-2.0.
8//
9// SPDX-License-Identifier: EPL-2.0 OR Apache-2.0
10//
11// Contributors:
12//   ZettaScale Zenoh Team, <zenoh@zettascale.tech>
13//
14use alloc::string::String;
15
16pub trait Canonize {
17    fn canonize(&mut self);
18}
19
20// Return the length of the canonized string
21fn canonize(bytes: &mut [u8]) -> usize {
22    let mut index = 0;
23    let mut written = 0;
24    let mut double_wild = false;
25    loop {
26        match &bytes[index..] {
27            [b'*', b'*'] => {
28                bytes[written..written + 2].copy_from_slice(b"**");
29                written += 2;
30                return written;
31            }
32            [b'*', b'*', b'/', ..] => {
33                double_wild = true;
34                index += 3;
35            }
36            [b'*', r @ ..] | [b'$', b'*', r @ ..] if r.is_empty() || r.starts_with(b"/") => {
37                let (end, len) = (!r.starts_with(b"/"), r.len());
38                bytes[written] = b'*';
39                written += 1;
40                if end {
41                    if double_wild {
42                        bytes[written..written + 3].copy_from_slice(b"/**");
43                        written += 3;
44                    }
45                    return written;
46                }
47                bytes[written] = b'/';
48                written += 1;
49                index = bytes.len() - len + 1;
50            }
51            // Handle chunks with only repeated "$*"
52            [b'$', b'*', b'$', b'*', ..] => {
53                index += 2;
54            }
55            _ => {
56                if double_wild && &bytes[index..] != b"**" {
57                    bytes[written..written + 3].copy_from_slice(b"**/");
58                    written += 3;
59                    double_wild = false;
60                }
61                let mut write_start = index;
62                loop {
63                    match bytes.get(index) {
64                        Some(b'/') => {
65                            index += 1;
66                            bytes.copy_within(write_start..index, written);
67                            written += index - write_start;
68                            break;
69                        }
70                        Some(b'$') if matches!(bytes.get(index + 1..index + 4), Some(b"*$*")) => {
71                            index += 2;
72                            bytes.copy_within(write_start..index, written);
73                            written += index - write_start;
74                            let skip = bytes[index + 4..]
75                                .windows(2)
76                                .take_while(|s| s == b"$*")
77                                .count();
78                            index += (1 + skip) * 2;
79                            write_start = index;
80                        }
81                        Some(_) => index += 1,
82                        None => {
83                            bytes.copy_within(write_start..index, written);
84                            written += index - write_start;
85                            return written;
86                        }
87                    }
88                }
89            }
90        }
91    }
92}
93
94impl Canonize for &mut str {
95    fn canonize(&mut self) {
96        // SAFETY: canonize leave an UTF8 string within the returned length,
97        // and remaining garbage bytes are zeroed
98        let bytes = unsafe { self.as_bytes_mut() };
99        let length = canonize(bytes);
100        bytes[length..].fill(b'\0');
101        *self = &mut core::mem::take(self)[..length];
102    }
103}
104
105impl Canonize for String {
106    fn canonize(&mut self) {
107        // SAFETY: canonize leave an UTF8 string within the returned length,
108        // and remaining garbage bytes are truncated
109        let bytes = unsafe { self.as_mut_vec() };
110        let length = canonize(bytes);
111        bytes.truncate(length);
112    }
113}
114
115#[test]
116fn canonizer() {
117    use super::OwnedKeyExpr;
118
119    dbg!(OwnedKeyExpr::autocanonize(String::from("/a/b/")).unwrap_err());
120    dbg!(OwnedKeyExpr::autocanonize(String::from("/a/b")).unwrap_err());
121    dbg!(OwnedKeyExpr::autocanonize(String::from("a/b/")).unwrap_err());
122    dbg!(OwnedKeyExpr::autocanonize(String::from("a/b/*$*")).unwrap_err());
123    dbg!(OwnedKeyExpr::autocanonize(String::from("a/b/$**")).unwrap_err());
124    dbg!(OwnedKeyExpr::autocanonize(String::from("a/b/**$*")).unwrap_err());
125    dbg!(OwnedKeyExpr::autocanonize(String::from("a/b/*$**")).unwrap_err());
126    dbg!(OwnedKeyExpr::autocanonize(String::from("a/b/*$***")).unwrap_err());
127    dbg!(OwnedKeyExpr::autocanonize(String::from("a/b/**$**")).unwrap_err());
128    dbg!(OwnedKeyExpr::autocanonize(String::from("a/b/**$***")).unwrap_err());
129
130    //
131    // Check statements declared in https://github.com/eclipse-zenoh/roadmap/blob/main/rfcs/ALL/Key%20Expressions.md
132    //
133    // Any contiguous sequence of $*s is replaced by a single $*
134    let mut s = String::from("hello/foo$*$*/bar");
135    s.canonize();
136    assert_eq!(s, "hello/foo$*/bar");
137
138    // Any contiguous sequence of ** chunks is replaced by a single ** chunk
139    let mut s = String::from("hello/**/**/bye");
140    s.canonize();
141    assert_eq!(s, "hello/**/bye");
142    let mut s = String::from("hello/**/**");
143    s.canonize();
144    assert_eq!(s, "hello/**");
145
146    // Any $* chunk is replaced by a * chunk
147    let mut s = String::from("hello/$*/bye");
148    s.canonize();
149    assert_eq!(s, "hello/*/bye");
150    let mut s = String::from("hello/$*$*/bye");
151    s.canonize();
152    assert_eq!(s, "hello/*/bye");
153    let mut s = String::from("$*/hello/$*/bye");
154    s.canonize();
155    assert_eq!(s, "*/hello/*/bye");
156    let mut s = String::from("$*$*$*/hello/$*/bye/$*");
157    s.canonize();
158    assert_eq!(s, "*/hello/*/bye/*");
159    let mut s = String::from("$*$*$*/hello/$*$*/bye/$*$*");
160    s.canonize();
161    assert_eq!(s, "*/hello/*/bye/*");
162
163    // **/* is replaced by */**
164    let mut s = String::from("hello/**/*");
165    s.canonize();
166    assert_eq!(s, "hello/*/**");
167
168    // &mut str remaining part is zeroed
169    let mut s = String::from("$*$*$*/hello/$*$*/bye/$*$*");
170    let mut s_mut = s.as_mut_str();
171    s_mut.canonize();
172    assert_eq!(s_mut, "*/hello/*/bye/*");
173    assert_eq!(s, "*/hello/*/bye/*\0\0\0\0\0\0\0\0\0\0\0");
174}