zenoh_keyexpr/key_expr/
canon.rs

1//
2// Copyright (c) 2023 ZettaScale Technology
3//
4// This program and the accompanying materials are made available under the
5// terms of the Eclipse Public License 2.0 which is available at
6// http://www.eclipse.org/legal/epl-2.0, or the Apache License, Version 2.0
7// which is available at https://www.apache.org/licenses/LICENSE-2.0.
8//
9// SPDX-License-Identifier: EPL-2.0 OR Apache-2.0
10//
11// Contributors:
12//   ZettaScale Zenoh Team, <zenoh@zettascale.tech>
13//
14use alloc::string::String;
15
16/// Helper trait implemented for types that can be canonized in place
17/// by `KeyExpr::autocanonize()`.
18pub trait Canonize {
19    fn canonize(&mut self);
20}
21
22// Return the length of the canonized string
23fn canonize(bytes: &mut [u8]) -> usize {
24    let mut index = 0;
25    let mut written = 0;
26    let mut double_wild = false;
27    loop {
28        match &bytes[index..] {
29            [b'*', b'*'] => {
30                bytes[written..written + 2].copy_from_slice(b"**");
31                written += 2;
32                return written;
33            }
34            [b'*', b'*', b'/', ..] => {
35                double_wild = true;
36                index += 3;
37            }
38            [b'*', r @ ..] | [b'$', b'*', r @ ..] if r.is_empty() || r.starts_with(b"/") => {
39                let (end, len) = (!r.starts_with(b"/"), r.len());
40                bytes[written] = b'*';
41                written += 1;
42                if end {
43                    if double_wild {
44                        bytes[written..written + 3].copy_from_slice(b"/**");
45                        written += 3;
46                    }
47                    return written;
48                }
49                bytes[written] = b'/';
50                written += 1;
51                index = bytes.len() - len + 1;
52            }
53            // Handle chunks with only repeated "$*"
54            [b'$', b'*', b'$', b'*', ..] => {
55                index += 2;
56            }
57            _ => {
58                if double_wild && &bytes[index..] != b"**" {
59                    bytes[written..written + 3].copy_from_slice(b"**/");
60                    written += 3;
61                    double_wild = false;
62                }
63                let mut write_start = index;
64                loop {
65                    match bytes.get(index) {
66                        Some(b'/') => {
67                            index += 1;
68                            bytes.copy_within(write_start..index, written);
69                            written += index - write_start;
70                            break;
71                        }
72                        Some(b'$') if matches!(bytes.get(index + 1..index + 4), Some(b"*$*")) => {
73                            index += 2;
74                            bytes.copy_within(write_start..index, written);
75                            written += index - write_start;
76                            let skip = bytes[index + 4..]
77                                .windows(2)
78                                .take_while(|s| s == b"$*")
79                                .count();
80                            index += (1 + skip) * 2;
81                            write_start = index;
82                        }
83                        Some(_) => index += 1,
84                        None => {
85                            bytes.copy_within(write_start..index, written);
86                            written += index - write_start;
87                            return written;
88                        }
89                    }
90                }
91            }
92        }
93    }
94}
95
96impl Canonize for &mut str {
97    fn canonize(&mut self) {
98        // SAFETY: canonize leave an UTF8 string within the returned length,
99        // and remaining garbage bytes are zeroed
100        let bytes = unsafe { self.as_bytes_mut() };
101        let length = canonize(bytes);
102        bytes[length..].fill(b'\0');
103        *self = &mut core::mem::take(self)[..length];
104    }
105}
106
107impl Canonize for String {
108    fn canonize(&mut self) {
109        // SAFETY: canonize leave an UTF8 string within the returned length,
110        // and remaining garbage bytes are truncated
111        let bytes = unsafe { self.as_mut_vec() };
112        let length = canonize(bytes);
113        bytes.truncate(length);
114    }
115}
116
117#[test]
118fn canonizer() {
119    use super::OwnedKeyExpr;
120
121    dbg!(OwnedKeyExpr::autocanonize(String::from("/a/b/")).unwrap_err());
122    dbg!(OwnedKeyExpr::autocanonize(String::from("/a/b")).unwrap_err());
123    dbg!(OwnedKeyExpr::autocanonize(String::from("a/b/")).unwrap_err());
124    dbg!(OwnedKeyExpr::autocanonize(String::from("a/b/*$*")).unwrap_err());
125    dbg!(OwnedKeyExpr::autocanonize(String::from("a/b/$**")).unwrap_err());
126    dbg!(OwnedKeyExpr::autocanonize(String::from("a/b/**$*")).unwrap_err());
127    dbg!(OwnedKeyExpr::autocanonize(String::from("a/b/*$**")).unwrap_err());
128    dbg!(OwnedKeyExpr::autocanonize(String::from("a/b/*$***")).unwrap_err());
129    dbg!(OwnedKeyExpr::autocanonize(String::from("a/b/**$**")).unwrap_err());
130    dbg!(OwnedKeyExpr::autocanonize(String::from("a/b/**$***")).unwrap_err());
131
132    //
133    // Check statements declared in https://github.com/eclipse-zenoh/roadmap/blob/main/rfcs/ALL/Key%20Expressions.md
134    //
135    // Any contiguous sequence of $*s is replaced by a single $*
136    let mut s = String::from("hello/foo$*$*/bar");
137    s.canonize();
138    assert_eq!(s, "hello/foo$*/bar");
139
140    // Any contiguous sequence of ** chunks is replaced by a single ** chunk
141    let mut s = String::from("hello/**/**/bye");
142    s.canonize();
143    assert_eq!(s, "hello/**/bye");
144    let mut s = String::from("hello/**/**");
145    s.canonize();
146    assert_eq!(s, "hello/**");
147
148    // Any $* chunk is replaced by a * chunk
149    let mut s = String::from("hello/$*/bye");
150    s.canonize();
151    assert_eq!(s, "hello/*/bye");
152    let mut s = String::from("hello/$*$*/bye");
153    s.canonize();
154    assert_eq!(s, "hello/*/bye");
155    let mut s = String::from("$*/hello/$*/bye");
156    s.canonize();
157    assert_eq!(s, "*/hello/*/bye");
158    let mut s = String::from("$*$*$*/hello/$*/bye/$*");
159    s.canonize();
160    assert_eq!(s, "*/hello/*/bye/*");
161    let mut s = String::from("$*$*$*/hello/$*$*/bye/$*$*");
162    s.canonize();
163    assert_eq!(s, "*/hello/*/bye/*");
164
165    // **/* is replaced by */**
166    let mut s = String::from("hello/**/*");
167    s.canonize();
168    assert_eq!(s, "hello/*/**");
169
170    // &mut str remaining part is zeroed
171    let mut s = String::from("$*$*$*/hello/$*$*/bye/$*$*");
172    let mut s_mut = s.as_mut_str();
173    s_mut.canonize();
174    assert_eq!(s_mut, "*/hello/*/bye/*");
175    assert_eq!(s, "*/hello/*/bye/*\0\0\0\0\0\0\0\0\0\0\0");
176}