netidx_netproto/
glob.rs

1use anyhow::Result;
2use arcstr::ArcStr;
3use bytes::{Buf, BufMut};
4use globset;
5use netidx_core::{
6    pack::{Pack, PackError},
7    path::Path,
8    pool::{Pool, Pooled},
9    utils,
10};
11use smallvec::SmallVec;
12use std::{
13    cmp::{Eq, PartialEq},
14    ops::Deref,
15    result,
16    sync::Arc,
17};
18
19use crate::value::{FromValue, Value};
20
21#[derive(Debug, Clone, Copy, PartialEq, Eq)]
22pub enum Scope {
23    Subtree,
24    Finite(usize),
25}
26
27impl Scope {
28    pub fn contains(&self, levels: usize) -> bool {
29        match self {
30            Scope::Subtree => true,
31            Scope::Finite(n) => levels <= *n,
32        }
33    }
34}
35
36/// Unix style globs for matching paths in the resolver. All common
37/// unix globing features are supported.
38/// * ? matches any character except the path separator
39/// * \* matches zero or more characters, but not the path separator
40/// * \** recursively matches containers. It's only legal uses are
41/// /**/foo, /foo/**/bar, and /foo/bar/**, which match respectively,
42/// any path ending in foo, any path starting with /foo and ending in
43/// bar, and any path starting with /foo/bar.
44/// * {a, b}, matches a or b where a and b are glob patterns, {} can't be nested however.
45/// * [ab], [!ab], matches respectively the char a or b, and any char but a or b.
46/// * any of the above metacharacters can be escaped with a \, a
47/// literal \ may be produced with \\.
48///
49/// e.g.
50/// `/solar/{stats,settings}/*` -> all leaf paths under /solar/stats or /solar/settings.
51/// `/s*/s*/**` -> any path who's first two levels start with s.
52/// `/**/?` -> any path who's final component is a single character
53/// `/marketdata/{IBM,MSFT,AMZN}/last`
54#[derive(Debug, Clone, PartialEq, Eq)]
55pub struct Glob {
56    raw: ArcStr,
57    base: Path,
58    scope: Scope,
59    glob: globset::Glob,
60}
61
62impl Glob {
63    /// return the longest plain string before the first glob char
64    pub fn plain(&self) -> &str {
65        match Self::first_glob_char(&*self.raw) {
66            None => &*self.raw,
67            Some(i) => &self.raw[0..i],
68        }
69    }
70
71    /// returns true if the specified string contains any non escaped
72    /// glob meta chars.
73    pub fn is_glob(s: &str) -> bool {
74        Self::first_glob_char(s).is_some()
75    }
76
77    /// returns the index of the first glob special char or None if raw is a plain string
78    pub fn first_glob_char(mut s: &str) -> Option<usize> {
79        loop {
80            if s.is_empty() {
81                break None;
82            } else {
83                match s.find(&['?', '*', '{', '['][..]) {
84                    None => break None,
85                    Some(i) => {
86                        if utils::is_escaped(s, '\\', i) {
87                            s = &s[i + 1..];
88                        } else {
89                            break Some(i);
90                        }
91                    }
92                }
93            }
94        }
95    }
96
97    pub fn new(raw: ArcStr) -> Result<Glob> {
98        if !Path::is_absolute(&raw) {
99            bail!("glob paths must be absolute")
100        }
101        let base = {
102            let mut cur = "/";
103            let mut iter = Path::dirnames(&raw);
104            loop {
105                match iter.next() {
106                    None => break cur,
107                    Some(p) => {
108                        if Glob::is_glob(p) {
109                            break cur;
110                        } else {
111                            cur = p;
112                        }
113                    }
114                }
115            }
116        };
117        let lvl = Path::levels(base);
118        let base = Path::from(ArcStr::from(base));
119        let scope =
120            if Path::dirnames(&raw).skip(lvl).any(|p| Path::basename(p) == Some("**")) {
121                Scope::Subtree
122            } else {
123                Scope::Finite(Path::levels(&raw))
124            };
125        let glob = globset::Glob::new(&*raw)?;
126        Ok(Glob { raw, base, scope, glob })
127    }
128
129    pub fn base(&self) -> &str {
130        &self.base
131    }
132
133    pub fn scope(&self) -> &Scope {
134        &self.scope
135    }
136
137    pub fn glob(&self) -> &globset::Glob {
138        &self.glob
139    }
140
141    pub fn into_glob(self) -> globset::Glob {
142        self.glob
143    }
144
145    pub fn raw(&self) -> &ArcStr {
146        &self.raw
147    }
148}
149
150impl Pack for Glob {
151    fn encoded_len(&self) -> usize {
152        Pack::encoded_len(&self.raw)
153    }
154
155    fn encode(&self, buf: &mut impl BufMut) -> result::Result<(), PackError> {
156        Pack::encode(&self.raw, buf)
157    }
158
159    fn decode(buf: &mut impl Buf) -> result::Result<Self, PackError> {
160        Glob::new(Pack::decode(buf)?).map_err(|_| PackError::InvalidFormat)
161    }
162}
163
164impl Into<Value> for Glob {
165    fn into(self) -> Value {
166        self.raw.into()
167    }
168}
169
170impl FromValue for Glob {
171    fn from_value(v: Value) -> Result<Self> {
172        match v {
173            Value::String(s) => Self::new(s),
174            x => bail!("Glob::from_value expected String got {x:?}"),
175        }
176    }
177}
178
179#[derive(Debug)]
180struct GlobSetInner {
181    raw: Pooled<Vec<Glob>>,
182    published_only: bool,
183    glob: globset::GlobSet,
184}
185
186#[derive(Debug, Clone)]
187pub struct GlobSet(Arc<GlobSetInner>);
188
189impl PartialEq for GlobSet {
190    fn eq(&self, other: &Self) -> bool {
191        &self.0.raw == &other.0.raw
192    }
193}
194
195impl Eq for GlobSet {}
196
197impl Into<Value> for GlobSet {
198    fn into(self) -> Value {
199        (self.0.published_only, self.0.raw.clone()).into()
200    }
201}
202
203impl FromValue for GlobSet {
204    fn from_value(v: Value) -> Result<Self> {
205        let (published_only, globs): (bool, Vec<Glob>) = FromValue::from_value(v)?;
206        Self::new(published_only, globs)
207    }
208}
209
210impl<'a> TryFrom<&'a [String]> for GlobSet {
211    type Error = anyhow::Error;
212
213    fn try_from(value: &[String]) -> result::Result<Self, Self::Error> {
214        let v: SmallVec<[Glob; 8]> =
215            value.iter().map(|c| Glob::new(ArcStr::from(&**c))).collect::<Result<_>>()?;
216        GlobSet::new(false, v)
217    }
218}
219
220impl<'a> TryFrom<&'a [ArcStr]> for GlobSet {
221    type Error = anyhow::Error;
222
223    fn try_from(value: &[ArcStr]) -> result::Result<Self, Self::Error> {
224        let v: SmallVec<[Glob; 8]> = value
225            .iter()
226            .map(|c| Glob::new(ArcStr::from(c.clone())))
227            .collect::<Result<_>>()?;
228        GlobSet::new(false, v)
229    }
230}
231
232impl TryFrom<Vec<String>> for GlobSet {
233    type Error = anyhow::Error;
234
235    fn try_from(value: Vec<String>) -> result::Result<Self, Self::Error> {
236        let v: SmallVec<[Glob; 8]> = value
237            .into_iter()
238            .map(|c| Glob::new(ArcStr::from(&*c)))
239            .collect::<Result<_>>()?;
240        GlobSet::new(false, v)
241    }
242}
243
244impl TryFrom<Vec<ArcStr>> for GlobSet {
245    type Error = anyhow::Error;
246
247    fn try_from(value: Vec<ArcStr>) -> result::Result<Self, Self::Error> {
248        let v: SmallVec<[Glob; 8]> = value
249            .into_iter()
250            .map(|c| Glob::new(ArcStr::from(c.clone())))
251            .collect::<Result<_>>()?;
252        GlobSet::new(false, v)
253    }
254}
255
256impl GlobSet {
257    /// create a new globset from the specified globs. if
258    /// published_only is true, then the globset will only match
259    /// published paths, otherwise it will match both structural and
260    /// published paths.
261    pub fn new(
262        published_only: bool,
263        globs: impl IntoIterator<Item = Glob>,
264    ) -> Result<GlobSet> {
265        lazy_static! {
266            static ref GLOB: Pool<Vec<Glob>> = Pool::new(10, 100);
267        }
268        let mut builder = globset::GlobSetBuilder::new();
269        let mut raw = GLOB.take();
270        for glob in globs {
271            builder.add(glob.glob.clone());
272            raw.push(glob);
273        }
274        raw.sort_unstable_by(|g0, g1| g0.base().cmp(g1.base()));
275        Ok(GlobSet(Arc::new(GlobSetInner {
276            raw,
277            published_only,
278            glob: builder.build()?,
279        })))
280    }
281
282    pub fn is_match(&self, path: &Path) -> bool {
283        self.0.glob.is_match(path.as_ref())
284    }
285
286    pub fn published_only(&self) -> bool {
287        self.0.published_only
288    }
289
290    /// return the raw glob strings
291    pub fn raw(&self) -> Vec<ArcStr> {
292        self.0.raw.iter().map(|g| g.raw.clone()).collect()
293    }
294
295    /// disjoint globsets will never both match a given path. However
296    /// non disjoint globsets might not match the same paths. So in
297    /// other words this will only return turn if the two globsets
298    /// definitely will not match any of the same paths. It is
299    /// possible that this returns true and the globsets are in fact
300    /// disjoint.
301    pub fn disjoint(&self, other: &Self) -> bool {
302        for g0 in self.0.raw.iter() {
303            let plain0 = g0.plain();
304            for g1 in other.0.raw.iter() {
305                let plain1 = g1.plain();
306                if plain0.starts_with(plain1)
307                    || plain1.starts_with(plain0)
308                    || plain0 == plain1
309                {
310                    return false;
311                }
312            }
313        }
314        return true;
315    }
316}
317
318impl Deref for GlobSet {
319    type Target = Vec<Glob>;
320
321    fn deref(&self) -> &Self::Target {
322        &*self.0.raw
323    }
324}
325
326impl Pack for GlobSet {
327    fn encoded_len(&self) -> usize {
328        <bool as Pack>::encoded_len(&self.0.published_only)
329            + <Pooled<Vec<Glob>> as Pack>::encoded_len(&self.0.raw)
330    }
331
332    fn encode(&self, buf: &mut impl BufMut) -> result::Result<(), PackError> {
333        <bool as Pack>::encode(&self.0.published_only, buf)?;
334        <Pooled<Vec<Glob>> as Pack>::encode(&self.0.raw, buf)
335    }
336
337    fn decode(buf: &mut impl Buf) -> result::Result<Self, PackError> {
338        let published_only = <bool as Pack>::decode(buf)?;
339        let mut raw = <Pooled<Vec<Glob>> as Pack>::decode(buf)?;
340        let mut builder = globset::GlobSetBuilder::new();
341        for glob in raw.iter() {
342            builder.add(glob.glob.clone());
343        }
344        raw.sort_unstable_by(|g0, g1| g0.base().cmp(g1.base()));
345        let glob = builder.build().map_err(|_| PackError::InvalidFormat)?;
346        Ok(GlobSet(Arc::new(GlobSetInner { raw, published_only, glob })))
347    }
348}