netidx_netproto/
glob.rs

1use anyhow::Result;
2use arcstr::ArcStr;
3use bytes::{Buf, BufMut};
4use globset;
5use netidx_core::{
6    pack::{Pack, PackError},
7    path::Path,
8};
9use poolshark::global::{GPooled, Pool};
10use smallvec::SmallVec;
11use std::{
12    cmp::{Eq, PartialEq},
13    ops::Deref,
14    result,
15    sync::{Arc, LazyLock},
16};
17
18use crate::value::{FromValue, Value};
19
20#[derive(Debug, Clone, Copy, PartialEq, Eq)]
21pub enum Scope {
22    Subtree,
23    Finite(usize),
24}
25
26impl Scope {
27    pub fn contains(&self, levels: usize) -> bool {
28        match self {
29            Scope::Subtree => true,
30            Scope::Finite(n) => levels <= *n,
31        }
32    }
33}
34
35/// Unix style globs for matching paths in the resolver.
36///
37/// All common unix globing features are supported.
38/// * ? matches any character except the path separator
39/// * \* matches zero or more characters, but not the path separator
40/// * \** recursively matches containers. It's only legal uses are
41/// /**/foo, /foo/**/bar, and /foo/bar/**, which match respectively,
42/// any path ending in foo, any path starting with /foo and ending in
43/// bar, and any path starting with /foo/bar.
44/// * {a, b}, matches a or b where a and b are glob patterns, {} can't be nested however.
45/// * \[ab\], \[!ab\], matches respectively the char a or b, and any char but a or b.
46/// * any of the above metacharacters can be escaped with a \, a
47/// literal \ may be produced with \\.
48///
49/// e.g.
50/// `/solar/{stats,settings}/*` -> all leaf paths under /solar/stats or /solar/settings.
51/// `/s*/s*/**` -> any path who's first two levels start with s.
52/// `/**/?` -> any path who's final component is a single character
53/// `/marketdata/{IBM,MSFT,AMZN}/last`
54#[derive(Debug, Clone, PartialEq, Eq)]
55pub struct Glob {
56    raw: ArcStr,
57    base: Path,
58    scope: Scope,
59    glob: globset::Glob,
60}
61
62impl Glob {
63    /// return the longest plain string before the first glob char
64    pub fn plain(&self) -> &str {
65        match Self::first_glob_char(&*self.raw) {
66            None => &*self.raw,
67            Some(i) => &self.raw[0..i],
68        }
69    }
70
71    /// returns true if the specified string contains any non escaped
72    /// glob meta chars.
73    pub fn is_glob(s: &str) -> bool {
74        Self::first_glob_char(s).is_some()
75    }
76
77    /// returns the index of the first glob special char or None if raw is a plain string
78    pub fn first_glob_char(mut s: &str) -> Option<usize> {
79        loop {
80            if s.is_empty() {
81                break None;
82            } else {
83                match s.find(&['?', '*', '{', '['][..]) {
84                    None => break None,
85                    Some(i) => {
86                        if escaping::is_escaped(s, '\\', i) {
87                            s = &s[i + 1..];
88                        } else {
89                            break Some(i);
90                        }
91                    }
92                }
93            }
94        }
95    }
96
97    pub fn new(raw: ArcStr) -> Result<Glob> {
98        if !Path::is_absolute(&raw) {
99            bail!("glob paths must be absolute")
100        }
101        let base = {
102            let mut cur = "/";
103            let mut iter = Path::dirnames(&raw);
104            loop {
105                match iter.next() {
106                    None => break cur,
107                    Some(p) => {
108                        if Glob::is_glob(p) {
109                            break cur;
110                        } else {
111                            cur = p;
112                        }
113                    }
114                }
115            }
116        };
117        let lvl = Path::levels(base);
118        let base = Path::from(ArcStr::from(base));
119        let scope =
120            if Path::dirnames(&raw).skip(lvl).any(|p| Path::basename(p) == Some("**")) {
121                Scope::Subtree
122            } else {
123                Scope::Finite(Path::levels(&raw))
124            };
125        let glob = globset::Glob::new(&*raw)?;
126        Ok(Glob { raw, base, scope, glob })
127    }
128
129    pub fn base(&self) -> &str {
130        &self.base
131    }
132
133    pub fn scope(&self) -> &Scope {
134        &self.scope
135    }
136
137    pub fn glob(&self) -> &globset::Glob {
138        &self.glob
139    }
140
141    pub fn into_glob(self) -> globset::Glob {
142        self.glob
143    }
144
145    pub fn raw(&self) -> &ArcStr {
146        &self.raw
147    }
148}
149
150impl Pack for Glob {
151    fn encoded_len(&self) -> usize {
152        Pack::encoded_len(&self.raw)
153    }
154
155    fn encode(&self, buf: &mut impl BufMut) -> result::Result<(), PackError> {
156        Pack::encode(&self.raw, buf)
157    }
158
159    fn decode(buf: &mut impl Buf) -> result::Result<Self, PackError> {
160        Glob::new(Pack::decode(buf)?).map_err(|_| PackError::InvalidFormat)
161    }
162}
163
164impl Into<Value> for Glob {
165    fn into(self) -> Value {
166        self.raw.into()
167    }
168}
169
170impl FromValue for Glob {
171    fn from_value(v: Value) -> Result<Self> {
172        match v {
173            Value::String(s) => Self::new(s),
174            x => bail!("Glob::from_value expected String got {x:?}"),
175        }
176    }
177}
178
179#[derive(Debug)]
180struct GlobSetInner {
181    raw: GPooled<Vec<Glob>>,
182    published_only: bool,
183    glob: globset::GlobSet,
184}
185
186/// A set of globs that are evaluated at the same time
187#[derive(Debug, Clone)]
188pub struct GlobSet(Arc<GlobSetInner>);
189
190impl PartialEq for GlobSet {
191    fn eq(&self, other: &Self) -> bool {
192        &self.0.raw == &other.0.raw
193    }
194}
195
196impl Eq for GlobSet {}
197
198impl Into<Value> for GlobSet {
199    fn into(self) -> Value {
200        (self.0.published_only, self.0.raw.clone()).into()
201    }
202}
203
204impl FromValue for GlobSet {
205    fn from_value(v: Value) -> Result<Self> {
206        let (published_only, globs): (bool, Vec<Glob>) = FromValue::from_value(v)?;
207        Self::new(published_only, globs)
208    }
209}
210
211impl<'a> TryFrom<&'a [String]> for GlobSet {
212    type Error = anyhow::Error;
213
214    fn try_from(value: &[String]) -> result::Result<Self, Self::Error> {
215        let v: SmallVec<[Glob; 8]> =
216            value.iter().map(|c| Glob::new(ArcStr::from(&**c))).collect::<Result<_>>()?;
217        GlobSet::new(false, v)
218    }
219}
220
221impl<'a> TryFrom<&'a [ArcStr]> for GlobSet {
222    type Error = anyhow::Error;
223
224    fn try_from(value: &[ArcStr]) -> result::Result<Self, Self::Error> {
225        let v: SmallVec<[Glob; 8]> = value
226            .iter()
227            .map(|c| Glob::new(ArcStr::from(c.clone())))
228            .collect::<Result<_>>()?;
229        GlobSet::new(false, v)
230    }
231}
232
233impl TryFrom<Vec<String>> for GlobSet {
234    type Error = anyhow::Error;
235
236    fn try_from(value: Vec<String>) -> result::Result<Self, Self::Error> {
237        let v: SmallVec<[Glob; 8]> = value
238            .into_iter()
239            .map(|c| Glob::new(ArcStr::from(&*c)))
240            .collect::<Result<_>>()?;
241        GlobSet::new(false, v)
242    }
243}
244
245impl TryFrom<Vec<ArcStr>> for GlobSet {
246    type Error = anyhow::Error;
247
248    fn try_from(value: Vec<ArcStr>) -> result::Result<Self, Self::Error> {
249        let v: SmallVec<[Glob; 8]> = value
250            .into_iter()
251            .map(|c| Glob::new(ArcStr::from(c.clone())))
252            .collect::<Result<_>>()?;
253        GlobSet::new(false, v)
254    }
255}
256
257impl GlobSet {
258    /// create a new globset from the specified globs
259    ///
260    /// if published_only is true, then the globset will only match
261    /// published paths, otherwise it will match both structural and
262    /// published paths.
263    pub fn new(
264        published_only: bool,
265        globs: impl IntoIterator<Item = Glob>,
266    ) -> Result<GlobSet> {
267        static GLOB: LazyLock<Pool<Vec<Glob>>> = LazyLock::new(|| Pool::new(10, 100));
268        let mut builder = globset::GlobSetBuilder::new();
269        let mut raw = GLOB.take();
270        for glob in globs {
271            builder.add(glob.glob.clone());
272            raw.push(glob);
273        }
274        raw.sort_unstable_by(|g0, g1| g0.base().cmp(g1.base()));
275        Ok(GlobSet(Arc::new(GlobSetInner {
276            raw,
277            published_only,
278            glob: builder.build()?,
279        })))
280    }
281
282    pub fn is_match(&self, path: &Path) -> bool {
283        self.0.glob.is_match(path.as_ref())
284    }
285
286    pub fn published_only(&self) -> bool {
287        self.0.published_only
288    }
289
290    /// return the raw glob strings
291    pub fn raw(&self) -> Vec<ArcStr> {
292        self.0.raw.iter().map(|g| g.raw.clone()).collect()
293    }
294
295    /// Return true if the two globsets are disjoint.
296    ///
297    /// Disjoint globsets will never both match a given path. However
298    /// non disjoint globsets might not match the same paths. So in
299    /// other words this will only return turn if the two globsets
300    /// definitely will not match any of the same paths. It is
301    /// possible that this returns true and the globsets are in fact
302    /// disjoint.
303    pub fn disjoint(&self, other: &Self) -> bool {
304        for g0 in self.0.raw.iter() {
305            let plain0 = g0.plain();
306            for g1 in other.0.raw.iter() {
307                let plain1 = g1.plain();
308                if plain0.starts_with(plain1)
309                    || plain1.starts_with(plain0)
310                    || plain0 == plain1
311                {
312                    return false;
313                }
314            }
315        }
316        return true;
317    }
318}
319
320impl Deref for GlobSet {
321    type Target = Vec<Glob>;
322
323    fn deref(&self) -> &Self::Target {
324        &*self.0.raw
325    }
326}
327
328impl Pack for GlobSet {
329    fn encoded_len(&self) -> usize {
330        <bool as Pack>::encoded_len(&self.0.published_only)
331            + <GPooled<Vec<Glob>> as Pack>::encoded_len(&self.0.raw)
332    }
333
334    fn encode(&self, buf: &mut impl BufMut) -> result::Result<(), PackError> {
335        <bool as Pack>::encode(&self.0.published_only, buf)?;
336        <GPooled<Vec<Glob>> as Pack>::encode(&self.0.raw, buf)
337    }
338
339    fn decode(buf: &mut impl Buf) -> result::Result<Self, PackError> {
340        let published_only = <bool as Pack>::decode(buf)?;
341        let mut raw = <GPooled<Vec<Glob>> as Pack>::decode(buf)?;
342        let mut builder = globset::GlobSetBuilder::new();
343        for glob in raw.iter() {
344            builder.add(glob.glob.clone());
345        }
346        raw.sort_unstable_by(|g0, g1| g0.base().cmp(g1.base()));
347        let glob = builder.build().map_err(|_| PackError::InvalidFormat)?;
348        Ok(GlobSet(Arc::new(GlobSetInner { raw, published_only, glob })))
349    }
350}