netidx_netproto/
glob.rs

1use anyhow::Result;
2use arcstr::ArcStr;
3use bytes::{Buf, BufMut};
4use globset;
5use netidx_core::{
6    pack::{Pack, PackError},
7    path::Path,
8};
9use poolshark::global::{GPooled, Pool};
10use smallvec::SmallVec;
11use std::{
12    cmp::{Eq, PartialEq},
13    ops::Deref,
14    result,
15    sync::{Arc, LazyLock},
16};
17
18use crate::value::{FromValue, Value};
19
20#[derive(Debug, Clone, Copy, PartialEq, Eq)]
21pub enum Scope {
22    Subtree,
23    Finite(usize),
24}
25
26impl Scope {
27    pub fn contains(&self, levels: usize) -> bool {
28        match self {
29            Scope::Subtree => true,
30            Scope::Finite(n) => levels <= *n,
31        }
32    }
33}
34
35/// Unix style globs for matching paths in the resolver. All common
36/// unix globing features are supported.
37/// * ? matches any character except the path separator
38/// * \* matches zero or more characters, but not the path separator
39/// * \** recursively matches containers. It's only legal uses are
40/// /**/foo, /foo/**/bar, and /foo/bar/**, which match respectively,
41/// any path ending in foo, any path starting with /foo and ending in
42/// bar, and any path starting with /foo/bar.
43/// * {a, b}, matches a or b where a and b are glob patterns, {} can't be nested however.
44/// * [ab], [!ab], matches respectively the char a or b, and any char but a or b.
45/// * any of the above metacharacters can be escaped with a \, a
46/// literal \ may be produced with \\.
47///
48/// e.g.
49/// `/solar/{stats,settings}/*` -> all leaf paths under /solar/stats or /solar/settings.
50/// `/s*/s*/**` -> any path who's first two levels start with s.
51/// `/**/?` -> any path who's final component is a single character
52/// `/marketdata/{IBM,MSFT,AMZN}/last`
53#[derive(Debug, Clone, PartialEq, Eq)]
54pub struct Glob {
55    raw: ArcStr,
56    base: Path,
57    scope: Scope,
58    glob: globset::Glob,
59}
60
61impl Glob {
62    /// return the longest plain string before the first glob char
63    pub fn plain(&self) -> &str {
64        match Self::first_glob_char(&*self.raw) {
65            None => &*self.raw,
66            Some(i) => &self.raw[0..i],
67        }
68    }
69
70    /// returns true if the specified string contains any non escaped
71    /// glob meta chars.
72    pub fn is_glob(s: &str) -> bool {
73        Self::first_glob_char(s).is_some()
74    }
75
76    /// returns the index of the first glob special char or None if raw is a plain string
77    pub fn first_glob_char(mut s: &str) -> Option<usize> {
78        loop {
79            if s.is_empty() {
80                break None;
81            } else {
82                match s.find(&['?', '*', '{', '['][..]) {
83                    None => break None,
84                    Some(i) => {
85                        if escaping::is_escaped(s, '\\', i) {
86                            s = &s[i + 1..];
87                        } else {
88                            break Some(i);
89                        }
90                    }
91                }
92            }
93        }
94    }
95
96    pub fn new(raw: ArcStr) -> Result<Glob> {
97        if !Path::is_absolute(&raw) {
98            bail!("glob paths must be absolute")
99        }
100        let base = {
101            let mut cur = "/";
102            let mut iter = Path::dirnames(&raw);
103            loop {
104                match iter.next() {
105                    None => break cur,
106                    Some(p) => {
107                        if Glob::is_glob(p) {
108                            break cur;
109                        } else {
110                            cur = p;
111                        }
112                    }
113                }
114            }
115        };
116        let lvl = Path::levels(base);
117        let base = Path::from(ArcStr::from(base));
118        let scope =
119            if Path::dirnames(&raw).skip(lvl).any(|p| Path::basename(p) == Some("**")) {
120                Scope::Subtree
121            } else {
122                Scope::Finite(Path::levels(&raw))
123            };
124        let glob = globset::Glob::new(&*raw)?;
125        Ok(Glob { raw, base, scope, glob })
126    }
127
128    pub fn base(&self) -> &str {
129        &self.base
130    }
131
132    pub fn scope(&self) -> &Scope {
133        &self.scope
134    }
135
136    pub fn glob(&self) -> &globset::Glob {
137        &self.glob
138    }
139
140    pub fn into_glob(self) -> globset::Glob {
141        self.glob
142    }
143
144    pub fn raw(&self) -> &ArcStr {
145        &self.raw
146    }
147}
148
149impl Pack for Glob {
150    fn encoded_len(&self) -> usize {
151        Pack::encoded_len(&self.raw)
152    }
153
154    fn encode(&self, buf: &mut impl BufMut) -> result::Result<(), PackError> {
155        Pack::encode(&self.raw, buf)
156    }
157
158    fn decode(buf: &mut impl Buf) -> result::Result<Self, PackError> {
159        Glob::new(Pack::decode(buf)?).map_err(|_| PackError::InvalidFormat)
160    }
161}
162
163impl Into<Value> for Glob {
164    fn into(self) -> Value {
165        self.raw.into()
166    }
167}
168
169impl FromValue for Glob {
170    fn from_value(v: Value) -> Result<Self> {
171        match v {
172            Value::String(s) => Self::new(s),
173            x => bail!("Glob::from_value expected String got {x:?}"),
174        }
175    }
176}
177
178#[derive(Debug)]
179struct GlobSetInner {
180    raw: GPooled<Vec<Glob>>,
181    published_only: bool,
182    glob: globset::GlobSet,
183}
184
185#[derive(Debug, Clone)]
186pub struct GlobSet(Arc<GlobSetInner>);
187
188impl PartialEq for GlobSet {
189    fn eq(&self, other: &Self) -> bool {
190        &self.0.raw == &other.0.raw
191    }
192}
193
194impl Eq for GlobSet {}
195
196impl Into<Value> for GlobSet {
197    fn into(self) -> Value {
198        (self.0.published_only, self.0.raw.clone()).into()
199    }
200}
201
202impl FromValue for GlobSet {
203    fn from_value(v: Value) -> Result<Self> {
204        let (published_only, globs): (bool, Vec<Glob>) = FromValue::from_value(v)?;
205        Self::new(published_only, globs)
206    }
207}
208
209impl<'a> TryFrom<&'a [String]> for GlobSet {
210    type Error = anyhow::Error;
211
212    fn try_from(value: &[String]) -> result::Result<Self, Self::Error> {
213        let v: SmallVec<[Glob; 8]> =
214            value.iter().map(|c| Glob::new(ArcStr::from(&**c))).collect::<Result<_>>()?;
215        GlobSet::new(false, v)
216    }
217}
218
219impl<'a> TryFrom<&'a [ArcStr]> for GlobSet {
220    type Error = anyhow::Error;
221
222    fn try_from(value: &[ArcStr]) -> result::Result<Self, Self::Error> {
223        let v: SmallVec<[Glob; 8]> = value
224            .iter()
225            .map(|c| Glob::new(ArcStr::from(c.clone())))
226            .collect::<Result<_>>()?;
227        GlobSet::new(false, v)
228    }
229}
230
231impl TryFrom<Vec<String>> for GlobSet {
232    type Error = anyhow::Error;
233
234    fn try_from(value: Vec<String>) -> result::Result<Self, Self::Error> {
235        let v: SmallVec<[Glob; 8]> = value
236            .into_iter()
237            .map(|c| Glob::new(ArcStr::from(&*c)))
238            .collect::<Result<_>>()?;
239        GlobSet::new(false, v)
240    }
241}
242
243impl TryFrom<Vec<ArcStr>> for GlobSet {
244    type Error = anyhow::Error;
245
246    fn try_from(value: Vec<ArcStr>) -> result::Result<Self, Self::Error> {
247        let v: SmallVec<[Glob; 8]> = value
248            .into_iter()
249            .map(|c| Glob::new(ArcStr::from(c.clone())))
250            .collect::<Result<_>>()?;
251        GlobSet::new(false, v)
252    }
253}
254
255impl GlobSet {
256    /// create a new globset from the specified globs. if
257    /// published_only is true, then the globset will only match
258    /// published paths, otherwise it will match both structural and
259    /// published paths.
260    pub fn new(
261        published_only: bool,
262        globs: impl IntoIterator<Item = Glob>,
263    ) -> Result<GlobSet> {
264        static GLOB: LazyLock<Pool<Vec<Glob>>> = LazyLock::new(|| Pool::new(10, 100));
265        let mut builder = globset::GlobSetBuilder::new();
266        let mut raw = GLOB.take();
267        for glob in globs {
268            builder.add(glob.glob.clone());
269            raw.push(glob);
270        }
271        raw.sort_unstable_by(|g0, g1| g0.base().cmp(g1.base()));
272        Ok(GlobSet(Arc::new(GlobSetInner {
273            raw,
274            published_only,
275            glob: builder.build()?,
276        })))
277    }
278
279    pub fn is_match(&self, path: &Path) -> bool {
280        self.0.glob.is_match(path.as_ref())
281    }
282
283    pub fn published_only(&self) -> bool {
284        self.0.published_only
285    }
286
287    /// return the raw glob strings
288    pub fn raw(&self) -> Vec<ArcStr> {
289        self.0.raw.iter().map(|g| g.raw.clone()).collect()
290    }
291
292    /// disjoint globsets will never both match a given path. However
293    /// non disjoint globsets might not match the same paths. So in
294    /// other words this will only return turn if the two globsets
295    /// definitely will not match any of the same paths. It is
296    /// possible that this returns true and the globsets are in fact
297    /// disjoint.
298    pub fn disjoint(&self, other: &Self) -> bool {
299        for g0 in self.0.raw.iter() {
300            let plain0 = g0.plain();
301            for g1 in other.0.raw.iter() {
302                let plain1 = g1.plain();
303                if plain0.starts_with(plain1)
304                    || plain1.starts_with(plain0)
305                    || plain0 == plain1
306                {
307                    return false;
308                }
309            }
310        }
311        return true;
312    }
313}
314
315impl Deref for GlobSet {
316    type Target = Vec<Glob>;
317
318    fn deref(&self) -> &Self::Target {
319        &*self.0.raw
320    }
321}
322
323impl Pack for GlobSet {
324    fn encoded_len(&self) -> usize {
325        <bool as Pack>::encoded_len(&self.0.published_only)
326            + <GPooled<Vec<Glob>> as Pack>::encoded_len(&self.0.raw)
327    }
328
329    fn encode(&self, buf: &mut impl BufMut) -> result::Result<(), PackError> {
330        <bool as Pack>::encode(&self.0.published_only, buf)?;
331        <GPooled<Vec<Glob>> as Pack>::encode(&self.0.raw, buf)
332    }
333
334    fn decode(buf: &mut impl Buf) -> result::Result<Self, PackError> {
335        let published_only = <bool as Pack>::decode(buf)?;
336        let mut raw = <GPooled<Vec<Glob>> as Pack>::decode(buf)?;
337        let mut builder = globset::GlobSetBuilder::new();
338        for glob in raw.iter() {
339            builder.add(glob.glob.clone());
340        }
341        raw.sort_unstable_by(|g0, g1| g0.base().cmp(g1.base()));
342        let glob = builder.build().map_err(|_| PackError::InvalidFormat)?;
343        Ok(GlobSet(Arc::new(GlobSetInner { raw, published_only, glob })))
344    }
345}