forest/ipld/selector/
mod.rs

1// Copyright 2019-2025 ChainSafe Systems
2// SPDX-License-Identifier: Apache-2.0, MIT
3
4mod empty_map;
5use std::ops::SubAssign;
6
7use indexmap::IndexMap;
8use serde::{Deserialize, Serialize};
9
10#[cfg(test)]
11use super::Ipld;
12#[cfg(test)]
13use Selector::*;
14
15/// Selectors are expressions that identify and select a subset of data from an
16/// IPLD DAG. Selectors are themselves IPLD and can be serialized and
17/// de-serialized as such.
18#[allow(dead_code)]
19#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
20pub enum Selector {
21    /// `Matcher` marks a node to be included in the "result" set.
22    /// (All nodes traversed by a selector are in the "covered" set (which is
23    /// a.k.a. "the Merkle proof"); the "result" set is a subset of the
24    /// "covered" set.)
25    ///
26    /// In libraries using selectors, the "result" set is typically provided to
27    /// some user-specified callback.
28    ///
29    /// A selector tree with only "explore*"-type selectors and no `Matcher`
30    /// selectors is valid; it will just generate a "covered" set of nodes
31    /// and no "result" set.
32    #[serde(rename = ".", with = "empty_map")]
33    Matcher,
34
35    /// `ExploreAll` is similar to a `*` -- it traverses all elements of an
36    /// array, or all entries in a map, and applies a next selector to the
37    /// reached nodes.
38    #[serde(rename = "a")]
39    ExploreAll {
40        #[serde(rename = ">")]
41        next: Box<Selector>,
42    },
43
44    /// `ExploreFields` traverses named fields in a map (or equivalently,
45    /// structure, if traversing on typed/schema nodes) and applies a next
46    /// selector to the reached nodes.
47    ///
48    /// Note that a concept of exploring a whole path (e.g. "path/to/file") can
49    /// be represented as a set of three nested `ExploreFields` selectors,
50    /// each specifying one field.
51    ///
52    /// Fields insertion order is maintained and traversed using that order.
53    #[serde(rename = "f")]
54    ExploreFields {
55        #[serde(rename = "f>")]
56        fields: IndexMap<String, Selector>,
57    },
58
59    /// `ExploreIndex` traverses a specific index in a list, and applies a next
60    /// selector to the reached node.
61    #[serde(rename = "i")]
62    ExploreIndex {
63        #[serde(rename = "i")]
64        index: usize,
65        #[serde(rename = ">")]
66        next: Box<Selector>,
67    },
68
69    /// `ExploreRange` traverses a list, and for each element in the range
70    /// specified, will apply a next selector to those reached nodes.
71    #[serde(rename = "r")]
72    ExploreRange {
73        #[serde(rename = "^")]
74        start: usize,
75        #[serde(rename = "$")]
76        end: usize,
77        #[serde(rename = ">")]
78        next: Box<Selector>,
79    },
80
81    /// `ExploreRecursive` traverses some structure recursively.
82    /// To guide this exploration, it uses a "sequence", which is another
83    /// Selector tree; some leaf node in this sequence should contain an
84    /// `ExploreRecursiveEdge` selector, which denotes the place recursion
85    /// should occur.
86    ///
87    /// In implementation, whenever evaluation reaches an `ExploreRecursiveEdge`
88    /// marker in the recursion sequence's Selector tree, the implementation
89    /// logically produces another new Selector which is a copy of the
90    /// original `ExploreRecursive` selector, but with a decremented depth
91    /// parameter for limit (if limit is of type depth), and continues
92    /// evaluation.
93    ///
94    /// It is not valid for an `ExploreRecursive` selector's sequence to contain
95    /// no instances of `ExploreRecursiveEdge`; it *is* valid for it to contain
96    /// more than one `ExploreRecursiveEdge`.
97    ///
98    /// `ExploreRecursive` can contain a nested `ExploreRecursive`!
99    /// This is comparable to a nested for-loop.
100    /// In these cases, any `ExploreRecursiveEdge` instance always refers to the
101    /// nearest parent `ExploreRecursive` (in other words,
102    /// `ExploreRecursiveEdge` can be thought of like the 'continue'
103    /// statement, or end of a for-loop body; it is *not* a `goto`
104    /// statement).
105    ///
106    /// Be careful when using `ExploreRecursive` with a large depth limit
107    /// parameter; it can easily cause very large traversals (especially if
108    /// used in combination with selectors like `ExploreAll` inside the
109    /// sequence).
110    ///
111    /// limit is a union type -- it can have an integer depth value (key
112    /// "depth") or no value (key "none"). If limit has no value it is up to
113    /// the implementation library using selectors to identify an
114    /// appropriate max depth as necessary so that recursion is not infinite
115    #[serde(rename = "R")]
116    ExploreRecursive {
117        #[serde(rename = ":>")]
118        sequence: Box<Selector>,
119        #[serde(rename = "l")]
120        limit: RecursionLimit,
121        /// if a node matches, we won't match it nor explore its children.
122        #[serde(rename = "!")]
123        stop_at: Option<Condition>,
124        #[serde(skip_deserializing)]
125        /// Used to index current
126        current: Option<Box<Selector>>,
127    },
128
129    /// `ExploreUnion` allows selection to continue with two or more distinct
130    /// selectors while exploring the same tree of data.
131    ///
132    /// `ExploreUnion` can be used to apply a `Matcher` on one node (causing it
133    /// to be considered part of a (possibly labeled) result set), while
134    /// simultaneously continuing to explore deeper parts of the tree with
135    /// another selector, for example.
136    #[serde(rename = "|")]
137    ExploreUnion(Vec<Selector>),
138
139    /// `ExploreRecursiveEdge` is a special sentinel value which is used to mark
140    /// the end of a sequence started by an `ExploreRecursive` selector: the
141    /// recursion goes back to the initial state of the earlier
142    /// `ExploreRecursive` selector, and proceeds again (with a decremented
143    /// `maxDepth` value).
144    ///
145    /// An `ExploreRecursive` selector that doesn't contain an
146    /// `ExploreRecursiveEdge` is nonsensical.  Containing more than one
147    /// `ExploreRecursiveEdge` is valid. An `ExploreRecursiveEdge` without
148    /// an enclosing `ExploreRecursive` is an error.
149    #[serde(rename = "@", with = "empty_map")]
150    ExploreRecursiveEdge,
151    //* No conditional explore impl exists, ignore for now
152    // #[serde(rename = "&")]
153    // ExploreConditional {
154    //     #[serde(rename = "&")]
155    //     condition: Option<Condition>,
156    //     #[serde(rename = ">")]
157    //     next: Box<Selector>,
158    // },
159}
160
161#[allow(dead_code)]
162#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Copy)]
163pub enum RecursionLimit {
164    #[serde(rename = "none", with = "empty_map")]
165    None,
166    #[serde(rename = "depth")]
167    Depth(u64),
168}
169
170impl SubAssign<u64> for RecursionLimit {
171    fn sub_assign(&mut self, other: u64) {
172        if let RecursionLimit::Depth(v) = self {
173            *v -= other;
174        }
175    }
176}
177
178/// Condition is expresses a predicate with a boolean result.
179///
180/// Condition clauses are used several places:
181///   - in `Matcher`, to determine if a node is selected.
182///   - in `ExploreRecursive`, to halt exploration.
183///   - in `ExploreConditional`,
184#[allow(dead_code)]
185#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Copy)]
186pub enum Condition {
187    #[serde(rename = "hasField")]
188    HasField,
189    #[serde(rename = "=")]
190    HasValue,
191    #[serde(rename = "%")]
192    HasKind,
193    #[serde(rename = "/")]
194    IsLink,
195    #[serde(rename = "greaterThan")]
196    GreaterThan,
197    #[serde(rename = "lessThan")]
198    LessThan,
199    #[serde(rename = "and")]
200    And,
201    #[serde(rename = "or")]
202    Or,
203}
204
205#[cfg(test)]
206impl Selector {
207    /// Processes and returns resultant selector node
208    pub fn explore(self, ipld: &Ipld, p: &str) -> Option<Selector> {
209        match self {
210            ExploreAll { next } => Some(*next),
211            ExploreFields { mut fields } => match ipld {
212                Ipld::Map(m) => {
213                        m.get(p)?;
214                        fields.swap_remove(p)
215                    }
216                ,
217                // Using ExploreFields for list is supported feature in go impl
218                Ipld::List(l) => {
219                    // Check to make sure index is within bounds
220                    if p.parse::<usize>().ok()? >= l.len() {
221                        return None;
222                    }
223                    fields.swap_remove(p)
224                }
225                _ => None,
226            },
227            ExploreIndex { index, next } => match ipld {
228                Ipld::List(l) => {
229                    let i = p.parse::<usize>().ok()?;
230                    if i != index || i >= l.len() {
231                        None
232                    } else {
233                        // Path segment matches selector index
234                        Some(*next)
235                    }
236                }
237                _ => None,
238            },
239            ExploreRange { start, end, next } => {
240                match ipld {
241                    Ipld::List(l) => {
242                        let i = p.parse::<usize>().ok()?;
243                        // Check to make sure index is within list bounds
244                        if i < start || i >= end || i >= l.len() {
245                            None
246                        } else {
247                            // Path segment is within the selector range
248                            Some(*next)
249                        }
250                    }
251                    _ => None,
252                }
253            }
254            ExploreRecursive {
255                current,
256                sequence,
257                mut limit,
258                stop_at,
259            } => {
260                let next = current
261                    .unwrap_or_else(|| sequence.clone())
262                    .explore(ipld, p)?;
263
264                if !has_recursive_edge(&next) {
265                    return Some(ExploreRecursive {
266                        sequence,
267                        current: Some(next.into()),
268                        limit,
269                        stop_at,
270                    });
271                }
272
273                if let RecursionLimit::Depth(depth) = limit {
274                    if depth < 2 {
275                        // Replaces recursive edge with None on last iteration
276                        return replace_recursive_edge(next, None);
277                    }
278                    limit -= 1;
279                }
280
281                Some(ExploreRecursive {
282                    current: replace_recursive_edge(next, Some(*sequence.clone())).map(Box::new),
283                    sequence,
284                    limit,
285                    stop_at,
286                })
287            }
288            ExploreUnion(selectors) => {
289                // Push all valid explored selectors to new vector
290                let replace_selectors: Vec<_> = selectors
291                    .into_iter()
292                    .filter_map(|s| s.explore(ipld, p))
293                    .collect();
294
295                Selector::from_selectors(replace_selectors)
296            }
297            // Go impl panics here, but panic on exploring malformed selector seems bad
298            ExploreRecursiveEdge => None,
299            // Matcher is terminal selector
300            Matcher => None,
301        }
302    }
303
304    fn from_selectors(mut vec: Vec<Self>) -> Option<Self> {
305        match vec.len() {
306            0 | 1 => vec.pop(),
307            _ => Some(ExploreUnion(vec)),
308        }
309    }
310}
311
312#[cfg(test)]
313fn replace_recursive_edge(next_sel: Selector, replace: Option<Selector>) -> Option<Selector> {
314    match next_sel {
315        ExploreRecursiveEdge => replace,
316        ExploreUnion(selectors) => {
317            // Push all valid explored selectors to new vector
318            let replace_selectors: Vec<_> = selectors
319                .into_iter()
320                .filter_map(|s| replace_recursive_edge(s, replace.clone()))
321                .collect();
322
323            Selector::from_selectors(replace_selectors)
324        }
325        _ => Some(next_sel),
326    }
327}
328
329#[cfg(test)]
330fn has_recursive_edge(next_sel: &Selector) -> bool {
331    match next_sel {
332        ExploreRecursiveEdge => true,
333        ExploreUnion(selectors) => selectors.iter().any(has_recursive_edge),
334        _ => false,
335    }
336}