forest/ipld/selector/mod.rs
1// Copyright 2019-2025 ChainSafe Systems
2// SPDX-License-Identifier: Apache-2.0, MIT
3
4mod empty_map;
5use std::ops::SubAssign;
6
7use indexmap::IndexMap;
8use serde::{Deserialize, Serialize};
9
10#[cfg(test)]
11use super::Ipld;
12#[cfg(test)]
13use Selector::*;
14
15/// Selectors are expressions that identify and select a subset of data from an
16/// IPLD DAG. Selectors are themselves IPLD and can be serialized and
17/// de-serialized as such.
18#[allow(dead_code)]
19#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
20pub enum Selector {
21 /// `Matcher` marks a node to be included in the "result" set.
22 /// (All nodes traversed by a selector are in the "covered" set (which is
23 /// a.k.a. "the Merkle proof"); the "result" set is a subset of the
24 /// "covered" set.)
25 ///
26 /// In libraries using selectors, the "result" set is typically provided to
27 /// some user-specified callback.
28 ///
29 /// A selector tree with only "explore*"-type selectors and no `Matcher`
30 /// selectors is valid; it will just generate a "covered" set of nodes
31 /// and no "result" set.
32 #[serde(rename = ".", with = "empty_map")]
33 Matcher,
34
35 /// `ExploreAll` is similar to a `*` -- it traverses all elements of an
36 /// array, or all entries in a map, and applies a next selector to the
37 /// reached nodes.
38 #[serde(rename = "a")]
39 ExploreAll {
40 #[serde(rename = ">")]
41 next: Box<Selector>,
42 },
43
44 /// `ExploreFields` traverses named fields in a map (or equivalently,
45 /// structure, if traversing on typed/schema nodes) and applies a next
46 /// selector to the reached nodes.
47 ///
48 /// Note that a concept of exploring a whole path (e.g. "path/to/file") can
49 /// be represented as a set of three nested `ExploreFields` selectors,
50 /// each specifying one field.
51 ///
52 /// Fields insertion order is maintained and traversed using that order.
53 #[serde(rename = "f")]
54 ExploreFields {
55 #[serde(rename = "f>")]
56 fields: IndexMap<String, Selector>,
57 },
58
59 /// `ExploreIndex` traverses a specific index in a list, and applies a next
60 /// selector to the reached node.
61 #[serde(rename = "i")]
62 ExploreIndex {
63 #[serde(rename = "i")]
64 index: usize,
65 #[serde(rename = ">")]
66 next: Box<Selector>,
67 },
68
69 /// `ExploreRange` traverses a list, and for each element in the range
70 /// specified, will apply a next selector to those reached nodes.
71 #[serde(rename = "r")]
72 ExploreRange {
73 #[serde(rename = "^")]
74 start: usize,
75 #[serde(rename = "$")]
76 end: usize,
77 #[serde(rename = ">")]
78 next: Box<Selector>,
79 },
80
81 /// `ExploreRecursive` traverses some structure recursively.
82 /// To guide this exploration, it uses a "sequence", which is another
83 /// Selector tree; some leaf node in this sequence should contain an
84 /// `ExploreRecursiveEdge` selector, which denotes the place recursion
85 /// should occur.
86 ///
87 /// In implementation, whenever evaluation reaches an `ExploreRecursiveEdge`
88 /// marker in the recursion sequence's Selector tree, the implementation
89 /// logically produces another new Selector which is a copy of the
90 /// original `ExploreRecursive` selector, but with a decremented depth
91 /// parameter for limit (if limit is of type depth), and continues
92 /// evaluation.
93 ///
94 /// It is not valid for an `ExploreRecursive` selector's sequence to contain
95 /// no instances of `ExploreRecursiveEdge`; it *is* valid for it to contain
96 /// more than one `ExploreRecursiveEdge`.
97 ///
98 /// `ExploreRecursive` can contain a nested `ExploreRecursive`!
99 /// This is comparable to a nested for-loop.
100 /// In these cases, any `ExploreRecursiveEdge` instance always refers to the
101 /// nearest parent `ExploreRecursive` (in other words,
102 /// `ExploreRecursiveEdge` can be thought of like the 'continue'
103 /// statement, or end of a for-loop body; it is *not* a `goto`
104 /// statement).
105 ///
106 /// Be careful when using `ExploreRecursive` with a large depth limit
107 /// parameter; it can easily cause very large traversals (especially if
108 /// used in combination with selectors like `ExploreAll` inside the
109 /// sequence).
110 ///
111 /// limit is a union type -- it can have an integer depth value (key
112 /// "depth") or no value (key "none"). If limit has no value it is up to
113 /// the implementation library using selectors to identify an
114 /// appropriate max depth as necessary so that recursion is not infinite
115 #[serde(rename = "R")]
116 ExploreRecursive {
117 #[serde(rename = ":>")]
118 sequence: Box<Selector>,
119 #[serde(rename = "l")]
120 limit: RecursionLimit,
121 /// if a node matches, we won't match it nor explore its children.
122 #[serde(rename = "!")]
123 stop_at: Option<Condition>,
124 #[serde(skip_deserializing)]
125 /// Used to index current
126 current: Option<Box<Selector>>,
127 },
128
129 /// `ExploreUnion` allows selection to continue with two or more distinct
130 /// selectors while exploring the same tree of data.
131 ///
132 /// `ExploreUnion` can be used to apply a `Matcher` on one node (causing it
133 /// to be considered part of a (possibly labeled) result set), while
134 /// simultaneously continuing to explore deeper parts of the tree with
135 /// another selector, for example.
136 #[serde(rename = "|")]
137 ExploreUnion(Vec<Selector>),
138
139 /// `ExploreRecursiveEdge` is a special sentinel value which is used to mark
140 /// the end of a sequence started by an `ExploreRecursive` selector: the
141 /// recursion goes back to the initial state of the earlier
142 /// `ExploreRecursive` selector, and proceeds again (with a decremented
143 /// `maxDepth` value).
144 ///
145 /// An `ExploreRecursive` selector that doesn't contain an
146 /// `ExploreRecursiveEdge` is nonsensical. Containing more than one
147 /// `ExploreRecursiveEdge` is valid. An `ExploreRecursiveEdge` without
148 /// an enclosing `ExploreRecursive` is an error.
149 #[serde(rename = "@", with = "empty_map")]
150 ExploreRecursiveEdge,
151 //* No conditional explore impl exists, ignore for now
152 // #[serde(rename = "&")]
153 // ExploreConditional {
154 // #[serde(rename = "&")]
155 // condition: Option<Condition>,
156 // #[serde(rename = ">")]
157 // next: Box<Selector>,
158 // },
159}
160
161#[allow(dead_code)]
162#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Copy)]
163pub enum RecursionLimit {
164 #[serde(rename = "none", with = "empty_map")]
165 None,
166 #[serde(rename = "depth")]
167 Depth(u64),
168}
169
170impl SubAssign<u64> for RecursionLimit {
171 fn sub_assign(&mut self, other: u64) {
172 if let RecursionLimit::Depth(v) = self {
173 *v -= other;
174 }
175 }
176}
177
178/// Condition is expresses a predicate with a boolean result.
179///
180/// Condition clauses are used several places:
181/// - in `Matcher`, to determine if a node is selected.
182/// - in `ExploreRecursive`, to halt exploration.
183/// - in `ExploreConditional`,
184#[allow(dead_code)]
185#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Copy)]
186pub enum Condition {
187 #[serde(rename = "hasField")]
188 HasField,
189 #[serde(rename = "=")]
190 HasValue,
191 #[serde(rename = "%")]
192 HasKind,
193 #[serde(rename = "/")]
194 IsLink,
195 #[serde(rename = "greaterThan")]
196 GreaterThan,
197 #[serde(rename = "lessThan")]
198 LessThan,
199 #[serde(rename = "and")]
200 And,
201 #[serde(rename = "or")]
202 Or,
203}
204
205#[cfg(test)]
206impl Selector {
207 /// Processes and returns resultant selector node
208 pub fn explore(self, ipld: &Ipld, p: &str) -> Option<Selector> {
209 match self {
210 ExploreAll { next } => Some(*next),
211 ExploreFields { mut fields } => match ipld {
212 Ipld::Map(m) => {
213 m.get(p)?;
214 fields.swap_remove(p)
215 }
216 ,
217 // Using ExploreFields for list is supported feature in go impl
218 Ipld::List(l) => {
219 // Check to make sure index is within bounds
220 if p.parse::<usize>().ok()? >= l.len() {
221 return None;
222 }
223 fields.swap_remove(p)
224 }
225 _ => None,
226 },
227 ExploreIndex { index, next } => match ipld {
228 Ipld::List(l) => {
229 let i = p.parse::<usize>().ok()?;
230 if i != index || i >= l.len() {
231 None
232 } else {
233 // Path segment matches selector index
234 Some(*next)
235 }
236 }
237 _ => None,
238 },
239 ExploreRange { start, end, next } => {
240 match ipld {
241 Ipld::List(l) => {
242 let i = p.parse::<usize>().ok()?;
243 // Check to make sure index is within list bounds
244 if i < start || i >= end || i >= l.len() {
245 None
246 } else {
247 // Path segment is within the selector range
248 Some(*next)
249 }
250 }
251 _ => None,
252 }
253 }
254 ExploreRecursive {
255 current,
256 sequence,
257 mut limit,
258 stop_at,
259 } => {
260 let next = current
261 .unwrap_or_else(|| sequence.clone())
262 .explore(ipld, p)?;
263
264 if !has_recursive_edge(&next) {
265 return Some(ExploreRecursive {
266 sequence,
267 current: Some(next.into()),
268 limit,
269 stop_at,
270 });
271 }
272
273 if let RecursionLimit::Depth(depth) = limit {
274 if depth < 2 {
275 // Replaces recursive edge with None on last iteration
276 return replace_recursive_edge(next, None);
277 }
278 limit -= 1;
279 }
280
281 Some(ExploreRecursive {
282 current: replace_recursive_edge(next, Some(*sequence.clone())).map(Box::new),
283 sequence,
284 limit,
285 stop_at,
286 })
287 }
288 ExploreUnion(selectors) => {
289 // Push all valid explored selectors to new vector
290 let replace_selectors: Vec<_> = selectors
291 .into_iter()
292 .filter_map(|s| s.explore(ipld, p))
293 .collect();
294
295 Selector::from_selectors(replace_selectors)
296 }
297 // Go impl panics here, but panic on exploring malformed selector seems bad
298 ExploreRecursiveEdge => None,
299 // Matcher is terminal selector
300 Matcher => None,
301 }
302 }
303
304 fn from_selectors(mut vec: Vec<Self>) -> Option<Self> {
305 match vec.len() {
306 0 | 1 => vec.pop(),
307 _ => Some(ExploreUnion(vec)),
308 }
309 }
310}
311
312#[cfg(test)]
313fn replace_recursive_edge(next_sel: Selector, replace: Option<Selector>) -> Option<Selector> {
314 match next_sel {
315 ExploreRecursiveEdge => replace,
316 ExploreUnion(selectors) => {
317 // Push all valid explored selectors to new vector
318 let replace_selectors: Vec<_> = selectors
319 .into_iter()
320 .filter_map(|s| replace_recursive_edge(s, replace.clone()))
321 .collect();
322
323 Selector::from_selectors(replace_selectors)
324 }
325 _ => Some(next_sel),
326 }
327}
328
329#[cfg(test)]
330fn has_recursive_edge(next_sel: &Selector) -> bool {
331 match next_sel {
332 ExploreRecursiveEdge => true,
333 ExploreUnion(selectors) => selectors.iter().any(has_recursive_edge),
334 _ => false,
335 }
336}