wdl_engine/eval/
trie.rs

1//! Implementation of the inputs prefix trie.
2//!
3//! The inputs prefix trie is used to map input host paths to guest paths for
4//! task evaluation.
5
6use std::collections::HashMap;
7use std::path::Component;
8use std::path::PathBuf;
9
10use anyhow::Context;
11use anyhow::Result;
12use anyhow::bail;
13use url::Url;
14
15use crate::GuestPath;
16use crate::Input;
17use crate::InputKind;
18use crate::eval::ROOT_NAME;
19use crate::path::EvaluationPath;
20
21/// Represents a node in an input trie.
22#[derive(Debug)]
23struct InputTrieNode {
24    /// The children of this node.
25    children: HashMap<String, Self>,
26    /// The identifier of the node in the trie.
27    ///
28    /// A node's identifier is used when formatting guest paths of children.
29    id: usize,
30    /// The index into the trie's `inputs` collection.
31    ///
32    /// This is `Some` only for terminal nodes in the trie.
33    index: Option<usize>,
34}
35
36impl InputTrieNode {
37    /// Constructs a new input trie node with the given id.
38    fn new(id: usize) -> Self {
39        Self {
40            children: Default::default(),
41            id,
42            index: None,
43        }
44    }
45}
46
47/// Represents a prefix trie based on input paths.
48///
49/// This is used to determine guest paths for inputs.
50///
51/// From the root to a terminal node represents a unique input.
52#[derive(Debug)]
53pub struct InputTrie {
54    /// The guest inputs root directory.
55    ///
56    /// This is `None` for backends that don't use containers.
57    guest_inputs_dir: Option<&'static str>,
58    /// The URL path children of the tree.
59    ///
60    /// The key in the map is the scheme of each URL.
61    urls: HashMap<String, InputTrieNode>,
62    /// The local path children of the tree.
63    ///
64    /// The key in the map is the first component of each path.
65    paths: HashMap<String, InputTrieNode>,
66    /// The inputs in the trie.
67    inputs: Vec<Input>,
68    /// The next node identifier.
69    next_id: usize,
70}
71
72impl InputTrie {
73    /// Constructs a new inputs trie without a guest inputs directory.
74    ///
75    /// Terminal nodes in the trie will not be mapped to guest paths.
76    pub fn new() -> Self {
77        Self {
78            guest_inputs_dir: None,
79            urls: Default::default(),
80            paths: Default::default(),
81            inputs: Default::default(),
82            // The first id starts at 1 as 0 is considered the "virtual root" of the trie
83            next_id: 1,
84        }
85    }
86
87    /// Constructs a new inputs trie with a guest inputs directory.
88    ///
89    /// Inputs with a host path will be mapped to a guest path relative to the
90    /// guest inputs directory.
91    ///
92    /// Note: a guest inputs directory is always a Unix-style path.
93    ///
94    /// # Panics
95    ///
96    /// Panics if the guest inputs directory does not end with a slash.
97    pub fn new_with_guest_dir(guest_inputs_dir: &'static str) -> Self {
98        assert!(guest_inputs_dir.ends_with('/'));
99
100        let mut trie = Self::new();
101        trie.guest_inputs_dir = Some(guest_inputs_dir);
102        trie
103    }
104
105    /// Inserts a new input into the trie.
106    ///
107    /// The path is either a local or remote input path.
108    ///
109    /// Relative paths are made absolute via the provided base path.
110    ///
111    /// If an input was added, returns `Ok(Some(index))` where `index` is the
112    /// index of the input in the trie.
113    ///
114    /// Returns `Ok(None)` if the provided path was already a guest input path.
115    ///
116    /// Returns an error for an invalid input path.
117    pub fn insert(
118        &mut self,
119        kind: InputKind,
120        path: &str,
121        base_dir: &EvaluationPath,
122    ) -> Result<Option<usize>> {
123        let path = base_dir.join(path)?;
124        match path {
125            EvaluationPath::Local(path) => {
126                // Check to see if the path being inserted is already a guest path
127                if let Some(dir) = self.guest_inputs_dir
128                    && path.starts_with(dir)
129                {
130                    return Ok(None);
131                }
132
133                self.insert_path(kind, path).map(Some)
134            }
135            EvaluationPath::Remote(url) => Ok(Some(self.insert_url(kind, url))),
136        }
137    }
138
139    /// Gets the inputs of the trie as a slice.
140    pub fn as_slice(&self) -> &[Input] {
141        &self.inputs
142    }
143
144    /// Gets the inputs of the trie as a mutable slice.
145    pub fn as_slice_mut(&mut self) -> &mut [Input] {
146        &mut self.inputs
147    }
148
149    /// Inserts an input with a local path into the trie.
150    fn insert_path(&mut self, kind: InputKind, path: PathBuf) -> Result<usize> {
151        let mut components = path.components();
152
153        let component = components
154            .next()
155            .context("input path cannot be empty")?
156            .as_os_str()
157            .to_str()
158            .with_context(|| format!("input path `{path}` is not UTF-8", path = path.display()))?;
159
160        let mut parent_id = 0;
161        let mut node = self.paths.entry(component.to_string()).or_insert_with(|| {
162            let node = InputTrieNode::new(self.next_id);
163            self.next_id += 1;
164            node
165        });
166
167        let mut last_component = None;
168        for component in components {
169            match component {
170                Component::CurDir | Component::ParentDir => {
171                    bail!(
172                        "input path `{path}` may not contain `.` or `..`",
173                        path = path.display()
174                    );
175                }
176                _ => {}
177            }
178
179            let component = component.as_os_str().to_str().with_context(|| {
180                format!("input path `{path}` is not UTF-8", path = path.display())
181            })?;
182
183            parent_id = node.id;
184
185            node = node
186                .children
187                .entry(component.to_string())
188                .or_insert_with(|| {
189                    let node = InputTrieNode::new(self.next_id);
190                    self.next_id += 1;
191                    node
192                });
193
194            last_component = Some(component);
195        }
196
197        // Check to see if the input already exists in the trie
198        if let Some(index) = node.index {
199            return Ok(index);
200        }
201
202        let guest_path = self.guest_inputs_dir.map(|d| {
203            GuestPath::new(format!(
204                "{d}{parent_id}/{last}",
205                // On Windows, `last_component` might be `Some` despite being a root due to the
206                // prefix (e.g. `C:`); instead check if the path has a parent
207                last = if path.parent().is_none() {
208                    ROOT_NAME
209                } else {
210                    last_component.unwrap_or(ROOT_NAME)
211                }
212            ))
213        });
214
215        let index = self.inputs.len();
216        self.inputs
217            .push(Input::new(kind, EvaluationPath::Local(path), guest_path));
218        node.index = Some(index);
219        Ok(index)
220    }
221
222    /// Inserts an input with a URL into the trie.
223    fn insert_url(&mut self, kind: InputKind, url: Url) -> usize {
224        // Insert for scheme
225        let mut node = self
226            .urls
227            .entry(url.scheme().to_string())
228            .or_insert_with(|| {
229                let node = InputTrieNode::new(self.next_id);
230                self.next_id += 1;
231                node
232            });
233
234        // Insert the authority; if the URL's path is empty, we'll
235        let mut parent_id = node.id;
236        node = node
237            .children
238            .entry(url.authority().to_string())
239            .or_insert_with(|| {
240                let node = InputTrieNode::new(self.next_id);
241                self.next_id += 1;
242                node
243            });
244
245        // Insert the path segments
246        let mut last_segment = None;
247        if let Some(segments) = url.path_segments() {
248            for segment in segments {
249                parent_id = node.id;
250                node = node.children.entry(segment.to_string()).or_insert_with(|| {
251                    let node = InputTrieNode::new(self.next_id);
252                    self.next_id += 1;
253                    node
254                });
255
256                if !segment.is_empty() {
257                    last_segment = Some(segment);
258                }
259            }
260        }
261
262        // Check to see if the input already exists in the trie
263        if let Some(index) = node.index {
264            return index;
265        }
266
267        let guest_path = self.guest_inputs_dir.as_ref().map(|d| {
268            GuestPath::new(format!(
269                "{d}{parent_id}/{last}",
270                last = last_segment.unwrap_or(ROOT_NAME)
271            ))
272        });
273
274        let index = self.inputs.len();
275        self.inputs
276            .push(Input::new(kind, EvaluationPath::Remote(url), guest_path));
277        node.index = Some(index);
278        index
279    }
280}
281
282#[cfg(test)]
283mod test {
284    use pretty_assertions::assert_eq;
285
286    use super::*;
287
288    #[test]
289    fn empty_trie() {
290        let empty = InputTrie::new();
291        assert!(empty.as_slice().is_empty());
292    }
293
294    #[cfg(unix)]
295    #[test]
296    fn unmapped_inputs_unix() {
297        let mut trie = InputTrie::new();
298        let base_dir: EvaluationPath = "/base".parse().unwrap();
299        trie.insert(InputKind::File, "/foo/bar/baz", &base_dir)
300            .unwrap();
301        assert_eq!(trie.as_slice().len(), 1);
302        assert_eq!(trie.as_slice()[0].path().to_str(), Some("/foo/bar/baz"));
303        assert!(trie.as_slice()[0].guest_path().is_none());
304    }
305
306    #[cfg(windows)]
307    #[test]
308    fn unmapped_inputs_windows() {
309        let mut trie = InputTrie::new();
310        let base_dir: EvaluationPath = "C:\\base".parse().unwrap();
311        trie.insert(InputKind::File, "C:\\foo\\bar\\baz", &base_dir)
312            .unwrap();
313        assert_eq!(trie.as_slice().len(), 1);
314        assert_eq!(
315            trie.as_slice()[0].path().to_str(),
316            Some("C:\\foo\\bar\\baz")
317        );
318        assert!(trie.as_slice()[0].guest_path().is_none());
319    }
320
321    #[cfg(unix)]
322    #[test]
323    fn non_empty_trie_unix() {
324        let mut trie = InputTrie::new_with_guest_dir("/inputs/");
325        let base_dir: EvaluationPath = "/base".parse().unwrap();
326        trie.insert(InputKind::Directory, "/", &base_dir)
327            .unwrap()
328            .unwrap();
329        trie.insert(InputKind::File, "/foo/bar/foo.txt", &base_dir)
330            .unwrap()
331            .unwrap();
332        trie.insert(InputKind::File, "/foo/bar/bar.txt", &base_dir)
333            .unwrap()
334            .unwrap();
335        trie.insert(InputKind::File, "/foo/baz/foo.txt", &base_dir)
336            .unwrap()
337            .unwrap();
338        trie.insert(InputKind::File, "/foo/baz/bar.txt", &base_dir)
339            .unwrap()
340            .unwrap();
341        trie.insert(InputKind::File, "/bar/foo/foo.txt", &base_dir)
342            .unwrap()
343            .unwrap();
344        trie.insert(InputKind::File, "/bar/foo/bar.txt", &base_dir)
345            .unwrap()
346            .unwrap();
347        trie.insert(InputKind::Directory, "/baz", &base_dir)
348            .unwrap()
349            .unwrap();
350        trie.insert(InputKind::File, "https://example.com/", &base_dir)
351            .unwrap()
352            .unwrap();
353        trie.insert(
354            InputKind::File,
355            "https://example.com/foo/bar/foo.txt",
356            &base_dir,
357        )
358        .unwrap()
359        .unwrap();
360        trie.insert(
361            InputKind::File,
362            "https://example.com/foo/bar/bar.txt",
363            &base_dir,
364        )
365        .unwrap()
366        .unwrap();
367        trie.insert(
368            InputKind::File,
369            "https://example.com/foo/baz/foo.txt",
370            &base_dir,
371        )
372        .unwrap()
373        .unwrap();
374        trie.insert(
375            InputKind::File,
376            "https://example.com/foo/baz/bar.txt",
377            &base_dir,
378        )
379        .unwrap()
380        .unwrap();
381        trie.insert(
382            InputKind::File,
383            "https://example.com/bar/foo/foo.txt",
384            &base_dir,
385        )
386        .unwrap()
387        .unwrap();
388        trie.insert(
389            InputKind::File,
390            "https://example.com/bar/foo/bar.txt",
391            &base_dir,
392        )
393        .unwrap()
394        .unwrap();
395        trie.insert(InputKind::File, "https://foo.com/bar", &base_dir)
396            .unwrap()
397            .unwrap();
398        trie.insert(InputKind::File, "foo.txt", &base_dir)
399            .unwrap()
400            .unwrap();
401
402        // The important part of the guest paths are:
403        // 1) The guest file name should be the same (or `.root` if the path is
404        //    considered to be root)
405        // 2) Paths with the same parent should have the same guest parent
406        let paths: Vec<_> = trie
407            .as_slice()
408            .iter()
409            .map(|i| {
410                (
411                    i.path().to_str().expect("should be a string"),
412                    i.guest_path().expect("should have guest path").as_str(),
413                )
414            })
415            .collect();
416
417        assert_eq!(
418            paths,
419            [
420                ("/", "/inputs/0/.root"),
421                ("/foo/bar/foo.txt", "/inputs/3/foo.txt"),
422                ("/foo/bar/bar.txt", "/inputs/3/bar.txt"),
423                ("/foo/baz/foo.txt", "/inputs/6/foo.txt"),
424                ("/foo/baz/bar.txt", "/inputs/6/bar.txt"),
425                ("/bar/foo/foo.txt", "/inputs/10/foo.txt"),
426                ("/bar/foo/bar.txt", "/inputs/10/bar.txt"),
427                ("/baz", "/inputs/1/baz"),
428                ("https://example.com/", "/inputs/15/.root"),
429                ("https://example.com/foo/bar/foo.txt", "/inputs/18/foo.txt"),
430                ("https://example.com/foo/bar/bar.txt", "/inputs/18/bar.txt"),
431                ("https://example.com/foo/baz/foo.txt", "/inputs/21/foo.txt"),
432                ("https://example.com/foo/baz/bar.txt", "/inputs/21/bar.txt"),
433                ("https://example.com/bar/foo/foo.txt", "/inputs/25/foo.txt"),
434                ("https://example.com/bar/foo/bar.txt", "/inputs/25/bar.txt"),
435                ("https://foo.com/bar", "/inputs/28/bar"),
436                ("/base/foo.txt", "/inputs/30/foo.txt"),
437            ]
438        );
439    }
440
441    #[cfg(windows)]
442    #[test]
443    fn non_empty_trie_windows() {
444        let mut trie = InputTrie::new_with_guest_dir("/inputs/");
445        let base_dir: EvaluationPath = "C:\\base".parse().unwrap();
446        trie.insert(InputKind::Directory, "C:\\", &base_dir)
447            .unwrap()
448            .unwrap();
449        trie.insert(InputKind::File, "C:\\foo\\bar\\foo.txt", &base_dir)
450            .unwrap()
451            .unwrap();
452        trie.insert(InputKind::File, "C:\\foo\\bar\\bar.txt", &base_dir)
453            .unwrap()
454            .unwrap();
455        trie.insert(InputKind::File, "C:\\foo\\baz\\foo.txt", &base_dir)
456            .unwrap()
457            .unwrap();
458        trie.insert(InputKind::File, "C:\\foo\\baz\\bar.txt", &base_dir)
459            .unwrap()
460            .unwrap();
461        trie.insert(InputKind::File, "C:\\bar\\foo\\foo.txt", &base_dir)
462            .unwrap()
463            .unwrap();
464        trie.insert(InputKind::File, "C:\\bar\\foo\\bar.txt", &base_dir)
465            .unwrap()
466            .unwrap();
467        trie.insert(InputKind::Directory, "C:\\baz", &base_dir)
468            .unwrap()
469            .unwrap();
470        trie.insert(InputKind::File, "https://example.com/", &base_dir)
471            .unwrap()
472            .unwrap();
473        trie.insert(
474            InputKind::File,
475            "https://example.com/foo/bar/foo.txt",
476            &base_dir,
477        )
478        .unwrap()
479        .unwrap();
480        trie.insert(
481            InputKind::File,
482            "https://example.com/foo/bar/bar.txt",
483            &base_dir,
484        )
485        .unwrap()
486        .unwrap();
487        trie.insert(
488            InputKind::File,
489            "https://example.com/foo/baz/foo.txt",
490            &base_dir,
491        )
492        .unwrap()
493        .unwrap();
494        trie.insert(
495            InputKind::File,
496            "https://example.com/foo/baz/bar.txt",
497            &base_dir,
498        )
499        .unwrap()
500        .unwrap();
501        trie.insert(
502            InputKind::File,
503            "https://example.com/bar/foo/foo.txt",
504            &base_dir,
505        )
506        .unwrap()
507        .unwrap();
508        trie.insert(
509            InputKind::File,
510            "https://example.com/bar/foo/bar.txt",
511            &base_dir,
512        )
513        .unwrap()
514        .unwrap();
515        trie.insert(InputKind::File, "https://foo.com/bar", &base_dir)
516            .unwrap()
517            .unwrap();
518        trie.insert(InputKind::File, "foo.txt", &base_dir)
519            .unwrap()
520            .unwrap();
521
522        // The important part of the guest paths are:
523        // 1) The guest file name should be the same (or `.root` if the path is
524        //    considered to be root)
525        // 2) Paths with the same parent should have the same guest parent
526        let paths: Vec<_> = trie
527            .as_slice()
528            .iter()
529            .map(|i| {
530                (
531                    i.path().to_str().expect("should be a string"),
532                    i.guest_path().expect("should have guest path").as_str(),
533                )
534            })
535            .collect();
536
537        assert_eq!(
538            paths,
539            [
540                ("C:\\", "/inputs/1/.root"),
541                ("C:\\foo\\bar\\foo.txt", "/inputs/4/foo.txt"),
542                ("C:\\foo\\bar\\bar.txt", "/inputs/4/bar.txt"),
543                ("C:\\foo\\baz\\foo.txt", "/inputs/7/foo.txt"),
544                ("C:\\foo\\baz\\bar.txt", "/inputs/7/bar.txt"),
545                ("C:\\bar\\foo\\foo.txt", "/inputs/11/foo.txt"),
546                ("C:\\bar\\foo\\bar.txt", "/inputs/11/bar.txt"),
547                ("C:\\baz", "/inputs/2/baz"),
548                ("https://example.com/", "/inputs/16/.root"),
549                ("https://example.com/foo/bar/foo.txt", "/inputs/19/foo.txt"),
550                ("https://example.com/foo/bar/bar.txt", "/inputs/19/bar.txt"),
551                ("https://example.com/foo/baz/foo.txt", "/inputs/22/foo.txt"),
552                ("https://example.com/foo/baz/bar.txt", "/inputs/22/bar.txt"),
553                ("https://example.com/bar/foo/foo.txt", "/inputs/26/foo.txt"),
554                ("https://example.com/bar/foo/bar.txt", "/inputs/26/bar.txt"),
555                ("https://foo.com/bar", "/inputs/29/bar"),
556                ("C:\\base\\foo.txt", "/inputs/31/foo.txt"),
557            ]
558        );
559    }
560}