wdl_engine/eval/
trie.rs

1//! Implementation of the inputs prefix trie.
2//!
3//! The inputs prefix trie is used to map input host paths to guest paths for
4//! task evaluation.
5
6use std::collections::HashMap;
7use std::path::Component;
8use std::path::PathBuf;
9
10use anyhow::Context;
11use anyhow::Result;
12use anyhow::bail;
13use url::Url;
14
15use crate::ContentKind;
16use crate::GuestPath;
17use crate::Input;
18use crate::eval::ROOT_NAME;
19use crate::path::EvaluationPath;
20
21/// Represents a node in an input trie.
22#[derive(Debug)]
23struct InputTrieNode {
24    /// The children of this node.
25    children: HashMap<String, Self>,
26    /// The identifier of the node in the trie.
27    ///
28    /// A node's identifier is used when formatting guest paths of children.
29    id: usize,
30    /// The index into the trie's `inputs` collection.
31    ///
32    /// This is `Some` only for terminal nodes in the trie.
33    index: Option<usize>,
34}
35
36impl InputTrieNode {
37    /// Constructs a new input trie node with the given id.
38    fn new(id: usize) -> Self {
39        Self {
40            children: Default::default(),
41            id,
42            index: None,
43        }
44    }
45}
46
47/// Represents a prefix trie based on input paths.
48///
49/// This is used to determine guest paths for inputs.
50///
51/// From the root to a terminal node represents a unique input.
52#[derive(Debug)]
53pub struct InputTrie {
54    /// The guest inputs root directory.
55    ///
56    /// This is `None` for backends that don't use containers.
57    guest_inputs_dir: Option<&'static str>,
58    /// The URL path children of the tree.
59    ///
60    /// The key in the map is the scheme of each URL.
61    urls: HashMap<String, InputTrieNode>,
62    /// The local path children of the tree.
63    ///
64    /// The key in the map is the first component of each path.
65    paths: HashMap<String, InputTrieNode>,
66    /// The inputs in the trie.
67    inputs: Vec<Input>,
68    /// The next node identifier.
69    next_id: usize,
70}
71
72impl InputTrie {
73    /// Constructs a new inputs trie without a guest inputs directory.
74    ///
75    /// Terminal nodes in the trie will not be mapped to guest paths.
76    pub fn new() -> Self {
77        Self {
78            guest_inputs_dir: None,
79            urls: Default::default(),
80            paths: Default::default(),
81            inputs: Default::default(),
82            // The first id starts at 1 as 0 is considered the "virtual root" of the trie
83            next_id: 1,
84        }
85    }
86
87    /// Constructs a new inputs trie with a guest inputs directory.
88    ///
89    /// Inputs with a host path will be mapped to a guest path relative to the
90    /// guest inputs directory.
91    ///
92    /// Note: a guest inputs directory is always a Unix-style path.
93    ///
94    /// # Panics
95    ///
96    /// Panics if the guest inputs directory does not end with a slash.
97    pub fn new_with_guest_dir(guest_inputs_dir: &'static str) -> Self {
98        assert!(guest_inputs_dir.ends_with('/'));
99
100        let mut trie = Self::new();
101        trie.guest_inputs_dir = Some(guest_inputs_dir);
102        trie
103    }
104
105    /// Inserts a new input into the trie.
106    ///
107    /// The path is either a local or remote input path.
108    ///
109    /// Relative paths are made absolute via the provided base path.
110    ///
111    /// If an input was added, returns `Ok(Some(index))` where `index` is the
112    /// index of the input in the trie.
113    ///
114    /// Returns `Ok(None)` if the provided path was already a guest input path.
115    ///
116    /// Returns an error for an invalid input path.
117    pub fn insert(
118        &mut self,
119        kind: ContentKind,
120        path: &str,
121        base_dir: &EvaluationPath,
122    ) -> Result<Option<usize>> {
123        let path = base_dir.join(path)?;
124        match path {
125            EvaluationPath::Local(path) => {
126                // Check to see if the path being inserted is already a guest path
127                if let Some(dir) = self.guest_inputs_dir
128                    && path.starts_with(dir)
129                {
130                    return Ok(None);
131                }
132
133                self.insert_path(kind, path).map(Some)
134            }
135            EvaluationPath::Remote(url) => Ok(Some(self.insert_url(kind, url))),
136        }
137    }
138
139    /// Gets the inputs of the trie as a slice.
140    pub fn as_slice(&self) -> &[Input] {
141        &self.inputs
142    }
143
144    /// Gets the inputs of the trie as a mutable slice.
145    pub fn as_slice_mut(&mut self) -> &mut [Input] {
146        &mut self.inputs
147    }
148
149    /// Inserts an input with a local path into the trie.
150    fn insert_path(&mut self, kind: ContentKind, path: PathBuf) -> Result<usize> {
151        let mut components = path.components();
152
153        let component = components
154            .next()
155            .context("input path cannot be empty")?
156            .as_os_str()
157            .to_str()
158            .with_context(|| format!("input path `{path}` is not UTF-8", path = path.display()))?;
159
160        let mut parent_id = 0;
161        let mut node = self.paths.entry(component.to_string()).or_insert_with(|| {
162            let node = InputTrieNode::new(self.next_id);
163            self.next_id += 1;
164            node
165        });
166
167        let mut last_component = None;
168        for component in components {
169            match component {
170                Component::CurDir | Component::ParentDir => {
171                    bail!(
172                        "input path `{path}` may not contain `.` or `..`",
173                        path = path.display()
174                    );
175                }
176                _ => {}
177            }
178
179            let component = component.as_os_str().to_str().with_context(|| {
180                format!("input path `{path}` is not UTF-8", path = path.display())
181            })?;
182
183            parent_id = node.id;
184
185            node = node
186                .children
187                .entry(component.to_string())
188                .or_insert_with(|| {
189                    let node = InputTrieNode::new(self.next_id);
190                    self.next_id += 1;
191                    node
192                });
193
194            last_component = Some(component);
195        }
196
197        // Check to see if the input already exists in the trie
198        if let Some(index) = node.index {
199            return Ok(index);
200        }
201
202        let guest_path = self.guest_inputs_dir.map(|d| {
203            GuestPath::new(format!(
204                "{d}{parent_id}/{last}",
205                // On Windows, `last_component` might be `Some` despite being a root due to the
206                // prefix (e.g. `C:`); instead check if the path has a parent
207                last = if path.parent().is_none() {
208                    ROOT_NAME
209                } else {
210                    last_component.unwrap_or(ROOT_NAME)
211                }
212            ))
213        });
214
215        let index = self.inputs.len();
216        self.inputs
217            .push(Input::new(kind, EvaluationPath::Local(path), guest_path));
218        node.index = Some(index);
219        Ok(index)
220    }
221
222    /// Inserts an input with a URL into the trie.
223    fn insert_url(&mut self, kind: ContentKind, url: Url) -> usize {
224        // Insert for scheme
225        let mut node = self
226            .urls
227            .entry(url.scheme().to_string())
228            .or_insert_with(|| {
229                let node = InputTrieNode::new(self.next_id);
230                self.next_id += 1;
231                node
232            });
233
234        // Insert the authority; if the URL's path is empty, we'll
235        let mut parent_id = node.id;
236        node = node
237            .children
238            .entry(url.authority().to_string())
239            .or_insert_with(|| {
240                let node = InputTrieNode::new(self.next_id);
241                self.next_id += 1;
242                node
243            });
244
245        // Insert the path segments
246        let mut last_segment = None;
247        if let Some(segments) = url.path_segments() {
248            for segment in segments {
249                parent_id = node.id;
250                node = node.children.entry(segment.to_string()).or_insert_with(|| {
251                    let node = InputTrieNode::new(self.next_id);
252                    self.next_id += 1;
253                    node
254                });
255
256                if !segment.is_empty() {
257                    last_segment = Some(segment);
258                }
259            }
260        }
261
262        // Check to see if the input already exists in the trie
263        if let Some(index) = node.index {
264            return index;
265        }
266
267        let guest_path = self.guest_inputs_dir.as_ref().map(|d| {
268            GuestPath::new(format!(
269                "{d}{parent_id}/{last}",
270                last = last_segment.unwrap_or(ROOT_NAME)
271            ))
272        });
273
274        let index = self.inputs.len();
275        self.inputs
276            .push(Input::new(kind, EvaluationPath::Remote(url), guest_path));
277        node.index = Some(index);
278        index
279    }
280}
281
282#[cfg(test)]
283mod test {
284    use pretty_assertions::assert_eq;
285
286    use super::*;
287
288    #[test]
289    fn empty_trie() {
290        let empty = InputTrie::new();
291        assert!(empty.as_slice().is_empty());
292    }
293
294    #[cfg(unix)]
295    #[test]
296    fn unmapped_inputs_unix() {
297        let mut trie = InputTrie::new();
298        let base_dir: EvaluationPath = "/base".parse().unwrap();
299        trie.insert(ContentKind::File, "/foo/bar/baz", &base_dir)
300            .unwrap();
301        assert_eq!(trie.as_slice().len(), 1);
302        assert_eq!(trie.as_slice()[0].path().to_string(), "/foo/bar/baz");
303        assert!(trie.as_slice()[0].guest_path().is_none());
304    }
305
306    #[cfg(windows)]
307    #[test]
308    fn unmapped_inputs_windows() {
309        let mut trie = InputTrie::new();
310        let base_dir: EvaluationPath = "C:\\base".parse().unwrap();
311        trie.insert(ContentKind::File, "C:\\foo\\bar\\baz", &base_dir)
312            .unwrap();
313        assert_eq!(trie.as_slice().len(), 1);
314        assert_eq!(trie.as_slice()[0].path().to_string(), "C:\\foo\\bar\\baz");
315        assert!(trie.as_slice()[0].guest_path().is_none());
316    }
317
318    #[cfg(unix)]
319    #[test]
320    fn non_empty_trie_unix() {
321        let mut trie = InputTrie::new_with_guest_dir("/inputs/");
322        let base_dir: EvaluationPath = "/base".parse().unwrap();
323        trie.insert(ContentKind::Directory, "/", &base_dir)
324            .unwrap()
325            .unwrap();
326        trie.insert(ContentKind::File, "/foo/bar/foo.txt", &base_dir)
327            .unwrap()
328            .unwrap();
329        trie.insert(ContentKind::File, "/foo/bar/bar.txt", &base_dir)
330            .unwrap()
331            .unwrap();
332        trie.insert(ContentKind::File, "/foo/baz/foo.txt", &base_dir)
333            .unwrap()
334            .unwrap();
335        trie.insert(ContentKind::File, "/foo/baz/bar.txt", &base_dir)
336            .unwrap()
337            .unwrap();
338        trie.insert(ContentKind::File, "/bar/foo/foo.txt", &base_dir)
339            .unwrap()
340            .unwrap();
341        trie.insert(ContentKind::File, "/bar/foo/bar.txt", &base_dir)
342            .unwrap()
343            .unwrap();
344        trie.insert(ContentKind::Directory, "/baz", &base_dir)
345            .unwrap()
346            .unwrap();
347        trie.insert(ContentKind::File, "https://example.com/", &base_dir)
348            .unwrap()
349            .unwrap();
350        trie.insert(
351            ContentKind::File,
352            "https://example.com/foo/bar/foo.txt",
353            &base_dir,
354        )
355        .unwrap()
356        .unwrap();
357        trie.insert(
358            ContentKind::File,
359            "https://example.com/foo/bar/bar.txt",
360            &base_dir,
361        )
362        .unwrap()
363        .unwrap();
364        trie.insert(
365            ContentKind::File,
366            "https://example.com/foo/baz/foo.txt",
367            &base_dir,
368        )
369        .unwrap()
370        .unwrap();
371        trie.insert(
372            ContentKind::File,
373            "https://example.com/foo/baz/bar.txt",
374            &base_dir,
375        )
376        .unwrap()
377        .unwrap();
378        trie.insert(
379            ContentKind::File,
380            "https://example.com/bar/foo/foo.txt",
381            &base_dir,
382        )
383        .unwrap()
384        .unwrap();
385        trie.insert(
386            ContentKind::File,
387            "https://example.com/bar/foo/bar.txt",
388            &base_dir,
389        )
390        .unwrap()
391        .unwrap();
392        trie.insert(ContentKind::File, "https://foo.com/bar", &base_dir)
393            .unwrap()
394            .unwrap();
395        trie.insert(ContentKind::File, "foo.txt", &base_dir)
396            .unwrap()
397            .unwrap();
398
399        // The important part of the guest paths are:
400        // 1) The guest file name should be the same (or `.root` if the path is
401        //    considered to be root)
402        // 2) Paths with the same parent should have the same guest parent
403        let paths: Vec<_> = trie
404            .as_slice()
405            .iter()
406            .map(|i| {
407                (
408                    i.path().to_string(),
409                    i.guest_path().expect("should have guest path").as_str(),
410                )
411            })
412            .collect();
413
414        assert_eq!(
415            paths,
416            [
417                ("/".to_string(), "/inputs/0/.root"),
418                ("/foo/bar/foo.txt".to_string(), "/inputs/3/foo.txt"),
419                ("/foo/bar/bar.txt".to_string(), "/inputs/3/bar.txt"),
420                ("/foo/baz/foo.txt".to_string(), "/inputs/6/foo.txt"),
421                ("/foo/baz/bar.txt".to_string(), "/inputs/6/bar.txt"),
422                ("/bar/foo/foo.txt".to_string(), "/inputs/10/foo.txt"),
423                ("/bar/foo/bar.txt".to_string(), "/inputs/10/bar.txt"),
424                ("/baz".to_string(), "/inputs/1/baz"),
425                ("https://example.com/".to_string(), "/inputs/15/.root"),
426                (
427                    "https://example.com/foo/bar/foo.txt".to_string(),
428                    "/inputs/18/foo.txt"
429                ),
430                (
431                    "https://example.com/foo/bar/bar.txt".to_string(),
432                    "/inputs/18/bar.txt"
433                ),
434                (
435                    "https://example.com/foo/baz/foo.txt".to_string(),
436                    "/inputs/21/foo.txt"
437                ),
438                (
439                    "https://example.com/foo/baz/bar.txt".to_string(),
440                    "/inputs/21/bar.txt"
441                ),
442                (
443                    "https://example.com/bar/foo/foo.txt".to_string(),
444                    "/inputs/25/foo.txt"
445                ),
446                (
447                    "https://example.com/bar/foo/bar.txt".to_string(),
448                    "/inputs/25/bar.txt"
449                ),
450                ("https://foo.com/bar".to_string(), "/inputs/28/bar"),
451                ("/base/foo.txt".to_string(), "/inputs/30/foo.txt"),
452            ]
453        );
454    }
455
456    #[cfg(windows)]
457    #[test]
458    fn non_empty_trie_windows() {
459        let mut trie = InputTrie::new_with_guest_dir("/inputs/");
460        let base_dir: EvaluationPath = "C:\\base".parse().unwrap();
461        trie.insert(ContentKind::Directory, "C:\\", &base_dir)
462            .unwrap()
463            .unwrap();
464        trie.insert(ContentKind::File, "C:\\foo\\bar\\foo.txt", &base_dir)
465            .unwrap()
466            .unwrap();
467        trie.insert(ContentKind::File, "C:\\foo\\bar\\bar.txt", &base_dir)
468            .unwrap()
469            .unwrap();
470        trie.insert(ContentKind::File, "C:\\foo\\baz\\foo.txt", &base_dir)
471            .unwrap()
472            .unwrap();
473        trie.insert(ContentKind::File, "C:\\foo\\baz\\bar.txt", &base_dir)
474            .unwrap()
475            .unwrap();
476        trie.insert(ContentKind::File, "C:\\bar\\foo\\foo.txt", &base_dir)
477            .unwrap()
478            .unwrap();
479        trie.insert(ContentKind::File, "C:\\bar\\foo\\bar.txt", &base_dir)
480            .unwrap()
481            .unwrap();
482        trie.insert(ContentKind::Directory, "C:\\baz", &base_dir)
483            .unwrap()
484            .unwrap();
485        trie.insert(ContentKind::File, "https://example.com/", &base_dir)
486            .unwrap()
487            .unwrap();
488        trie.insert(
489            ContentKind::File,
490            "https://example.com/foo/bar/foo.txt",
491            &base_dir,
492        )
493        .unwrap()
494        .unwrap();
495        trie.insert(
496            ContentKind::File,
497            "https://example.com/foo/bar/bar.txt",
498            &base_dir,
499        )
500        .unwrap()
501        .unwrap();
502        trie.insert(
503            ContentKind::File,
504            "https://example.com/foo/baz/foo.txt",
505            &base_dir,
506        )
507        .unwrap()
508        .unwrap();
509        trie.insert(
510            ContentKind::File,
511            "https://example.com/foo/baz/bar.txt",
512            &base_dir,
513        )
514        .unwrap()
515        .unwrap();
516        trie.insert(
517            ContentKind::File,
518            "https://example.com/bar/foo/foo.txt",
519            &base_dir,
520        )
521        .unwrap()
522        .unwrap();
523        trie.insert(
524            ContentKind::File,
525            "https://example.com/bar/foo/bar.txt",
526            &base_dir,
527        )
528        .unwrap()
529        .unwrap();
530        trie.insert(ContentKind::File, "https://foo.com/bar", &base_dir)
531            .unwrap()
532            .unwrap();
533        trie.insert(ContentKind::File, "foo.txt", &base_dir)
534            .unwrap()
535            .unwrap();
536
537        // The important part of the guest paths are:
538        // 1) The guest file name should be the same (or `.root` if the path is
539        //    considered to be root)
540        // 2) Paths with the same parent should have the same guest parent
541        let paths: Vec<_> = trie
542            .as_slice()
543            .iter()
544            .map(|i| {
545                (
546                    i.path().to_string(),
547                    i.guest_path().expect("should have guest path").as_str(),
548                )
549            })
550            .collect();
551
552        assert_eq!(
553            paths,
554            [
555                ("C:\\".to_string(), "/inputs/1/.root"),
556                ("C:\\foo\\bar\\foo.txt".to_string(), "/inputs/4/foo.txt"),
557                ("C:\\foo\\bar\\bar.txt".to_string(), "/inputs/4/bar.txt"),
558                ("C:\\foo\\baz\\foo.txt".to_string(), "/inputs/7/foo.txt"),
559                ("C:\\foo\\baz\\bar.txt".to_string(), "/inputs/7/bar.txt"),
560                ("C:\\bar\\foo\\foo.txt".to_string(), "/inputs/11/foo.txt"),
561                ("C:\\bar\\foo\\bar.txt".to_string(), "/inputs/11/bar.txt"),
562                ("C:\\baz".to_string(), "/inputs/2/baz"),
563                ("https://example.com/".to_string(), "/inputs/16/.root"),
564                (
565                    "https://example.com/foo/bar/foo.txt".to_string(),
566                    "/inputs/19/foo.txt"
567                ),
568                (
569                    "https://example.com/foo/bar/bar.txt".to_string(),
570                    "/inputs/19/bar.txt"
571                ),
572                (
573                    "https://example.com/foo/baz/foo.txt".to_string(),
574                    "/inputs/22/foo.txt"
575                ),
576                (
577                    "https://example.com/foo/baz/bar.txt".to_string(),
578                    "/inputs/22/bar.txt"
579                ),
580                (
581                    "https://example.com/bar/foo/foo.txt".to_string(),
582                    "/inputs/26/foo.txt"
583                ),
584                (
585                    "https://example.com/bar/foo/bar.txt".to_string(),
586                    "/inputs/26/bar.txt"
587                ),
588                ("https://foo.com/bar".to_string(), "/inputs/29/bar"),
589                ("C:\\base\\foo.txt".to_string(), "/inputs/31/foo.txt"),
590            ]
591        );
592    }
593}