1use std::collections::HashMap;
7use std::path::Component;
8use std::path::PathBuf;
9
10use anyhow::Context;
11use anyhow::Result;
12use anyhow::bail;
13use url::Url;
14
15use crate::ContentKind;
16use crate::EvaluationPath;
17use crate::EvaluationPathKind;
18use crate::GuestPath;
19use crate::backend::Input;
20use crate::eval::ROOT_NAME;
21
22#[derive(Debug)]
24struct InputTrieNode {
25 children: HashMap<String, Self>,
27 id: usize,
31 index: Option<usize>,
35}
36
37impl InputTrieNode {
38 fn new(id: usize) -> Self {
40 Self {
41 children: Default::default(),
42 id,
43 index: None,
44 }
45 }
46}
47
48#[derive(Debug)]
54pub struct InputTrie {
55 guest_inputs_dir: Option<&'static str>,
59 urls: HashMap<String, InputTrieNode>,
63 paths: HashMap<String, InputTrieNode>,
67 inputs: Vec<Input>,
69 next_id: usize,
71}
72
73impl InputTrie {
74 pub fn new() -> Self {
78 Self {
79 guest_inputs_dir: None,
80 urls: Default::default(),
81 paths: Default::default(),
82 inputs: Default::default(),
83 next_id: 1,
85 }
86 }
87
88 pub fn new_with_guest_dir(guest_inputs_dir: &'static str) -> Self {
99 assert!(guest_inputs_dir.ends_with('/'));
100
101 let mut trie = Self::new();
102 trie.guest_inputs_dir = Some(guest_inputs_dir);
103 trie
104 }
105
106 pub fn insert(
119 &mut self,
120 kind: ContentKind,
121 path: &str,
122 base_dir: &EvaluationPath,
123 ) -> Result<Option<usize>> {
124 match base_dir.join(path)?.into_kind() {
125 EvaluationPathKind::Local(path) => {
126 if let Some(dir) = self.guest_inputs_dir
128 && path.starts_with(dir)
129 {
130 return Ok(None);
131 }
132
133 self.insert_path(kind, path).map(Some)
134 }
135 EvaluationPathKind::Remote(url) => self.insert_url(kind, url).map(Some),
136 }
137 }
138
139 pub fn as_slice(&self) -> &[Input] {
141 &self.inputs
142 }
143
144 pub fn as_slice_mut(&mut self) -> &mut [Input] {
146 &mut self.inputs
147 }
148
149 fn insert_path(&mut self, kind: ContentKind, path: PathBuf) -> Result<usize> {
151 let mut components = path.components();
152
153 let component = components
154 .next()
155 .context("input path cannot be empty")?
156 .as_os_str()
157 .to_str()
158 .with_context(|| format!("input path `{path}` is not UTF-8", path = path.display()))?;
159
160 let mut parent_id = 0;
161 let mut node = self.paths.entry(component.to_string()).or_insert_with(|| {
162 let node = InputTrieNode::new(self.next_id);
163 self.next_id += 1;
164 node
165 });
166
167 let mut last_component = None;
168 for component in components {
169 match component {
170 Component::CurDir | Component::ParentDir => {
171 bail!(
172 "input path `{path}` may not contain `.` or `..`",
173 path = path.display()
174 );
175 }
176 _ => {}
177 }
178
179 let component = component.as_os_str().to_str().with_context(|| {
180 format!("input path `{path}` is not UTF-8", path = path.display())
181 })?;
182
183 parent_id = node.id;
184
185 node = node
186 .children
187 .entry(component.to_string())
188 .or_insert_with(|| {
189 let node = InputTrieNode::new(self.next_id);
190 self.next_id += 1;
191 node
192 });
193
194 last_component = Some(component);
195 }
196
197 if let Some(index) = node.index {
199 return Ok(index);
200 }
201
202 let guest_path = self.guest_inputs_dir.map(|d| {
203 GuestPath::new(format!(
204 "{d}{parent_id}/{last}",
205 last = if path.parent().is_none() {
208 ROOT_NAME
209 } else {
210 last_component.unwrap_or(ROOT_NAME)
211 }
212 ))
213 });
214
215 let index = self.inputs.len();
216 self.inputs.push(Input::new(
217 kind,
218 EvaluationPath::from_local_path(path),
219 guest_path,
220 ));
221 node.index = Some(index);
222 Ok(index)
223 }
224
225 fn insert_url(&mut self, kind: ContentKind, url: Url) -> Result<usize> {
227 let mut node = self
229 .urls
230 .entry(url.scheme().to_string())
231 .or_insert_with(|| {
232 let node = InputTrieNode::new(self.next_id);
233 self.next_id += 1;
234 node
235 });
236
237 let mut parent_id = node.id;
239 node = node
240 .children
241 .entry(url.authority().to_string())
242 .or_insert_with(|| {
243 let node = InputTrieNode::new(self.next_id);
244 self.next_id += 1;
245 node
246 });
247
248 let mut last_segment = None;
250 if let Some(segments) = url.path_segments() {
251 for segment in segments {
252 parent_id = node.id;
253 node = node.children.entry(segment.to_string()).or_insert_with(|| {
254 let node = InputTrieNode::new(self.next_id);
255 self.next_id += 1;
256 node
257 });
258
259 if !segment.is_empty() {
260 last_segment = Some(segment);
261 }
262 }
263 }
264
265 if let Some(index) = node.index {
267 return Ok(index);
268 }
269
270 let guest_path = self.guest_inputs_dir.as_ref().map(|d| {
271 GuestPath::new(format!(
272 "{d}{parent_id}/{last}",
273 last = last_segment.unwrap_or(ROOT_NAME)
274 ))
275 });
276
277 let index = self.inputs.len();
278 self.inputs
279 .push(Input::new(kind, EvaluationPath::try_from(url)?, guest_path));
280 node.index = Some(index);
281 Ok(index)
282 }
283}
284
285#[cfg(test)]
286mod test {
287 use pretty_assertions::assert_eq;
288
289 use super::*;
290
291 #[test]
292 fn empty_trie() {
293 let empty = InputTrie::new();
294 assert!(empty.as_slice().is_empty());
295 }
296
297 #[cfg(unix)]
298 #[test]
299 fn unmapped_inputs_unix() {
300 let mut trie = InputTrie::new();
301 let base_dir: EvaluationPath = "/base".parse().unwrap();
302 trie.insert(ContentKind::File, "/foo/bar/baz", &base_dir)
303 .unwrap();
304 assert_eq!(trie.as_slice().len(), 1);
305 assert_eq!(trie.as_slice()[0].path().to_string(), "/foo/bar/baz");
306 assert!(trie.as_slice()[0].guest_path().is_none());
307 }
308
309 #[cfg(windows)]
310 #[test]
311 fn unmapped_inputs_windows() {
312 let mut trie = InputTrie::new();
313 let base_dir: EvaluationPath = "C:\\base".parse().unwrap();
314 trie.insert(ContentKind::File, "C:\\foo\\bar\\baz", &base_dir)
315 .unwrap();
316 assert_eq!(trie.as_slice().len(), 1);
317 assert_eq!(trie.as_slice()[0].path().to_string(), "C:\\foo\\bar\\baz");
318 assert!(trie.as_slice()[0].guest_path().is_none());
319 }
320
321 #[cfg(unix)]
322 #[test]
323 fn non_empty_trie_unix() {
324 let mut trie = InputTrie::new_with_guest_dir("/inputs/");
325 let base_dir: EvaluationPath = "/base".parse().unwrap();
326 trie.insert(ContentKind::Directory, "/", &base_dir)
327 .unwrap()
328 .unwrap();
329 trie.insert(ContentKind::File, "/foo/bar/foo.txt", &base_dir)
330 .unwrap()
331 .unwrap();
332 trie.insert(ContentKind::File, "/foo/bar/bar.txt", &base_dir)
333 .unwrap()
334 .unwrap();
335 trie.insert(ContentKind::File, "/foo/baz/foo.txt", &base_dir)
336 .unwrap()
337 .unwrap();
338 trie.insert(ContentKind::File, "/foo/baz/bar.txt", &base_dir)
339 .unwrap()
340 .unwrap();
341 trie.insert(ContentKind::File, "/bar/foo/foo.txt", &base_dir)
342 .unwrap()
343 .unwrap();
344 trie.insert(ContentKind::File, "/bar/foo/bar.txt", &base_dir)
345 .unwrap()
346 .unwrap();
347 trie.insert(ContentKind::Directory, "/baz", &base_dir)
348 .unwrap()
349 .unwrap();
350 trie.insert(ContentKind::File, "https://example.com/", &base_dir)
351 .unwrap()
352 .unwrap();
353 trie.insert(
354 ContentKind::File,
355 "https://example.com/foo/bar/foo.txt",
356 &base_dir,
357 )
358 .unwrap()
359 .unwrap();
360 trie.insert(
361 ContentKind::File,
362 "https://example.com/foo/bar/bar.txt",
363 &base_dir,
364 )
365 .unwrap()
366 .unwrap();
367 trie.insert(
368 ContentKind::File,
369 "https://example.com/foo/baz/foo.txt",
370 &base_dir,
371 )
372 .unwrap()
373 .unwrap();
374 trie.insert(
375 ContentKind::File,
376 "https://example.com/foo/baz/bar.txt",
377 &base_dir,
378 )
379 .unwrap()
380 .unwrap();
381 trie.insert(
382 ContentKind::File,
383 "https://example.com/bar/foo/foo.txt",
384 &base_dir,
385 )
386 .unwrap()
387 .unwrap();
388 trie.insert(
389 ContentKind::File,
390 "https://example.com/bar/foo/bar.txt",
391 &base_dir,
392 )
393 .unwrap()
394 .unwrap();
395 trie.insert(ContentKind::File, "https://foo.com/bar", &base_dir)
396 .unwrap()
397 .unwrap();
398 trie.insert(ContentKind::File, "foo.txt", &base_dir)
399 .unwrap()
400 .unwrap();
401
402 let paths: Vec<_> = trie
407 .as_slice()
408 .iter()
409 .map(|i| {
410 (
411 i.path().to_string(),
412 i.guest_path().expect("should have guest path").as_str(),
413 )
414 })
415 .collect();
416
417 assert_eq!(
418 paths,
419 [
420 ("/".to_string(), "/inputs/0/.root"),
421 ("/foo/bar/foo.txt".to_string(), "/inputs/3/foo.txt"),
422 ("/foo/bar/bar.txt".to_string(), "/inputs/3/bar.txt"),
423 ("/foo/baz/foo.txt".to_string(), "/inputs/6/foo.txt"),
424 ("/foo/baz/bar.txt".to_string(), "/inputs/6/bar.txt"),
425 ("/bar/foo/foo.txt".to_string(), "/inputs/10/foo.txt"),
426 ("/bar/foo/bar.txt".to_string(), "/inputs/10/bar.txt"),
427 ("/baz".to_string(), "/inputs/1/baz"),
428 ("https://example.com/".to_string(), "/inputs/15/.root"),
429 (
430 "https://example.com/foo/bar/foo.txt".to_string(),
431 "/inputs/18/foo.txt"
432 ),
433 (
434 "https://example.com/foo/bar/bar.txt".to_string(),
435 "/inputs/18/bar.txt"
436 ),
437 (
438 "https://example.com/foo/baz/foo.txt".to_string(),
439 "/inputs/21/foo.txt"
440 ),
441 (
442 "https://example.com/foo/baz/bar.txt".to_string(),
443 "/inputs/21/bar.txt"
444 ),
445 (
446 "https://example.com/bar/foo/foo.txt".to_string(),
447 "/inputs/25/foo.txt"
448 ),
449 (
450 "https://example.com/bar/foo/bar.txt".to_string(),
451 "/inputs/25/bar.txt"
452 ),
453 ("https://foo.com/bar".to_string(), "/inputs/28/bar"),
454 ("/base/foo.txt".to_string(), "/inputs/30/foo.txt"),
455 ]
456 );
457 }
458
459 #[cfg(windows)]
460 #[test]
461 fn non_empty_trie_windows() {
462 let mut trie = InputTrie::new_with_guest_dir("/inputs/");
463 let base_dir: EvaluationPath = "C:\\base".parse().unwrap();
464 trie.insert(ContentKind::Directory, "C:\\", &base_dir)
465 .unwrap()
466 .unwrap();
467 trie.insert(ContentKind::File, "C:\\foo\\bar\\foo.txt", &base_dir)
468 .unwrap()
469 .unwrap();
470 trie.insert(ContentKind::File, "C:\\foo\\bar\\bar.txt", &base_dir)
471 .unwrap()
472 .unwrap();
473 trie.insert(ContentKind::File, "C:\\foo\\baz\\foo.txt", &base_dir)
474 .unwrap()
475 .unwrap();
476 trie.insert(ContentKind::File, "C:\\foo\\baz\\bar.txt", &base_dir)
477 .unwrap()
478 .unwrap();
479 trie.insert(ContentKind::File, "C:\\bar\\foo\\foo.txt", &base_dir)
480 .unwrap()
481 .unwrap();
482 trie.insert(ContentKind::File, "C:\\bar\\foo\\bar.txt", &base_dir)
483 .unwrap()
484 .unwrap();
485 trie.insert(ContentKind::Directory, "C:\\baz", &base_dir)
486 .unwrap()
487 .unwrap();
488 trie.insert(ContentKind::File, "https://example.com/", &base_dir)
489 .unwrap()
490 .unwrap();
491 trie.insert(
492 ContentKind::File,
493 "https://example.com/foo/bar/foo.txt",
494 &base_dir,
495 )
496 .unwrap()
497 .unwrap();
498 trie.insert(
499 ContentKind::File,
500 "https://example.com/foo/bar/bar.txt",
501 &base_dir,
502 )
503 .unwrap()
504 .unwrap();
505 trie.insert(
506 ContentKind::File,
507 "https://example.com/foo/baz/foo.txt",
508 &base_dir,
509 )
510 .unwrap()
511 .unwrap();
512 trie.insert(
513 ContentKind::File,
514 "https://example.com/foo/baz/bar.txt",
515 &base_dir,
516 )
517 .unwrap()
518 .unwrap();
519 trie.insert(
520 ContentKind::File,
521 "https://example.com/bar/foo/foo.txt",
522 &base_dir,
523 )
524 .unwrap()
525 .unwrap();
526 trie.insert(
527 ContentKind::File,
528 "https://example.com/bar/foo/bar.txt",
529 &base_dir,
530 )
531 .unwrap()
532 .unwrap();
533 trie.insert(ContentKind::File, "https://foo.com/bar", &base_dir)
534 .unwrap()
535 .unwrap();
536 trie.insert(ContentKind::File, "foo.txt", &base_dir)
537 .unwrap()
538 .unwrap();
539
540 let paths: Vec<_> = trie
545 .as_slice()
546 .iter()
547 .map(|i| {
548 (
549 i.path().to_string(),
550 i.guest_path().expect("should have guest path").as_str(),
551 )
552 })
553 .collect();
554
555 assert_eq!(
556 paths,
557 [
558 ("C:\\".to_string(), "/inputs/1/.root"),
559 ("C:\\foo\\bar\\foo.txt".to_string(), "/inputs/4/foo.txt"),
560 ("C:\\foo\\bar\\bar.txt".to_string(), "/inputs/4/bar.txt"),
561 ("C:\\foo\\baz\\foo.txt".to_string(), "/inputs/7/foo.txt"),
562 ("C:\\foo\\baz\\bar.txt".to_string(), "/inputs/7/bar.txt"),
563 ("C:\\bar\\foo\\foo.txt".to_string(), "/inputs/11/foo.txt"),
564 ("C:\\bar\\foo\\bar.txt".to_string(), "/inputs/11/bar.txt"),
565 ("C:\\baz".to_string(), "/inputs/2/baz"),
566 ("https://example.com/".to_string(), "/inputs/16/.root"),
567 (
568 "https://example.com/foo/bar/foo.txt".to_string(),
569 "/inputs/19/foo.txt"
570 ),
571 (
572 "https://example.com/foo/bar/bar.txt".to_string(),
573 "/inputs/19/bar.txt"
574 ),
575 (
576 "https://example.com/foo/baz/foo.txt".to_string(),
577 "/inputs/22/foo.txt"
578 ),
579 (
580 "https://example.com/foo/baz/bar.txt".to_string(),
581 "/inputs/22/bar.txt"
582 ),
583 (
584 "https://example.com/bar/foo/foo.txt".to_string(),
585 "/inputs/26/foo.txt"
586 ),
587 (
588 "https://example.com/bar/foo/bar.txt".to_string(),
589 "/inputs/26/bar.txt"
590 ),
591 ("https://foo.com/bar".to_string(), "/inputs/29/bar"),
592 ("C:\\base\\foo.txt".to_string(), "/inputs/31/foo.txt"),
593 ]
594 );
595 }
596}