1use std::collections::HashMap;
7use std::path::Component;
8use std::path::PathBuf;
9
10use anyhow::Context;
11use anyhow::Result;
12use anyhow::bail;
13use url::Url;
14
15use crate::ContentKind;
16use crate::GuestPath;
17use crate::Input;
18use crate::eval::ROOT_NAME;
19use crate::path::EvaluationPath;
20
21#[derive(Debug)]
23struct InputTrieNode {
24 children: HashMap<String, Self>,
26 id: usize,
30 index: Option<usize>,
34}
35
36impl InputTrieNode {
37 fn new(id: usize) -> Self {
39 Self {
40 children: Default::default(),
41 id,
42 index: None,
43 }
44 }
45}
46
47#[derive(Debug)]
53pub struct InputTrie {
54 guest_inputs_dir: Option<&'static str>,
58 urls: HashMap<String, InputTrieNode>,
62 paths: HashMap<String, InputTrieNode>,
66 inputs: Vec<Input>,
68 next_id: usize,
70}
71
72impl InputTrie {
73 pub fn new() -> Self {
77 Self {
78 guest_inputs_dir: None,
79 urls: Default::default(),
80 paths: Default::default(),
81 inputs: Default::default(),
82 next_id: 1,
84 }
85 }
86
87 pub fn new_with_guest_dir(guest_inputs_dir: &'static str) -> Self {
98 assert!(guest_inputs_dir.ends_with('/'));
99
100 let mut trie = Self::new();
101 trie.guest_inputs_dir = Some(guest_inputs_dir);
102 trie
103 }
104
105 pub fn insert(
118 &mut self,
119 kind: ContentKind,
120 path: &str,
121 base_dir: &EvaluationPath,
122 ) -> Result<Option<usize>> {
123 let path = base_dir.join(path)?;
124 match path {
125 EvaluationPath::Local(path) => {
126 if let Some(dir) = self.guest_inputs_dir
128 && path.starts_with(dir)
129 {
130 return Ok(None);
131 }
132
133 self.insert_path(kind, path).map(Some)
134 }
135 EvaluationPath::Remote(url) => Ok(Some(self.insert_url(kind, url))),
136 }
137 }
138
139 pub fn as_slice(&self) -> &[Input] {
141 &self.inputs
142 }
143
144 pub fn as_slice_mut(&mut self) -> &mut [Input] {
146 &mut self.inputs
147 }
148
149 fn insert_path(&mut self, kind: ContentKind, path: PathBuf) -> Result<usize> {
151 let mut components = path.components();
152
153 let component = components
154 .next()
155 .context("input path cannot be empty")?
156 .as_os_str()
157 .to_str()
158 .with_context(|| format!("input path `{path}` is not UTF-8", path = path.display()))?;
159
160 let mut parent_id = 0;
161 let mut node = self.paths.entry(component.to_string()).or_insert_with(|| {
162 let node = InputTrieNode::new(self.next_id);
163 self.next_id += 1;
164 node
165 });
166
167 let mut last_component = None;
168 for component in components {
169 match component {
170 Component::CurDir | Component::ParentDir => {
171 bail!(
172 "input path `{path}` may not contain `.` or `..`",
173 path = path.display()
174 );
175 }
176 _ => {}
177 }
178
179 let component = component.as_os_str().to_str().with_context(|| {
180 format!("input path `{path}` is not UTF-8", path = path.display())
181 })?;
182
183 parent_id = node.id;
184
185 node = node
186 .children
187 .entry(component.to_string())
188 .or_insert_with(|| {
189 let node = InputTrieNode::new(self.next_id);
190 self.next_id += 1;
191 node
192 });
193
194 last_component = Some(component);
195 }
196
197 if let Some(index) = node.index {
199 return Ok(index);
200 }
201
202 let guest_path = self.guest_inputs_dir.map(|d| {
203 GuestPath::new(format!(
204 "{d}{parent_id}/{last}",
205 last = if path.parent().is_none() {
208 ROOT_NAME
209 } else {
210 last_component.unwrap_or(ROOT_NAME)
211 }
212 ))
213 });
214
215 let index = self.inputs.len();
216 self.inputs
217 .push(Input::new(kind, EvaluationPath::Local(path), guest_path));
218 node.index = Some(index);
219 Ok(index)
220 }
221
222 fn insert_url(&mut self, kind: ContentKind, url: Url) -> usize {
224 let mut node = self
226 .urls
227 .entry(url.scheme().to_string())
228 .or_insert_with(|| {
229 let node = InputTrieNode::new(self.next_id);
230 self.next_id += 1;
231 node
232 });
233
234 let mut parent_id = node.id;
236 node = node
237 .children
238 .entry(url.authority().to_string())
239 .or_insert_with(|| {
240 let node = InputTrieNode::new(self.next_id);
241 self.next_id += 1;
242 node
243 });
244
245 let mut last_segment = None;
247 if let Some(segments) = url.path_segments() {
248 for segment in segments {
249 parent_id = node.id;
250 node = node.children.entry(segment.to_string()).or_insert_with(|| {
251 let node = InputTrieNode::new(self.next_id);
252 self.next_id += 1;
253 node
254 });
255
256 if !segment.is_empty() {
257 last_segment = Some(segment);
258 }
259 }
260 }
261
262 if let Some(index) = node.index {
264 return index;
265 }
266
267 let guest_path = self.guest_inputs_dir.as_ref().map(|d| {
268 GuestPath::new(format!(
269 "{d}{parent_id}/{last}",
270 last = last_segment.unwrap_or(ROOT_NAME)
271 ))
272 });
273
274 let index = self.inputs.len();
275 self.inputs
276 .push(Input::new(kind, EvaluationPath::Remote(url), guest_path));
277 node.index = Some(index);
278 index
279 }
280}
281
282#[cfg(test)]
283mod test {
284 use pretty_assertions::assert_eq;
285
286 use super::*;
287
288 #[test]
289 fn empty_trie() {
290 let empty = InputTrie::new();
291 assert!(empty.as_slice().is_empty());
292 }
293
294 #[cfg(unix)]
295 #[test]
296 fn unmapped_inputs_unix() {
297 let mut trie = InputTrie::new();
298 let base_dir: EvaluationPath = "/base".parse().unwrap();
299 trie.insert(ContentKind::File, "/foo/bar/baz", &base_dir)
300 .unwrap();
301 assert_eq!(trie.as_slice().len(), 1);
302 assert_eq!(trie.as_slice()[0].path().to_string(), "/foo/bar/baz");
303 assert!(trie.as_slice()[0].guest_path().is_none());
304 }
305
306 #[cfg(windows)]
307 #[test]
308 fn unmapped_inputs_windows() {
309 let mut trie = InputTrie::new();
310 let base_dir: EvaluationPath = "C:\\base".parse().unwrap();
311 trie.insert(ContentKind::File, "C:\\foo\\bar\\baz", &base_dir)
312 .unwrap();
313 assert_eq!(trie.as_slice().len(), 1);
314 assert_eq!(trie.as_slice()[0].path().to_string(), "C:\\foo\\bar\\baz");
315 assert!(trie.as_slice()[0].guest_path().is_none());
316 }
317
318 #[cfg(unix)]
319 #[test]
320 fn non_empty_trie_unix() {
321 let mut trie = InputTrie::new_with_guest_dir("/inputs/");
322 let base_dir: EvaluationPath = "/base".parse().unwrap();
323 trie.insert(ContentKind::Directory, "/", &base_dir)
324 .unwrap()
325 .unwrap();
326 trie.insert(ContentKind::File, "/foo/bar/foo.txt", &base_dir)
327 .unwrap()
328 .unwrap();
329 trie.insert(ContentKind::File, "/foo/bar/bar.txt", &base_dir)
330 .unwrap()
331 .unwrap();
332 trie.insert(ContentKind::File, "/foo/baz/foo.txt", &base_dir)
333 .unwrap()
334 .unwrap();
335 trie.insert(ContentKind::File, "/foo/baz/bar.txt", &base_dir)
336 .unwrap()
337 .unwrap();
338 trie.insert(ContentKind::File, "/bar/foo/foo.txt", &base_dir)
339 .unwrap()
340 .unwrap();
341 trie.insert(ContentKind::File, "/bar/foo/bar.txt", &base_dir)
342 .unwrap()
343 .unwrap();
344 trie.insert(ContentKind::Directory, "/baz", &base_dir)
345 .unwrap()
346 .unwrap();
347 trie.insert(ContentKind::File, "https://example.com/", &base_dir)
348 .unwrap()
349 .unwrap();
350 trie.insert(
351 ContentKind::File,
352 "https://example.com/foo/bar/foo.txt",
353 &base_dir,
354 )
355 .unwrap()
356 .unwrap();
357 trie.insert(
358 ContentKind::File,
359 "https://example.com/foo/bar/bar.txt",
360 &base_dir,
361 )
362 .unwrap()
363 .unwrap();
364 trie.insert(
365 ContentKind::File,
366 "https://example.com/foo/baz/foo.txt",
367 &base_dir,
368 )
369 .unwrap()
370 .unwrap();
371 trie.insert(
372 ContentKind::File,
373 "https://example.com/foo/baz/bar.txt",
374 &base_dir,
375 )
376 .unwrap()
377 .unwrap();
378 trie.insert(
379 ContentKind::File,
380 "https://example.com/bar/foo/foo.txt",
381 &base_dir,
382 )
383 .unwrap()
384 .unwrap();
385 trie.insert(
386 ContentKind::File,
387 "https://example.com/bar/foo/bar.txt",
388 &base_dir,
389 )
390 .unwrap()
391 .unwrap();
392 trie.insert(ContentKind::File, "https://foo.com/bar", &base_dir)
393 .unwrap()
394 .unwrap();
395 trie.insert(ContentKind::File, "foo.txt", &base_dir)
396 .unwrap()
397 .unwrap();
398
399 let paths: Vec<_> = trie
404 .as_slice()
405 .iter()
406 .map(|i| {
407 (
408 i.path().to_string(),
409 i.guest_path().expect("should have guest path").as_str(),
410 )
411 })
412 .collect();
413
414 assert_eq!(
415 paths,
416 [
417 ("/".to_string(), "/inputs/0/.root"),
418 ("/foo/bar/foo.txt".to_string(), "/inputs/3/foo.txt"),
419 ("/foo/bar/bar.txt".to_string(), "/inputs/3/bar.txt"),
420 ("/foo/baz/foo.txt".to_string(), "/inputs/6/foo.txt"),
421 ("/foo/baz/bar.txt".to_string(), "/inputs/6/bar.txt"),
422 ("/bar/foo/foo.txt".to_string(), "/inputs/10/foo.txt"),
423 ("/bar/foo/bar.txt".to_string(), "/inputs/10/bar.txt"),
424 ("/baz".to_string(), "/inputs/1/baz"),
425 ("https://example.com/".to_string(), "/inputs/15/.root"),
426 (
427 "https://example.com/foo/bar/foo.txt".to_string(),
428 "/inputs/18/foo.txt"
429 ),
430 (
431 "https://example.com/foo/bar/bar.txt".to_string(),
432 "/inputs/18/bar.txt"
433 ),
434 (
435 "https://example.com/foo/baz/foo.txt".to_string(),
436 "/inputs/21/foo.txt"
437 ),
438 (
439 "https://example.com/foo/baz/bar.txt".to_string(),
440 "/inputs/21/bar.txt"
441 ),
442 (
443 "https://example.com/bar/foo/foo.txt".to_string(),
444 "/inputs/25/foo.txt"
445 ),
446 (
447 "https://example.com/bar/foo/bar.txt".to_string(),
448 "/inputs/25/bar.txt"
449 ),
450 ("https://foo.com/bar".to_string(), "/inputs/28/bar"),
451 ("/base/foo.txt".to_string(), "/inputs/30/foo.txt"),
452 ]
453 );
454 }
455
456 #[cfg(windows)]
457 #[test]
458 fn non_empty_trie_windows() {
459 let mut trie = InputTrie::new_with_guest_dir("/inputs/");
460 let base_dir: EvaluationPath = "C:\\base".parse().unwrap();
461 trie.insert(ContentKind::Directory, "C:\\", &base_dir)
462 .unwrap()
463 .unwrap();
464 trie.insert(ContentKind::File, "C:\\foo\\bar\\foo.txt", &base_dir)
465 .unwrap()
466 .unwrap();
467 trie.insert(ContentKind::File, "C:\\foo\\bar\\bar.txt", &base_dir)
468 .unwrap()
469 .unwrap();
470 trie.insert(ContentKind::File, "C:\\foo\\baz\\foo.txt", &base_dir)
471 .unwrap()
472 .unwrap();
473 trie.insert(ContentKind::File, "C:\\foo\\baz\\bar.txt", &base_dir)
474 .unwrap()
475 .unwrap();
476 trie.insert(ContentKind::File, "C:\\bar\\foo\\foo.txt", &base_dir)
477 .unwrap()
478 .unwrap();
479 trie.insert(ContentKind::File, "C:\\bar\\foo\\bar.txt", &base_dir)
480 .unwrap()
481 .unwrap();
482 trie.insert(ContentKind::Directory, "C:\\baz", &base_dir)
483 .unwrap()
484 .unwrap();
485 trie.insert(ContentKind::File, "https://example.com/", &base_dir)
486 .unwrap()
487 .unwrap();
488 trie.insert(
489 ContentKind::File,
490 "https://example.com/foo/bar/foo.txt",
491 &base_dir,
492 )
493 .unwrap()
494 .unwrap();
495 trie.insert(
496 ContentKind::File,
497 "https://example.com/foo/bar/bar.txt",
498 &base_dir,
499 )
500 .unwrap()
501 .unwrap();
502 trie.insert(
503 ContentKind::File,
504 "https://example.com/foo/baz/foo.txt",
505 &base_dir,
506 )
507 .unwrap()
508 .unwrap();
509 trie.insert(
510 ContentKind::File,
511 "https://example.com/foo/baz/bar.txt",
512 &base_dir,
513 )
514 .unwrap()
515 .unwrap();
516 trie.insert(
517 ContentKind::File,
518 "https://example.com/bar/foo/foo.txt",
519 &base_dir,
520 )
521 .unwrap()
522 .unwrap();
523 trie.insert(
524 ContentKind::File,
525 "https://example.com/bar/foo/bar.txt",
526 &base_dir,
527 )
528 .unwrap()
529 .unwrap();
530 trie.insert(ContentKind::File, "https://foo.com/bar", &base_dir)
531 .unwrap()
532 .unwrap();
533 trie.insert(ContentKind::File, "foo.txt", &base_dir)
534 .unwrap()
535 .unwrap();
536
537 let paths: Vec<_> = trie
542 .as_slice()
543 .iter()
544 .map(|i| {
545 (
546 i.path().to_string(),
547 i.guest_path().expect("should have guest path").as_str(),
548 )
549 })
550 .collect();
551
552 assert_eq!(
553 paths,
554 [
555 ("C:\\".to_string(), "/inputs/1/.root"),
556 ("C:\\foo\\bar\\foo.txt".to_string(), "/inputs/4/foo.txt"),
557 ("C:\\foo\\bar\\bar.txt".to_string(), "/inputs/4/bar.txt"),
558 ("C:\\foo\\baz\\foo.txt".to_string(), "/inputs/7/foo.txt"),
559 ("C:\\foo\\baz\\bar.txt".to_string(), "/inputs/7/bar.txt"),
560 ("C:\\bar\\foo\\foo.txt".to_string(), "/inputs/11/foo.txt"),
561 ("C:\\bar\\foo\\bar.txt".to_string(), "/inputs/11/bar.txt"),
562 ("C:\\baz".to_string(), "/inputs/2/baz"),
563 ("https://example.com/".to_string(), "/inputs/16/.root"),
564 (
565 "https://example.com/foo/bar/foo.txt".to_string(),
566 "/inputs/19/foo.txt"
567 ),
568 (
569 "https://example.com/foo/bar/bar.txt".to_string(),
570 "/inputs/19/bar.txt"
571 ),
572 (
573 "https://example.com/foo/baz/foo.txt".to_string(),
574 "/inputs/22/foo.txt"
575 ),
576 (
577 "https://example.com/foo/baz/bar.txt".to_string(),
578 "/inputs/22/bar.txt"
579 ),
580 (
581 "https://example.com/bar/foo/foo.txt".to_string(),
582 "/inputs/26/foo.txt"
583 ),
584 (
585 "https://example.com/bar/foo/bar.txt".to_string(),
586 "/inputs/26/bar.txt"
587 ),
588 ("https://foo.com/bar".to_string(), "/inputs/29/bar"),
589 ("C:\\base\\foo.txt".to_string(), "/inputs/31/foo.txt"),
590 ]
591 );
592 }
593}