1use std::collections::HashMap;
7use std::path::Component;
8use std::path::PathBuf;
9
10use anyhow::Context;
11use anyhow::Result;
12use anyhow::bail;
13use url::Url;
14
15use crate::GuestPath;
16use crate::Input;
17use crate::InputKind;
18use crate::eval::ROOT_NAME;
19use crate::path::EvaluationPath;
20
21#[derive(Debug)]
23struct InputTrieNode {
24 children: HashMap<String, Self>,
26 id: usize,
30 index: Option<usize>,
34}
35
36impl InputTrieNode {
37 fn new(id: usize) -> Self {
39 Self {
40 children: Default::default(),
41 id,
42 index: None,
43 }
44 }
45}
46
47#[derive(Debug)]
53pub struct InputTrie {
54 guest_inputs_dir: Option<&'static str>,
58 urls: HashMap<String, InputTrieNode>,
62 paths: HashMap<String, InputTrieNode>,
66 inputs: Vec<Input>,
68 next_id: usize,
70}
71
72impl InputTrie {
73 pub fn new() -> Self {
77 Self {
78 guest_inputs_dir: None,
79 urls: Default::default(),
80 paths: Default::default(),
81 inputs: Default::default(),
82 next_id: 1,
84 }
85 }
86
87 pub fn new_with_guest_dir(guest_inputs_dir: &'static str) -> Self {
98 assert!(guest_inputs_dir.ends_with('/'));
99
100 let mut trie = Self::new();
101 trie.guest_inputs_dir = Some(guest_inputs_dir);
102 trie
103 }
104
105 pub fn insert(
118 &mut self,
119 kind: InputKind,
120 path: &str,
121 base_dir: &EvaluationPath,
122 ) -> Result<Option<usize>> {
123 let path = base_dir.join(path)?;
124 match path {
125 EvaluationPath::Local(path) => {
126 if let Some(dir) = self.guest_inputs_dir
128 && path.starts_with(dir)
129 {
130 return Ok(None);
131 }
132
133 self.insert_path(kind, path).map(Some)
134 }
135 EvaluationPath::Remote(url) => Ok(Some(self.insert_url(kind, url))),
136 }
137 }
138
139 pub fn as_slice(&self) -> &[Input] {
141 &self.inputs
142 }
143
144 pub fn as_slice_mut(&mut self) -> &mut [Input] {
146 &mut self.inputs
147 }
148
149 fn insert_path(&mut self, kind: InputKind, path: PathBuf) -> Result<usize> {
151 let mut components = path.components();
152
153 let component = components
154 .next()
155 .context("input path cannot be empty")?
156 .as_os_str()
157 .to_str()
158 .with_context(|| format!("input path `{path}` is not UTF-8", path = path.display()))?;
159
160 let mut parent_id = 0;
161 let mut node = self.paths.entry(component.to_string()).or_insert_with(|| {
162 let node = InputTrieNode::new(self.next_id);
163 self.next_id += 1;
164 node
165 });
166
167 let mut last_component = None;
168 for component in components {
169 match component {
170 Component::CurDir | Component::ParentDir => {
171 bail!(
172 "input path `{path}` may not contain `.` or `..`",
173 path = path.display()
174 );
175 }
176 _ => {}
177 }
178
179 let component = component.as_os_str().to_str().with_context(|| {
180 format!("input path `{path}` is not UTF-8", path = path.display())
181 })?;
182
183 parent_id = node.id;
184
185 node = node
186 .children
187 .entry(component.to_string())
188 .or_insert_with(|| {
189 let node = InputTrieNode::new(self.next_id);
190 self.next_id += 1;
191 node
192 });
193
194 last_component = Some(component);
195 }
196
197 if let Some(index) = node.index {
199 return Ok(index);
200 }
201
202 let guest_path = self.guest_inputs_dir.map(|d| {
203 GuestPath::new(format!(
204 "{d}{parent_id}/{last}",
205 last = if path.parent().is_none() {
208 ROOT_NAME
209 } else {
210 last_component.unwrap_or(ROOT_NAME)
211 }
212 ))
213 });
214
215 let index = self.inputs.len();
216 self.inputs
217 .push(Input::new(kind, EvaluationPath::Local(path), guest_path));
218 node.index = Some(index);
219 Ok(index)
220 }
221
222 fn insert_url(&mut self, kind: InputKind, url: Url) -> usize {
224 let mut node = self
226 .urls
227 .entry(url.scheme().to_string())
228 .or_insert_with(|| {
229 let node = InputTrieNode::new(self.next_id);
230 self.next_id += 1;
231 node
232 });
233
234 let mut parent_id = node.id;
236 node = node
237 .children
238 .entry(url.authority().to_string())
239 .or_insert_with(|| {
240 let node = InputTrieNode::new(self.next_id);
241 self.next_id += 1;
242 node
243 });
244
245 let mut last_segment = None;
247 if let Some(segments) = url.path_segments() {
248 for segment in segments {
249 parent_id = node.id;
250 node = node.children.entry(segment.to_string()).or_insert_with(|| {
251 let node = InputTrieNode::new(self.next_id);
252 self.next_id += 1;
253 node
254 });
255
256 if !segment.is_empty() {
257 last_segment = Some(segment);
258 }
259 }
260 }
261
262 if let Some(index) = node.index {
264 return index;
265 }
266
267 let guest_path = self.guest_inputs_dir.as_ref().map(|d| {
268 GuestPath::new(format!(
269 "{d}{parent_id}/{last}",
270 last = last_segment.unwrap_or(ROOT_NAME)
271 ))
272 });
273
274 let index = self.inputs.len();
275 self.inputs
276 .push(Input::new(kind, EvaluationPath::Remote(url), guest_path));
277 node.index = Some(index);
278 index
279 }
280}
281
282#[cfg(test)]
283mod test {
284 use pretty_assertions::assert_eq;
285
286 use super::*;
287
288 #[test]
289 fn empty_trie() {
290 let empty = InputTrie::new();
291 assert!(empty.as_slice().is_empty());
292 }
293
294 #[cfg(unix)]
295 #[test]
296 fn unmapped_inputs_unix() {
297 let mut trie = InputTrie::new();
298 let base_dir: EvaluationPath = "/base".parse().unwrap();
299 trie.insert(InputKind::File, "/foo/bar/baz", &base_dir)
300 .unwrap();
301 assert_eq!(trie.as_slice().len(), 1);
302 assert_eq!(trie.as_slice()[0].path().to_str(), Some("/foo/bar/baz"));
303 assert!(trie.as_slice()[0].guest_path().is_none());
304 }
305
306 #[cfg(windows)]
307 #[test]
308 fn unmapped_inputs_windows() {
309 let mut trie = InputTrie::new();
310 let base_dir: EvaluationPath = "C:\\base".parse().unwrap();
311 trie.insert(InputKind::File, "C:\\foo\\bar\\baz", &base_dir)
312 .unwrap();
313 assert_eq!(trie.as_slice().len(), 1);
314 assert_eq!(
315 trie.as_slice()[0].path().to_str(),
316 Some("C:\\foo\\bar\\baz")
317 );
318 assert!(trie.as_slice()[0].guest_path().is_none());
319 }
320
321 #[cfg(unix)]
322 #[test]
323 fn non_empty_trie_unix() {
324 let mut trie = InputTrie::new_with_guest_dir("/inputs/");
325 let base_dir: EvaluationPath = "/base".parse().unwrap();
326 trie.insert(InputKind::Directory, "/", &base_dir)
327 .unwrap()
328 .unwrap();
329 trie.insert(InputKind::File, "/foo/bar/foo.txt", &base_dir)
330 .unwrap()
331 .unwrap();
332 trie.insert(InputKind::File, "/foo/bar/bar.txt", &base_dir)
333 .unwrap()
334 .unwrap();
335 trie.insert(InputKind::File, "/foo/baz/foo.txt", &base_dir)
336 .unwrap()
337 .unwrap();
338 trie.insert(InputKind::File, "/foo/baz/bar.txt", &base_dir)
339 .unwrap()
340 .unwrap();
341 trie.insert(InputKind::File, "/bar/foo/foo.txt", &base_dir)
342 .unwrap()
343 .unwrap();
344 trie.insert(InputKind::File, "/bar/foo/bar.txt", &base_dir)
345 .unwrap()
346 .unwrap();
347 trie.insert(InputKind::Directory, "/baz", &base_dir)
348 .unwrap()
349 .unwrap();
350 trie.insert(InputKind::File, "https://example.com/", &base_dir)
351 .unwrap()
352 .unwrap();
353 trie.insert(
354 InputKind::File,
355 "https://example.com/foo/bar/foo.txt",
356 &base_dir,
357 )
358 .unwrap()
359 .unwrap();
360 trie.insert(
361 InputKind::File,
362 "https://example.com/foo/bar/bar.txt",
363 &base_dir,
364 )
365 .unwrap()
366 .unwrap();
367 trie.insert(
368 InputKind::File,
369 "https://example.com/foo/baz/foo.txt",
370 &base_dir,
371 )
372 .unwrap()
373 .unwrap();
374 trie.insert(
375 InputKind::File,
376 "https://example.com/foo/baz/bar.txt",
377 &base_dir,
378 )
379 .unwrap()
380 .unwrap();
381 trie.insert(
382 InputKind::File,
383 "https://example.com/bar/foo/foo.txt",
384 &base_dir,
385 )
386 .unwrap()
387 .unwrap();
388 trie.insert(
389 InputKind::File,
390 "https://example.com/bar/foo/bar.txt",
391 &base_dir,
392 )
393 .unwrap()
394 .unwrap();
395 trie.insert(InputKind::File, "https://foo.com/bar", &base_dir)
396 .unwrap()
397 .unwrap();
398 trie.insert(InputKind::File, "foo.txt", &base_dir)
399 .unwrap()
400 .unwrap();
401
402 let paths: Vec<_> = trie
407 .as_slice()
408 .iter()
409 .map(|i| {
410 (
411 i.path().to_str().expect("should be a string"),
412 i.guest_path().expect("should have guest path").as_str(),
413 )
414 })
415 .collect();
416
417 assert_eq!(
418 paths,
419 [
420 ("/", "/inputs/0/.root"),
421 ("/foo/bar/foo.txt", "/inputs/3/foo.txt"),
422 ("/foo/bar/bar.txt", "/inputs/3/bar.txt"),
423 ("/foo/baz/foo.txt", "/inputs/6/foo.txt"),
424 ("/foo/baz/bar.txt", "/inputs/6/bar.txt"),
425 ("/bar/foo/foo.txt", "/inputs/10/foo.txt"),
426 ("/bar/foo/bar.txt", "/inputs/10/bar.txt"),
427 ("/baz", "/inputs/1/baz"),
428 ("https://example.com/", "/inputs/15/.root"),
429 ("https://example.com/foo/bar/foo.txt", "/inputs/18/foo.txt"),
430 ("https://example.com/foo/bar/bar.txt", "/inputs/18/bar.txt"),
431 ("https://example.com/foo/baz/foo.txt", "/inputs/21/foo.txt"),
432 ("https://example.com/foo/baz/bar.txt", "/inputs/21/bar.txt"),
433 ("https://example.com/bar/foo/foo.txt", "/inputs/25/foo.txt"),
434 ("https://example.com/bar/foo/bar.txt", "/inputs/25/bar.txt"),
435 ("https://foo.com/bar", "/inputs/28/bar"),
436 ("/base/foo.txt", "/inputs/30/foo.txt"),
437 ]
438 );
439 }
440
441 #[cfg(windows)]
442 #[test]
443 fn non_empty_trie_windows() {
444 let mut trie = InputTrie::new_with_guest_dir("/inputs/");
445 let base_dir: EvaluationPath = "C:\\base".parse().unwrap();
446 trie.insert(InputKind::Directory, "C:\\", &base_dir)
447 .unwrap()
448 .unwrap();
449 trie.insert(InputKind::File, "C:\\foo\\bar\\foo.txt", &base_dir)
450 .unwrap()
451 .unwrap();
452 trie.insert(InputKind::File, "C:\\foo\\bar\\bar.txt", &base_dir)
453 .unwrap()
454 .unwrap();
455 trie.insert(InputKind::File, "C:\\foo\\baz\\foo.txt", &base_dir)
456 .unwrap()
457 .unwrap();
458 trie.insert(InputKind::File, "C:\\foo\\baz\\bar.txt", &base_dir)
459 .unwrap()
460 .unwrap();
461 trie.insert(InputKind::File, "C:\\bar\\foo\\foo.txt", &base_dir)
462 .unwrap()
463 .unwrap();
464 trie.insert(InputKind::File, "C:\\bar\\foo\\bar.txt", &base_dir)
465 .unwrap()
466 .unwrap();
467 trie.insert(InputKind::Directory, "C:\\baz", &base_dir)
468 .unwrap()
469 .unwrap();
470 trie.insert(InputKind::File, "https://example.com/", &base_dir)
471 .unwrap()
472 .unwrap();
473 trie.insert(
474 InputKind::File,
475 "https://example.com/foo/bar/foo.txt",
476 &base_dir,
477 )
478 .unwrap()
479 .unwrap();
480 trie.insert(
481 InputKind::File,
482 "https://example.com/foo/bar/bar.txt",
483 &base_dir,
484 )
485 .unwrap()
486 .unwrap();
487 trie.insert(
488 InputKind::File,
489 "https://example.com/foo/baz/foo.txt",
490 &base_dir,
491 )
492 .unwrap()
493 .unwrap();
494 trie.insert(
495 InputKind::File,
496 "https://example.com/foo/baz/bar.txt",
497 &base_dir,
498 )
499 .unwrap()
500 .unwrap();
501 trie.insert(
502 InputKind::File,
503 "https://example.com/bar/foo/foo.txt",
504 &base_dir,
505 )
506 .unwrap()
507 .unwrap();
508 trie.insert(
509 InputKind::File,
510 "https://example.com/bar/foo/bar.txt",
511 &base_dir,
512 )
513 .unwrap()
514 .unwrap();
515 trie.insert(InputKind::File, "https://foo.com/bar", &base_dir)
516 .unwrap()
517 .unwrap();
518 trie.insert(InputKind::File, "foo.txt", &base_dir)
519 .unwrap()
520 .unwrap();
521
522 let paths: Vec<_> = trie
527 .as_slice()
528 .iter()
529 .map(|i| {
530 (
531 i.path().to_str().expect("should be a string"),
532 i.guest_path().expect("should have guest path").as_str(),
533 )
534 })
535 .collect();
536
537 assert_eq!(
538 paths,
539 [
540 ("C:\\", "/inputs/1/.root"),
541 ("C:\\foo\\bar\\foo.txt", "/inputs/4/foo.txt"),
542 ("C:\\foo\\bar\\bar.txt", "/inputs/4/bar.txt"),
543 ("C:\\foo\\baz\\foo.txt", "/inputs/7/foo.txt"),
544 ("C:\\foo\\baz\\bar.txt", "/inputs/7/bar.txt"),
545 ("C:\\bar\\foo\\foo.txt", "/inputs/11/foo.txt"),
546 ("C:\\bar\\foo\\bar.txt", "/inputs/11/bar.txt"),
547 ("C:\\baz", "/inputs/2/baz"),
548 ("https://example.com/", "/inputs/16/.root"),
549 ("https://example.com/foo/bar/foo.txt", "/inputs/19/foo.txt"),
550 ("https://example.com/foo/bar/bar.txt", "/inputs/19/bar.txt"),
551 ("https://example.com/foo/baz/foo.txt", "/inputs/22/foo.txt"),
552 ("https://example.com/foo/baz/bar.txt", "/inputs/22/bar.txt"),
553 ("https://example.com/bar/foo/foo.txt", "/inputs/26/foo.txt"),
554 ("https://example.com/bar/foo/bar.txt", "/inputs/26/bar.txt"),
555 ("https://foo.com/bar", "/inputs/29/bar"),
556 ("C:\\base\\foo.txt", "/inputs/31/foo.txt"),
557 ]
558 );
559 }
560}