flatlake/dredger/
mod.rs

1use std::path::PathBuf;
2
3use wax::{Glob, LinkBehavior, WalkBehavior, WalkEntry};
4
5use crate::{LakeContext, Tributary, Watershed};
6
7impl Tributary {
8    pub fn new_relative_to(
9        file_path: PathBuf,
10        root_path: PathBuf,
11        collection_id: usize,
12        input_id: usize,
13        collection_name: String,
14    ) -> Self {
15        Self {
16            collection_id,
17            input_id,
18            collection_name,
19            file_path: Some(file_path),
20            root_path: Some(root_path),
21            output_url: None,
22        }
23    }
24}
25
26pub async fn walk_for_files(ctx: &LakeContext) -> Vec<Tributary> {
27    let log = &ctx.logger;
28    let mut tributaries = vec![];
29
30    log.status("[Walking collections]");
31
32    for (collection_id, collection) in ctx.params.collections.iter().enumerate() {
33        for (input_id, input) in collection.inputs.iter().enumerate() {
34            let collection_source: PathBuf = ctx.params.source.join(&input.path);
35            log.v_info(format!(
36                "Looking for {} in {:?}",
37                input.glob, collection_source
38            ));
39
40            if let Ok(glob) = Glob::new(&input.glob) {
41                let collection_files = glob
42                    .walk_with_behavior(
43                        &collection_source,
44                        WalkBehavior {
45                            depth: usize::MAX,
46                            link: LinkBehavior::ReadTarget,
47                        },
48                    )
49                    .filter_map(Result::ok)
50                    .map(WalkEntry::into_path)
51                    .map(|file_path| {
52                        Tributary::new_relative_to(
53                            file_path,
54                            collection_source.clone(),
55                            collection_id,
56                            input_id,
57                            collection.output_key.clone(),
58                        )
59                    });
60
61                tributaries.extend(collection_files);
62            } else {
63                log.error(format!(
64                    "Error: Provided glob \"{}\" did not parse as a valid glob.",
65                    input.glob
66                ));
67                // TODO: Bubble this error back to the Node API if applicable
68                std::process::exit(1);
69            }
70        }
71    }
72
73    tributaries
74}