Skip to main content

numi_core/
parse_files.rs

1use crate::input_filters::should_ignore_directory_entry;
2use camino::Utf8PathBuf;
3use numi_ir::{EntryKind, Metadata, RawEntry};
4use serde_json::Value;
5use std::{
6    fs, io,
7    path::{Path, PathBuf},
8};
9
10#[derive(Debug)]
11pub enum ParseFilesError {
12    ReadDirectory { path: PathBuf, source: io::Error },
13    InvalidPath { path: PathBuf },
14    InvalidUtf8Path { path: PathBuf },
15}
16
17impl std::fmt::Display for ParseFilesError {
18    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
19        match self {
20            Self::ReadDirectory { path, source } => {
21                write!(
22                    f,
23                    "failed to read files input directory {}: {source}",
24                    path.display()
25                )
26            }
27            Self::InvalidPath { path } => {
28                write!(
29                    f,
30                    "files input path {} is not a file or directory",
31                    path.display()
32                )
33            }
34            Self::InvalidUtf8Path { path } => {
35                write!(
36                    f,
37                    "files input path {} is not valid UTF-8 and cannot be represented in the IR",
38                    path.display()
39                )
40            }
41        }
42    }
43}
44
45impl std::error::Error for ParseFilesError {}
46
47pub fn parse_files(path: &Path) -> Result<Vec<RawEntry>, ParseFilesError> {
48    if path.is_file() {
49        return Ok(vec![parse_single_file_entry(path)?]);
50    }
51
52    if path.is_dir() {
53        let mut entries = Vec::new();
54        collect_files(path, path, &mut entries)?;
55        entries.sort_by(|left, right| left.path.cmp(&right.path));
56        return Ok(entries);
57    }
58
59    Err(ParseFilesError::InvalidPath {
60        path: path.to_path_buf(),
61    })
62}
63
64fn collect_files(
65    root: &Path,
66    directory: &Path,
67    entries: &mut Vec<RawEntry>,
68) -> Result<(), ParseFilesError> {
69    let read_dir = fs::read_dir(directory).map_err(|source| ParseFilesError::ReadDirectory {
70        path: directory.to_path_buf(),
71        source,
72    })?;
73
74    for entry in read_dir {
75        let entry = entry.map_err(|source| ParseFilesError::ReadDirectory {
76            path: directory.to_path_buf(),
77            source,
78        })?;
79        let path = entry.path();
80
81        if should_ignore_directory_entry(&path) {
82            continue;
83        }
84
85        let file_type = entry
86            .file_type()
87            .map_err(|source| ParseFilesError::ReadDirectory {
88                path: path.clone(),
89                source,
90            })?;
91
92        if file_type.is_dir() {
93            collect_files(root, &path, entries)?;
94            continue;
95        }
96
97        if !file_type.is_file() {
98            continue;
99        }
100
101        entries.push(parse_file_entry(root, &path)?);
102    }
103
104    Ok(())
105}
106
107fn parse_file_entry(root: &Path, file_path: &Path) -> Result<RawEntry, ParseFilesError> {
108    let relative = file_path
109        .strip_prefix(root)
110        .expect("files should be discovered under input root");
111    let relative_path = relative
112        .iter()
113        .map(|part| {
114            part.to_str()
115                .ok_or_else(|| ParseFilesError::InvalidUtf8Path {
116                    path: file_path.to_path_buf(),
117                })
118                .map(ToOwned::to_owned)
119        })
120        .collect::<Result<Vec<_>, _>>()?
121        .join("/");
122    let file_name = file_path
123        .file_name()
124        .and_then(|name| name.to_str())
125        .ok_or_else(|| ParseFilesError::InvalidUtf8Path {
126            path: file_path.to_path_buf(),
127        })?
128        .to_owned();
129    let file_stem = file_path
130        .file_stem()
131        .and_then(|stem| stem.to_str())
132        .ok_or_else(|| ParseFilesError::InvalidUtf8Path {
133            path: file_path.to_path_buf(),
134        })?
135        .to_owned();
136    let path_extension = file_path
137        .extension()
138        .and_then(|extension| extension.to_str())
139        .unwrap_or("")
140        .to_owned();
141
142    Ok(RawEntry {
143        path: relative_path.clone(),
144        source_path: Utf8PathBuf::from_path_buf(file_path.to_path_buf())
145            .map_err(|path| ParseFilesError::InvalidUtf8Path { path })?,
146        kind: EntryKind::Data,
147        properties: Metadata::from([
148            ("relativePath".to_string(), Value::String(relative_path)),
149            ("fileName".to_string(), Value::String(file_name)),
150            ("fileStem".to_string(), Value::String(file_stem)),
151            ("pathExtension".to_string(), Value::String(path_extension)),
152        ]),
153    })
154}
155
156fn parse_single_file_entry(file_path: &Path) -> Result<RawEntry, ParseFilesError> {
157    let relative_path = file_path
158        .file_name()
159        .and_then(|name| name.to_str())
160        .ok_or_else(|| ParseFilesError::InvalidUtf8Path {
161            path: file_path.to_path_buf(),
162        })?
163        .to_owned();
164    let file_name = relative_path.clone();
165    let file_stem = file_path
166        .file_stem()
167        .and_then(|stem| stem.to_str())
168        .ok_or_else(|| ParseFilesError::InvalidUtf8Path {
169            path: file_path.to_path_buf(),
170        })?
171        .to_owned();
172    let path_extension = file_path
173        .extension()
174        .and_then(|extension| extension.to_str())
175        .unwrap_or("")
176        .to_owned();
177
178    Ok(RawEntry {
179        path: relative_path.clone(),
180        source_path: Utf8PathBuf::from_path_buf(file_path.to_path_buf())
181            .map_err(|path| ParseFilesError::InvalidUtf8Path { path })?,
182        kind: EntryKind::Data,
183        properties: Metadata::from([
184            ("relativePath".to_string(), Value::String(relative_path)),
185            ("fileName".to_string(), Value::String(file_name)),
186            ("fileStem".to_string(), Value::String(file_stem)),
187            ("pathExtension".to_string(), Value::String(path_extension)),
188        ]),
189    })
190}
191
192#[cfg(test)]
193mod tests {
194    use super::*;
195    use std::time::{SystemTime, UNIX_EPOCH};
196
197    fn make_temp_dir(test_name: &str) -> PathBuf {
198        let unique = format!(
199            "numi-{test_name}-{}-{}",
200            std::process::id(),
201            SystemTime::now()
202                .duration_since(UNIX_EPOCH)
203                .expect("clock should be after epoch")
204                .as_nanos()
205        );
206        let path = std::env::temp_dir().join(unique);
207        fs::create_dir_all(&path).expect("temp dir should be created");
208        path
209    }
210
211    #[test]
212    fn parses_single_file_input() {
213        let temp_dir = make_temp_dir("parse-files-single");
214        let file_path = temp_dir.join("Single.txt");
215        fs::write(&file_path, "binary").expect("file should be written");
216
217        let entries = parse_files(&file_path).expect("single file input should parse");
218
219        assert_eq!(entries.len(), 1);
220        assert_eq!(entries[0].path, "Single.txt");
221        assert_eq!(entries[0].kind, EntryKind::Data);
222        assert_eq!(
223            entries[0].source_path,
224            Utf8PathBuf::from_path_buf(file_path.clone()).expect("utf8 path")
225        );
226        assert_eq!(
227            entries[0].properties["relativePath"],
228            Value::String("Single.txt".to_string())
229        );
230        assert_eq!(
231            entries[0].properties["fileName"],
232            Value::String("Single.txt".to_string())
233        );
234        assert_eq!(
235            entries[0].properties["fileStem"],
236            Value::String("Single".to_string())
237        );
238        assert_eq!(
239            entries[0].properties["pathExtension"],
240            Value::String("txt".to_string())
241        );
242
243        fs::remove_dir_all(temp_dir).expect("temp dir should be removed");
244    }
245
246    #[test]
247    fn parses_recursive_directory_input() {
248        let temp_dir = make_temp_dir("parse-files-recursive");
249        let assets_dir = temp_dir.join("Resources").join("Assets");
250        fs::create_dir_all(assets_dir.join("Nested")).expect("directories should be created");
251        let first_file = assets_dir.join("zeta").with_file_name("zeta.txt");
252        let second_file = assets_dir.join("Nested").join("alpha.json");
253        let ignored = assets_dir.join(".DS_Store");
254        let hidden_dir = assets_dir.join(".Snapshots");
255        fs::write(&first_file, "one").expect("first file should be written");
256        fs::write(&second_file, "two").expect("second file should be written");
257        fs::create_dir_all(&hidden_dir).expect("hidden directory should be created");
258        fs::write(hidden_dir.join("preview.txt"), "hidden").expect("hidden file should be written");
259        fs::write(&ignored, "ignored").expect("noise file should be written");
260
261        let entries = parse_files(&assets_dir).expect("directory input should parse");
262
263        assert_eq!(entries.len(), 2);
264        assert_eq!(entries[0].path, "Nested/alpha.json");
265        assert_eq!(
266            entries[0].properties["relativePath"],
267            Value::String("Nested/alpha.json".to_string())
268        );
269        assert_eq!(
270            entries[0].properties["fileName"],
271            Value::String("alpha.json".to_string())
272        );
273        assert_eq!(
274            entries[0].properties["fileStem"],
275            Value::String("alpha".to_string())
276        );
277        assert_eq!(
278            entries[0].properties["pathExtension"],
279            Value::String("json".to_string())
280        );
281        assert_eq!(entries[1].path, "zeta.txt");
282        assert_eq!(
283            entries[1].properties["fileName"],
284            Value::String("zeta.txt".to_string())
285        );
286        assert_eq!(
287            entries[1].properties["pathExtension"],
288            Value::String("txt".to_string())
289        );
290        assert!(entries.iter().all(|entry| entry.path != ".DS_Store"));
291        assert!(
292            entries
293                .iter()
294                .all(|entry| !entry.path.starts_with(".Snapshots/"))
295        );
296
297        fs::remove_dir_all(temp_dir).expect("temp dir should be removed");
298    }
299
300    #[test]
301    fn hidden_only_directory_is_treated_as_empty() {
302        let temp_dir = make_temp_dir("parse-files-hidden-only");
303        let files_dir = temp_dir.join("Resources").join("Assets");
304        let hidden_dir = files_dir.join(".Snapshots");
305        fs::create_dir_all(&hidden_dir).expect("hidden directory should be created");
306        fs::write(files_dir.join(".DS_Store"), "ignored").expect("dotfile should be written");
307        fs::write(hidden_dir.join("preview.txt"), "hidden").expect("hidden file should be written");
308
309        let entries = parse_files(&files_dir).expect("hidden-only directory should parse");
310
311        assert!(
312            entries.is_empty(),
313            "hidden-only folders should not emit entries"
314        );
315
316        fs::remove_dir_all(temp_dir).expect("temp dir should be removed");
317    }
318}