Skip to main content

common/
filegen.rs

1use anyhow::{anyhow, Context};
2use async_recursion::async_recursion;
3use tracing::instrument;
4
5use crate::progress;
6
7/// Error type for filegen operations that preserves operation summary even on failure.
8///
9/// # Logging Convention
10/// When logging this error, use `{:#}` or `{:?}` format to preserve the error chain:
11/// ```ignore
12/// tracing::error!("operation failed: {:#}", &error); // ✅ Shows full chain
13/// tracing::error!("operation failed: {:?}", &error); // ✅ Shows full chain
14/// ```
15/// The Display implementation also shows the full chain, but workspace linting enforces `{:#}`
16/// for consistency.
17#[derive(Debug, thiserror::Error)]
18#[error("{source:#}")]
19pub struct Error {
20    #[source]
21    pub source: anyhow::Error,
22    pub summary: Summary,
23}
24
25impl Error {
26    #[must_use]
27    pub fn new(source: anyhow::Error, summary: Summary) -> Self {
28        Error { source, summary }
29    }
30}
31
32#[derive(Copy, Clone, Debug, Default, serde::Serialize, serde::Deserialize)]
33pub struct Summary {
34    pub files_created: usize,
35    pub directories_created: usize,
36    pub bytes_written: u64,
37}
38
39impl std::ops::Add for Summary {
40    type Output = Self;
41    fn add(self, other: Self) -> Self {
42        Self {
43            files_created: self.files_created + other.files_created,
44            directories_created: self.directories_created + other.directories_created,
45            bytes_written: self.bytes_written + other.bytes_written,
46        }
47    }
48}
49
50impl std::fmt::Display for Summary {
51    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
52        write!(
53            f,
54            "files created: {}\n\
55            directories created: {}\n\
56            bytes written: {}",
57            self.files_created,
58            self.directories_created,
59            bytesize::ByteSize(self.bytes_written)
60        )
61    }
62}
63
64/// Configuration for file generation
65#[derive(Debug, Clone)]
66pub struct FileGenConfig {
67    /// Root directory for file generation
68    pub root: std::path::PathBuf,
69    /// Directory width at each level
70    pub dirwidth: Vec<usize>,
71    /// Number of files to generate at each leaf
72    pub numfiles: usize,
73    /// Size of each file in bytes
74    pub filesize: usize,
75    /// Write buffer size in bytes
76    pub writebuf: usize,
77    /// Chunk size for I/O throttling
78    pub chunk_size: u64,
79    /// Whether to generate files at leaf directories only
80    pub leaf_files: bool,
81}
82
83impl FileGenConfig {
84    /// Create a new file generation configuration
85    pub fn new(
86        root: impl Into<std::path::PathBuf>,
87        dirwidth: Vec<usize>,
88        numfiles: usize,
89        filesize: usize,
90    ) -> Self {
91        Self {
92            root: root.into(),
93            dirwidth,
94            numfiles,
95            filesize,
96            writebuf: 1024 * 1024, // 1MB default
97            chunk_size: 0,
98            leaf_files: false,
99        }
100    }
101}
102
103#[instrument(skip(prog_track))]
104pub async fn write_file(
105    prog_track: &'static progress::Progress,
106    path: std::path::PathBuf,
107    mut filesize: usize,
108    bufsize: usize,
109    chunk_size: u64,
110) -> Result<Summary, Error> {
111    use tokio::io::AsyncWriteExt;
112    let _permit = throttle::open_file_permit().await;
113    throttle::get_file_iops_tokens(chunk_size, filesize as u64).await;
114    let _ops_guard = prog_track.ops.guard();
115    let original_filesize = filesize;
116    let mut bytes = vec![0u8; bufsize];
117    let mut file = tokio::fs::OpenOptions::new()
118        .write(true)
119        .create(true)
120        .truncate(false)
121        .open(&path)
122        .await
123        .with_context(|| format!("Error opening {:?}", &path))
124        .map_err(|err| Error::new(err, Default::default()))?;
125    while filesize > 0 {
126        {
127            // make sure rng falls out of scope before await
128            rand::fill(&mut bytes[..]);
129        }
130        let writesize = std::cmp::min(filesize, bufsize);
131        file.write_all(&bytes[..writesize])
132            .await
133            .with_context(|| format!("Error writing to {:?}", &path))
134            .map_err(|err| Error::new(err, Default::default()))?;
135        filesize -= writesize;
136        prog_track.bytes_copied.add(writesize as u64);
137    }
138    prog_track.files_copied.inc();
139    Ok(Summary {
140        files_created: 1,
141        bytes_written: original_filesize as u64,
142        ..Default::default()
143    })
144}
145
146#[async_recursion]
147#[instrument(skip(prog_track))]
148pub async fn filegen(
149    prog_track: &'static progress::Progress,
150    config: &FileGenConfig,
151) -> Result<Summary, Error> {
152    let FileGenConfig {
153        root,
154        dirwidth,
155        numfiles,
156        filesize,
157        writebuf,
158        chunk_size,
159        leaf_files,
160    } = config;
161    let numdirs = *dirwidth.first().unwrap_or(&0);
162    let mut join_set = tokio::task::JoinSet::new();
163    // generate directories and recurse into them
164    for i in 0..numdirs {
165        let path = root.join(format!("dir{i}"));
166        let next_dirwidth = dirwidth[1..].to_vec();
167        let recurse_config = FileGenConfig {
168            root: path.clone(),
169            dirwidth: next_dirwidth,
170            numfiles: *numfiles,
171            filesize: *filesize,
172            writebuf: *writebuf,
173            chunk_size: *chunk_size,
174            leaf_files: *leaf_files,
175        };
176        let recurse = || async move {
177            tokio::fs::create_dir(&path)
178                .await
179                .with_context(|| format!("Error creating directory {:?}", &path))
180                .map_err(|err| Error::new(err, Default::default()))?;
181            prog_track.directories_created.inc();
182            let dir_summary = Summary {
183                directories_created: 1,
184                ..Default::default()
185            };
186            let recurse_summary = filegen(prog_track, &recurse_config).await?;
187            Ok(dir_summary + recurse_summary)
188        };
189        join_set.spawn(recurse());
190    }
191    // generate files (only if we're not in leaf_files mode, or if we are a leaf directory)
192    // a directory is a leaf when dirwidth is empty (no more subdirectories to create)
193    let is_leaf = dirwidth.is_empty();
194    let should_generate_files = !leaf_files || is_leaf;
195    if should_generate_files {
196        for i in 0..*numfiles {
197            // it's better to await the token here so that we throttle how many tasks we spawn. the
198            // ops-throttle will never cause a deadlock (unlike max-open-files limit) so it's safe to
199            // do here.
200            throttle::get_ops_token().await;
201            let path = root.join(format!("file{i}"));
202            join_set.spawn(write_file(
203                prog_track,
204                path,
205                *filesize,
206                *writebuf,
207                *chunk_size,
208            ));
209        }
210    }
211    let mut success = true;
212    let mut last_error: Option<anyhow::Error> = None;
213    let mut filegen_summary = Summary::default();
214    while let Some(res) = join_set.join_next().await {
215        match res.map_err(|err| Error::new(err.into(), Default::default()))? {
216            Ok(summary) => filegen_summary = filegen_summary + summary,
217            Err(error) => {
218                tracing::error!("filegen: {:?} failed with: {:#}", root, &error);
219                filegen_summary = filegen_summary + error.summary;
220                if last_error.is_none() {
221                    last_error = Some(error.source);
222                }
223                success = false;
224            }
225        }
226    }
227    if !success {
228        let error = if let Some(error) = last_error {
229            error.context(format!("filegen: {:?} failed!", &root))
230        } else {
231            anyhow!("filegen: {:?} failed!", &root)
232        };
233        return Err(Error::new(error, filegen_summary));
234    }
235    Ok(filegen_summary)
236}
237
238#[cfg(test)]
239mod tests {
240    use super::*;
241    use crate::testutils;
242    use std::os::unix::fs::PermissionsExt;
243    use tracing_test::traced_test;
244
245    static PROGRESS: std::sync::LazyLock<progress::Progress> =
246        std::sync::LazyLock::new(progress::Progress::new);
247
248    #[tokio::test]
249    #[traced_test]
250    async fn test_basic_filegen() -> Result<(), anyhow::Error> {
251        let tmp_dir = testutils::create_temp_dir().await?;
252        let test_path = tmp_dir.as_path();
253        // generate 2 subdirectories with 3 files per directory (including root)
254        let config = FileGenConfig {
255            root: test_path.to_path_buf(),
256            dirwidth: vec![2],
257            numfiles: 3,
258            filesize: 100,
259            writebuf: 50,
260            chunk_size: 0,
261            leaf_files: false,
262        };
263        let summary = filegen(&PROGRESS, &config).await?;
264        // verify summary
265        // files: 3 (in root) + 3 (in dir0) + 3 (in dir1) = 9 files
266        // directories: 2 (dir0, dir1)
267        // bytes: 100 bytes × 9 files = 900 bytes
268        assert_eq!(summary.files_created, 9);
269        assert_eq!(summary.directories_created, 2);
270        assert_eq!(summary.bytes_written, 900);
271        // verify files were actually created
272        assert!(test_path.join("file0").exists()); // root level files
273        assert!(test_path.join("dir0").join("file0").exists());
274        assert!(test_path.join("dir0").join("file1").exists());
275        assert!(test_path.join("dir0").join("file2").exists());
276        assert!(test_path.join("dir1").join("file0").exists());
277        assert!(test_path.join("dir1").join("file1").exists());
278        assert!(test_path.join("dir1").join("file2").exists());
279        // verify file sizes
280        let metadata = tokio::fs::metadata(test_path.join("dir0").join("file0")).await?;
281        assert_eq!(metadata.len(), 100);
282        // cleanup
283        tokio::fs::remove_dir_all(test_path).await?;
284        Ok(())
285    }
286
287    #[tokio::test]
288    #[traced_test]
289    async fn test_nested_filegen() -> Result<(), anyhow::Error> {
290        let tmp_dir = testutils::create_temp_dir().await?;
291        let test_path = tmp_dir.as_path();
292        // generate nested structure: 2 top-level dirs, each with 3 subdirs, 4 files per dir, 50 bytes each
293        let config = FileGenConfig {
294            root: test_path.to_path_buf(),
295            dirwidth: vec![2, 3],
296            numfiles: 4,
297            filesize: 50,
298            writebuf: 25,
299            chunk_size: 0,
300            leaf_files: false,
301        };
302        let summary = filegen(&PROGRESS, &config).await?;
303        // calculate expected values:
304        // directories: 2 top-level + (2 × 3) subdirs = 8 total
305        // files: 4 (in root) + 4×2 (in dir0, dir1) + 4×2×3 (in all leaf dirs) = 4 + 8 + 24 = 36 files
306        // bytes: 50 bytes × 36 files = 1800 bytes
307        assert_eq!(summary.files_created, 36);
308        assert_eq!(summary.directories_created, 8);
309        assert_eq!(summary.bytes_written, 1800);
310        // spot check some files exist
311        assert!(test_path.join("file0").exists()); // root files
312        assert!(test_path.join("dir0").join("file0").exists()); // top-level dir files
313        assert!(test_path.join("dir0").join("dir0").join("file0").exists());
314        assert!(test_path.join("dir0").join("dir2").join("file3").exists());
315        assert!(test_path.join("dir1").join("dir1").join("file2").exists());
316        // cleanup
317        tokio::fs::remove_dir_all(test_path).await?;
318        Ok(())
319    }
320
321    #[tokio::test]
322    #[traced_test]
323    async fn test_deeply_nested_filegen() -> Result<(), anyhow::Error> {
324        let tmp_dir = testutils::create_temp_dir().await?;
325        let test_path = tmp_dir.as_path();
326        // generate 3 levels: 2,2,2 with 2 files each, 10 bytes per file
327        let config = FileGenConfig {
328            root: test_path.to_path_buf(),
329            dirwidth: vec![2, 2, 2],
330            numfiles: 2,
331            filesize: 10,
332            writebuf: 10,
333            chunk_size: 0,
334            leaf_files: false,
335        };
336        let summary = filegen(&PROGRESS, &config).await?;
337        // directories: 2 + (2×2) + (2×2×2) = 2 + 4 + 8 = 14 dirs
338        // files: 2 (root) + 2×2 (level 1) + 2×2×2 (level 2) + 2×2×2×2 (level 3) = 2 + 4 + 8 + 16 = 30 files
339        // bytes: 10 bytes × 30 files = 300 bytes
340        assert_eq!(summary.files_created, 30);
341        assert_eq!(summary.directories_created, 14);
342        assert_eq!(summary.bytes_written, 300);
343        // verify deep nesting works
344        assert!(test_path.join("file0").exists()); // root files
345        assert!(test_path
346            .join("dir0")
347            .join("dir0")
348            .join("dir0")
349            .join("file0")
350            .exists());
351        assert!(test_path
352            .join("dir1")
353            .join("dir1")
354            .join("dir1")
355            .join("file1")
356            .exists());
357        // cleanup
358        tokio::fs::remove_dir_all(test_path).await?;
359        Ok(())
360    }
361
362    #[tokio::test]
363    #[traced_test]
364    async fn test_single_file() -> Result<(), anyhow::Error> {
365        let tmp_dir = testutils::create_temp_dir().await?;
366        let test_path = tmp_dir.as_path();
367        // generate just files, no directories
368        let config = FileGenConfig {
369            root: test_path.to_path_buf(),
370            dirwidth: vec![],
371            numfiles: 5,
372            filesize: 200,
373            writebuf: 100,
374            chunk_size: 0,
375            leaf_files: false,
376        };
377        let summary = filegen(&PROGRESS, &config).await?;
378        assert_eq!(summary.files_created, 5);
379        assert_eq!(summary.directories_created, 0);
380        assert_eq!(summary.bytes_written, 1000); // 200 × 5
381        for i in 0..5 {
382            // verify files
383            let file_path = test_path.join(format!("file{i}"));
384            assert!(file_path.exists());
385            let metadata = tokio::fs::metadata(&file_path).await?;
386            assert_eq!(metadata.len(), 200);
387        }
388        // cleanup
389        tokio::fs::remove_dir_all(test_path).await?;
390        Ok(())
391    }
392
393    #[tokio::test]
394    #[traced_test]
395    async fn test_zero_files() -> Result<(), anyhow::Error> {
396        let tmp_dir = testutils::create_temp_dir().await?;
397        let test_path = tmp_dir.as_path();
398        // generate only directories, no files
399        let config = FileGenConfig {
400            root: test_path.to_path_buf(),
401            dirwidth: vec![3, 2],
402            numfiles: 0,
403            filesize: 100,
404            writebuf: 50,
405            chunk_size: 0,
406            leaf_files: false,
407        };
408        let summary = filegen(&PROGRESS, &config).await?;
409        // directories: 3 + (3×2) = 9 dirs
410        assert_eq!(summary.files_created, 0);
411        assert_eq!(summary.directories_created, 9);
412        assert_eq!(summary.bytes_written, 0);
413        // verify directories exist but no files
414        assert!(test_path.join("dir0").join("dir0").exists());
415        assert!(test_path.join("dir2").join("dir1").exists());
416        assert!(!test_path.join("dir0").join("file0").exists());
417        // cleanup
418        tokio::fs::remove_dir_all(test_path).await?;
419        Ok(())
420    }
421
422    #[tokio::test]
423    #[traced_test]
424    async fn test_leaf_files_only() -> Result<(), anyhow::Error> {
425        let tmp_dir = testutils::create_temp_dir().await?;
426        let test_path = tmp_dir.as_path();
427        // generate with leaf_files=true, meaning files only in deepest directories
428        let config = FileGenConfig {
429            root: test_path.to_path_buf(),
430            dirwidth: vec![2, 3],
431            numfiles: 4,
432            filesize: 50,
433            writebuf: 25,
434            chunk_size: 0,
435            leaf_files: true,
436        };
437        let summary = filegen(&PROGRESS, &config).await?;
438        // directories: 2 top-level + (2 × 3) subdirs = 8 total
439        // files: ONLY in leaf dirs (6 leaf dirs) × 4 files each = 24 files
440        // bytes: 50 bytes × 24 files = 1200 bytes
441        assert_eq!(summary.files_created, 24);
442        assert_eq!(summary.directories_created, 8);
443        assert_eq!(summary.bytes_written, 1200);
444        // verify NO files in root or intermediate directories
445        assert!(!test_path.join("file0").exists()); // no root files
446        assert!(!test_path.join("dir0").join("file0").exists()); // no intermediate files
447        assert!(!test_path.join("dir1").join("file0").exists());
448        // verify files ONLY in leaf directories
449        assert!(test_path.join("dir0").join("dir0").join("file0").exists());
450        assert!(test_path.join("dir0").join("dir0").join("file3").exists());
451        assert!(test_path.join("dir0").join("dir2").join("file0").exists());
452        assert!(test_path.join("dir1").join("dir1").join("file0").exists());
453        // cleanup
454        tokio::fs::remove_dir_all(test_path).await?;
455        Ok(())
456    }
457
458    #[tokio::test]
459    #[traced_test]
460    async fn test_permission_error_includes_root_cause() -> Result<(), anyhow::Error> {
461        let tmp_dir = testutils::create_temp_dir().await?;
462        let root = tmp_dir.join("readonly");
463        tokio::fs::create_dir(&root).await?;
464        tokio::fs::set_permissions(&root, std::fs::Permissions::from_mode(0o555)).await?;
465
466        let config = FileGenConfig {
467            root: root.clone(),
468            dirwidth: Vec::new(),
469            numfiles: 1,
470            filesize: 10,
471            writebuf: 10,
472            chunk_size: 0,
473            leaf_files: false,
474        };
475        let result = filegen(&PROGRESS, &config).await;
476
477        // restore permissions to allow cleanup
478        tokio::fs::set_permissions(&root, std::fs::Permissions::from_mode(0o755)).await?;
479
480        assert!(
481            result.is_err(),
482            "filegen inside read-only directory should fail"
483        );
484        let err = result.unwrap_err();
485        let err_msg = format!("{:#}", err.source);
486        assert!(
487            err_msg.to_lowercase().contains("permission denied") || err_msg.contains("EACCES"),
488            "Error message must include permission denied text. Got: {}",
489            err_msg
490        );
491        Ok(())
492    }
493}